tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

raw_hash_set_probe_benchmark.cc (17008B)


      1 // Copyright 2018 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // Generates probe length statistics for many combinations of key types and key
     16 // distributions, all using the default hash function for swisstable.
     17 
     18 #include <memory>
     19 #include <regex>  // NOLINT
     20 #include <vector>
     21 
     22 #include "absl/base/no_destructor.h"
     23 #include "absl/container/flat_hash_map.h"
     24 #include "absl/container/internal/hash_function_defaults.h"
     25 #include "absl/container/internal/hashtable_debug.h"
     26 #include "absl/container/internal/raw_hash_set.h"
     27 #include "absl/random/distributions.h"
     28 #include "absl/random/random.h"
     29 #include "absl/strings/str_cat.h"
     30 #include "absl/strings/str_format.h"
     31 #include "absl/strings/string_view.h"
     32 #include "absl/strings/strip.h"
     33 #include "absl/types/optional.h"
     34 
     35 namespace {
     36 
     37 enum class OutputStyle { kRegular, kBenchmark };
     38 
     39 // The --benchmark command line flag.
     40 // This is populated from main().
     41 // When run in "benchmark" mode, we have different output. This allows
     42 // A/B comparisons with tools like `benchy`.
     43 absl::string_view benchmarks;
     44 
     45 OutputStyle output() {
     46  return !benchmarks.empty() ? OutputStyle::kBenchmark : OutputStyle::kRegular;
     47 }
     48 
     49 template <class T>
     50 struct Policy {
     51  using slot_type = T;
     52  using key_type = T;
     53  using init_type = T;
     54 
     55  template <class allocator_type, class Arg>
     56  static void construct(allocator_type* alloc, slot_type* slot,
     57                        const Arg& arg) {
     58    std::allocator_traits<allocator_type>::construct(*alloc, slot, arg);
     59  }
     60 
     61  template <class allocator_type>
     62  static void destroy(allocator_type* alloc, slot_type* slot) {
     63    std::allocator_traits<allocator_type>::destroy(*alloc, slot);
     64  }
     65 
     66  static slot_type& element(slot_type* slot) { return *slot; }
     67 
     68  template <class F, class... Args>
     69  static auto apply(F&& f, const slot_type& arg)
     70      -> decltype(std::forward<F>(f)(arg, arg)) {
     71    return std::forward<F>(f)(arg, arg);
     72  }
     73 
     74  template <class Hash>
     75  static constexpr auto get_hash_slot_fn() {
     76    return nullptr;
     77  }
     78 };
     79 
     80 absl::BitGen& GlobalBitGen() {
     81  static absl::NoDestructor<absl::BitGen> value;
     82  return *value;
     83 }
     84 
     85 // Keeps a pool of allocations and randomly gives one out.
     86 // This introduces more randomization to the addresses given to swisstable and
     87 // should help smooth out this factor from probe length calculation.
     88 template <class T>
     89 class RandomizedAllocator {
     90 public:
     91  using value_type = T;
     92 
     93  RandomizedAllocator() = default;
     94  template <typename U>
     95  RandomizedAllocator(RandomizedAllocator<U>) {}  // NOLINT
     96 
     97  static T* allocate(size_t n) {
     98    auto& pointers = GetPointers(n);
     99    // Fill the pool
    100    while (pointers.size() < kRandomPool) {
    101      pointers.push_back(std::allocator<T>{}.allocate(n));
    102    }
    103 
    104    // Choose a random one.
    105    size_t i = absl::Uniform<size_t>(GlobalBitGen(), 0, pointers.size());
    106    T* result = pointers[i];
    107    pointers[i] = pointers.back();
    108    pointers.pop_back();
    109    return result;
    110  }
    111 
    112  static void deallocate(T* p, size_t n) {
    113    // Just put it back on the pool. No need to release the memory.
    114    GetPointers(n).push_back(p);
    115  }
    116 
    117 private:
    118  // We keep at least kRandomPool allocations for each size.
    119  static constexpr size_t kRandomPool = 20;
    120 
    121  static std::vector<T*>& GetPointers(size_t n) {
    122    static absl::NoDestructor<absl::flat_hash_map<size_t, std::vector<T*>>> m;
    123    return (*m)[n];
    124  }
    125 };
    126 
    127 template <class T>
    128 struct DefaultHash {
    129  using type = absl::container_internal::hash_default_hash<T>;
    130 };
    131 
    132 template <class T>
    133 using DefaultHashT = typename DefaultHash<T>::type;
    134 
    135 template <class T>
    136 struct Table : absl::container_internal::raw_hash_set<
    137                   Policy<T>, DefaultHashT<T>,
    138                   absl::container_internal::hash_default_eq<T>,
    139                   RandomizedAllocator<T>> {};
    140 
    141 struct LoadSizes {
    142  size_t min_load;
    143  size_t max_load;
    144 };
    145 
    146 LoadSizes GetMinMaxLoadSizes() {
    147  static const auto sizes = [] {
    148    Table<int> t;
    149 
    150    // First, fill enough to have a good distribution.
    151    constexpr size_t kMinSize = 10000;
    152    while (t.size() < kMinSize) t.insert(t.size());
    153 
    154    const auto reach_min_load_factor = [&] {
    155      const double lf = t.load_factor();
    156      while (lf <= t.load_factor()) t.insert(t.size());
    157    };
    158 
    159    // Then, insert until we reach min load factor.
    160    reach_min_load_factor();
    161    const size_t min_load_size = t.size();
    162 
    163    // Keep going until we hit min load factor again, then go back one.
    164    t.insert(t.size());
    165    reach_min_load_factor();
    166 
    167    return LoadSizes{min_load_size, t.size() - 1};
    168  }();
    169  return sizes;
    170 }
    171 
    172 struct Ratios {
    173  double min_load;
    174  double avg_load;
    175  double max_load;
    176 };
    177 
    178 // See absl/container/internal/hashtable_debug.h for details on
    179 // probe length calculation.
    180 template <class ElemFn>
    181 Ratios CollectMeanProbeLengths() {
    182  const auto min_max_sizes = GetMinMaxLoadSizes();
    183 
    184  ElemFn elem;
    185  using Key = decltype(elem());
    186  Table<Key> t;
    187 
    188  Ratios result;
    189  while (t.size() < min_max_sizes.min_load) t.insert(elem());
    190  result.min_load =
    191      absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
    192 
    193  while (t.size() < (min_max_sizes.min_load + min_max_sizes.max_load) / 2)
    194    t.insert(elem());
    195  result.avg_load =
    196      absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
    197 
    198  while (t.size() < min_max_sizes.max_load) t.insert(elem());
    199  result.max_load =
    200      absl::container_internal::GetHashtableDebugProbeSummary(t).mean;
    201 
    202  return result;
    203 }
    204 
    205 template <int Align>
    206 uintptr_t PointerForAlignment() {
    207  alignas(Align) static constexpr uintptr_t kInitPointer = 0;
    208  return reinterpret_cast<uintptr_t>(&kInitPointer);
    209 }
    210 
    211 // This incomplete type is used for testing hash of pointers of different
    212 // alignments.
    213 // NOTE: We are generating invalid pointer values on the fly with
    214 // reinterpret_cast. There are not "safely derived" pointers so using them is
    215 // technically UB. It is unlikely to be a problem, though.
    216 template <int Align>
    217 struct Ptr;
    218 
    219 template <int Align>
    220 Ptr<Align>* MakePtr(uintptr_t v) {
    221  if (sizeof(v) == 8) {
    222    constexpr int kCopyBits = 16;
    223    // Ensure high bits are all the same.
    224    v = static_cast<uintptr_t>(static_cast<intptr_t>(v << kCopyBits) >>
    225                               kCopyBits);
    226  }
    227  return reinterpret_cast<Ptr<Align>*>(v);
    228 }
    229 
    230 struct IntIdentity {
    231  uint64_t i;
    232  friend bool operator==(IntIdentity a, IntIdentity b) { return a.i == b.i; }
    233  IntIdentity operator++(int) { return IntIdentity{i++}; }
    234 };
    235 
    236 template <int Align>
    237 struct PtrIdentity {
    238  explicit PtrIdentity(uintptr_t val = PointerForAlignment<Align>()) : i(val) {}
    239  uintptr_t i;
    240  friend bool operator==(PtrIdentity a, PtrIdentity b) { return a.i == b.i; }
    241  PtrIdentity operator++(int) {
    242    PtrIdentity p(i);
    243    i += Align;
    244    return p;
    245  }
    246 };
    247 
    248 constexpr char kStringFormat[] = "/path/to/file/name-%07d-of-9999999.txt";
    249 
    250 template <bool small>
    251 struct String {
    252  std::string value;
    253  static std::string Make(uint32_t v) {
    254    return {small ? absl::StrCat(v) : absl::StrFormat(kStringFormat, v)};
    255  }
    256 };
    257 
    258 template <>
    259 struct DefaultHash<IntIdentity> {
    260  struct type {
    261    size_t operator()(IntIdentity t) const { return t.i; }
    262  };
    263 };
    264 
    265 template <int Align>
    266 struct DefaultHash<PtrIdentity<Align>> {
    267  struct type {
    268    size_t operator()(PtrIdentity<Align> t) const { return t.i; }
    269  };
    270 };
    271 
    272 template <class T>
    273 struct Sequential {
    274  T operator()() const { return current++; }
    275  mutable T current{};
    276 };
    277 
    278 template <int Align>
    279 struct Sequential<Ptr<Align>*> {
    280  Ptr<Align>* operator()() const {
    281    auto* result = MakePtr<Align>(current);
    282    current += Align;
    283    return result;
    284  }
    285  mutable uintptr_t current = PointerForAlignment<Align>();
    286 };
    287 
    288 
    289 template <bool small>
    290 struct Sequential<String<small>> {
    291  std::string operator()() const { return String<small>::Make(current++); }
    292  mutable uint32_t current = 0;
    293 };
    294 
    295 template <class T, class U>
    296 struct Sequential<std::pair<T, U>> {
    297  mutable Sequential<T> tseq;
    298  mutable Sequential<U> useq;
    299 
    300  using RealT = decltype(tseq());
    301  using RealU = decltype(useq());
    302 
    303  mutable std::vector<RealT> ts;
    304  mutable std::vector<RealU> us;
    305  mutable size_t ti = 0, ui = 0;
    306 
    307  std::pair<RealT, RealU> operator()() const {
    308    std::pair<RealT, RealU> value{get_t(), get_u()};
    309    if (ti == 0) {
    310      ti = ui + 1;
    311      ui = 0;
    312    } else {
    313      --ti;
    314      ++ui;
    315    }
    316    return value;
    317  }
    318 
    319  RealT get_t() const {
    320    while (ti >= ts.size()) ts.push_back(tseq());
    321    return ts[ti];
    322  }
    323 
    324  RealU get_u() const {
    325    while (ui >= us.size()) us.push_back(useq());
    326    return us[ui];
    327  }
    328 };
    329 
    330 template <class T, int percent_skip>
    331 struct AlmostSequential {
    332  mutable Sequential<T> current;
    333 
    334  auto operator()() const -> decltype(current()) {
    335    while (absl::Uniform(GlobalBitGen(), 0.0, 1.0) <= percent_skip / 100.)
    336      current();
    337    return current();
    338  }
    339 };
    340 
    341 struct Uniform {
    342  template <typename T>
    343  T operator()(T) const {
    344    return absl::Uniform<T>(absl::IntervalClosed, GlobalBitGen(), T{0}, ~T{0});
    345  }
    346 };
    347 
    348 struct Gaussian {
    349  template <typename T>
    350  T operator()(T) const {
    351    double d;
    352    do {
    353      d = absl::Gaussian<double>(GlobalBitGen(), 1e6, 1e4);
    354    } while (d <= 0 || d > std::numeric_limits<T>::max() / 2);
    355    return static_cast<T>(d);
    356  }
    357 };
    358 
    359 struct Zipf {
    360  template <typename T>
    361  T operator()(T) const {
    362    return absl::Zipf<T>(GlobalBitGen(), std::numeric_limits<T>::max(), 1.6);
    363  }
    364 };
    365 
    366 template <class T, class Dist>
    367 struct Random {
    368  T operator()() const { return Dist{}(T{}); }
    369 };
    370 
    371 template <class Dist, int Align>
    372 struct Random<Ptr<Align>*, Dist> {
    373  Ptr<Align>* operator()() const {
    374    return MakePtr<Align>(Random<uintptr_t, Dist>{}() * Align);
    375  }
    376 };
    377 
    378 template <class Dist>
    379 struct Random<IntIdentity, Dist> {
    380  IntIdentity operator()() const {
    381    return IntIdentity{Random<uint64_t, Dist>{}()};
    382  }
    383 };
    384 
    385 template <class Dist, int Align>
    386 struct Random<PtrIdentity<Align>, Dist> {
    387  PtrIdentity<Align> operator()() const {
    388    return PtrIdentity<Align>{Random<uintptr_t, Dist>{}() * Align};
    389  }
    390 };
    391 
    392 template <class Dist, bool small>
    393 struct Random<String<small>, Dist> {
    394  std::string operator()() const {
    395    return String<small>::Make(Random<uint32_t, Dist>{}());
    396  }
    397 };
    398 
    399 template <class T, class U, class Dist>
    400 struct Random<std::pair<T, U>, Dist> {
    401  auto operator()() const
    402      -> decltype(std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}())) {
    403    return std::make_pair(Random<T, Dist>{}(), Random<U, Dist>{}());
    404  }
    405 };
    406 
    407 template <typename>
    408 std::string Name();
    409 
    410 std::string Name(uint32_t*) { return "u32"; }
    411 std::string Name(uint64_t*) { return "u64"; }
    412 std::string Name(IntIdentity*) { return "IntIdentity"; }
    413 
    414 template <int Align>
    415 std::string Name(Ptr<Align>**) {
    416  return absl::StrCat("Ptr", Align);
    417 }
    418 
    419 template <int Align>
    420 std::string Name(PtrIdentity<Align>*) {
    421  return absl::StrCat("PtrIdentity", Align);
    422 }
    423 
    424 template <bool small>
    425 std::string Name(String<small>*) {
    426  return small ? "StrS" : "StrL";
    427 }
    428 
    429 template <class T, class U>
    430 std::string Name(std::pair<T, U>*) {
    431  if (output() == OutputStyle::kBenchmark)
    432    return absl::StrCat("P_", Name<T>(), "_", Name<U>());
    433  return absl::StrCat("P<", Name<T>(), ",", Name<U>(), ">");
    434 }
    435 
    436 template <class T>
    437 std::string Name(Sequential<T>*) {
    438  return "Sequential";
    439 }
    440 
    441 template <class T, int P>
    442 std::string Name(AlmostSequential<T, P>*) {
    443  return absl::StrCat("AlmostSeq_", P);
    444 }
    445 
    446 template <class T>
    447 std::string Name(Random<T, Uniform>*) {
    448  return "UnifRand";
    449 }
    450 
    451 template <class T>
    452 std::string Name(Random<T, Gaussian>*) {
    453  return "GausRand";
    454 }
    455 
    456 template <class T>
    457 std::string Name(Random<T, Zipf>*) {
    458  return "ZipfRand";
    459 }
    460 
    461 template <typename T>
    462 std::string Name() {
    463  return Name(static_cast<T*>(nullptr));
    464 }
    465 
    466 constexpr int kNameWidth = 15;
    467 constexpr int kDistWidth = 16;
    468 
    469 bool CanRunBenchmark(absl::string_view name) {
    470  static const absl::NoDestructor<absl::optional<std::regex>> filter([] {
    471    return benchmarks.empty() || benchmarks == "all"
    472               ? absl::nullopt
    473               : absl::make_optional(std::regex(std::string(benchmarks)));
    474  }());
    475  return !filter->has_value() || std::regex_search(std::string(name), **filter);
    476 }
    477 
    478 struct Result {
    479  std::string name;
    480  std::string dist_name;
    481  Ratios ratios;
    482 };
    483 
    484 template <typename T, typename Dist>
    485 void RunForTypeAndDistribution(std::vector<Result>& results) {
    486  std::string name = absl::StrCat(Name<T>(), "/", Name<Dist>());
    487  // We have to check against all three names (min/avg/max) before we run it.
    488  // If any of them is enabled, we run it.
    489  if (!CanRunBenchmark(absl::StrCat(name, "/min")) &&
    490      !CanRunBenchmark(absl::StrCat(name, "/avg")) &&
    491      !CanRunBenchmark(absl::StrCat(name, "/max"))) {
    492    return;
    493  }
    494  results.push_back({Name<T>(), Name<Dist>(), CollectMeanProbeLengths<Dist>()});
    495 }
    496 
    497 template <class T>
    498 void RunForType(std::vector<Result>& results) {
    499  RunForTypeAndDistribution<T, Sequential<T>>(results);
    500  RunForTypeAndDistribution<T, AlmostSequential<T, 20>>(results);
    501  RunForTypeAndDistribution<T, AlmostSequential<T, 50>>(results);
    502  RunForTypeAndDistribution<T, Random<T, Uniform>>(results);
    503 #ifdef NDEBUG
    504  // Disable these in non-opt mode because they take too long.
    505  RunForTypeAndDistribution<T, Random<T, Gaussian>>(results);
    506  RunForTypeAndDistribution<T, Random<T, Zipf>>(results);
    507 #endif  // NDEBUG
    508 }
    509 
    510 }  // namespace
    511 
    512 int main(int argc, char** argv) {
    513  // Parse the benchmark flags. Ignore all of them except the regex pattern.
    514  for (int i = 1; i < argc; ++i) {
    515    absl::string_view arg = argv[i];
    516    const auto next = [&] { return argv[std::min(i + 1, argc - 1)]; };
    517 
    518    if (absl::ConsumePrefix(&arg, "--benchmark_filter")) {
    519      if (arg == "") {
    520        // --benchmark_filter X
    521        benchmarks = next();
    522      } else if (absl::ConsumePrefix(&arg, "=")) {
    523        // --benchmark_filter=X
    524        benchmarks = arg;
    525      }
    526    }
    527 
    528    // Any --benchmark flag turns on the mode.
    529    if (absl::ConsumePrefix(&arg, "--benchmark")) {
    530      if (benchmarks.empty()) benchmarks="all";
    531    }
    532  }
    533 
    534  std::vector<Result> results;
    535  RunForType<uint64_t>(results);
    536  RunForType<IntIdentity>(results);
    537  RunForType<Ptr<8>*>(results);
    538  RunForType<Ptr<16>*>(results);
    539  RunForType<Ptr<32>*>(results);
    540  RunForType<Ptr<64>*>(results);
    541  RunForType<PtrIdentity<8>>(results);
    542  RunForType<PtrIdentity<16>>(results);
    543  RunForType<PtrIdentity<32>>(results);
    544  RunForType<PtrIdentity<64>>(results);
    545  RunForType<std::pair<uint32_t, uint32_t>>(results);
    546  RunForType<String<true>>(results);
    547  RunForType<String<false>>(results);
    548  RunForType<std::pair<uint64_t, String<true>>>(results);
    549  RunForType<std::pair<String<true>, uint64_t>>(results);
    550  RunForType<std::pair<uint64_t, String<false>>>(results);
    551  RunForType<std::pair<String<false>, uint64_t>>(results);
    552 
    553  switch (output()) {
    554    case OutputStyle::kRegular:
    555      absl::PrintF("%-*s%-*s       Min       Avg       Max\n%s\n", kNameWidth,
    556                   "Type", kDistWidth, "Distribution",
    557                   std::string(kNameWidth + kDistWidth + 10 * 3, '-'));
    558      for (const auto& result : results) {
    559        absl::PrintF("%-*s%-*s  %8.4f  %8.4f  %8.4f\n", kNameWidth, result.name,
    560                     kDistWidth, result.dist_name, result.ratios.min_load,
    561                     result.ratios.avg_load, result.ratios.max_load);
    562      }
    563      break;
    564    case OutputStyle::kBenchmark: {
    565      absl::PrintF("{\n");
    566      absl::PrintF("  \"benchmarks\": [\n");
    567      absl::string_view comma;
    568      for (const auto& result : results) {
    569        auto print = [&](absl::string_view stat, double Ratios::*val) {
    570          std::string name =
    571              absl::StrCat(result.name, "/", result.dist_name, "/", stat);
    572          // Check the regex again. We might had have enabled only one of the
    573          // stats for the benchmark.
    574          if (!CanRunBenchmark(name)) return;
    575          absl::PrintF("    %s{\n", comma);
    576          absl::PrintF("      \"cpu_time\": %f,\n", 1e9 * result.ratios.*val);
    577          absl::PrintF("      \"real_time\": %f,\n", 1e9 * result.ratios.*val);
    578          absl::PrintF("      \"iterations\": 1,\n");
    579          absl::PrintF("      \"name\": \"%s\",\n", name);
    580          absl::PrintF("      \"time_unit\": \"ns\"\n");
    581          absl::PrintF("    }\n");
    582          comma = ",";
    583        };
    584        print("min", &Ratios::min_load);
    585        print("avg", &Ratios::avg_load);
    586        print("max", &Ratios::max_load);
    587      }
    588      absl::PrintF("  ],\n");
    589      absl::PrintF("  \"context\": {\n");
    590      absl::PrintF("  }\n");
    591      absl::PrintF("}\n");
    592      break;
    593    }
    594  }
    595 
    596  return 0;
    597 }