[ tor-browser ].git.dasho

hash_benchmark.cc (16421B)
      1 // Copyright 2018 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include <algorithm>
     16 #include <cassert>
     17 #include <cstddef>
     18 #include <cstdint>
     19 #include <cstring>
     20 #include <string>
     21 #include <tuple>
     22 #include <type_traits>
     23 #include <typeindex>
     24 #include <utility>
     25 #include <vector>
     26 
     27 #include "absl/base/attributes.h"
     28 #include "absl/container/flat_hash_set.h"
     29 #include "absl/hash/hash.h"
     30 #include "absl/random/random.h"
     31 #include "absl/strings/cord.h"
     32 #include "absl/strings/cord_test_helpers.h"
     33 #include "absl/strings/string_view.h"
     34 #include "benchmark/benchmark.h"
     35 
     36 namespace {
     37 
     38 using absl::Hash;
     39 
     40 template <template <typename> class H, typename T>
     41 void RunBenchmark(benchmark::State& state, T value) {
     42  H<T> h;
     43  for (auto _ : state) {
     44    benchmark::DoNotOptimize(value);
     45    benchmark::DoNotOptimize(h(value));
     46  }
     47 }
     48 
     49 }  // namespace
     50 
     51 template <typename T>
     52 using AbslHash = absl::Hash<T>;
     53 
     54 class TypeErasedInterface {
     55 public:
     56  virtual ~TypeErasedInterface() = default;
     57 
     58  template <typename H>
     59  friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) {
     60    state = H::combine(std::move(state), std::type_index(typeid(wrapper)));
     61    wrapper.HashValue(absl::HashState::Create(&state));
     62    return state;
     63  }
     64 
     65 private:
     66  virtual void HashValue(absl::HashState state) const = 0;
     67 };
     68 
     69 template <typename T>
     70 struct TypeErasedAbslHash {
     71  class Wrapper : public TypeErasedInterface {
     72   public:
     73    explicit Wrapper(const T& value) : value_(value) {}
     74 
     75   private:
     76    void HashValue(absl::HashState state) const override {
     77      absl::HashState::combine(std::move(state), value_);
     78    }
     79 
     80    const T& value_;
     81  };
     82 
     83  size_t operator()(const T& value) {
     84    return absl::Hash<Wrapper>{}(Wrapper(value));
     85  }
     86 };
     87 
     88 absl::Cord FlatCord(size_t size) {
     89  absl::Cord result(std::string(size, 'a'));
     90  result.Flatten();
     91  return result;
     92 }
     93 
     94 absl::Cord FragmentedCord(size_t size) {
     95  const size_t orig_size = size;
     96  std::vector<std::string> chunks;
     97  size_t chunk_size = std::max<size_t>(1, size / 10);
     98  while (size > chunk_size) {
     99    chunks.push_back(std::string(chunk_size, 'a'));
    100    size -= chunk_size;
    101  }
    102  if (size > 0) {
    103    chunks.push_back(std::string(size, 'a'));
    104  }
    105  absl::Cord result = absl::MakeFragmentedCord(chunks);
    106  (void) orig_size;
    107  assert(result.size() == orig_size);
    108  return result;
    109 }
    110 
    111 template <typename T>
    112 std::vector<T> Vector(size_t count) {
    113  std::vector<T> result;
    114  for (size_t v = 0; v < count; ++v) {
    115    result.push_back(v);
    116  }
    117  return result;
    118 }
    119 
    120 // Bogus type that replicates an unorderd_set's bit mixing, but with
    121 // vector-speed iteration. This is intended to measure the overhead of unordered
    122 // hashing without counting the speed of unordered_set iteration.
    123 template <typename T>
    124 struct FastUnorderedSet {
    125  explicit FastUnorderedSet(size_t count) {
    126    for (size_t v = 0; v < count; ++v) {
    127      values.push_back(v);
    128    }
    129  }
    130  std::vector<T> values;
    131 
    132  template <typename H>
    133  friend H AbslHashValue(H h, const FastUnorderedSet& fus) {
    134    return H::combine(H::combine_unordered(std::move(h), fus.values.begin(),
    135                                           fus.values.end()),
    136                      fus.values.size());
    137  }
    138 };
    139 
    140 template <typename T>
    141 absl::flat_hash_set<T> FlatHashSet(size_t count) {
    142  absl::flat_hash_set<T> result;
    143  for (size_t v = 0; v < count; ++v) {
    144    result.insert(v);
    145  }
    146  return result;
    147 }
    148 
    149 template <typename T>
    150 struct LongCombine {
    151  T a[200]{};
    152  template <typename H>
    153  friend H AbslHashValue(H state, const LongCombine& v) {
    154    // This is testing a single call to `combine` with a lot of arguments to
    155    // test the performance of the folding logic.
    156    return H::combine(
    157        std::move(state),  //
    158        v.a[0], v.a[1], v.a[2], v.a[3], v.a[4], v.a[5], v.a[6], v.a[7], v.a[8],
    159        v.a[9], v.a[10], v.a[11], v.a[12], v.a[13], v.a[14], v.a[15], v.a[16],
    160        v.a[17], v.a[18], v.a[19], v.a[20], v.a[21], v.a[22], v.a[23], v.a[24],
    161        v.a[25], v.a[26], v.a[27], v.a[28], v.a[29], v.a[30], v.a[31], v.a[32],
    162        v.a[33], v.a[34], v.a[35], v.a[36], v.a[37], v.a[38], v.a[39], v.a[40],
    163        v.a[41], v.a[42], v.a[43], v.a[44], v.a[45], v.a[46], v.a[47], v.a[48],
    164        v.a[49], v.a[50], v.a[51], v.a[52], v.a[53], v.a[54], v.a[55], v.a[56],
    165        v.a[57], v.a[58], v.a[59], v.a[60], v.a[61], v.a[62], v.a[63], v.a[64],
    166        v.a[65], v.a[66], v.a[67], v.a[68], v.a[69], v.a[70], v.a[71], v.a[72],
    167        v.a[73], v.a[74], v.a[75], v.a[76], v.a[77], v.a[78], v.a[79], v.a[80],
    168        v.a[81], v.a[82], v.a[83], v.a[84], v.a[85], v.a[86], v.a[87], v.a[88],
    169        v.a[89], v.a[90], v.a[91], v.a[92], v.a[93], v.a[94], v.a[95], v.a[96],
    170        v.a[97], v.a[98], v.a[99], v.a[100], v.a[101], v.a[102], v.a[103],
    171        v.a[104], v.a[105], v.a[106], v.a[107], v.a[108], v.a[109], v.a[110],
    172        v.a[111], v.a[112], v.a[113], v.a[114], v.a[115], v.a[116], v.a[117],
    173        v.a[118], v.a[119], v.a[120], v.a[121], v.a[122], v.a[123], v.a[124],
    174        v.a[125], v.a[126], v.a[127], v.a[128], v.a[129], v.a[130], v.a[131],
    175        v.a[132], v.a[133], v.a[134], v.a[135], v.a[136], v.a[137], v.a[138],
    176        v.a[139], v.a[140], v.a[141], v.a[142], v.a[143], v.a[144], v.a[145],
    177        v.a[146], v.a[147], v.a[148], v.a[149], v.a[150], v.a[151], v.a[152],
    178        v.a[153], v.a[154], v.a[155], v.a[156], v.a[157], v.a[158], v.a[159],
    179        v.a[160], v.a[161], v.a[162], v.a[163], v.a[164], v.a[165], v.a[166],
    180        v.a[167], v.a[168], v.a[169], v.a[170], v.a[171], v.a[172], v.a[173],
    181        v.a[174], v.a[175], v.a[176], v.a[177], v.a[178], v.a[179], v.a[180],
    182        v.a[181], v.a[182], v.a[183], v.a[184], v.a[185], v.a[186], v.a[187],
    183        v.a[188], v.a[189], v.a[190], v.a[191], v.a[192], v.a[193], v.a[194],
    184        v.a[195], v.a[196], v.a[197], v.a[198], v.a[199]);
    185  }
    186 };
    187 
    188 template <typename T>
    189 auto MakeLongTuple() {
    190  auto t1 = std::tuple<T>();
    191  auto t2 = std::tuple_cat(t1, t1);
    192  auto t3 = std::tuple_cat(t2, t2);
    193  auto t4 = std::tuple_cat(t3, t3);
    194  auto t5 = std::tuple_cat(t4, t4);
    195  auto t6 = std::tuple_cat(t5, t5);
    196  // Ideally this would be much larger, but some configurations can't handle
    197  // making tuples with that many elements. They break inside std::tuple itself.
    198  static_assert(std::tuple_size<decltype(t6)>::value == 32, "");
    199  return t6;
    200 }
    201 
    202 // Generates a benchmark and a codegen method for the provided types.  The
    203 // codegen method provides a well known entrypoint for dumping assembly.
    204 #define MAKE_BENCHMARK(hash, name, ...)                          \
    205  namespace {                                                    \
    206  void BM_##hash##_##name(benchmark::State& state) {             \
    207    RunBenchmark<hash>(state, __VA_ARGS__);                      \
    208  }                                                              \
    209  BENCHMARK(BM_##hash##_##name);                                 \
    210  }                                                              \
    211  size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg);  \
    212  size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \
    213    return hash<decltype(__VA_ARGS__)>{}(arg);                   \
    214  }                                                              \
    215  bool absl_hash_test_odr_use##hash##name =                      \
    216      (benchmark::DoNotOptimize(&Codegen##hash##name), false)
    217 
    218 MAKE_BENCHMARK(AbslHash, Int32, int32_t{});
    219 MAKE_BENCHMARK(AbslHash, Int64, int64_t{});
    220 MAKE_BENCHMARK(AbslHash, Double, 1.2);
    221 MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0);
    222 MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{});
    223 MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{});
    224 MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64,
    225               std::tuple<int32_t, bool, int64_t>{});
    226 MAKE_BENCHMARK(AbslHash, String_0, std::string());
    227 MAKE_BENCHMARK(AbslHash, String_1, std::string(1, 'a'));
    228 MAKE_BENCHMARK(AbslHash, String_2, std::string(2, 'a'));
    229 MAKE_BENCHMARK(AbslHash, String_4, std::string(4, 'a'));
    230 MAKE_BENCHMARK(AbslHash, String_8, std::string(8, 'a'));
    231 MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a'));
    232 MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a'));
    233 MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a'));
    234 MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a'));
    235 MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a'));
    236 MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord());
    237 MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10));
    238 MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30));
    239 MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90));
    240 MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200));
    241 MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000));
    242 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200));
    243 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000));
    244 MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10));
    245 MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100));
    246 MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000));
    247 MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10));
    248 MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100));
    249 MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000));
    250 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10));
    251 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100));
    252 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000));
    253 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10));
    254 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100));
    255 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000));
    256 MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000,
    257               FastUnorderedSet<int64_t>(1000));
    258 MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000,
    259               FastUnorderedSet<double>(1000));
    260 MAKE_BENCHMARK(AbslHash, PairStringString_0,
    261               std::make_pair(std::string(), std::string()));
    262 MAKE_BENCHMARK(AbslHash, PairStringString_10,
    263               std::make_pair(std::string(10, 'a'), std::string(10, 'b')));
    264 MAKE_BENCHMARK(AbslHash, PairStringString_30,
    265               std::make_pair(std::string(30, 'a'), std::string(30, 'b')));
    266 MAKE_BENCHMARK(AbslHash, PairStringString_90,
    267               std::make_pair(std::string(90, 'a'), std::string(90, 'b')));
    268 MAKE_BENCHMARK(AbslHash, PairStringString_200,
    269               std::make_pair(std::string(200, 'a'), std::string(200, 'b')));
    270 MAKE_BENCHMARK(AbslHash, PairStringString_5000,
    271               std::make_pair(std::string(5000, 'a'), std::string(5000, 'b')));
    272 MAKE_BENCHMARK(AbslHash, LongTupleInt32, MakeLongTuple<int>());
    273 MAKE_BENCHMARK(AbslHash, LongTupleString, MakeLongTuple<std::string>());
    274 MAKE_BENCHMARK(AbslHash, LongCombineInt32, LongCombine<int>());
    275 MAKE_BENCHMARK(AbslHash, LongCombineString, LongCombine<std::string>());
    276 
    277 MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{});
    278 MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{});
    279 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32,
    280               std::pair<int32_t, int32_t>{});
    281 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64,
    282               std::pair<int64_t, int64_t>{});
    283 MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64,
    284               std::tuple<int32_t, bool, int64_t>{});
    285 MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string());
    286 MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a'));
    287 MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a'));
    288 MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a'));
    289 MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a'));
    290 MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a'));
    291 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10,
    292               std::vector<double>(10, 1.1));
    293 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100,
    294               std::vector<double>(100, 1.1));
    295 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000,
    296               std::vector<double>(1000, 1.1));
    297 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10,
    298               FlatHashSet<int64_t>(10));
    299 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100,
    300               FlatHashSet<int64_t>(100));
    301 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000,
    302               FlatHashSet<int64_t>(1000));
    303 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10,
    304               FlatHashSet<double>(10));
    305 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100,
    306               FlatHashSet<double>(100));
    307 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000,
    308               FlatHashSet<double>(1000));
    309 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000,
    310               FastUnorderedSet<int64_t>(1000));
    311 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000,
    312               FastUnorderedSet<double>(1000));
    313 
    314 // The latency benchmark attempts to model the speed of the hash function in
    315 // production. When a hash function is used for hashtable lookups it is rarely
    316 // used to hash N items in a tight loop nor on constant sized strings. Instead,
    317 // after hashing there is a potential equality test plus a (usually) large
    318 // amount of user code. To simulate this effectively we introduce a data
    319 // dependency between elements we hash by using the hash of the Nth element as
    320 // the selector of the N+1th element to hash. This isolates the hash function
    321 // code much like in production. As a bonus we use the hash to generate strings
    322 // of size [1,N] (instead of fixed N) to disable perfect branch predictions in
    323 // hash function implementations.
    324 namespace {
    325 // 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low
    326 // will allow us to attribute most time to CPU which means more accurate
    327 // measurements.
    328 static constexpr size_t kEntropySize = 16 << 10;
    329 static char entropy[kEntropySize + 1024];
    330 ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] {
    331  absl::BitGen gen;
    332  static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, "");
    333  for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) {
    334    auto rand = absl::Uniform<uint64_t>(gen);
    335    memcpy(&entropy[i], &rand, sizeof(uint64_t));
    336  }
    337  return true;
    338 }();
    339 }  // namespace
    340 
    341 template <class T>
    342 struct PodRand {
    343  static_assert(std::is_pod<T>::value, "");
    344  static_assert(kEntropySize + sizeof(T) < sizeof(entropy), "");
    345 
    346  T Get(size_t i) const {
    347    T v;
    348    memcpy(&v, &entropy[i % kEntropySize], sizeof(T));
    349    return v;
    350  }
    351 };
    352 
    353 template <size_t N>
    354 struct StringRand {
    355  static_assert(kEntropySize + N < sizeof(entropy), "");
    356 
    357  absl::string_view Get(size_t i) const {
    358    // This has a small bias towards small numbers. Because max N is ~200 this
    359    // is very small and prefer to be very fast instead of absolutely accurate.
    360    // Also we pass N = 2^K+1 so that mod reduces to a bitand.
    361    size_t s = (i % (N - 1)) + 1;
    362    return {&entropy[i % kEntropySize], s};
    363  }
    364 };
    365 
    366 #define MAKE_LATENCY_BENCHMARK(hash, name, ...)              \
    367  namespace {                                                \
    368  void BM_latency_##hash##_##name(benchmark::State& state) { \
    369    __VA_ARGS__ r;                                           \
    370    hash<decltype(r.Get(0))> h;                              \
    371    size_t i = 871401241;                                    \
    372    for (auto _ : state) {                                   \
    373      benchmark::DoNotOptimize(i = h(r.Get(i)));             \
    374    }                                                        \
    375  }                                                          \
    376  BENCHMARK(BM_latency_##hash##_##name);                     \
    377  }  // namespace
    378 
    379 MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>)
    380 MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>)
    381 MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>)
    382 MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>)
    383 MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>)
    384 MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>)
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE