hash_benchmark.cc (16421B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <algorithm> 16 #include <cassert> 17 #include <cstddef> 18 #include <cstdint> 19 #include <cstring> 20 #include <string> 21 #include <tuple> 22 #include <type_traits> 23 #include <typeindex> 24 #include <utility> 25 #include <vector> 26 27 #include "absl/base/attributes.h" 28 #include "absl/container/flat_hash_set.h" 29 #include "absl/hash/hash.h" 30 #include "absl/random/random.h" 31 #include "absl/strings/cord.h" 32 #include "absl/strings/cord_test_helpers.h" 33 #include "absl/strings/string_view.h" 34 #include "benchmark/benchmark.h" 35 36 namespace { 37 38 using absl::Hash; 39 40 template <template <typename> class H, typename T> 41 void RunBenchmark(benchmark::State& state, T value) { 42 H<T> h; 43 for (auto _ : state) { 44 benchmark::DoNotOptimize(value); 45 benchmark::DoNotOptimize(h(value)); 46 } 47 } 48 49 } // namespace 50 51 template <typename T> 52 using AbslHash = absl::Hash<T>; 53 54 class TypeErasedInterface { 55 public: 56 virtual ~TypeErasedInterface() = default; 57 58 template <typename H> 59 friend H AbslHashValue(H state, const TypeErasedInterface& wrapper) { 60 state = H::combine(std::move(state), std::type_index(typeid(wrapper))); 61 wrapper.HashValue(absl::HashState::Create(&state)); 62 return state; 63 } 64 65 private: 66 virtual void HashValue(absl::HashState state) const = 0; 67 }; 68 69 template <typename T> 70 struct TypeErasedAbslHash { 71 class Wrapper : public TypeErasedInterface { 72 public: 73 explicit Wrapper(const T& value) : value_(value) {} 74 75 private: 76 void HashValue(absl::HashState state) const override { 77 absl::HashState::combine(std::move(state), value_); 78 } 79 80 const T& value_; 81 }; 82 83 size_t operator()(const T& value) { 84 return absl::Hash<Wrapper>{}(Wrapper(value)); 85 } 86 }; 87 88 absl::Cord FlatCord(size_t size) { 89 absl::Cord result(std::string(size, 'a')); 90 result.Flatten(); 91 return result; 92 } 93 94 absl::Cord FragmentedCord(size_t size) { 95 const size_t orig_size = size; 96 std::vector<std::string> chunks; 97 size_t chunk_size = std::max<size_t>(1, size / 10); 98 while (size > chunk_size) { 99 chunks.push_back(std::string(chunk_size, 'a')); 100 size -= chunk_size; 101 } 102 if (size > 0) { 103 chunks.push_back(std::string(size, 'a')); 104 } 105 absl::Cord result = absl::MakeFragmentedCord(chunks); 106 (void) orig_size; 107 assert(result.size() == orig_size); 108 return result; 109 } 110 111 template <typename T> 112 std::vector<T> Vector(size_t count) { 113 std::vector<T> result; 114 for (size_t v = 0; v < count; ++v) { 115 result.push_back(v); 116 } 117 return result; 118 } 119 120 // Bogus type that replicates an unorderd_set's bit mixing, but with 121 // vector-speed iteration. This is intended to measure the overhead of unordered 122 // hashing without counting the speed of unordered_set iteration. 123 template <typename T> 124 struct FastUnorderedSet { 125 explicit FastUnorderedSet(size_t count) { 126 for (size_t v = 0; v < count; ++v) { 127 values.push_back(v); 128 } 129 } 130 std::vector<T> values; 131 132 template <typename H> 133 friend H AbslHashValue(H h, const FastUnorderedSet& fus) { 134 return H::combine(H::combine_unordered(std::move(h), fus.values.begin(), 135 fus.values.end()), 136 fus.values.size()); 137 } 138 }; 139 140 template <typename T> 141 absl::flat_hash_set<T> FlatHashSet(size_t count) { 142 absl::flat_hash_set<T> result; 143 for (size_t v = 0; v < count; ++v) { 144 result.insert(v); 145 } 146 return result; 147 } 148 149 template <typename T> 150 struct LongCombine { 151 T a[200]{}; 152 template <typename H> 153 friend H AbslHashValue(H state, const LongCombine& v) { 154 // This is testing a single call to `combine` with a lot of arguments to 155 // test the performance of the folding logic. 156 return H::combine( 157 std::move(state), // 158 v.a[0], v.a[1], v.a[2], v.a[3], v.a[4], v.a[5], v.a[6], v.a[7], v.a[8], 159 v.a[9], v.a[10], v.a[11], v.a[12], v.a[13], v.a[14], v.a[15], v.a[16], 160 v.a[17], v.a[18], v.a[19], v.a[20], v.a[21], v.a[22], v.a[23], v.a[24], 161 v.a[25], v.a[26], v.a[27], v.a[28], v.a[29], v.a[30], v.a[31], v.a[32], 162 v.a[33], v.a[34], v.a[35], v.a[36], v.a[37], v.a[38], v.a[39], v.a[40], 163 v.a[41], v.a[42], v.a[43], v.a[44], v.a[45], v.a[46], v.a[47], v.a[48], 164 v.a[49], v.a[50], v.a[51], v.a[52], v.a[53], v.a[54], v.a[55], v.a[56], 165 v.a[57], v.a[58], v.a[59], v.a[60], v.a[61], v.a[62], v.a[63], v.a[64], 166 v.a[65], v.a[66], v.a[67], v.a[68], v.a[69], v.a[70], v.a[71], v.a[72], 167 v.a[73], v.a[74], v.a[75], v.a[76], v.a[77], v.a[78], v.a[79], v.a[80], 168 v.a[81], v.a[82], v.a[83], v.a[84], v.a[85], v.a[86], v.a[87], v.a[88], 169 v.a[89], v.a[90], v.a[91], v.a[92], v.a[93], v.a[94], v.a[95], v.a[96], 170 v.a[97], v.a[98], v.a[99], v.a[100], v.a[101], v.a[102], v.a[103], 171 v.a[104], v.a[105], v.a[106], v.a[107], v.a[108], v.a[109], v.a[110], 172 v.a[111], v.a[112], v.a[113], v.a[114], v.a[115], v.a[116], v.a[117], 173 v.a[118], v.a[119], v.a[120], v.a[121], v.a[122], v.a[123], v.a[124], 174 v.a[125], v.a[126], v.a[127], v.a[128], v.a[129], v.a[130], v.a[131], 175 v.a[132], v.a[133], v.a[134], v.a[135], v.a[136], v.a[137], v.a[138], 176 v.a[139], v.a[140], v.a[141], v.a[142], v.a[143], v.a[144], v.a[145], 177 v.a[146], v.a[147], v.a[148], v.a[149], v.a[150], v.a[151], v.a[152], 178 v.a[153], v.a[154], v.a[155], v.a[156], v.a[157], v.a[158], v.a[159], 179 v.a[160], v.a[161], v.a[162], v.a[163], v.a[164], v.a[165], v.a[166], 180 v.a[167], v.a[168], v.a[169], v.a[170], v.a[171], v.a[172], v.a[173], 181 v.a[174], v.a[175], v.a[176], v.a[177], v.a[178], v.a[179], v.a[180], 182 v.a[181], v.a[182], v.a[183], v.a[184], v.a[185], v.a[186], v.a[187], 183 v.a[188], v.a[189], v.a[190], v.a[191], v.a[192], v.a[193], v.a[194], 184 v.a[195], v.a[196], v.a[197], v.a[198], v.a[199]); 185 } 186 }; 187 188 template <typename T> 189 auto MakeLongTuple() { 190 auto t1 = std::tuple<T>(); 191 auto t2 = std::tuple_cat(t1, t1); 192 auto t3 = std::tuple_cat(t2, t2); 193 auto t4 = std::tuple_cat(t3, t3); 194 auto t5 = std::tuple_cat(t4, t4); 195 auto t6 = std::tuple_cat(t5, t5); 196 // Ideally this would be much larger, but some configurations can't handle 197 // making tuples with that many elements. They break inside std::tuple itself. 198 static_assert(std::tuple_size<decltype(t6)>::value == 32, ""); 199 return t6; 200 } 201 202 // Generates a benchmark and a codegen method for the provided types. The 203 // codegen method provides a well known entrypoint for dumping assembly. 204 #define MAKE_BENCHMARK(hash, name, ...) \ 205 namespace { \ 206 void BM_##hash##_##name(benchmark::State& state) { \ 207 RunBenchmark<hash>(state, __VA_ARGS__); \ 208 } \ 209 BENCHMARK(BM_##hash##_##name); \ 210 } \ 211 size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg); \ 212 size_t Codegen##hash##name(const decltype(__VA_ARGS__)& arg) { \ 213 return hash<decltype(__VA_ARGS__)>{}(arg); \ 214 } \ 215 bool absl_hash_test_odr_use##hash##name = \ 216 (benchmark::DoNotOptimize(&Codegen##hash##name), false) 217 218 MAKE_BENCHMARK(AbslHash, Int32, int32_t{}); 219 MAKE_BENCHMARK(AbslHash, Int64, int64_t{}); 220 MAKE_BENCHMARK(AbslHash, Double, 1.2); 221 MAKE_BENCHMARK(AbslHash, DoubleZero, 0.0); 222 MAKE_BENCHMARK(AbslHash, PairInt32Int32, std::pair<int32_t, int32_t>{}); 223 MAKE_BENCHMARK(AbslHash, PairInt64Int64, std::pair<int64_t, int64_t>{}); 224 MAKE_BENCHMARK(AbslHash, TupleInt32BoolInt64, 225 std::tuple<int32_t, bool, int64_t>{}); 226 MAKE_BENCHMARK(AbslHash, String_0, std::string()); 227 MAKE_BENCHMARK(AbslHash, String_1, std::string(1, 'a')); 228 MAKE_BENCHMARK(AbslHash, String_2, std::string(2, 'a')); 229 MAKE_BENCHMARK(AbslHash, String_4, std::string(4, 'a')); 230 MAKE_BENCHMARK(AbslHash, String_8, std::string(8, 'a')); 231 MAKE_BENCHMARK(AbslHash, String_10, std::string(10, 'a')); 232 MAKE_BENCHMARK(AbslHash, String_30, std::string(30, 'a')); 233 MAKE_BENCHMARK(AbslHash, String_90, std::string(90, 'a')); 234 MAKE_BENCHMARK(AbslHash, String_200, std::string(200, 'a')); 235 MAKE_BENCHMARK(AbslHash, String_5000, std::string(5000, 'a')); 236 MAKE_BENCHMARK(AbslHash, Cord_Flat_0, absl::Cord()); 237 MAKE_BENCHMARK(AbslHash, Cord_Flat_10, FlatCord(10)); 238 MAKE_BENCHMARK(AbslHash, Cord_Flat_30, FlatCord(30)); 239 MAKE_BENCHMARK(AbslHash, Cord_Flat_90, FlatCord(90)); 240 MAKE_BENCHMARK(AbslHash, Cord_Flat_200, FlatCord(200)); 241 MAKE_BENCHMARK(AbslHash, Cord_Flat_5000, FlatCord(5000)); 242 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_200, FragmentedCord(200)); 243 MAKE_BENCHMARK(AbslHash, Cord_Fragmented_5000, FragmentedCord(5000)); 244 MAKE_BENCHMARK(AbslHash, VectorInt64_10, Vector<int64_t>(10)); 245 MAKE_BENCHMARK(AbslHash, VectorInt64_100, Vector<int64_t>(100)); 246 MAKE_BENCHMARK(AbslHash, VectorInt64_1000, Vector<int64_t>(1000)); 247 MAKE_BENCHMARK(AbslHash, VectorDouble_10, Vector<double>(10)); 248 MAKE_BENCHMARK(AbslHash, VectorDouble_100, Vector<double>(100)); 249 MAKE_BENCHMARK(AbslHash, VectorDouble_1000, Vector<double>(1000)); 250 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_10, FlatHashSet<int64_t>(10)); 251 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_100, FlatHashSet<int64_t>(100)); 252 MAKE_BENCHMARK(AbslHash, FlatHashSetInt64_1000, FlatHashSet<int64_t>(1000)); 253 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_10, FlatHashSet<double>(10)); 254 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_100, FlatHashSet<double>(100)); 255 MAKE_BENCHMARK(AbslHash, FlatHashSetDouble_1000, FlatHashSet<double>(1000)); 256 MAKE_BENCHMARK(AbslHash, FastUnorderedSetInt64_1000, 257 FastUnorderedSet<int64_t>(1000)); 258 MAKE_BENCHMARK(AbslHash, FastUnorderedSetDouble_1000, 259 FastUnorderedSet<double>(1000)); 260 MAKE_BENCHMARK(AbslHash, PairStringString_0, 261 std::make_pair(std::string(), std::string())); 262 MAKE_BENCHMARK(AbslHash, PairStringString_10, 263 std::make_pair(std::string(10, 'a'), std::string(10, 'b'))); 264 MAKE_BENCHMARK(AbslHash, PairStringString_30, 265 std::make_pair(std::string(30, 'a'), std::string(30, 'b'))); 266 MAKE_BENCHMARK(AbslHash, PairStringString_90, 267 std::make_pair(std::string(90, 'a'), std::string(90, 'b'))); 268 MAKE_BENCHMARK(AbslHash, PairStringString_200, 269 std::make_pair(std::string(200, 'a'), std::string(200, 'b'))); 270 MAKE_BENCHMARK(AbslHash, PairStringString_5000, 271 std::make_pair(std::string(5000, 'a'), std::string(5000, 'b'))); 272 MAKE_BENCHMARK(AbslHash, LongTupleInt32, MakeLongTuple<int>()); 273 MAKE_BENCHMARK(AbslHash, LongTupleString, MakeLongTuple<std::string>()); 274 MAKE_BENCHMARK(AbslHash, LongCombineInt32, LongCombine<int>()); 275 MAKE_BENCHMARK(AbslHash, LongCombineString, LongCombine<std::string>()); 276 277 MAKE_BENCHMARK(TypeErasedAbslHash, Int32, int32_t{}); 278 MAKE_BENCHMARK(TypeErasedAbslHash, Int64, int64_t{}); 279 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt32Int32, 280 std::pair<int32_t, int32_t>{}); 281 MAKE_BENCHMARK(TypeErasedAbslHash, PairInt64Int64, 282 std::pair<int64_t, int64_t>{}); 283 MAKE_BENCHMARK(TypeErasedAbslHash, TupleInt32BoolInt64, 284 std::tuple<int32_t, bool, int64_t>{}); 285 MAKE_BENCHMARK(TypeErasedAbslHash, String_0, std::string()); 286 MAKE_BENCHMARK(TypeErasedAbslHash, String_10, std::string(10, 'a')); 287 MAKE_BENCHMARK(TypeErasedAbslHash, String_30, std::string(30, 'a')); 288 MAKE_BENCHMARK(TypeErasedAbslHash, String_90, std::string(90, 'a')); 289 MAKE_BENCHMARK(TypeErasedAbslHash, String_200, std::string(200, 'a')); 290 MAKE_BENCHMARK(TypeErasedAbslHash, String_5000, std::string(5000, 'a')); 291 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_10, 292 std::vector<double>(10, 1.1)); 293 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_100, 294 std::vector<double>(100, 1.1)); 295 MAKE_BENCHMARK(TypeErasedAbslHash, VectorDouble_1000, 296 std::vector<double>(1000, 1.1)); 297 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_10, 298 FlatHashSet<int64_t>(10)); 299 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_100, 300 FlatHashSet<int64_t>(100)); 301 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetInt64_1000, 302 FlatHashSet<int64_t>(1000)); 303 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_10, 304 FlatHashSet<double>(10)); 305 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_100, 306 FlatHashSet<double>(100)); 307 MAKE_BENCHMARK(TypeErasedAbslHash, FlatHashSetDouble_1000, 308 FlatHashSet<double>(1000)); 309 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetInt64_1000, 310 FastUnorderedSet<int64_t>(1000)); 311 MAKE_BENCHMARK(TypeErasedAbslHash, FastUnorderedSetDouble_1000, 312 FastUnorderedSet<double>(1000)); 313 314 // The latency benchmark attempts to model the speed of the hash function in 315 // production. When a hash function is used for hashtable lookups it is rarely 316 // used to hash N items in a tight loop nor on constant sized strings. Instead, 317 // after hashing there is a potential equality test plus a (usually) large 318 // amount of user code. To simulate this effectively we introduce a data 319 // dependency between elements we hash by using the hash of the Nth element as 320 // the selector of the N+1th element to hash. This isolates the hash function 321 // code much like in production. As a bonus we use the hash to generate strings 322 // of size [1,N] (instead of fixed N) to disable perfect branch predictions in 323 // hash function implementations. 324 namespace { 325 // 16kb fits in L1 cache of most CPUs we care about. Keeping memory latency low 326 // will allow us to attribute most time to CPU which means more accurate 327 // measurements. 328 static constexpr size_t kEntropySize = 16 << 10; 329 static char entropy[kEntropySize + 1024]; 330 ABSL_ATTRIBUTE_UNUSED static const bool kInitialized = [] { 331 absl::BitGen gen; 332 static_assert(sizeof(entropy) % sizeof(uint64_t) == 0, ""); 333 for (int i = 0; i != sizeof(entropy); i += sizeof(uint64_t)) { 334 auto rand = absl::Uniform<uint64_t>(gen); 335 memcpy(&entropy[i], &rand, sizeof(uint64_t)); 336 } 337 return true; 338 }(); 339 } // namespace 340 341 template <class T> 342 struct PodRand { 343 static_assert(std::is_pod<T>::value, ""); 344 static_assert(kEntropySize + sizeof(T) < sizeof(entropy), ""); 345 346 T Get(size_t i) const { 347 T v; 348 memcpy(&v, &entropy[i % kEntropySize], sizeof(T)); 349 return v; 350 } 351 }; 352 353 template <size_t N> 354 struct StringRand { 355 static_assert(kEntropySize + N < sizeof(entropy), ""); 356 357 absl::string_view Get(size_t i) const { 358 // This has a small bias towards small numbers. Because max N is ~200 this 359 // is very small and prefer to be very fast instead of absolutely accurate. 360 // Also we pass N = 2^K+1 so that mod reduces to a bitand. 361 size_t s = (i % (N - 1)) + 1; 362 return {&entropy[i % kEntropySize], s}; 363 } 364 }; 365 366 #define MAKE_LATENCY_BENCHMARK(hash, name, ...) \ 367 namespace { \ 368 void BM_latency_##hash##_##name(benchmark::State& state) { \ 369 __VA_ARGS__ r; \ 370 hash<decltype(r.Get(0))> h; \ 371 size_t i = 871401241; \ 372 for (auto _ : state) { \ 373 benchmark::DoNotOptimize(i = h(r.Get(i))); \ 374 } \ 375 } \ 376 BENCHMARK(BM_latency_##hash##_##name); \ 377 } // namespace 378 379 MAKE_LATENCY_BENCHMARK(AbslHash, Int32, PodRand<int32_t>) 380 MAKE_LATENCY_BENCHMARK(AbslHash, Int64, PodRand<int64_t>) 381 MAKE_LATENCY_BENCHMARK(AbslHash, String9, StringRand<9>) 382 MAKE_LATENCY_BENCHMARK(AbslHash, String33, StringRand<33>) 383 MAKE_LATENCY_BENCHMARK(AbslHash, String65, StringRand<65>) 384 MAKE_LATENCY_BENCHMARK(AbslHash, String257, StringRand<257>)