hash.h (17676B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // ----------------------------------------------------------------------------- 16 // File: hash.h 17 // ----------------------------------------------------------------------------- 18 // 19 // This header file defines the Abseil `hash` library and the Abseil hashing 20 // framework. This framework consists of the following: 21 // 22 // * The `absl::Hash` functor, which is used to invoke the hasher within the 23 // Abseil hashing framework. `absl::Hash<T>` supports most basic types and 24 // a number of Abseil types out of the box. 25 // * `AbslHashValue`, an extension point that allows you to extend types to 26 // support Abseil hashing without requiring you to define a hashing 27 // algorithm. 28 // * `HashState`, a type-erased class which implements the manipulation of the 29 // hash state (H) itself; contains member functions `combine()`, 30 // `combine_contiguous()`, and `combine_unordered()`; and which you can use 31 // to contribute to an existing hash state when hashing your types. 32 // 33 // Unlike `std::hash` or other hashing frameworks, the Abseil hashing framework 34 // provides most of its utility by abstracting away the hash algorithm (and its 35 // implementation) entirely. Instead, a type invokes the Abseil hashing 36 // framework by simply combining its state with the state of known, hashable 37 // types. Hashing of that combined state is separately done by `absl::Hash`. 38 // 39 // One should assume that a hash algorithm is chosen randomly at the start of 40 // each process. E.g., `absl::Hash<int>{}(9)` in one process and 41 // `absl::Hash<int>{}(9)` in another process are likely to differ. 42 // 43 // `absl::Hash` may also produce different values from different dynamically 44 // loaded libraries. For this reason, `absl::Hash` values must never cross 45 // boundaries in dynamically loaded libraries (including when used in types like 46 // hash containers.) 47 // 48 // `absl::Hash` is intended to strongly mix input bits with a target of passing 49 // an [Avalanche Test](https://en.wikipedia.org/wiki/Avalanche_effect). 50 // 51 // Example: 52 // 53 // // Suppose we have a class `Circle` for which we want to add hashing: 54 // class Circle { 55 // public: 56 // ... 57 // private: 58 // std::pair<int, int> center_; 59 // int radius_; 60 // }; 61 // 62 // // To add hashing support to `Circle`, we simply need to add a free 63 // // (non-member) function `AbslHashValue()`, and return the combined hash 64 // // state of the existing hash state and the class state. You can add such a 65 // // free function using a friend declaration within the body of the class: 66 // class Circle { 67 // public: 68 // ... 69 // template <typename H> 70 // friend H AbslHashValue(H h, const Circle& c) { 71 // return H::combine(std::move(h), c.center_, c.radius_); 72 // } 73 // ... 74 // }; 75 // 76 // For more information, see Adding Type Support to `absl::Hash` below. 77 // 78 #ifndef ABSL_HASH_HASH_H_ 79 #define ABSL_HASH_HASH_H_ 80 81 #include <cstddef> 82 #include <cstdint> 83 #include <tuple> 84 #include <type_traits> 85 #include <utility> 86 87 #include "absl/base/config.h" 88 #include "absl/functional/function_ref.h" 89 #include "absl/hash/internal/hash.h" 90 #include "absl/meta/type_traits.h" 91 92 namespace absl { 93 ABSL_NAMESPACE_BEGIN 94 95 // ----------------------------------------------------------------------------- 96 // `absl::Hash` 97 // ----------------------------------------------------------------------------- 98 // 99 // `absl::Hash<T>` is a convenient general-purpose hash functor for any type `T` 100 // satisfying any of the following conditions (in order): 101 // 102 // * T is an arithmetic or pointer type 103 // * T defines an overload for `AbslHashValue(H, const T&)` for an arbitrary 104 // hash state `H`. 105 // - T defines a specialization of `std::hash<T>` 106 // 107 // `absl::Hash` intrinsically supports the following types: 108 // 109 // * All integral types (including bool) 110 // * All enum types 111 // * All floating-point types (although hashing them is discouraged) 112 // * All pointer types, including nullptr_t 113 // * std::pair<T1, T2>, if T1 and T2 are hashable 114 // * std::tuple<Ts...>, if all the Ts... are hashable 115 // * std::unique_ptr and std::shared_ptr 116 // * All string-like types including: 117 // * absl::Cord 118 // * std::string (as well as any instance of std::basic_string that 119 // uses one of {char, wchar_t, char16_t, char32_t} and its associated 120 // std::char_traits) 121 // * std::string_view (as well as any instance of std::basic_string_view 122 // that uses one of {char, wchar_t, char16_t, char32_t} and its associated 123 // std::char_traits) 124 // * All the standard sequence containers (provided the elements are hashable) 125 // * All the standard associative containers (provided the elements are 126 // hashable) 127 // * absl types such as the following: 128 // * absl::string_view 129 // * absl::uint128 130 // * absl::Time, absl::Duration, and absl::TimeZone 131 // * absl containers (provided the elements are hashable) such as the 132 // following: 133 // * absl::flat_hash_set, absl::node_hash_set, absl::btree_set 134 // * absl::flat_hash_map, absl::node_hash_map, absl::btree_map 135 // * absl::btree_multiset, absl::btree_multimap 136 // * absl::InlinedVector 137 // * absl::FixedArray 138 // 139 // When absl::Hash is used to hash an unordered container with a custom hash 140 // functor, the elements are hashed using default absl::Hash semantics, not 141 // the custom hash functor. This is consistent with the behavior of 142 // operator==() on unordered containers, which compares elements pairwise with 143 // operator==() rather than the custom equality functor. It is usually a 144 // mistake to use either operator==() or absl::Hash on unordered collections 145 // that use functors incompatible with operator==() equality. 146 // 147 // Note: the list above is not meant to be exhaustive. Additional type support 148 // may be added, in which case the above list will be updated. 149 // 150 // ----------------------------------------------------------------------------- 151 // absl::Hash Invocation Evaluation 152 // ----------------------------------------------------------------------------- 153 // 154 // When invoked, `absl::Hash<T>` searches for supplied hash functions in the 155 // following order: 156 // 157 // * Natively supported types out of the box (see above) 158 // * Types for which an `AbslHashValue()` overload is provided (such as 159 // user-defined types). See "Adding Type Support to `absl::Hash`" below. 160 // * Types which define a `std::hash<T>` specialization 161 // 162 // The fallback to legacy hash functions exists mainly for backwards 163 // compatibility. If you have a choice, prefer defining an `AbslHashValue` 164 // overload instead of specializing any legacy hash functors. 165 // 166 // ----------------------------------------------------------------------------- 167 // The Hash State Concept, and using `HashState` for Type Erasure 168 // ----------------------------------------------------------------------------- 169 // 170 // The `absl::Hash` framework relies on the Concept of a "hash state." Such a 171 // hash state is used in several places: 172 // 173 // * Within existing implementations of `absl::Hash<T>` to store the hashed 174 // state of an object. Note that it is up to the implementation how it stores 175 // such state. A hash table, for example, may mix the state to produce an 176 // integer value; a testing framework may simply hold a vector of that state. 177 // * Within implementations of `AbslHashValue()` used to extend user-defined 178 // types. (See "Adding Type Support to absl::Hash" below.) 179 // * Inside a `HashState`, providing type erasure for the concept of a hash 180 // state, which you can use to extend the `absl::Hash` framework for types 181 // that are otherwise difficult to extend using `AbslHashValue()`. (See the 182 // `HashState` class below.) 183 // 184 // The "hash state" concept contains three member functions for mixing hash 185 // state: 186 // 187 // * `H::combine(state, values...)` 188 // 189 // Combines an arbitrary number of values into a hash state, returning the 190 // updated state. Note that the existing hash state is move-only and must be 191 // passed by value. 192 // 193 // Each of the value types T must be hashable by H. 194 // 195 // NOTE: 196 // 197 // state = H::combine(std::move(state), value1, value2, value3); 198 // 199 // must be guaranteed to produce the same hash expansion as 200 // 201 // state = H::combine(std::move(state), value1); 202 // state = H::combine(std::move(state), value2); 203 // state = H::combine(std::move(state), value3); 204 // 205 // * `H::combine_contiguous(state, data, size)` 206 // 207 // Combines a contiguous array of `size` elements into a hash state, 208 // returning the updated state. Note that the existing hash state is 209 // move-only and must be passed by value. 210 // 211 // NOTE: 212 // 213 // state = H::combine_contiguous(std::move(state), data, size); 214 // 215 // need NOT be guaranteed to produce the same hash expansion as a loop 216 // (it may perform internal optimizations). If you need this guarantee, use a 217 // loop instead. 218 // 219 // * `H::combine_unordered(state, begin, end)` 220 // 221 // Combines a set of elements denoted by an iterator pair into a hash 222 // state, returning the updated state. Note that the existing hash 223 // state is move-only and must be passed by value. 224 // 225 // Unlike the other two methods, the hashing is order-independent. 226 // This can be used to hash unordered collections. 227 // 228 // ----------------------------------------------------------------------------- 229 // Adding Type Support to `absl::Hash` 230 // ----------------------------------------------------------------------------- 231 // 232 // To add support for your user-defined type, add a proper `AbslHashValue()` 233 // overload as a free (non-member) function. The overload will take an 234 // existing hash state and should combine that state with state from the type. 235 // 236 // Example: 237 // 238 // template <typename H> 239 // H AbslHashValue(H state, const MyType& v) { 240 // return H::combine(std::move(state), v.field1, ..., v.fieldN); 241 // } 242 // 243 // where `(field1, ..., fieldN)` are the members you would use on your 244 // `operator==` to define equality. 245 // 246 // Notice that `AbslHashValue` is not a class member, but an ordinary function. 247 // An `AbslHashValue` overload for a type should only be declared in the same 248 // file and namespace as said type. The proper `AbslHashValue` implementation 249 // for a given type will be discovered via ADL. 250 // 251 // Note: unlike `std::hash', `absl::Hash` should never be specialized. It must 252 // only be extended by adding `AbslHashValue()` overloads. 253 // 254 template <typename T> 255 using Hash = absl::hash_internal::Hash<T>; 256 257 // HashOf 258 // 259 // absl::HashOf() is a helper that generates a hash from the values of its 260 // arguments. It dispatches to absl::Hash directly, as follows: 261 // * HashOf(t) == absl::Hash<T>{}(t) 262 // * HashOf(a, b, c) == HashOf(std::make_tuple(a, b, c)) 263 // 264 // HashOf(a1, a2, ...) == HashOf(b1, b2, ...) is guaranteed when 265 // * The argument lists have pairwise identical C++ types 266 // * a1 == b1 && a2 == b2 && ... 267 // 268 // The requirement that the arguments match in both type and value is critical. 269 // It means that `a == b` does not necessarily imply `HashOf(a) == HashOf(b)` if 270 // `a` and `b` have different types. For example, `HashOf(2) != HashOf(2.0)`. 271 template <int&... ExplicitArgumentBarrier, typename... Types> 272 size_t HashOf(const Types&... values) { 273 auto tuple = std::tie(values...); 274 return absl::Hash<decltype(tuple)>{}(tuple); 275 } 276 277 // HashState 278 // 279 // A type erased version of the hash state concept, for use in user-defined 280 // `AbslHashValue` implementations that can't use templates (such as PImpl 281 // classes, virtual functions, etc.). The type erasure adds overhead so it 282 // should be avoided unless necessary. 283 // 284 // Note: This wrapper will only erase calls to 285 // combine_contiguous(H, const unsigned char*, size_t) 286 // RunCombineUnordered(H, CombinerF) 287 // 288 // All other calls will be handled internally and will not invoke overloads 289 // provided by the wrapped class. 290 // 291 // Users of this class should still define a template `AbslHashValue` function, 292 // but can use `absl::HashState::Create(&state)` to erase the type of the hash 293 // state and dispatch to their private hashing logic. 294 // 295 // This state can be used like any other hash state. In particular, you can call 296 // `HashState::combine()` and `HashState::combine_contiguous()` on it. 297 // 298 // Example: 299 // 300 // class Interface { 301 // public: 302 // template <typename H> 303 // friend H AbslHashValue(H state, const Interface& value) { 304 // state = H::combine(std::move(state), std::type_index(typeid(*this))); 305 // value.HashValue(absl::HashState::Create(&state)); 306 // return state; 307 // } 308 // private: 309 // virtual void HashValue(absl::HashState state) const = 0; 310 // }; 311 // 312 // class Impl : Interface { 313 // private: 314 // void HashValue(absl::HashState state) const override { 315 // absl::HashState::combine(std::move(state), v1_, v2_); 316 // } 317 // int v1_; 318 // std::string v2_; 319 // }; 320 class HashState : public hash_internal::HashStateBase<HashState> { 321 public: 322 // HashState::Create() 323 // 324 // Create a new `HashState` instance that wraps `state`. All calls to 325 // `combine()` and `combine_contiguous()` on the new instance will be 326 // redirected to the original `state` object. The `state` object must outlive 327 // the `HashState` instance. `T` must be a subclass of `HashStateBase<T>` - 328 // users should not define their own HashState types. 329 template < 330 typename T, 331 absl::enable_if_t< 332 std::is_base_of<hash_internal::HashStateBase<T>, T>::value, int> = 0> 333 static HashState Create(T* state) { 334 HashState s; 335 s.Init(state); 336 return s; 337 } 338 339 HashState(const HashState&) = delete; 340 HashState& operator=(const HashState&) = delete; 341 HashState(HashState&&) = default; 342 HashState& operator=(HashState&&) = default; 343 344 // HashState::combine() 345 // 346 // Combines an arbitrary number of values into a hash state, returning the 347 // updated state. 348 using HashState::HashStateBase::combine; 349 350 // HashState::combine_contiguous() 351 // 352 // Combines a contiguous array of `size` elements into a hash state, returning 353 // the updated state. 354 static HashState combine_contiguous(HashState hash_state, 355 const unsigned char* first, size_t size) { 356 hash_state.combine_contiguous_(hash_state.state_, first, size); 357 return hash_state; 358 } 359 using HashState::HashStateBase::combine_contiguous; 360 361 private: 362 HashState() = default; 363 364 friend class HashState::HashStateBase; 365 friend struct hash_internal::CombineRaw; 366 367 template <typename T> 368 static void CombineContiguousImpl(void* p, const unsigned char* first, 369 size_t size) { 370 T& state = *static_cast<T*>(p); 371 state = T::combine_contiguous(std::move(state), first, size); 372 } 373 374 static HashState combine_raw(HashState hash_state, uint64_t value) { 375 hash_state.combine_raw_(hash_state.state_, value); 376 return hash_state; 377 } 378 379 template <typename T> 380 static void CombineRawImpl(void* p, uint64_t value) { 381 T& state = *static_cast<T*>(p); 382 state = hash_internal::CombineRaw()(std::move(state), value); 383 } 384 385 template <typename T> 386 void Init(T* state) { 387 state_ = state; 388 combine_contiguous_ = &CombineContiguousImpl<T>; 389 combine_raw_ = &CombineRawImpl<T>; 390 run_combine_unordered_ = &RunCombineUnorderedImpl<T>; 391 } 392 393 template <typename HS> 394 struct CombineUnorderedInvoker { 395 template <typename T, typename ConsumerT> 396 void operator()(T inner_state, ConsumerT inner_cb) { 397 f(HashState::Create(&inner_state), 398 [&](HashState& inner_erased) { inner_cb(inner_erased.Real<T>()); }); 399 } 400 401 absl::FunctionRef<void(HS, absl::FunctionRef<void(HS&)>)> f; 402 }; 403 404 template <typename T> 405 static HashState RunCombineUnorderedImpl( 406 HashState state, 407 absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)> 408 f) { 409 // Note that this implementation assumes that inner_state and outer_state 410 // are the same type. This isn't true in the SpyHash case, but SpyHash 411 // types are move-convertible to each other, so this still works. 412 T& real_state = state.Real<T>(); 413 real_state = T::RunCombineUnordered( 414 std::move(real_state), CombineUnorderedInvoker<HashState>{f}); 415 return state; 416 } 417 418 template <typename CombinerT> 419 static HashState RunCombineUnordered(HashState state, CombinerT combiner) { 420 auto* run = state.run_combine_unordered_; 421 return run(std::move(state), std::ref(combiner)); 422 } 423 424 // Do not erase an already erased state. 425 void Init(HashState* state) { 426 state_ = state->state_; 427 combine_contiguous_ = state->combine_contiguous_; 428 combine_raw_ = state->combine_raw_; 429 run_combine_unordered_ = state->run_combine_unordered_; 430 } 431 432 template <typename T> 433 T& Real() { 434 return *static_cast<T*>(state_); 435 } 436 437 void* state_; 438 void (*combine_contiguous_)(void*, const unsigned char*, size_t); 439 void (*combine_raw_)(void*, uint64_t); 440 HashState (*run_combine_unordered_)( 441 HashState state, 442 absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>); 443 }; 444 445 ABSL_NAMESPACE_END 446 } // namespace absl 447 448 #endif // ABSL_HASH_HASH_H_