hash.h (58025B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // ----------------------------------------------------------------------------- 16 // File: hash.h 17 // ----------------------------------------------------------------------------- 18 // 19 #ifndef ABSL_HASH_INTERNAL_HASH_H_ 20 #define ABSL_HASH_INTERNAL_HASH_H_ 21 22 #ifdef __APPLE__ 23 #include <Availability.h> 24 #include <TargetConditionals.h> 25 #endif 26 27 #include "absl/base/config.h" 28 29 // For feature testing and determining which headers can be included. 30 #if ABSL_INTERNAL_CPLUSPLUS_LANG >= 202002L 31 #include <version> 32 #else 33 #include <ciso646> 34 #endif 35 36 #include <algorithm> 37 #include <array> 38 #include <bitset> 39 #include <cassert> 40 #include <cmath> 41 #include <cstddef> 42 #include <cstdint> 43 #include <cstring> 44 #include <deque> 45 #include <forward_list> 46 #include <functional> 47 #include <iterator> 48 #include <limits> 49 #include <list> 50 #include <map> 51 #include <memory> 52 #include <set> 53 #include <string> 54 #include <tuple> 55 #include <type_traits> 56 #include <unordered_map> 57 #include <unordered_set> 58 #include <utility> 59 #include <vector> 60 61 #include "absl/base/attributes.h" 62 #include "absl/base/internal/endian.h" 63 #include "absl/base/internal/unaligned_access.h" 64 #include "absl/base/optimization.h" 65 #include "absl/base/port.h" 66 #include "absl/container/fixed_array.h" 67 #include "absl/hash/internal/city.h" 68 #include "absl/hash/internal/low_level_hash.h" 69 #include "absl/meta/type_traits.h" 70 #include "absl/numeric/bits.h" 71 #include "absl/numeric/int128.h" 72 #include "absl/strings/string_view.h" 73 #include "absl/types/optional.h" 74 #include "absl/types/variant.h" 75 #include "absl/utility/utility.h" 76 77 #if defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L 78 #include <filesystem> // NOLINT 79 #endif 80 81 #ifdef ABSL_HAVE_STD_STRING_VIEW 82 #include <string_view> 83 #endif 84 85 #ifdef __ARM_ACLE 86 #include <arm_acle.h> 87 #endif 88 89 namespace absl { 90 ABSL_NAMESPACE_BEGIN 91 92 class HashState; 93 94 namespace hash_internal { 95 96 // Internal detail: Large buffers are hashed in smaller chunks. This function 97 // returns the size of these chunks. 98 constexpr size_t PiecewiseChunkSize() { return 1024; } 99 100 // PiecewiseCombiner 101 // 102 // PiecewiseCombiner is an internal-only helper class for hashing a piecewise 103 // buffer of `char` or `unsigned char` as though it were contiguous. This class 104 // provides two methods: 105 // 106 // H add_buffer(state, data, size) 107 // H finalize(state) 108 // 109 // `add_buffer` can be called zero or more times, followed by a single call to 110 // `finalize`. This will produce the same hash expansion as concatenating each 111 // buffer piece into a single contiguous buffer, and passing this to 112 // `H::combine_contiguous`. 113 // 114 // Example usage: 115 // PiecewiseCombiner combiner; 116 // for (const auto& piece : pieces) { 117 // state = combiner.add_buffer(std::move(state), piece.data, piece.size); 118 // } 119 // return combiner.finalize(std::move(state)); 120 class PiecewiseCombiner { 121 public: 122 PiecewiseCombiner() : position_(0) {} 123 PiecewiseCombiner(const PiecewiseCombiner&) = delete; 124 PiecewiseCombiner& operator=(const PiecewiseCombiner&) = delete; 125 126 // PiecewiseCombiner::add_buffer() 127 // 128 // Appends the given range of bytes to the sequence to be hashed, which may 129 // modify the provided hash state. 130 template <typename H> 131 H add_buffer(H state, const unsigned char* data, size_t size); 132 template <typename H> 133 H add_buffer(H state, const char* data, size_t size) { 134 return add_buffer(std::move(state), 135 reinterpret_cast<const unsigned char*>(data), size); 136 } 137 138 // PiecewiseCombiner::finalize() 139 // 140 // Finishes combining the hash sequence, which may may modify the provided 141 // hash state. 142 // 143 // Once finalize() is called, add_buffer() may no longer be called. The 144 // resulting hash state will be the same as if the pieces passed to 145 // add_buffer() were concatenated into a single flat buffer, and then provided 146 // to H::combine_contiguous(). 147 template <typename H> 148 H finalize(H state); 149 150 private: 151 unsigned char buf_[PiecewiseChunkSize()]; 152 size_t position_; 153 }; 154 155 // is_hashable() 156 // 157 // Trait class which returns true if T is hashable by the absl::Hash framework. 158 // Used for the AbslHashValue implementations for composite types below. 159 template <typename T> 160 struct is_hashable; 161 162 // HashStateBase 163 // 164 // An internal implementation detail that contains common implementation details 165 // for all of the "hash state objects" objects generated by Abseil. This is not 166 // a public API; users should not create classes that inherit from this. 167 // 168 // A hash state object is the template argument `H` passed to `AbslHashValue`. 169 // It represents an intermediate state in the computation of an unspecified hash 170 // algorithm. `HashStateBase` provides a CRTP style base class for hash state 171 // implementations. Developers adding type support for `absl::Hash` should not 172 // rely on any parts of the state object other than the following member 173 // functions: 174 // 175 // * HashStateBase::combine() 176 // * HashStateBase::combine_contiguous() 177 // * HashStateBase::combine_unordered() 178 // 179 // A derived hash state class of type `H` must provide a public member function 180 // with a signature similar to the following: 181 // 182 // `static H combine_contiguous(H state, const unsigned char*, size_t)`. 183 // 184 // It must also provide a private template method named RunCombineUnordered. 185 // 186 // A "consumer" is a 1-arg functor returning void. Its argument is a reference 187 // to an inner hash state object, and it may be called multiple times. When 188 // called, the functor consumes the entropy from the provided state object, 189 // and resets that object to its empty state. 190 // 191 // A "combiner" is a stateless 2-arg functor returning void. Its arguments are 192 // an inner hash state object and an ElementStateConsumer functor. A combiner 193 // uses the provided inner hash state object to hash each element of the 194 // container, passing the inner hash state object to the consumer after hashing 195 // each element. 196 // 197 // Given these definitions, a derived hash state class of type H 198 // must provide a private template method with a signature similar to the 199 // following: 200 // 201 // `template <typename CombinerT>` 202 // `static H RunCombineUnordered(H outer_state, CombinerT combiner)` 203 // 204 // This function is responsible for constructing the inner state object and 205 // providing a consumer to the combiner. It uses side effects of the consumer 206 // and combiner to mix the state of each element in an order-independent manner, 207 // and uses this to return an updated value of `outer_state`. 208 // 209 // This inside-out approach generates efficient object code in the normal case, 210 // but allows us to use stack storage to implement the absl::HashState type 211 // erasure mechanism (avoiding heap allocations while hashing). 212 // 213 // `HashStateBase` will provide a complete implementation for a hash state 214 // object in terms of these two methods. 215 // 216 // Example: 217 // 218 // // Use CRTP to define your derived class. 219 // struct MyHashState : HashStateBase<MyHashState> { 220 // static H combine_contiguous(H state, const unsigned char*, size_t); 221 // using MyHashState::HashStateBase::combine; 222 // using MyHashState::HashStateBase::combine_contiguous; 223 // using MyHashState::HashStateBase::combine_unordered; 224 // private: 225 // template <typename CombinerT> 226 // static H RunCombineUnordered(H state, CombinerT combiner); 227 // }; 228 template <typename H> 229 class HashStateBase { 230 public: 231 // HashStateBase::combine() 232 // 233 // Combines an arbitrary number of values into a hash state, returning the 234 // updated state. 235 // 236 // Each of the value types `T` must be separately hashable by the Abseil 237 // hashing framework. 238 // 239 // NOTE: 240 // 241 // state = H::combine(std::move(state), value1, value2, value3); 242 // 243 // is guaranteed to produce the same hash expansion as: 244 // 245 // state = H::combine(std::move(state), value1); 246 // state = H::combine(std::move(state), value2); 247 // state = H::combine(std::move(state), value3); 248 template <typename T, typename... Ts> 249 static H combine(H state, const T& value, const Ts&... values); 250 static H combine(H state) { return state; } 251 252 // HashStateBase::combine_contiguous() 253 // 254 // Combines a contiguous array of `size` elements into a hash state, returning 255 // the updated state. 256 // 257 // NOTE: 258 // 259 // state = H::combine_contiguous(std::move(state), data, size); 260 // 261 // is NOT guaranteed to produce the same hash expansion as a for-loop (it may 262 // perform internal optimizations). If you need this guarantee, use the 263 // for-loop instead. 264 template <typename T> 265 static H combine_contiguous(H state, const T* data, size_t size); 266 267 template <typename I> 268 static H combine_unordered(H state, I begin, I end); 269 270 using AbslInternalPiecewiseCombiner = PiecewiseCombiner; 271 272 template <typename T> 273 using is_hashable = absl::hash_internal::is_hashable<T>; 274 275 private: 276 // Common implementation of the iteration step of a "combiner", as described 277 // above. 278 template <typename I> 279 struct CombineUnorderedCallback { 280 I begin; 281 I end; 282 283 template <typename InnerH, typename ElementStateConsumer> 284 void operator()(InnerH inner_state, ElementStateConsumer cb) { 285 for (; begin != end; ++begin) { 286 inner_state = H::combine(std::move(inner_state), *begin); 287 cb(inner_state); 288 } 289 } 290 }; 291 }; 292 293 // is_uniquely_represented 294 // 295 // `is_uniquely_represented<T>` is a trait class that indicates whether `T` 296 // is uniquely represented. 297 // 298 // A type is "uniquely represented" if two equal values of that type are 299 // guaranteed to have the same bytes in their underlying storage. In other 300 // words, if `a == b`, then `memcmp(&a, &b, sizeof(T))` is guaranteed to be 301 // zero. This property cannot be detected automatically, so this trait is false 302 // by default, but can be specialized by types that wish to assert that they are 303 // uniquely represented. This makes them eligible for certain optimizations. 304 // 305 // If you have any doubt whatsoever, do not specialize this template. 306 // The default is completely safe, and merely disables some optimizations 307 // that will not matter for most types. Specializing this template, 308 // on the other hand, can be very hazardous. 309 // 310 // To be uniquely represented, a type must not have multiple ways of 311 // representing the same value; for example, float and double are not 312 // uniquely represented, because they have distinct representations for 313 // +0 and -0. Furthermore, the type's byte representation must consist 314 // solely of user-controlled data, with no padding bits and no compiler- 315 // controlled data such as vptrs or sanitizer metadata. This is usually 316 // very difficult to guarantee, because in most cases the compiler can 317 // insert data and padding bits at its own discretion. 318 // 319 // If you specialize this template for a type `T`, you must do so in the file 320 // that defines that type (or in this file). If you define that specialization 321 // anywhere else, `is_uniquely_represented<T>` could have different meanings 322 // in different places. 323 // 324 // The Enable parameter is meaningless; it is provided as a convenience, 325 // to support certain SFINAE techniques when defining specializations. 326 template <typename T, typename Enable = void> 327 struct is_uniquely_represented : std::false_type {}; 328 329 // is_uniquely_represented<unsigned char> 330 // 331 // unsigned char is a synonym for "byte", so it is guaranteed to be 332 // uniquely represented. 333 template <> 334 struct is_uniquely_represented<unsigned char> : std::true_type {}; 335 336 // is_uniquely_represented for non-standard integral types 337 // 338 // Integral types other than bool should be uniquely represented on any 339 // platform that this will plausibly be ported to. 340 template <typename Integral> 341 struct is_uniquely_represented< 342 Integral, typename std::enable_if<std::is_integral<Integral>::value>::type> 343 : std::true_type {}; 344 345 // is_uniquely_represented<bool> 346 // 347 // 348 template <> 349 struct is_uniquely_represented<bool> : std::false_type {}; 350 351 #ifdef ABSL_HAVE_INTRINSIC_INT128 352 // Specialize the trait for GNU extension types. 353 template <> 354 struct is_uniquely_represented<__int128> : std::true_type {}; 355 template <> 356 struct is_uniquely_represented<unsigned __int128> : std::true_type {}; 357 #endif // ABSL_HAVE_INTRINSIC_INT128 358 359 template <typename T> 360 struct FitsIn64Bits : std::integral_constant<bool, sizeof(T) <= 8> {}; 361 362 struct CombineRaw { 363 template <typename H> 364 H operator()(H state, uint64_t value) const { 365 return H::combine_raw(std::move(state), value); 366 } 367 }; 368 369 // hash_bytes() 370 // 371 // Convenience function that combines `hash_state` with the byte representation 372 // of `value`. 373 template <typename H, typename T, 374 absl::enable_if_t<FitsIn64Bits<T>::value, int> = 0> 375 H hash_bytes(H hash_state, const T& value) { 376 const unsigned char* start = reinterpret_cast<const unsigned char*>(&value); 377 uint64_t v; 378 if constexpr (sizeof(T) == 1) { 379 v = *start; 380 } else if constexpr (sizeof(T) == 2) { 381 v = absl::base_internal::UnalignedLoad16(start); 382 } else if constexpr (sizeof(T) == 4) { 383 v = absl::base_internal::UnalignedLoad32(start); 384 } else { 385 static_assert(sizeof(T) == 8); 386 v = absl::base_internal::UnalignedLoad64(start); 387 } 388 return CombineRaw()(std::move(hash_state), v); 389 } 390 template <typename H, typename T, 391 absl::enable_if_t<!FitsIn64Bits<T>::value, int> = 0> 392 H hash_bytes(H hash_state, const T& value) { 393 const unsigned char* start = reinterpret_cast<const unsigned char*>(&value); 394 return H::combine_contiguous(std::move(hash_state), start, sizeof(value)); 395 } 396 397 // ----------------------------------------------------------------------------- 398 // AbslHashValue for Basic Types 399 // ----------------------------------------------------------------------------- 400 401 // Note: Default `AbslHashValue` implementations live in `hash_internal`. This 402 // allows us to block lexical scope lookup when doing an unqualified call to 403 // `AbslHashValue` below. User-defined implementations of `AbslHashValue` can 404 // only be found via ADL. 405 406 // AbslHashValue() for hashing bool values 407 // 408 // We use SFINAE to ensure that this overload only accepts bool, not types that 409 // are convertible to bool. 410 template <typename H, typename B> 411 typename std::enable_if<std::is_same<B, bool>::value, H>::type AbslHashValue( 412 H hash_state, B value) { 413 return H::combine(std::move(hash_state), 414 static_cast<unsigned char>(value ? 1 : 0)); 415 } 416 417 // AbslHashValue() for hashing enum values 418 template <typename H, typename Enum> 419 typename std::enable_if<std::is_enum<Enum>::value, H>::type AbslHashValue( 420 H hash_state, Enum e) { 421 // In practice, we could almost certainly just invoke hash_bytes directly, 422 // but it's possible that a sanitizer might one day want to 423 // store data in the unused bits of an enum. To avoid that risk, we 424 // convert to the underlying type before hashing. Hopefully this will get 425 // optimized away; if not, we can reopen discussion with c-toolchain-team. 426 return H::combine(std::move(hash_state), 427 static_cast<typename std::underlying_type<Enum>::type>(e)); 428 } 429 // AbslHashValue() for hashing floating-point values 430 template <typename H, typename Float> 431 typename std::enable_if<std::is_same<Float, float>::value || 432 std::is_same<Float, double>::value, 433 H>::type 434 AbslHashValue(H hash_state, Float value) { 435 return hash_internal::hash_bytes(std::move(hash_state), 436 value == 0 ? 0 : value); 437 } 438 439 // Long double has the property that it might have extra unused bytes in it. 440 // For example, in x86 sizeof(long double)==16 but it only really uses 80-bits 441 // of it. This means we can't use hash_bytes on a long double and have to 442 // convert it to something else first. 443 template <typename H, typename LongDouble> 444 typename std::enable_if<std::is_same<LongDouble, long double>::value, H>::type 445 AbslHashValue(H hash_state, LongDouble value) { 446 const int category = std::fpclassify(value); 447 switch (category) { 448 case FP_INFINITE: 449 // Add the sign bit to differentiate between +Inf and -Inf 450 hash_state = H::combine(std::move(hash_state), std::signbit(value)); 451 break; 452 453 case FP_NAN: 454 case FP_ZERO: 455 default: 456 // Category is enough for these. 457 break; 458 459 case FP_NORMAL: 460 case FP_SUBNORMAL: 461 // We can't convert `value` directly to double because this would have 462 // undefined behavior if the value is out of range. 463 // std::frexp gives us a value in the range (-1, -.5] or [.5, 1) that is 464 // guaranteed to be in range for `double`. The truncation is 465 // implementation defined, but that works as long as it is deterministic. 466 int exp; 467 auto mantissa = static_cast<double>(std::frexp(value, &exp)); 468 hash_state = H::combine(std::move(hash_state), mantissa, exp); 469 } 470 471 return H::combine(std::move(hash_state), category); 472 } 473 474 // Without this overload, an array decays to a pointer and we hash that, which 475 // is not likely to be what the caller intended. 476 template <typename H, typename T, size_t N> 477 H AbslHashValue(H hash_state, T (&)[N]) { 478 static_assert( 479 sizeof(T) == -1, 480 "Hashing C arrays is not allowed. For string literals, wrap the literal " 481 "in absl::string_view(). To hash the array contents, use " 482 "absl::MakeSpan() or make the array an std::array. To hash the array " 483 "address, use &array[0]."); 484 return hash_state; 485 } 486 487 // AbslHashValue() for hashing pointers 488 template <typename H, typename T> 489 std::enable_if_t<std::is_pointer<T>::value, H> AbslHashValue(H hash_state, 490 T ptr) { 491 auto v = reinterpret_cast<uintptr_t>(ptr); 492 // Due to alignment, pointers tend to have low bits as zero, and the next few 493 // bits follow a pattern since they are also multiples of some base value. The 494 // byte swap in WeakMix helps ensure we still have good entropy in low bits. 495 // Mix pointers twice to ensure we have good entropy in low bits. 496 return H::combine(std::move(hash_state), v, v); 497 } 498 499 // AbslHashValue() for hashing nullptr_t 500 template <typename H> 501 H AbslHashValue(H hash_state, std::nullptr_t) { 502 return H::combine(std::move(hash_state), static_cast<void*>(nullptr)); 503 } 504 505 // AbslHashValue() for hashing pointers-to-member 506 template <typename H, typename T, typename C> 507 H AbslHashValue(H hash_state, T C::*ptr) { 508 auto salient_ptm_size = [](std::size_t n) -> std::size_t { 509 #if defined(_MSC_VER) 510 // Pointers-to-member-function on MSVC consist of one pointer plus 0, 1, 2, 511 // or 3 ints. In 64-bit mode, they are 8-byte aligned and thus can contain 512 // padding (namely when they have 1 or 3 ints). The value below is a lower 513 // bound on the number of salient, non-padding bytes that we use for 514 // hashing. 515 if constexpr (alignof(T C::*) == alignof(int)) { 516 // No padding when all subobjects have the same size as the total 517 // alignment. This happens in 32-bit mode. 518 return n; 519 } else { 520 // Padding for 1 int (size 16) or 3 ints (size 24). 521 // With 2 ints, the size is 16 with no padding, which we pessimize. 522 return n == 24 ? 20 : n == 16 ? 12 : n; 523 } 524 #else 525 // On other platforms, we assume that pointers-to-members do not have 526 // padding. 527 #ifdef __cpp_lib_has_unique_object_representations 528 static_assert(std::has_unique_object_representations<T C::*>::value); 529 #endif // __cpp_lib_has_unique_object_representations 530 return n; 531 #endif 532 }; 533 return H::combine_contiguous(std::move(hash_state), 534 reinterpret_cast<unsigned char*>(&ptr), 535 salient_ptm_size(sizeof ptr)); 536 } 537 538 // ----------------------------------------------------------------------------- 539 // AbslHashValue for Composite Types 540 // ----------------------------------------------------------------------------- 541 542 // AbslHashValue() for hashing pairs 543 template <typename H, typename T1, typename T2> 544 typename std::enable_if<is_hashable<T1>::value && is_hashable<T2>::value, 545 H>::type 546 AbslHashValue(H hash_state, const std::pair<T1, T2>& p) { 547 return H::combine(std::move(hash_state), p.first, p.second); 548 } 549 550 // hash_tuple() 551 // 552 // Helper function for hashing a tuple. The third argument should 553 // be an index_sequence running from 0 to tuple_size<Tuple> - 1. 554 template <typename H, typename Tuple, size_t... Is> 555 H hash_tuple(H hash_state, const Tuple& t, absl::index_sequence<Is...>) { 556 return H::combine(std::move(hash_state), std::get<Is>(t)...); 557 } 558 559 // AbslHashValue for hashing tuples 560 template <typename H, typename... Ts> 561 #if defined(_MSC_VER) 562 // This SFINAE gets MSVC confused under some conditions. Let's just disable it 563 // for now. 564 H 565 #else // _MSC_VER 566 typename std::enable_if<absl::conjunction<is_hashable<Ts>...>::value, H>::type 567 #endif // _MSC_VER 568 AbslHashValue(H hash_state, const std::tuple<Ts...>& t) { 569 return hash_internal::hash_tuple(std::move(hash_state), t, 570 absl::make_index_sequence<sizeof...(Ts)>()); 571 } 572 573 // ----------------------------------------------------------------------------- 574 // AbslHashValue for Pointers 575 // ----------------------------------------------------------------------------- 576 577 // AbslHashValue for hashing unique_ptr 578 template <typename H, typename T, typename D> 579 H AbslHashValue(H hash_state, const std::unique_ptr<T, D>& ptr) { 580 return H::combine(std::move(hash_state), ptr.get()); 581 } 582 583 // AbslHashValue for hashing shared_ptr 584 template <typename H, typename T> 585 H AbslHashValue(H hash_state, const std::shared_ptr<T>& ptr) { 586 return H::combine(std::move(hash_state), ptr.get()); 587 } 588 589 // ----------------------------------------------------------------------------- 590 // AbslHashValue for String-Like Types 591 // ----------------------------------------------------------------------------- 592 593 // AbslHashValue for hashing strings 594 // 595 // All the string-like types supported here provide the same hash expansion for 596 // the same character sequence. These types are: 597 // 598 // - `absl::Cord` 599 // - `std::string` (and std::basic_string<T, std::char_traits<T>, A> for 600 // any allocator A and any T in {char, wchar_t, char16_t, char32_t}) 601 // - `absl::string_view`, `std::string_view`, `std::wstring_view`, 602 // `std::u16string_view`, and `std::u32_string_view`. 603 // 604 // For simplicity, we currently support only strings built on `char`, `wchar_t`, 605 // `char16_t`, or `char32_t`. This support may be broadened, if necessary, but 606 // with some caution - this overload would misbehave in cases where the traits' 607 // `eq()` member isn't equivalent to `==` on the underlying character type. 608 template <typename H> 609 H AbslHashValue(H hash_state, absl::string_view str) { 610 return H::combine( 611 H::combine_contiguous(std::move(hash_state), str.data(), str.size()), 612 str.size()); 613 } 614 615 // Support std::wstring, std::u16string and std::u32string. 616 template <typename Char, typename Alloc, typename H, 617 typename = absl::enable_if_t<std::is_same<Char, wchar_t>::value || 618 std::is_same<Char, char16_t>::value || 619 std::is_same<Char, char32_t>::value>> 620 H AbslHashValue( 621 H hash_state, 622 const std::basic_string<Char, std::char_traits<Char>, Alloc>& str) { 623 return H::combine( 624 H::combine_contiguous(std::move(hash_state), str.data(), str.size()), 625 str.size()); 626 } 627 628 #ifdef ABSL_HAVE_STD_STRING_VIEW 629 630 // Support std::wstring_view, std::u16string_view and std::u32string_view. 631 template <typename Char, typename H, 632 typename = absl::enable_if_t<std::is_same<Char, wchar_t>::value || 633 std::is_same<Char, char16_t>::value || 634 std::is_same<Char, char32_t>::value>> 635 H AbslHashValue(H hash_state, std::basic_string_view<Char> str) { 636 return H::combine( 637 H::combine_contiguous(std::move(hash_state), str.data(), str.size()), 638 str.size()); 639 } 640 641 #endif // ABSL_HAVE_STD_STRING_VIEW 642 643 #if defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \ 644 (!defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) || \ 645 __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ >= 130000) && \ 646 (!defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) || \ 647 __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101500) 648 649 #define ABSL_INTERNAL_STD_FILESYSTEM_PATH_HASH_AVAILABLE 1 650 651 // Support std::filesystem::path. The SFINAE is required because some string 652 // types are implicitly convertible to std::filesystem::path. 653 template <typename Path, typename H, 654 typename = absl::enable_if_t< 655 std::is_same_v<Path, std::filesystem::path>>> 656 H AbslHashValue(H hash_state, const Path& path) { 657 // This is implemented by deferring to the standard library to compute the 658 // hash. The standard library requires that for two paths, `p1 == p2`, then 659 // `hash_value(p1) == hash_value(p2)`. `AbslHashValue` has the same 660 // requirement. Since `operator==` does platform specific matching, deferring 661 // to the standard library is the simplest approach. 662 return H::combine(std::move(hash_state), std::filesystem::hash_value(path)); 663 } 664 665 #endif // ABSL_INTERNAL_STD_FILESYSTEM_PATH_HASH_AVAILABLE 666 667 // ----------------------------------------------------------------------------- 668 // AbslHashValue for Sequence Containers 669 // ----------------------------------------------------------------------------- 670 671 // AbslHashValue for hashing std::array 672 template <typename H, typename T, size_t N> 673 typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( 674 H hash_state, const std::array<T, N>& array) { 675 return H::combine_contiguous(std::move(hash_state), array.data(), 676 array.size()); 677 } 678 679 // AbslHashValue for hashing std::deque 680 template <typename H, typename T, typename Allocator> 681 typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( 682 H hash_state, const std::deque<T, Allocator>& deque) { 683 // TODO(gromer): investigate a more efficient implementation taking 684 // advantage of the chunk structure. 685 for (const auto& t : deque) { 686 hash_state = H::combine(std::move(hash_state), t); 687 } 688 return H::combine(std::move(hash_state), deque.size()); 689 } 690 691 // AbslHashValue for hashing std::forward_list 692 template <typename H, typename T, typename Allocator> 693 typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( 694 H hash_state, const std::forward_list<T, Allocator>& list) { 695 size_t size = 0; 696 for (const T& t : list) { 697 hash_state = H::combine(std::move(hash_state), t); 698 ++size; 699 } 700 return H::combine(std::move(hash_state), size); 701 } 702 703 // AbslHashValue for hashing std::list 704 template <typename H, typename T, typename Allocator> 705 typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( 706 H hash_state, const std::list<T, Allocator>& list) { 707 for (const auto& t : list) { 708 hash_state = H::combine(std::move(hash_state), t); 709 } 710 return H::combine(std::move(hash_state), list.size()); 711 } 712 713 // AbslHashValue for hashing std::vector 714 // 715 // Do not use this for vector<bool> on platforms that have a working 716 // implementation of std::hash. It does not have a .data(), and a fallback for 717 // std::hash<> is most likely faster. 718 template <typename H, typename T, typename Allocator> 719 typename std::enable_if<is_hashable<T>::value && !std::is_same<T, bool>::value, 720 H>::type 721 AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) { 722 return H::combine(H::combine_contiguous(std::move(hash_state), vector.data(), 723 vector.size()), 724 vector.size()); 725 } 726 727 // AbslHashValue special cases for hashing std::vector<bool> 728 729 #if defined(ABSL_IS_BIG_ENDIAN) && \ 730 (defined(__GLIBCXX__) || defined(__GLIBCPP__)) 731 732 // std::hash in libstdc++ does not work correctly with vector<bool> on Big 733 // Endian platforms therefore we need to implement a custom AbslHashValue for 734 // it. More details on the bug: 735 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102531 736 template <typename H, typename T, typename Allocator> 737 typename std::enable_if<is_hashable<T>::value && std::is_same<T, bool>::value, 738 H>::type 739 AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) { 740 typename H::AbslInternalPiecewiseCombiner combiner; 741 for (const auto& i : vector) { 742 unsigned char c = static_cast<unsigned char>(i); 743 hash_state = combiner.add_buffer(std::move(hash_state), &c, sizeof(c)); 744 } 745 return H::combine(combiner.finalize(std::move(hash_state)), vector.size()); 746 } 747 #else 748 // When not working around the libstdc++ bug above, we still have to contend 749 // with the fact that std::hash<vector<bool>> is often poor quality, hashing 750 // directly on the internal words and on no other state. On these platforms, 751 // vector<bool>{1, 1} and vector<bool>{1, 1, 0} hash to the same value. 752 // 753 // Mixing in the size (as we do in our other vector<> implementations) on top 754 // of the library-provided hash implementation avoids this QOI issue. 755 template <typename H, typename T, typename Allocator> 756 typename std::enable_if<is_hashable<T>::value && std::is_same<T, bool>::value, 757 H>::type 758 AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) { 759 return H::combine(std::move(hash_state), 760 std::hash<std::vector<T, Allocator>>{}(vector), 761 vector.size()); 762 } 763 #endif 764 765 // ----------------------------------------------------------------------------- 766 // AbslHashValue for Ordered Associative Containers 767 // ----------------------------------------------------------------------------- 768 769 // AbslHashValue for hashing std::map 770 template <typename H, typename Key, typename T, typename Compare, 771 typename Allocator> 772 typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, 773 H>::type 774 AbslHashValue(H hash_state, const std::map<Key, T, Compare, Allocator>& map) { 775 for (const auto& t : map) { 776 hash_state = H::combine(std::move(hash_state), t); 777 } 778 return H::combine(std::move(hash_state), map.size()); 779 } 780 781 // AbslHashValue for hashing std::multimap 782 template <typename H, typename Key, typename T, typename Compare, 783 typename Allocator> 784 typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, 785 H>::type 786 AbslHashValue(H hash_state, 787 const std::multimap<Key, T, Compare, Allocator>& map) { 788 for (const auto& t : map) { 789 hash_state = H::combine(std::move(hash_state), t); 790 } 791 return H::combine(std::move(hash_state), map.size()); 792 } 793 794 // AbslHashValue for hashing std::set 795 template <typename H, typename Key, typename Compare, typename Allocator> 796 typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( 797 H hash_state, const std::set<Key, Compare, Allocator>& set) { 798 for (const auto& t : set) { 799 hash_state = H::combine(std::move(hash_state), t); 800 } 801 return H::combine(std::move(hash_state), set.size()); 802 } 803 804 // AbslHashValue for hashing std::multiset 805 template <typename H, typename Key, typename Compare, typename Allocator> 806 typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( 807 H hash_state, const std::multiset<Key, Compare, Allocator>& set) { 808 for (const auto& t : set) { 809 hash_state = H::combine(std::move(hash_state), t); 810 } 811 return H::combine(std::move(hash_state), set.size()); 812 } 813 814 // ----------------------------------------------------------------------------- 815 // AbslHashValue for Unordered Associative Containers 816 // ----------------------------------------------------------------------------- 817 818 // AbslHashValue for hashing std::unordered_set 819 template <typename H, typename Key, typename Hash, typename KeyEqual, 820 typename Alloc> 821 typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( 822 H hash_state, const std::unordered_set<Key, Hash, KeyEqual, Alloc>& s) { 823 return H::combine( 824 H::combine_unordered(std::move(hash_state), s.begin(), s.end()), 825 s.size()); 826 } 827 828 // AbslHashValue for hashing std::unordered_multiset 829 template <typename H, typename Key, typename Hash, typename KeyEqual, 830 typename Alloc> 831 typename std::enable_if<is_hashable<Key>::value, H>::type AbslHashValue( 832 H hash_state, 833 const std::unordered_multiset<Key, Hash, KeyEqual, Alloc>& s) { 834 return H::combine( 835 H::combine_unordered(std::move(hash_state), s.begin(), s.end()), 836 s.size()); 837 } 838 839 // AbslHashValue for hashing std::unordered_set 840 template <typename H, typename Key, typename T, typename Hash, 841 typename KeyEqual, typename Alloc> 842 typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, 843 H>::type 844 AbslHashValue(H hash_state, 845 const std::unordered_map<Key, T, Hash, KeyEqual, Alloc>& s) { 846 return H::combine( 847 H::combine_unordered(std::move(hash_state), s.begin(), s.end()), 848 s.size()); 849 } 850 851 // AbslHashValue for hashing std::unordered_multiset 852 template <typename H, typename Key, typename T, typename Hash, 853 typename KeyEqual, typename Alloc> 854 typename std::enable_if<is_hashable<Key>::value && is_hashable<T>::value, 855 H>::type 856 AbslHashValue(H hash_state, 857 const std::unordered_multimap<Key, T, Hash, KeyEqual, Alloc>& s) { 858 return H::combine( 859 H::combine_unordered(std::move(hash_state), s.begin(), s.end()), 860 s.size()); 861 } 862 863 // ----------------------------------------------------------------------------- 864 // AbslHashValue for Wrapper Types 865 // ----------------------------------------------------------------------------- 866 867 // AbslHashValue for hashing std::reference_wrapper 868 template <typename H, typename T> 869 typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( 870 H hash_state, std::reference_wrapper<T> opt) { 871 return H::combine(std::move(hash_state), opt.get()); 872 } 873 874 // AbslHashValue for hashing absl::optional 875 template <typename H, typename T> 876 typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue( 877 H hash_state, const absl::optional<T>& opt) { 878 if (opt) hash_state = H::combine(std::move(hash_state), *opt); 879 return H::combine(std::move(hash_state), opt.has_value()); 880 } 881 882 // VariantVisitor 883 template <typename H> 884 struct VariantVisitor { 885 H&& hash_state; 886 template <typename T> 887 H operator()(const T& t) const { 888 return H::combine(std::move(hash_state), t); 889 } 890 }; 891 892 // AbslHashValue for hashing absl::variant 893 template <typename H, typename... T> 894 typename std::enable_if<conjunction<is_hashable<T>...>::value, H>::type 895 AbslHashValue(H hash_state, const absl::variant<T...>& v) { 896 if (!v.valueless_by_exception()) { 897 hash_state = absl::visit(VariantVisitor<H>{std::move(hash_state)}, v); 898 } 899 return H::combine(std::move(hash_state), v.index()); 900 } 901 902 // ----------------------------------------------------------------------------- 903 // AbslHashValue for Other Types 904 // ----------------------------------------------------------------------------- 905 906 // AbslHashValue for hashing std::bitset is not defined on Little Endian 907 // platforms, for the same reason as for vector<bool> (see std::vector above): 908 // It does not expose the raw bytes, and a fallback to std::hash<> is most 909 // likely faster. 910 911 #if defined(ABSL_IS_BIG_ENDIAN) && \ 912 (defined(__GLIBCXX__) || defined(__GLIBCPP__)) 913 // AbslHashValue for hashing std::bitset 914 // 915 // std::hash in libstdc++ does not work correctly with std::bitset on Big Endian 916 // platforms therefore we need to implement a custom AbslHashValue for it. More 917 // details on the bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102531 918 template <typename H, size_t N> 919 H AbslHashValue(H hash_state, const std::bitset<N>& set) { 920 typename H::AbslInternalPiecewiseCombiner combiner; 921 for (size_t i = 0; i < N; i++) { 922 unsigned char c = static_cast<unsigned char>(set[i]); 923 hash_state = combiner.add_buffer(std::move(hash_state), &c, sizeof(c)); 924 } 925 return H::combine(combiner.finalize(std::move(hash_state)), N); 926 } 927 #endif 928 929 // ----------------------------------------------------------------------------- 930 931 // hash_range_or_bytes() 932 // 933 // Mixes all values in the range [data, data+size) into the hash state. 934 // This overload accepts only uniquely-represented types, and hashes them by 935 // hashing the entire range of bytes. 936 template <typename H, typename T> 937 typename std::enable_if<is_uniquely_represented<T>::value, H>::type 938 hash_range_or_bytes(H hash_state, const T* data, size_t size) { 939 const auto* bytes = reinterpret_cast<const unsigned char*>(data); 940 return H::combine_contiguous(std::move(hash_state), bytes, sizeof(T) * size); 941 } 942 943 // hash_range_or_bytes() 944 template <typename H, typename T> 945 typename std::enable_if<!is_uniquely_represented<T>::value, H>::type 946 hash_range_or_bytes(H hash_state, const T* data, size_t size) { 947 for (const auto end = data + size; data < end; ++data) { 948 hash_state = H::combine(std::move(hash_state), *data); 949 } 950 return hash_state; 951 } 952 953 #if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \ 954 ABSL_META_INTERNAL_STD_HASH_SFINAE_FRIENDLY_ 955 #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1 956 #else 957 #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 0 958 #endif 959 960 // HashSelect 961 // 962 // Type trait to select the appropriate hash implementation to use. 963 // HashSelect::type<T> will give the proper hash implementation, to be invoked 964 // as: 965 // HashSelect::type<T>::Invoke(state, value) 966 // Also, HashSelect::type<T>::value is a boolean equal to `true` if there is a 967 // valid `Invoke` function. Types that are not hashable will have a ::value of 968 // `false`. 969 struct HashSelect { 970 private: 971 struct State : HashStateBase<State> { 972 static State combine_contiguous(State hash_state, const unsigned char*, 973 size_t); 974 using State::HashStateBase::combine_contiguous; 975 static State combine_raw(State state, uint64_t value); 976 }; 977 978 struct UniquelyRepresentedProbe { 979 template <typename H, typename T> 980 static auto Invoke(H state, const T& value) 981 -> absl::enable_if_t<is_uniquely_represented<T>::value, H> { 982 return hash_internal::hash_bytes(std::move(state), value); 983 } 984 }; 985 986 struct HashValueProbe { 987 template <typename H, typename T> 988 static auto Invoke(H state, const T& value) -> absl::enable_if_t< 989 std::is_same<H, 990 decltype(AbslHashValue(std::move(state), value))>::value, 991 H> { 992 return AbslHashValue(std::move(state), value); 993 } 994 }; 995 996 struct LegacyHashProbe { 997 #if ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 998 template <typename H, typename T> 999 static auto Invoke(H state, const T& value) -> absl::enable_if_t< 1000 std::is_convertible< 1001 decltype(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>()(value)), 1002 size_t>::value, 1003 H> { 1004 return hash_internal::hash_bytes( 1005 std::move(state), 1006 ABSL_INTERNAL_LEGACY_HASH_NAMESPACE::hash<T>{}(value)); 1007 } 1008 #endif // ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1009 }; 1010 1011 struct StdHashProbe { 1012 template <typename H, typename T> 1013 static auto Invoke(H state, const T& value) 1014 -> absl::enable_if_t<type_traits_internal::IsHashable<T>::value, H> { 1015 return hash_internal::hash_bytes(std::move(state), std::hash<T>{}(value)); 1016 } 1017 }; 1018 1019 template <typename Hash, typename T> 1020 struct Probe : Hash { 1021 private: 1022 template <typename H, typename = decltype(H::Invoke( 1023 std::declval<State>(), std::declval<const T&>()))> 1024 static std::true_type Test(int); 1025 template <typename U> 1026 static std::false_type Test(char); 1027 1028 public: 1029 static constexpr bool value = decltype(Test<Hash>(0))::value; 1030 }; 1031 1032 public: 1033 // Probe each implementation in order. 1034 // disjunction provides short circuiting wrt instantiation. 1035 template <typename T> 1036 using Apply = absl::disjunction< // 1037 Probe<UniquelyRepresentedProbe, T>, // 1038 Probe<HashValueProbe, T>, // 1039 Probe<LegacyHashProbe, T>, // 1040 Probe<StdHashProbe, T>, // 1041 std::false_type>; 1042 }; 1043 1044 template <typename T> 1045 struct is_hashable 1046 : std::integral_constant<bool, HashSelect::template Apply<T>::value> {}; 1047 1048 // MixingHashState 1049 class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> { 1050 // absl::uint128 is not an alias or a thin wrapper around the intrinsic. 1051 // We use the intrinsic when available to improve performance. 1052 #ifdef ABSL_HAVE_INTRINSIC_INT128 1053 using uint128 = __uint128_t; 1054 #else // ABSL_HAVE_INTRINSIC_INT128 1055 using uint128 = absl::uint128; 1056 #endif // ABSL_HAVE_INTRINSIC_INT128 1057 1058 // Random data taken from the hexadecimal digits of Pi's fractional component. 1059 // https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number 1060 ABSL_CACHELINE_ALIGNED static constexpr uint64_t kStaticRandomData[] = { 1061 0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0, 1062 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, 1063 }; 1064 1065 static constexpr uint64_t kMul = 1066 sizeof(size_t) == 4 ? uint64_t{0xcc9e2d51} 1067 : uint64_t{0xdcb22ca68cb134ed}; 1068 1069 template <typename T> 1070 using IntegralFastPath = 1071 conjunction<std::is_integral<T>, is_uniquely_represented<T>, 1072 FitsIn64Bits<T>>; 1073 1074 public: 1075 // Move only 1076 MixingHashState(MixingHashState&&) = default; 1077 MixingHashState& operator=(MixingHashState&&) = default; 1078 1079 // MixingHashState::combine_contiguous() 1080 // 1081 // Fundamental base case for hash recursion: mixes the given range of bytes 1082 // into the hash state. 1083 static MixingHashState combine_contiguous(MixingHashState hash_state, 1084 const unsigned char* first, 1085 size_t size) { 1086 return MixingHashState( 1087 CombineContiguousImpl(hash_state.state_, first, size, 1088 std::integral_constant<int, sizeof(size_t)>{})); 1089 } 1090 using MixingHashState::HashStateBase::combine_contiguous; 1091 1092 // MixingHashState::hash() 1093 // 1094 // For performance reasons in non-opt mode, we specialize this for 1095 // integral types. 1096 // Otherwise we would be instantiating and calling dozens of functions for 1097 // something that is just one multiplication and a couple xor's. 1098 // The result should be the same as running the whole algorithm, but faster. 1099 template <typename T, absl::enable_if_t<IntegralFastPath<T>::value, int> = 0> 1100 static size_t hash(T value) { 1101 return static_cast<size_t>( 1102 WeakMix(Seed(), static_cast<std::make_unsigned_t<T>>(value))); 1103 } 1104 1105 // Overload of MixingHashState::hash() 1106 template <typename T, absl::enable_if_t<!IntegralFastPath<T>::value, int> = 0> 1107 static size_t hash(const T& value) { 1108 return static_cast<size_t>(combine(MixingHashState{}, value).state_); 1109 } 1110 1111 private: 1112 // Invoked only once for a given argument; that plus the fact that this is 1113 // move-only ensures that there is only one non-moved-from object. 1114 MixingHashState() : state_(Seed()) {} 1115 1116 friend class MixingHashState::HashStateBase; 1117 1118 template <typename CombinerT> 1119 static MixingHashState RunCombineUnordered(MixingHashState state, 1120 CombinerT combiner) { 1121 uint64_t unordered_state = 0; 1122 combiner(MixingHashState{}, [&](MixingHashState& inner_state) { 1123 // Add the hash state of the element to the running total, but mix the 1124 // carry bit back into the low bit. This in intended to avoid losing 1125 // entropy to overflow, especially when unordered_multisets contain 1126 // multiple copies of the same value. 1127 auto element_state = inner_state.state_; 1128 unordered_state += element_state; 1129 if (unordered_state < element_state) { 1130 ++unordered_state; 1131 } 1132 inner_state = MixingHashState{}; 1133 }); 1134 return MixingHashState::combine(std::move(state), unordered_state); 1135 } 1136 1137 // Allow the HashState type-erasure implementation to invoke 1138 // RunCombinedUnordered() directly. 1139 friend class absl::HashState; 1140 friend struct CombineRaw; 1141 1142 // Workaround for MSVC bug. 1143 // We make the type copyable to fix the calling convention, even though we 1144 // never actually copy it. Keep it private to not affect the public API of the 1145 // type. 1146 MixingHashState(const MixingHashState&) = default; 1147 1148 explicit MixingHashState(uint64_t state) : state_(state) {} 1149 1150 // Combines a raw value from e.g. integrals/floats/pointers/etc. This allows 1151 // us to be consistent with IntegralFastPath when combining raw types, but 1152 // optimize Read1To3 and Read4To8 differently for the string case. 1153 static MixingHashState combine_raw(MixingHashState hash_state, 1154 uint64_t value) { 1155 return MixingHashState(WeakMix(hash_state.state_, value)); 1156 } 1157 1158 // Implementation of the base case for combine_contiguous where we actually 1159 // mix the bytes into the state. 1160 // Dispatch to different implementations of the combine_contiguous depending 1161 // on the value of `sizeof(size_t)`. 1162 static uint64_t CombineContiguousImpl(uint64_t state, 1163 const unsigned char* first, size_t len, 1164 std::integral_constant<int, 4> 1165 /* sizeof_size_t */); 1166 static uint64_t CombineContiguousImpl(uint64_t state, 1167 const unsigned char* first, size_t len, 1168 std::integral_constant<int, 8> 1169 /* sizeof_size_t */); 1170 1171 ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineSmallContiguousImpl( 1172 uint64_t state, const unsigned char* first, size_t len) { 1173 ABSL_ASSUME(len <= 8); 1174 uint64_t v; 1175 if (len >= 4) { 1176 v = Read4To8(first, len); 1177 } else if (len > 0) { 1178 v = Read1To3(first, len); 1179 } else { 1180 // Empty ranges have no effect. 1181 return state; 1182 } 1183 return WeakMix(state, v); 1184 } 1185 1186 ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineContiguousImpl9to16( 1187 uint64_t state, const unsigned char* first, size_t len) { 1188 ABSL_ASSUME(len >= 9); 1189 ABSL_ASSUME(len <= 16); 1190 // Note: any time one half of the mix function becomes zero it will fail to 1191 // incorporate any bits from the other half. However, there is exactly 1 in 1192 // 2^64 values for each side that achieve this, and only when the size is 1193 // exactly 16 -- for smaller sizes there is an overlapping byte that makes 1194 // this impossible unless the seed is *also* incredibly unlucky. 1195 auto p = Read9To16(first, len); 1196 return Mix(state ^ p.first, kMul ^ p.second); 1197 } 1198 1199 ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineContiguousImpl17to32( 1200 uint64_t state, const unsigned char* first, size_t len) { 1201 ABSL_ASSUME(len >= 17); 1202 ABSL_ASSUME(len <= 32); 1203 // Do two mixes of overlapping 16-byte ranges in parallel to minimize 1204 // latency. 1205 const uint64_t m0 = 1206 Mix(Read8(first) ^ kStaticRandomData[1], Read8(first + 8) ^ state); 1207 1208 const unsigned char* tail_16b_ptr = first + (len - 16); 1209 const uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ kStaticRandomData[3], 1210 Read8(tail_16b_ptr + 8) ^ state); 1211 return m0 ^ m1; 1212 } 1213 1214 // Slow dispatch path for calls to CombineContiguousImpl with a size argument 1215 // larger than PiecewiseChunkSize(). Has the same effect as calling 1216 // CombineContiguousImpl() repeatedly with the chunk stride size. 1217 static uint64_t CombineLargeContiguousImpl32(uint64_t state, 1218 const unsigned char* first, 1219 size_t len); 1220 static uint64_t CombineLargeContiguousImpl64(uint64_t state, 1221 const unsigned char* first, 1222 size_t len); 1223 1224 // Reads 9 to 16 bytes from p. 1225 // The least significant 8 bytes are in .first, the rest (zero padded) bytes 1226 // are in .second. 1227 static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p, 1228 size_t len) { 1229 uint64_t low_mem = Read8(p); 1230 uint64_t high_mem = Read8(p + len - 8); 1231 #ifdef ABSL_IS_LITTLE_ENDIAN 1232 uint64_t most_significant = high_mem; 1233 uint64_t least_significant = low_mem; 1234 #else 1235 uint64_t most_significant = low_mem; 1236 uint64_t least_significant = high_mem; 1237 #endif 1238 return {least_significant, most_significant}; 1239 } 1240 1241 // Reads 8 bytes from p. 1242 static uint64_t Read8(const unsigned char* p) { 1243 // Suppress erroneous array bounds errors on GCC. 1244 #if defined(__GNUC__) && !defined(__clang__) 1245 #pragma GCC diagnostic push 1246 #pragma GCC diagnostic ignored "-Warray-bounds" 1247 #endif 1248 return absl::base_internal::UnalignedLoad64(p); 1249 #if defined(__GNUC__) && !defined(__clang__) 1250 #pragma GCC diagnostic pop 1251 #endif 1252 } 1253 1254 // Reads 4 to 8 bytes from p. Some input bytes may be duplicated in output. 1255 static uint64_t Read4To8(const unsigned char* p, size_t len) { 1256 // If `len < 8`, we duplicate bytes in the middle. 1257 // E.g.: 1258 // `ABCD` will be read as `ABCDABCD`. 1259 // `ABCDE` will be read as `ABCDBCDE`. 1260 // `ABCDEF` will be read as `ABCDCDEF`. 1261 // `ABCDEFG` will be read as `ABCDDEFG`. 1262 // We also do not care about endianness. On big-endian platforms, bytes will 1263 // be shuffled (it's fine). We always shift low memory by 32, because that 1264 // can be pipelined earlier. Reading high memory requires computing 1265 // `p + len - 4`. 1266 uint64_t most_significant = 1267 static_cast<uint64_t>(absl::base_internal::UnalignedLoad32(p)) << 32; 1268 uint64_t least_significant = 1269 absl::base_internal::UnalignedLoad32(p + len - 4); 1270 return most_significant | least_significant; 1271 } 1272 1273 // Reads 1 to 3 bytes from p. Some input bytes may be duplicated in output. 1274 static uint32_t Read1To3(const unsigned char* p, size_t len) { 1275 // The trick used by this implementation is to avoid branches. 1276 // We always read three bytes by duplicating. 1277 // E.g., 1278 // `A` is read as `AAA`. 1279 // `AB` is read as `ABB`. 1280 // `ABC` is read as `ABC`. 1281 // We always shift `p[0]` so that it can be pipelined better. 1282 // Other bytes require extra computation to find indices. 1283 uint32_t mem0 = (static_cast<uint32_t>(p[0]) << 16) | p[len - 1]; 1284 uint32_t mem1 = static_cast<uint32_t>(p[len / 2]) << 8; 1285 return mem0 | mem1; 1286 } 1287 1288 ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t lhs, uint64_t rhs) { 1289 // Though the 128-bit product on AArch64 needs two instructions, it is 1290 // still a good balance between speed and hash quality. 1291 using MultType = 1292 absl::conditional_t<sizeof(size_t) == 4, uint64_t, uint128>; 1293 MultType m = lhs; 1294 m *= rhs; 1295 return static_cast<uint64_t>(m ^ (m >> (sizeof(m) * 8 / 2))); 1296 } 1297 1298 // Slightly lower latency than Mix, but with lower quality. The byte swap 1299 // helps ensure that low bits still have high quality. 1300 ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t WeakMix(uint64_t lhs, 1301 uint64_t rhs) { 1302 const uint64_t n = lhs ^ rhs; 1303 // WeakMix doesn't work well on 32-bit platforms so just use Mix. 1304 if constexpr (sizeof(size_t) < 8) return Mix(n, kMul); 1305 #ifdef __ARM_ACLE 1306 // gbswap_64 compiles to `rev` on ARM, but `rbit` is better because it 1307 // reverses bits rather than reversing bytes. 1308 return __rbitll(n * kMul); 1309 #else 1310 return absl::gbswap_64(n * kMul); 1311 #endif 1312 } 1313 1314 // An extern to avoid bloat on a direct call to LowLevelHash() with fixed 1315 // values for both the seed and salt parameters. 1316 static uint64_t LowLevelHashImpl(const unsigned char* data, size_t len); 1317 1318 ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data, 1319 size_t len) { 1320 #ifdef ABSL_HAVE_INTRINSIC_INT128 1321 return LowLevelHashImpl(data, len); 1322 #else 1323 return hash_internal::CityHash64(reinterpret_cast<const char*>(data), len); 1324 #endif 1325 } 1326 1327 // Seed() 1328 // 1329 // A non-deterministic seed. 1330 // 1331 // The current purpose of this seed is to generate non-deterministic results 1332 // and prevent having users depend on the particular hash values. 1333 // It is not meant as a security feature right now, but it leaves the door 1334 // open to upgrade it to a true per-process random seed. A true random seed 1335 // costs more and we don't need to pay for that right now. 1336 // 1337 // On platforms with ASLR, we take advantage of it to make a per-process 1338 // random value. 1339 // See https://en.wikipedia.org/wiki/Address_space_layout_randomization 1340 // 1341 // On other platforms this is still going to be non-deterministic but most 1342 // probably per-build and not per-process. 1343 ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() { 1344 #if (!defined(__clang__) || __clang_major__ > 11) && \ 1345 (!defined(__apple_build_version__) || \ 1346 __apple_build_version__ >= 19558921) // Xcode 12 1347 return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(&kSeed)); 1348 #else 1349 // Workaround the absence of 1350 // https://github.com/llvm/llvm-project/commit/bc15bf66dcca76cc06fe71fca35b74dc4d521021. 1351 return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(kSeed)); 1352 #endif 1353 } 1354 static const void* const kSeed; 1355 1356 uint64_t state_; 1357 }; 1358 1359 // MixingHashState::CombineContiguousImpl() 1360 inline uint64_t MixingHashState::CombineContiguousImpl( 1361 uint64_t state, const unsigned char* first, size_t len, 1362 std::integral_constant<int, 4> /* sizeof_size_t */) { 1363 // For large values we use CityHash, for small ones we just use a 1364 // multiplicative hash. 1365 if (len <= 8) { 1366 return CombineSmallContiguousImpl(state, first, len); 1367 } 1368 if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { 1369 return Mix(state ^ hash_internal::CityHash32( 1370 reinterpret_cast<const char*>(first), len), 1371 kMul); 1372 } 1373 return CombineLargeContiguousImpl32(state, first, len); 1374 } 1375 1376 // Overload of MixingHashState::CombineContiguousImpl() 1377 inline uint64_t MixingHashState::CombineContiguousImpl( 1378 uint64_t state, const unsigned char* first, size_t len, 1379 std::integral_constant<int, 8> /* sizeof_size_t */) { 1380 // For large values we use LowLevelHash or CityHash depending on the platform, 1381 // for small ones we just use a multiplicative hash. 1382 if (len <= 8) { 1383 return CombineSmallContiguousImpl(state, first, len); 1384 } 1385 if (len <= 16) { 1386 return CombineContiguousImpl9to16(state, first, len); 1387 } 1388 if (len <= 32) { 1389 return CombineContiguousImpl17to32(state, first, len); 1390 } 1391 if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { 1392 return Mix(state ^ Hash64(first, len), kMul); 1393 } 1394 return CombineLargeContiguousImpl64(state, first, len); 1395 } 1396 1397 struct AggregateBarrier {}; 1398 1399 // HashImpl 1400 1401 // Add a private base class to make sure this type is not an aggregate. 1402 // Aggregates can be aggregate initialized even if the default constructor is 1403 // deleted. 1404 struct PoisonedHash : private AggregateBarrier { 1405 PoisonedHash() = delete; 1406 PoisonedHash(const PoisonedHash&) = delete; 1407 PoisonedHash& operator=(const PoisonedHash&) = delete; 1408 }; 1409 1410 template <typename T> 1411 struct HashImpl { 1412 size_t operator()(const T& value) const { 1413 return MixingHashState::hash(value); 1414 } 1415 }; 1416 1417 template <typename T> 1418 struct Hash 1419 : absl::conditional_t<is_hashable<T>::value, HashImpl<T>, PoisonedHash> {}; 1420 1421 template <typename H> 1422 template <typename T, typename... Ts> 1423 H HashStateBase<H>::combine(H state, const T& value, const Ts&... values) { 1424 return H::combine(hash_internal::HashSelect::template Apply<T>::Invoke( 1425 std::move(state), value), 1426 values...); 1427 } 1428 1429 // HashStateBase::combine_contiguous() 1430 template <typename H> 1431 template <typename T> 1432 H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) { 1433 return hash_internal::hash_range_or_bytes(std::move(state), data, size); 1434 } 1435 1436 // HashStateBase::combine_unordered() 1437 template <typename H> 1438 template <typename I> 1439 H HashStateBase<H>::combine_unordered(H state, I begin, I end) { 1440 return H::RunCombineUnordered(std::move(state), 1441 CombineUnorderedCallback<I>{begin, end}); 1442 } 1443 1444 // HashStateBase::PiecewiseCombiner::add_buffer() 1445 template <typename H> 1446 H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, 1447 size_t size) { 1448 if (position_ + size < PiecewiseChunkSize()) { 1449 // This partial chunk does not fill our existing buffer 1450 memcpy(buf_ + position_, data, size); 1451 position_ += size; 1452 return state; 1453 } 1454 1455 // If the buffer is partially filled we need to complete the buffer 1456 // and hash it. 1457 if (position_ != 0) { 1458 const size_t bytes_needed = PiecewiseChunkSize() - position_; 1459 memcpy(buf_ + position_, data, bytes_needed); 1460 state = H::combine_contiguous(std::move(state), buf_, PiecewiseChunkSize()); 1461 data += bytes_needed; 1462 size -= bytes_needed; 1463 } 1464 1465 // Hash whatever chunks we can without copying 1466 while (size >= PiecewiseChunkSize()) { 1467 state = H::combine_contiguous(std::move(state), data, PiecewiseChunkSize()); 1468 data += PiecewiseChunkSize(); 1469 size -= PiecewiseChunkSize(); 1470 } 1471 // Fill the buffer with the remainder 1472 memcpy(buf_, data, size); 1473 position_ = size; 1474 return state; 1475 } 1476 1477 // HashStateBase::PiecewiseCombiner::finalize() 1478 template <typename H> 1479 H PiecewiseCombiner::finalize(H state) { 1480 // Hash the remainder left in the buffer, which may be empty 1481 return H::combine_contiguous(std::move(state), buf_, position_); 1482 } 1483 1484 } // namespace hash_internal 1485 ABSL_NAMESPACE_END 1486 } // namespace absl 1487 1488 #endif // ABSL_HASH_INTERNAL_HASH_H_