hash.cc (5850B)
1 // Copyright 2014 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/hash/hash.h" 6 7 #include <string_view> 8 9 #include "base/check_op.h" 10 #include "base/notreached.h" 11 #include "base/rand_util.h" 12 #include "base/third_party/cityhash/city.h" 13 #include "build/build_config.h" 14 15 // Definition in base/third_party/superfasthash/superfasthash.c. (Third-party 16 // code did not come with its own header file, so declaring the function here.) 17 // Note: This algorithm is also in Blink under Source/wtf/StringHasher.h. 18 extern "C" uint32_t SuperFastHash(const char* data, int len); 19 20 namespace base { 21 22 namespace { 23 24 size_t FastHashImpl(base::span<const uint8_t> data) { 25 // We use the updated CityHash within our namespace (not the deprecated 26 // version from third_party/smhasher). 27 if constexpr (sizeof(size_t) > 4) { 28 return base::internal::cityhash_v111::CityHash64( 29 reinterpret_cast<const char*>(data.data()), data.size()); 30 } else { 31 return base::internal::cityhash_v111::CityHash32( 32 reinterpret_cast<const char*>(data.data()), data.size()); 33 } 34 } 35 36 // Implement hashing for pairs of at-most 32 bit integer values. 37 // When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using 38 // multiply-add hashing. This algorithm, as described in 39 // Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in 40 // eingeschränkten Branchingprogrammmodellen" by Woelfel, is: 41 // 42 // h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32 43 // 44 // Contact danakj@chromium.org for any questions. 45 size_t HashInts32Impl(uint32_t value1, uint32_t value2) { 46 uint64_t value1_64 = value1; 47 uint64_t hash64 = (value1_64 << 32) | value2; 48 49 if (sizeof(size_t) >= sizeof(uint64_t)) 50 return static_cast<size_t>(hash64); 51 52 uint64_t odd_random = 481046412LL << 32 | 1025306955LL; 53 uint32_t shift_random = 10121U << 16; 54 55 hash64 = hash64 * odd_random + shift_random; 56 size_t high_bits = 57 static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t)))); 58 return high_bits; 59 } 60 61 // Implement hashing for pairs of up-to 64-bit integer values. 62 // We use the compound integer hash method to produce a 64-bit hash code, by 63 // breaking the two 64-bit inputs into 4 32-bit values: 64 // http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000 65 // Then we reduce our result to 32 bits if required, similar to above. 66 size_t HashInts64Impl(uint64_t value1, uint64_t value2) { 67 uint32_t short_random1 = 842304669U; 68 uint32_t short_random2 = 619063811U; 69 uint32_t short_random3 = 937041849U; 70 uint32_t short_random4 = 3309708029U; 71 72 uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff); 73 uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff); 74 uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff); 75 uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff); 76 77 uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1; 78 uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2; 79 uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3; 80 uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4; 81 82 uint64_t hash64 = product1 + product2 + product3 + product4; 83 84 if (sizeof(size_t) >= sizeof(uint64_t)) 85 return static_cast<size_t>(hash64); 86 87 uint64_t odd_random = 1578233944LL << 32 | 194370989LL; 88 uint32_t shift_random = 20591U << 16; 89 90 hash64 = hash64 * odd_random + shift_random; 91 size_t high_bits = 92 static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t)))); 93 return high_bits; 94 } 95 96 // The random seed is used to perturb the output of base::FastHash() and 97 // base::HashInts() so that it is only deterministic within the lifetime of a 98 // process. This prevents inadvertent dependencies on the underlying 99 // implementation, e.g. anything that persists the hash value and expects it to 100 // be unchanging will break. 101 // 102 // Note: this is the same trick absl uses to generate a random seed. This is 103 // more robust than using base::RandBytes(), which can fail inside a sandboxed 104 // environment. Note that without ASLR, the seed won't be quite as random... 105 #if DCHECK_IS_ON() 106 constexpr const void* kSeed = &kSeed; 107 #endif 108 109 template <typename T> 110 T Scramble(T input) { 111 #if DCHECK_IS_ON() 112 return HashInts64Impl(input, reinterpret_cast<uintptr_t>(kSeed)); 113 #else 114 return input; 115 #endif 116 } 117 118 } // namespace 119 120 size_t FastHash(base::span<const uint8_t> data) { 121 return Scramble(FastHashImpl(data)); 122 } 123 124 uint32_t Hash(const void* data, size_t length) { 125 // Currently our in-memory hash is the same as the persistent hash. The 126 // split between in-memory and persistent hash functions is maintained to 127 // allow the in-memory hash function to be updated in the future. 128 return PersistentHash(data, length); 129 } 130 131 uint32_t Hash(const std::string& str) { 132 return PersistentHash(as_bytes(make_span(str))); 133 } 134 135 uint32_t PersistentHash(span<const uint8_t> data) { 136 // This hash function must not change, since it is designed to be persistable 137 // to disk. 138 if (data.size() > static_cast<size_t>(std::numeric_limits<int>::max())) { 139 NOTREACHED(); 140 return 0; 141 } 142 return ::SuperFastHash(reinterpret_cast<const char*>(data.data()), 143 static_cast<int>(data.size())); 144 } 145 146 uint32_t PersistentHash(const void* data, size_t length) { 147 return PersistentHash(make_span(static_cast<const uint8_t*>(data), length)); 148 } 149 150 uint32_t PersistentHash(std::string_view str) { 151 return PersistentHash(as_bytes(make_span(str))); 152 } 153 154 size_t HashInts32(uint32_t value1, uint32_t value2) { 155 return Scramble(HashInts32Impl(value1, value2)); 156 } 157 158 size_t HashInts64(uint64_t value1, uint64_t value2) { 159 return Scramble(HashInts64Impl(value1, value2)); 160 } 161 162 } // namespace base