tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hash.cc (5850B)


      1 // Copyright 2014 The Chromium Authors
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/hash/hash.h"
      6 
      7 #include <string_view>
      8 
      9 #include "base/check_op.h"
     10 #include "base/notreached.h"
     11 #include "base/rand_util.h"
     12 #include "base/third_party/cityhash/city.h"
     13 #include "build/build_config.h"
     14 
     15 // Definition in base/third_party/superfasthash/superfasthash.c. (Third-party
     16 // code did not come with its own header file, so declaring the function here.)
     17 // Note: This algorithm is also in Blink under Source/wtf/StringHasher.h.
     18 extern "C" uint32_t SuperFastHash(const char* data, int len);
     19 
     20 namespace base {
     21 
     22 namespace {
     23 
     24 size_t FastHashImpl(base::span<const uint8_t> data) {
     25  // We use the updated CityHash within our namespace (not the deprecated
     26  // version from third_party/smhasher).
     27  if constexpr (sizeof(size_t) > 4) {
     28    return base::internal::cityhash_v111::CityHash64(
     29        reinterpret_cast<const char*>(data.data()), data.size());
     30  } else {
     31    return base::internal::cityhash_v111::CityHash32(
     32        reinterpret_cast<const char*>(data.data()), data.size());
     33  }
     34 }
     35 
     36 // Implement hashing for pairs of at-most 32 bit integer values.
     37 // When size_t is 32 bits, we turn the 64-bit hash code into 32 bits by using
     38 // multiply-add hashing. This algorithm, as described in
     39 // Theorem 4.3.3 of the thesis "Über die Komplexität der Multiplikation in
     40 // eingeschränkten Branchingprogrammmodellen" by Woelfel, is:
     41 //
     42 //   h32(x32, y32) = (h64(x32, y32) * rand_odd64 + rand16 * 2^16) % 2^64 / 2^32
     43 //
     44 // Contact danakj@chromium.org for any questions.
     45 size_t HashInts32Impl(uint32_t value1, uint32_t value2) {
     46  uint64_t value1_64 = value1;
     47  uint64_t hash64 = (value1_64 << 32) | value2;
     48 
     49  if (sizeof(size_t) >= sizeof(uint64_t))
     50    return static_cast<size_t>(hash64);
     51 
     52  uint64_t odd_random = 481046412LL << 32 | 1025306955LL;
     53  uint32_t shift_random = 10121U << 16;
     54 
     55  hash64 = hash64 * odd_random + shift_random;
     56  size_t high_bits =
     57      static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
     58  return high_bits;
     59 }
     60 
     61 // Implement hashing for pairs of up-to 64-bit integer values.
     62 // We use the compound integer hash method to produce a 64-bit hash code, by
     63 // breaking the two 64-bit inputs into 4 32-bit values:
     64 // http://opendatastructures.org/versions/edition-0.1d/ods-java/node33.html#SECTION00832000000000000000
     65 // Then we reduce our result to 32 bits if required, similar to above.
     66 size_t HashInts64Impl(uint64_t value1, uint64_t value2) {
     67  uint32_t short_random1 = 842304669U;
     68  uint32_t short_random2 = 619063811U;
     69  uint32_t short_random3 = 937041849U;
     70  uint32_t short_random4 = 3309708029U;
     71 
     72  uint32_t value1a = static_cast<uint32_t>(value1 & 0xffffffff);
     73  uint32_t value1b = static_cast<uint32_t>((value1 >> 32) & 0xffffffff);
     74  uint32_t value2a = static_cast<uint32_t>(value2 & 0xffffffff);
     75  uint32_t value2b = static_cast<uint32_t>((value2 >> 32) & 0xffffffff);
     76 
     77  uint64_t product1 = static_cast<uint64_t>(value1a) * short_random1;
     78  uint64_t product2 = static_cast<uint64_t>(value1b) * short_random2;
     79  uint64_t product3 = static_cast<uint64_t>(value2a) * short_random3;
     80  uint64_t product4 = static_cast<uint64_t>(value2b) * short_random4;
     81 
     82  uint64_t hash64 = product1 + product2 + product3 + product4;
     83 
     84  if (sizeof(size_t) >= sizeof(uint64_t))
     85    return static_cast<size_t>(hash64);
     86 
     87  uint64_t odd_random = 1578233944LL << 32 | 194370989LL;
     88  uint32_t shift_random = 20591U << 16;
     89 
     90  hash64 = hash64 * odd_random + shift_random;
     91  size_t high_bits =
     92      static_cast<size_t>(hash64 >> (8 * (sizeof(uint64_t) - sizeof(size_t))));
     93  return high_bits;
     94 }
     95 
     96 // The random seed is used to perturb the output of base::FastHash() and
     97 // base::HashInts() so that it is only deterministic within the lifetime of a
     98 // process. This prevents inadvertent dependencies on the underlying
     99 // implementation, e.g. anything that persists the hash value and expects it to
    100 // be unchanging will break.
    101 //
    102 // Note: this is the same trick absl uses to generate a random seed. This is
    103 // more robust than using base::RandBytes(), which can fail inside a sandboxed
    104 // environment. Note that without ASLR, the seed won't be quite as random...
    105 #if DCHECK_IS_ON()
    106 constexpr const void* kSeed = &kSeed;
    107 #endif
    108 
    109 template <typename T>
    110 T Scramble(T input) {
    111 #if DCHECK_IS_ON()
    112  return HashInts64Impl(input, reinterpret_cast<uintptr_t>(kSeed));
    113 #else
    114  return input;
    115 #endif
    116 }
    117 
    118 }  // namespace
    119 
    120 size_t FastHash(base::span<const uint8_t> data) {
    121  return Scramble(FastHashImpl(data));
    122 }
    123 
    124 uint32_t Hash(const void* data, size_t length) {
    125  // Currently our in-memory hash is the same as the persistent hash. The
    126  // split between in-memory and persistent hash functions is maintained to
    127  // allow the in-memory hash function to be updated in the future.
    128  return PersistentHash(data, length);
    129 }
    130 
    131 uint32_t Hash(const std::string& str) {
    132  return PersistentHash(as_bytes(make_span(str)));
    133 }
    134 
    135 uint32_t PersistentHash(span<const uint8_t> data) {
    136  // This hash function must not change, since it is designed to be persistable
    137  // to disk.
    138  if (data.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
    139    NOTREACHED();
    140    return 0;
    141  }
    142  return ::SuperFastHash(reinterpret_cast<const char*>(data.data()),
    143                         static_cast<int>(data.size()));
    144 }
    145 
    146 uint32_t PersistentHash(const void* data, size_t length) {
    147  return PersistentHash(make_span(static_cast<const uint8_t*>(data), length));
    148 }
    149 
    150 uint32_t PersistentHash(std::string_view str) {
    151  return PersistentHash(as_bytes(make_span(str)));
    152 }
    153 
    154 size_t HashInts32(uint32_t value1, uint32_t value2) {
    155  return Scramble(HashInts32Impl(value1, value2));
    156 }
    157 
    158 size_t HashInts64(uint64_t value1, uint64_t value2) {
    159  return Scramble(HashInts64Impl(value1, value2));
    160 }
    161 
    162 }  // namespace base