tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hash_longest_match64_inc.h (10655B)


      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2010 Google Inc. All Rights Reserved.
      3 
      4   Distributed under MIT license.
      5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN */
      9 
     10 /* A (forgetful) hash table to the data seen by the compressor, to
     11   help create backward references to previous data.
     12 
     13   This is a hash map of fixed size (bucket_size_) to a ring buffer of
     14   fixed size (block_size_). The ring buffer contains the last block_size_
     15   index positions of the given hash key in the compressed data. */
     16 
     17 #define HashLongestMatch HASHER()
     18 
     19 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
     20 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
     21 
     22 /* HashBytes is the function that chooses the bucket to place the address in. */
     23 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
     24                                          uint64_t hash_mul) {
     25  const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul;
     26  /* The higher bits contain more mixture from the multiplication,
     27     so we take our results from there. */
     28  return (size_t)(h >> (64 - 15));
     29 }
     30 
     31 typedef struct HashLongestMatch {
     32  /* Number of hash buckets. */
     33  size_t bucket_size_;
     34  /* Only block_size_ newest backward references are kept,
     35     and the older are forgotten. */
     36  size_t block_size_;
     37  /* Hash multiplier tuned to match length. */
     38  uint64_t hash_mul_;
     39  /* Mask for accessing entries in a block (in a ring-buffer manner). */
     40  uint32_t block_mask_;
     41 
     42  int block_bits_;
     43  int num_last_distances_to_check_;
     44 
     45  /* Shortcuts. */
     46  HasherCommon* common_;
     47 
     48  /* --- Dynamic size members --- */
     49 
     50  /* Number of entries in a particular bucket. */
     51  uint16_t* num_;  /* uint16_t[bucket_size]; */
     52 
     53  /* Buckets containing block_size_ of backward references. */
     54  uint32_t* buckets_;  /* uint32_t[bucket_size * block_size]; */
     55 } HashLongestMatch;
     56 
     57 static void FN(Initialize)(
     58    HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
     59    const BrotliEncoderParams* params) {
     60  self->common_ = common;
     61 
     62  BROTLI_UNUSED(params);
     63  self->hash_mul_ = kHashMul64 << (64 - 5 * 8);
     64  BROTLI_DCHECK(common->params.bucket_bits == 15);
     65  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
     66  self->block_bits_ = common->params.block_bits;
     67  self->block_size_ = (size_t)1 << common->params.block_bits;
     68  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
     69  self->num_last_distances_to_check_ =
     70      common->params.num_last_distances_to_check;
     71  self->num_ = (uint16_t*)common->extra[0];
     72  self->buckets_ = (uint32_t*)common->extra[1];
     73 }
     74 
     75 static void FN(Prepare)(
     76    HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
     77    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
     78  uint16_t* BROTLI_RESTRICT num = self->num_;
     79  /* Partial preparation is 100 times slower (per socket). */
     80  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
     81  if (one_shot && input_size <= partial_prepare_threshold) {
     82    size_t i;
     83    for (i = 0; i < input_size; ++i) {
     84      const size_t key = FN(HashBytes)(&data[i], self->hash_mul_);
     85      num[key] = 0;
     86    }
     87  } else {
     88    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
     89  }
     90 }
     91 
     92 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
     93    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
     94    size_t input_size, size_t* alloc_size) {
     95  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
     96  size_t block_size = (size_t)1 << params->hasher.block_bits;
     97  BROTLI_UNUSED(one_shot);
     98  BROTLI_UNUSED(input_size);
     99  alloc_size[0] = sizeof(uint16_t) * bucket_size;
    100  alloc_size[1] = sizeof(uint32_t) * bucket_size * block_size;
    101 }
    102 
    103 /* Look at 4 bytes at &data[ix & mask].
    104   Compute a hash from these, and store the value of ix at that position. */
    105 static BROTLI_INLINE void FN(Store)(
    106    HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
    107    const size_t mask, const size_t ix) {
    108  uint16_t* BROTLI_RESTRICT num = self->num_;
    109  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
    110  const size_t key = FN(HashBytes)(&data[ix & mask], self->hash_mul_);
    111  const size_t minor_ix = num[key] & self->block_mask_;
    112  const size_t offset = minor_ix + (key << self->block_bits_);
    113  ++num[key];
    114  buckets[offset] = (uint32_t)ix;
    115 }
    116 
    117 static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
    118    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
    119    const size_t ix_start, const size_t ix_end) {
    120  size_t i;
    121  for (i = ix_start; i < ix_end; ++i) {
    122    FN(Store)(self, data, mask, i);
    123  }
    124 }
    125 
    126 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
    127    HashLongestMatch* BROTLI_RESTRICT self,
    128    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
    129    size_t ringbuffer_mask) {
    130  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
    131    /* Prepare the hashes for three last bytes of the last write.
    132       These could not be calculated before, since they require knowledge
    133       of both the previous and the current block. */
    134    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
    135    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
    136    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
    137  }
    138 }
    139 
    140 static BROTLI_INLINE void FN(PrepareDistanceCache)(
    141    HashLongestMatch* BROTLI_RESTRICT self,
    142    int* BROTLI_RESTRICT distance_cache) {
    143  PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
    144 }
    145 
    146 /* Find a longest backward match of &data[cur_ix] up to the length of
    147   max_length and stores the position cur_ix in the hash table.
    148 
    149   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
    150             values; if this method is invoked repeatedly with the same distance
    151             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
    152 
    153   Does not look for matches longer than max_length.
    154   Does not look for matches further away than max_backward.
    155   Writes the best match into |out|.
    156   |out|->score is updated only if a better match is found. */
    157 static BROTLI_INLINE void FN(FindLongestMatch)(
    158    HashLongestMatch* BROTLI_RESTRICT self,
    159    const BrotliEncoderDictionary* dictionary,
    160    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
    161    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
    162    const size_t max_length, const size_t max_backward,
    163    const size_t dictionary_distance, const size_t max_distance,
    164    HasherSearchResult* BROTLI_RESTRICT out) {
    165  uint16_t* BROTLI_RESTRICT num = self->num_;
    166  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
    167  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    168  /* Don't accept a short copy from far away. */
    169  score_t min_score = out->score;
    170  score_t best_score = out->score;
    171  size_t best_len = out->len;
    172  size_t i;
    173  /* Precalculate the hash key and prefetch the bucket. */
    174  const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
    175  uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
    176  PREFETCH_L1(bucket);
    177  if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
    178  out->len = 0;
    179  out->len_code_delta = 0;
    180 
    181  BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
    182 
    183  /* Try last distance first. */
    184  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
    185    const size_t backward = (size_t)distance_cache[i];
    186    size_t prev_ix = (size_t)(cur_ix - backward);
    187    if (prev_ix >= cur_ix) {
    188      continue;
    189    }
    190    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
    191      continue;
    192    }
    193    prev_ix &= ring_buffer_mask;
    194 
    195    if (cur_ix_masked + best_len > ring_buffer_mask) {
    196      break;
    197    }
    198    if (prev_ix + best_len > ring_buffer_mask ||
    199        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
    200      continue;
    201    }
    202    {
    203      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    204                                                  &data[cur_ix_masked],
    205                                                  max_length);
    206      if (len >= 3 || (len == 2 && i < 2)) {
    207        /* Comparing for >= 2 does not change the semantics, but just saves for
    208           a few unnecessary binary logarithms in backward reference score,
    209           since we are not interested in such short matches. */
    210        score_t score = BackwardReferenceScoreUsingLastDistance(len);
    211        if (best_score < score) {
    212          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
    213          if (best_score < score) {
    214            best_score = score;
    215            best_len = len;
    216            out->len = best_len;
    217            out->distance = backward;
    218            out->score = best_score;
    219          }
    220        }
    221      }
    222    }
    223  }
    224  /* we require matches of len >4, so increase best_len to 3, so we can compare
    225   * 4 bytes all the time. */
    226  if (best_len < 3) {
    227    best_len = 3;
    228  }
    229  {
    230    const size_t down =
    231        (num[key] > self->block_size_) ?
    232        (num[key] - self->block_size_) : 0u;
    233    const uint32_t first4 = BrotliUnalignedRead32(data + cur_ix_masked);
    234    const size_t max_length_m4 = max_length - 4;
    235    i = num[key];
    236    for (; i > down;) {
    237      size_t prev_ix = bucket[--i & self->block_mask_];
    238      uint32_t current4;
    239      const size_t backward = cur_ix - prev_ix;
    240      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
    241        break;
    242      }
    243      prev_ix &= ring_buffer_mask;
    244      if (cur_ix_masked + best_len > ring_buffer_mask) {
    245        break;
    246      }
    247      if (prev_ix + best_len > ring_buffer_mask ||
    248          /* compare 4 bytes ending at best_len + 1 */
    249          BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
    250              BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
    251        continue;
    252      }
    253      current4 = BrotliUnalignedRead32(data + prev_ix);
    254      if (first4 != current4) continue;
    255      {
    256        const size_t len = FindMatchLengthWithLimit(&data[prev_ix + 4],
    257                                                    &data[cur_ix_masked + 4],
    258                                                    max_length_m4) + 4;
    259        const score_t score = BackwardReferenceScore(len, backward);
    260        if (best_score < score) {
    261          best_score = score;
    262          best_len = len;
    263          out->len = best_len;
    264          out->distance = backward;
    265          out->score = best_score;
    266        }
    267      }
    268    }
    269    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
    270    ++num[key];
    271  }
    272  if (min_score == out->score) {
    273    SearchInStaticDictionary(dictionary,
    274        self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
    275        max_distance, out, BROTLI_FALSE);
    276  }
    277 }
    278 
    279 #undef HashLongestMatch