tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hash_longest_match64_simd_inc.h (11835B)


      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2010 Google Inc. All Rights Reserved.
      3 
      4   Distributed under MIT license.
      5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN */
      9 
     10 /* A (forgetful) hash table to the data seen by the compressor, to
     11   help create backward references to previous data.
     12 
     13   This is a hash map of fixed size (bucket_size_) to a ring buffer of
     14   fixed size (block_size_). The ring buffer contains the last block_size_
     15   index positions of the given hash key in the compressed data. */
     16 
     17 #define HashLongestMatch HASHER()
     18 
     19 #define TAG_HASH_BITS 8
     20 #define TAG_HASH_MASK ((1 << TAG_HASH_BITS) - 1)
     21 
     22 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
     23 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
     24 
     25 /* HashBytes is the function that chooses the bucket to place the address in. */
     26 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
     27                                          uint64_t hash_mul) {
     28  const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul;
     29  /* The higher bits contain more mixture from the multiplication,
     30     so we take our results from there. */
     31  return (size_t)(h >> (64 - 15 - TAG_HASH_BITS));
     32 }
     33 
     34 typedef struct HashLongestMatch {
     35  /* Number of hash buckets. */
     36  size_t bucket_size_;
     37  /* Only block_size_ newest backward references are kept,
     38     and the older are forgotten. */
     39  size_t block_size_;
     40  /* Hash multiplier tuned to match length. */
     41  uint64_t hash_mul_;
     42  /* Mask for accessing entries in a block (in a ring-buffer manner). */
     43  uint32_t block_mask_;
     44 
     45  int block_bits_;
     46  int num_last_distances_to_check_;
     47 
     48  /* Shortcuts. */
     49  HasherCommon* common_;
     50 
     51  /* --- Dynamic size members --- */
     52 
     53  /* Number of entries in a particular bucket. */
     54  uint16_t* num_;  /* uint16_t[bucket_size]; */
     55 
     56  uint8_t* tags_;
     57 
     58  /* Buckets containing block_size_ of backward references. */
     59  uint32_t* buckets_;  /* uint32_t[bucket_size * block_size]; */
     60 } HashLongestMatch;
     61 
     62 static void FN(Initialize)(
     63    HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
     64    const BrotliEncoderParams* params) {
     65  self->common_ = common;
     66 
     67  BROTLI_UNUSED(params);
     68  self->hash_mul_ = kHashMul64 << (64 - 5 * 8);
     69  BROTLI_DCHECK(common->params.bucket_bits == 15);
     70  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
     71  self->block_bits_ = common->params.block_bits;
     72  self->block_size_ = (size_t)1 << common->params.block_bits;
     73  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
     74  self->num_last_distances_to_check_ =
     75      common->params.num_last_distances_to_check;
     76  self->num_ = (uint16_t*)common->extra[0];
     77  self->tags_ = (uint8_t*)common->extra[1];
     78  self->buckets_ = (uint32_t*)common->extra[2];
     79 }
     80 
     81 static void FN(Prepare)(
     82    HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
     83    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
     84  uint16_t* BROTLI_RESTRICT num = self->num_;
     85  /* Partial preparation is 100 times slower (per socket). */
     86  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
     87  if (one_shot && input_size <= partial_prepare_threshold) {
     88    size_t i;
     89    for (i = 0; i < input_size; ++i) {
     90      const size_t hash = FN(HashBytes)(&data[i], self->hash_mul_);
     91      const size_t key = hash >> TAG_HASH_BITS;
     92      num[key] = 65535;
     93    }
     94  } else {
     95    /* Set all the bytes of num to 255, which makes each uint16_t 65535. */
     96    memset(num, 255, self->bucket_size_ * sizeof(num[0]));
     97  }
     98 }
     99 
    100 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
    101    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
    102    size_t input_size, size_t* alloc_size) {
    103  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
    104  size_t block_size = (size_t)1 << params->hasher.block_bits;
    105  BROTLI_UNUSED(one_shot);
    106  BROTLI_UNUSED(input_size);
    107  alloc_size[0] = sizeof(uint16_t) * bucket_size;
    108  alloc_size[1] = sizeof(uint8_t) * bucket_size * block_size;
    109  alloc_size[2] = sizeof(uint32_t) * bucket_size * block_size;
    110 }
    111 
    112 /* Look at 4 bytes at &data[ix & mask].
    113   Compute a hash from these, and store the value of ix at that position. */
    114 static BROTLI_INLINE void FN(Store)(
    115    HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
    116    const size_t mask, const size_t ix) {
    117  uint16_t* BROTLI_RESTRICT num = self->num_;
    118  uint8_t* BROTLI_RESTRICT tags = self->tags_;
    119  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
    120  const size_t hash = FN(HashBytes)(&data[ix & mask], self->hash_mul_);
    121  const size_t key = hash >> TAG_HASH_BITS;
    122  const uint8_t tag = hash & TAG_HASH_MASK;
    123  const size_t minor_ix = num[key] & self->block_mask_;
    124  const size_t offset = minor_ix + (key << self->block_bits_);
    125  --num[key];
    126  buckets[offset] = (uint32_t)ix;
    127  tags[offset] = tag;
    128 }
    129 
    130 static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
    131    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
    132    const size_t ix_start, const size_t ix_end) {
    133  size_t i;
    134  for (i = ix_start; i < ix_end; ++i) {
    135    FN(Store)(self, data, mask, i);
    136  }
    137 }
    138 
    139 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
    140    HashLongestMatch* BROTLI_RESTRICT self,
    141    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
    142    size_t ringbuffer_mask) {
    143  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
    144    /* Prepare the hashes for three last bytes of the last write.
    145       These could not be calculated before, since they require knowledge
    146       of both the previous and the current block. */
    147    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
    148    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
    149    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
    150  }
    151 }
    152 
    153 static BROTLI_INLINE void FN(PrepareDistanceCache)(
    154    HashLongestMatch* BROTLI_RESTRICT self,
    155    int* BROTLI_RESTRICT distance_cache) {
    156  PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
    157 }
    158 
    159 /* Find a longest backward match of &data[cur_ix] up to the length of
    160   max_length and stores the position cur_ix in the hash table.
    161 
    162   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
    163             values; if this method is invoked repeatedly with the same distance
    164             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
    165 
    166   Does not look for matches longer than max_length.
    167   Does not look for matches further away than max_backward.
    168   Writes the best match into |out|.
    169   |out|->score is updated only if a better match is found. */
    170 static BROTLI_INLINE void FN(FindLongestMatch)(
    171    HashLongestMatch* BROTLI_RESTRICT self,
    172    const BrotliEncoderDictionary* dictionary,
    173    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
    174    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
    175    const size_t max_length, const size_t max_backward,
    176    const size_t dictionary_distance, const size_t max_distance,
    177    HasherSearchResult* BROTLI_RESTRICT out) {
    178  uint16_t* BROTLI_RESTRICT num = self->num_;
    179  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
    180  uint8_t* BROTLI_RESTRICT tags = self->tags_;
    181  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    182  /* Don't accept a short copy from far away. */
    183  score_t min_score = out->score;
    184  score_t best_score = out->score;
    185  size_t best_len = out->len;
    186  size_t i;
    187  /* Precalculate the hash key and prefetch the bucket. */
    188  const size_t hash = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
    189  const size_t key = hash >> TAG_HASH_BITS;
    190  uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
    191  uint8_t* BROTLI_RESTRICT tag_bucket = &tags[key << self->block_bits_];
    192  PREFETCH_L1(bucket);
    193  PREFETCH_L1(tag_bucket);
    194  if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
    195  out->len = 0;
    196  out->len_code_delta = 0;
    197 
    198  BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
    199 
    200  /* Try last distance first. */
    201  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
    202    const size_t backward = (size_t)distance_cache[i];
    203    size_t prev_ix = (size_t)(cur_ix - backward);
    204    if (prev_ix >= cur_ix) {
    205      continue;
    206    }
    207    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
    208      continue;
    209    }
    210    prev_ix &= ring_buffer_mask;
    211 
    212    if (cur_ix_masked + best_len > ring_buffer_mask) {
    213      break;
    214    }
    215    if (prev_ix + best_len > ring_buffer_mask ||
    216        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
    217      continue;
    218    }
    219    {
    220      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    221                                                  &data[cur_ix_masked],
    222                                                  max_length);
    223      if (len >= 3 || (len == 2 && i < 2)) {
    224        /* Comparing for >= 2 does not change the semantics, but just saves for
    225           a few unnecessary binary logarithms in backward reference score,
    226           since we are not interested in such short matches. */
    227        score_t score = BackwardReferenceScoreUsingLastDistance(len);
    228        if (best_score < score) {
    229          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
    230          if (best_score < score) {
    231            best_score = score;
    232            best_len = len;
    233            out->len = best_len;
    234            out->distance = backward;
    235            out->score = best_score;
    236          }
    237        }
    238      }
    239    }
    240  }
    241  /* we require matches of len >4, so increase best_len to 3, so we can compare
    242   * 4 bytes all the time. */
    243  if (best_len < 3) {
    244    best_len = 3;
    245  }
    246  {
    247    const uint8_t tag = hash & TAG_HASH_MASK;
    248    const uint32_t first4 = BrotliUnalignedRead32(data + cur_ix_masked);
    249    const size_t max_length_m4 = max_length - 4;
    250    const size_t head = (num[key] + 1) & self->block_mask_;
    251    uint64_t matches =
    252        GetMatchingTagMask(self->block_size_ / 16, tag, tag_bucket, head);
    253    /* Mask off any matches from uninitialized tags. */
    254    uint16_t n = 65535 - num[key];
    255    uint64_t block_has_unused_slots = self->block_size_ > n;
    256    uint64_t mask = (block_has_unused_slots << (n & (64 - 1))) - 1;
    257    matches &= mask;
    258    for (; matches > 0; matches &= (matches - 1)) {
    259      const size_t rb_index =
    260          (head + (size_t)BROTLI_TZCNT64(matches)) & self->block_mask_;
    261      size_t prev_ix = bucket[rb_index];
    262      uint32_t current4;
    263      const size_t backward = cur_ix - prev_ix;
    264      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
    265        break;
    266      }
    267      prev_ix &= ring_buffer_mask;
    268      if (cur_ix_masked + best_len > ring_buffer_mask) {
    269        break;
    270      }
    271      if (prev_ix + best_len > ring_buffer_mask ||
    272          /* compare 4 bytes ending at best_len + 1 */
    273          BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
    274              BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
    275        continue;
    276      }
    277      current4 = BrotliUnalignedRead32(data + prev_ix);
    278      if (first4 != current4) continue;
    279      {
    280        const size_t len = FindMatchLengthWithLimit(&data[prev_ix + 4],
    281                                                    &data[cur_ix_masked + 4],
    282                                                    max_length_m4) + 4;
    283        const score_t score = BackwardReferenceScore(len, backward);
    284        if (best_score < score) {
    285          best_score = score;
    286          best_len = len;
    287          out->len = best_len;
    288          out->distance = backward;
    289          out->score = best_score;
    290        }
    291      }
    292    }
    293    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
    294    tag_bucket[num[key] & self->block_mask_] = tag;
    295    --num[key];
    296  }
    297  if (min_score == out->score) {
    298    SearchInStaticDictionary(dictionary,
    299        self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
    300        max_distance, out, BROTLI_FALSE);
    301  }
    302 }
    303 
    304 #undef HashLongestMatch