tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hash_longest_match_quickly_inc.h (9481B)


      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2010 Google Inc. All Rights Reserved.
      3 
      4   Distributed under MIT license.
      5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP_BITS, HASH_LEN,
      9                        USE_DICTIONARY
     10 */
     11 
     12 #define HashLongestMatchQuickly HASHER()
     13 
     14 #define BUCKET_SIZE (1 << BUCKET_BITS)
     15 #define BUCKET_MASK (BUCKET_SIZE - 1)
     16 #define BUCKET_SWEEP (1 << BUCKET_SWEEP_BITS)
     17 #define BUCKET_SWEEP_MASK ((BUCKET_SWEEP - 1) << 3)
     18 
     19 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
     20 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
     21 
     22 /* HashBytes is the function that chooses the bucket to place
     23   the address in. The HashLongestMatch and HashLongestMatchQuickly
     24   classes have separate, different implementations of hashing. */
     25 static uint32_t FN(HashBytes)(const uint8_t* data) {
     26  const uint64_t h = ((BROTLI_UNALIGNED_LOAD64LE(data) << (64 - 8 * HASH_LEN)) *
     27                      kHashMul64);
     28  /* The higher bits contain more mixture from the multiplication,
     29     so we take our results from there. */
     30  return (uint32_t)(h >> (64 - BUCKET_BITS));
     31 }
     32 
     33 /* A (forgetful) hash table to the data seen by the compressor, to
     34   help create backward references to previous data.
     35 
     36   This is a hash map of fixed size (BUCKET_SIZE). */
     37 typedef struct HashLongestMatchQuickly {
     38  /* Shortcuts. */
     39  HasherCommon* common;
     40 
     41  /* --- Dynamic size members --- */
     42 
     43  uint32_t* buckets_;  /* uint32_t[BUCKET_SIZE]; */
     44 } HashLongestMatchQuickly;
     45 
     46 static void FN(Initialize)(
     47    HasherCommon* common, HashLongestMatchQuickly* BROTLI_RESTRICT self,
     48    const BrotliEncoderParams* params) {
     49  self->common = common;
     50 
     51  BROTLI_UNUSED(params);
     52  self->buckets_ = (uint32_t*)common->extra[0];
     53 }
     54 
     55 static void FN(Prepare)(
     56    HashLongestMatchQuickly* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
     57    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
     58  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
     59  /* Partial preparation is 100 times slower (per socket). */
     60  size_t partial_prepare_threshold = BUCKET_SIZE >> 5;
     61  if (one_shot && input_size <= partial_prepare_threshold) {
     62    size_t i;
     63    for (i = 0; i < input_size; ++i) {
     64      const uint32_t key = FN(HashBytes)(&data[i]);
     65      if (BUCKET_SWEEP == 1) {
     66        buckets[key] = 0;
     67      } else {
     68        uint32_t j;
     69        for (j = 0; j < BUCKET_SWEEP; ++j) {
     70          buckets[(key + (j << 3)) & BUCKET_MASK] = 0;
     71        }
     72      }
     73    }
     74  } else {
     75    /* It is not strictly necessary to fill this buffer here, but
     76       not filling will make the results of the compression stochastic
     77       (but correct). This is because random data would cause the
     78       system to find accidentally good backward references here and there. */
     79    memset(buckets, 0, sizeof(uint32_t) * BUCKET_SIZE);
     80  }
     81 }
     82 
     83 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
     84    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
     85    size_t input_size, size_t* alloc_size) {
     86  BROTLI_UNUSED(params);
     87  BROTLI_UNUSED(one_shot);
     88  BROTLI_UNUSED(input_size);
     89  alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE;
     90 }
     91 
     92 /* Look at 5 bytes at &data[ix & mask].
     93   Compute a hash from these, and store the value somewhere within
     94   [ix .. ix+3]. */
     95 static BROTLI_INLINE void FN(Store)(
     96    HashLongestMatchQuickly* BROTLI_RESTRICT self,
     97    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
     98  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
     99  if (BUCKET_SWEEP == 1) {
    100    self->buckets_[key] = (uint32_t)ix;
    101  } else {
    102    /* Wiggle the value with the bucket sweep range. */
    103    const uint32_t off = ix & BUCKET_SWEEP_MASK;
    104    self->buckets_[(key + off) & BUCKET_MASK] = (uint32_t)ix;
    105  }
    106 }
    107 
    108 static BROTLI_INLINE void FN(StoreRange)(
    109    HashLongestMatchQuickly* BROTLI_RESTRICT self,
    110    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
    111    const size_t ix_start, const size_t ix_end) {
    112  size_t i;
    113  for (i = ix_start; i < ix_end; ++i) {
    114    FN(Store)(self, data, mask, i);
    115  }
    116 }
    117 
    118 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
    119    HashLongestMatchQuickly* BROTLI_RESTRICT self,
    120    size_t num_bytes, size_t position,
    121    const uint8_t* ringbuffer, size_t ringbuffer_mask) {
    122  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
    123    /* Prepare the hashes for three last bytes of the last write.
    124       These could not be calculated before, since they require knowledge
    125       of both the previous and the current block. */
    126    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
    127    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
    128    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
    129  }
    130 }
    131 
    132 static BROTLI_INLINE void FN(PrepareDistanceCache)(
    133    HashLongestMatchQuickly* BROTLI_RESTRICT self,
    134    int* BROTLI_RESTRICT distance_cache) {
    135  BROTLI_UNUSED(self);
    136  BROTLI_UNUSED(distance_cache);
    137 }
    138 
    139 /* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
    140   up to the length of max_length and stores the position cur_ix in the
    141   hash table.
    142 
    143   Does not look for matches longer than max_length.
    144   Does not look for matches further away than max_backward.
    145   Writes the best match into |out|.
    146   |out|->score is updated only if a better match is found. */
    147 static BROTLI_INLINE void FN(FindLongestMatch)(
    148    HashLongestMatchQuickly* BROTLI_RESTRICT self,
    149    const BrotliEncoderDictionary* dictionary,
    150    const uint8_t* BROTLI_RESTRICT data,
    151    const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
    152    const size_t cur_ix, const size_t max_length, const size_t max_backward,
    153    const size_t dictionary_distance, const size_t max_distance,
    154    HasherSearchResult* BROTLI_RESTRICT out) {
    155  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
    156  const size_t best_len_in = out->len;
    157  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    158  /* TODO: compare 4 bytes at once (and set the minimum best len to 4) */
    159  int compare_char = data[cur_ix_masked + best_len_in];
    160  size_t key = FN(HashBytes)(&data[cur_ix_masked]);
    161  size_t key_out;
    162  score_t min_score = out->score;
    163  score_t best_score = out->score;
    164  size_t best_len = best_len_in;
    165  size_t cached_backward = (size_t)distance_cache[0];
    166  size_t prev_ix = cur_ix - cached_backward;
    167 
    168  BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
    169 
    170  out->len_code_delta = 0;
    171  if (prev_ix < cur_ix) {
    172    prev_ix &= (uint32_t)ring_buffer_mask;
    173    if (compare_char == data[prev_ix + best_len]) {
    174      const size_t len = FindMatchLengthWithLimit(
    175          &data[prev_ix], &data[cur_ix_masked], max_length);
    176      if (len >= 4) {
    177        const score_t score = BackwardReferenceScoreUsingLastDistance(len);
    178        if (best_score < score) {
    179          out->len = len;
    180          out->distance = cached_backward;
    181          out->score = score;
    182          if (BUCKET_SWEEP == 1) {
    183            buckets[key] = (uint32_t)cur_ix;
    184            return;
    185          } else {
    186            best_len = len;
    187            best_score = score;
    188            compare_char = data[cur_ix_masked + len];
    189          }
    190        }
    191      }
    192    }
    193  }
    194  if (BUCKET_SWEEP == 1) {
    195    size_t backward;
    196    size_t len;
    197    /* Only one to look for, don't bother to prepare for a loop. */
    198    prev_ix = buckets[key];
    199    buckets[key] = (uint32_t)cur_ix;
    200    backward = cur_ix - prev_ix;
    201    prev_ix &= (uint32_t)ring_buffer_mask;
    202    if (compare_char != data[prev_ix + best_len_in]) {
    203      return;
    204    }
    205    if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
    206      return;
    207    }
    208    len = FindMatchLengthWithLimit(&data[prev_ix],
    209                                   &data[cur_ix_masked],
    210                                   max_length);
    211    if (len >= 4) {
    212      const score_t score = BackwardReferenceScore(len, backward);
    213      if (best_score < score) {
    214        out->len = len;
    215        out->distance = backward;
    216        out->score = score;
    217        return;
    218      }
    219    }
    220  } else {
    221    size_t keys[BUCKET_SWEEP];
    222    size_t i;
    223    for (i = 0; i < BUCKET_SWEEP; ++i) {
    224      keys[i] = (key + (i << 3)) & BUCKET_MASK;
    225    }
    226    key_out = keys[(cur_ix & BUCKET_SWEEP_MASK) >> 3];
    227    for (i = 0; i < BUCKET_SWEEP; ++i) {
    228      size_t len;
    229      size_t backward;
    230      prev_ix = buckets[keys[i]];
    231      backward = cur_ix - prev_ix;
    232      prev_ix &= (uint32_t)ring_buffer_mask;
    233      if (compare_char != data[prev_ix + best_len]) {
    234        continue;
    235      }
    236      if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
    237        continue;
    238      }
    239      len = FindMatchLengthWithLimit(&data[prev_ix],
    240                                     &data[cur_ix_masked],
    241                                     max_length);
    242      if (len >= 4) {
    243        const score_t score = BackwardReferenceScore(len, backward);
    244        if (best_score < score) {
    245          best_len = len;
    246          out->len = len;
    247          compare_char = data[cur_ix_masked + len];
    248          best_score = score;
    249          out->score = score;
    250          out->distance = backward;
    251        }
    252      }
    253    }
    254  }
    255  if (USE_DICTIONARY && min_score == out->score) {
    256    SearchInStaticDictionary(dictionary,
    257        self->common, &data[cur_ix_masked], max_length, dictionary_distance,
    258        max_distance, out, BROTLI_TRUE);
    259  }
    260  if (BUCKET_SWEEP != 1) {
    261    buckets[key_out] = (uint32_t)cur_ix;
    262  }
    263 }
    264 
    265 #undef BUCKET_SWEEP_MASK
    266 #undef BUCKET_SWEEP
    267 #undef BUCKET_MASK
    268 #undef BUCKET_SIZE
    269 
    270 #undef HashLongestMatchQuickly