tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hash_rolling_inc.h (7226B)


      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2018 Google Inc. All Rights Reserved.
      3 
      4   Distributed under MIT license.
      5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
      9 /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
     10 /* JUMP = skip bytes for speedup */
     11 
     12 /* Rolling hash for long distance long string matches. Stores one position
     13   per bucket, bucket key is computed over a long region. */
     14 
     15 #define HashRolling HASHER()
     16 
     17 static const uint32_t FN(kRollingHashMul32) = 69069;
     18 static const uint32_t FN(kInvalidPos) = 0xffffffff;
     19 
     20 /* This hasher uses a longer forward length, but returning a higher value here
     21   will hurt compression by the main hasher when combined with a composite
     22   hasher. The hasher tests for forward itself instead. */
     23 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
     24 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
     25 
     26 /* Computes a code from a single byte. A lookup table of 256 values could be
     27   used, but simply adding 1 works about as good. */
     28 static uint32_t FN(HashByte)(uint8_t byte) {
     29  return (uint32_t)byte + 1u;
     30 }
     31 
     32 static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
     33                                               uint32_t factor) {
     34  return (uint32_t)(factor * state + FN(HashByte)(add));
     35 }
     36 
     37 static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
     38                                        uint8_t rem, uint32_t factor,
     39                                        uint32_t factor_remove) {
     40  return (uint32_t)(factor * state +
     41      FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
     42 }
     43 
     44 typedef struct HashRolling {
     45  uint32_t state;
     46  uint32_t* table;
     47  size_t next_ix;
     48 
     49  uint32_t chunk_len;
     50  uint32_t factor;
     51  uint32_t factor_remove;
     52 } HashRolling;
     53 
     54 static void FN(Initialize)(
     55    HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
     56    const BrotliEncoderParams* params) {
     57  size_t i;
     58  self->state = 0;
     59  self->next_ix = 0;
     60 
     61  self->factor = FN(kRollingHashMul32);
     62 
     63  /* Compute the factor of the oldest byte to remove: factor**steps modulo
     64     0xffffffff (the multiplications rely on 32-bit overflow) */
     65  self->factor_remove = 1;
     66  for (i = 0; i < CHUNKLEN; i += JUMP) {
     67    self->factor_remove *= self->factor;
     68  }
     69 
     70  self->table = (uint32_t*)common->extra[0];
     71  for (i = 0; i < NUMBUCKETS; i++) {
     72    self->table[i] = FN(kInvalidPos);
     73  }
     74 
     75  BROTLI_UNUSED(params);
     76 }
     77 
     78 static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
     79    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
     80  size_t i;
     81  /* Too small size, cannot use this hasher. */
     82  if (input_size < CHUNKLEN) return;
     83  self->state = 0;
     84  for (i = 0; i < CHUNKLEN; i += JUMP) {
     85    self->state = FN(HashRollingFunctionInitial)(
     86        self->state, data[i], self->factor);
     87  }
     88  BROTLI_UNUSED(one_shot);
     89 }
     90 
     91 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
     92    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
     93    size_t input_size, size_t* alloc_size) {
     94  BROTLI_UNUSED(params);
     95  BROTLI_UNUSED(one_shot);
     96  BROTLI_UNUSED(input_size);
     97  alloc_size[0] = NUMBUCKETS * sizeof(uint32_t);
     98 }
     99 
    100 static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
    101    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
    102  BROTLI_UNUSED(self);
    103  BROTLI_UNUSED(data);
    104  BROTLI_UNUSED(mask);
    105  BROTLI_UNUSED(ix);
    106 }
    107 
    108 static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
    109    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
    110    const size_t ix_start, const size_t ix_end) {
    111  BROTLI_UNUSED(self);
    112  BROTLI_UNUSED(data);
    113  BROTLI_UNUSED(mask);
    114  BROTLI_UNUSED(ix_start);
    115  BROTLI_UNUSED(ix_end);
    116 }
    117 
    118 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
    119    HashRolling* BROTLI_RESTRICT self,
    120    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
    121    size_t ring_buffer_mask) {
    122  /* In this case we must re-initialize the hasher from scratch from the
    123     current position. */
    124  size_t position_masked;
    125  size_t available = num_bytes;
    126  if ((position & (JUMP - 1)) != 0) {
    127    size_t diff = JUMP - (position & (JUMP - 1));
    128    available = (diff > available) ? 0 : (available - diff);
    129    position += diff;
    130  }
    131  position_masked = position & ring_buffer_mask;
    132  /* wrapping around ringbuffer not handled. */
    133  if (available > ring_buffer_mask - position_masked) {
    134    available = ring_buffer_mask - position_masked;
    135  }
    136 
    137  FN(Prepare)(self, BROTLI_FALSE, available,
    138      ringbuffer + (position & ring_buffer_mask));
    139  self->next_ix = position;
    140  BROTLI_UNUSED(num_bytes);
    141 }
    142 
    143 static BROTLI_INLINE void FN(PrepareDistanceCache)(
    144    HashRolling* BROTLI_RESTRICT self,
    145    int* BROTLI_RESTRICT distance_cache) {
    146  BROTLI_UNUSED(self);
    147  BROTLI_UNUSED(distance_cache);
    148 }
    149 
    150 static BROTLI_INLINE void FN(FindLongestMatch)(
    151    HashRolling* BROTLI_RESTRICT self,
    152    const BrotliEncoderDictionary* dictionary,
    153    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
    154    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
    155    const size_t max_length, const size_t max_backward,
    156    const size_t dictionary_distance, const size_t max_distance,
    157    HasherSearchResult* BROTLI_RESTRICT out) {
    158  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    159  size_t pos;
    160 
    161  if ((cur_ix & (JUMP - 1)) != 0) return;
    162 
    163  /* Not enough lookahead */
    164  if (max_length < CHUNKLEN) return;
    165 
    166  for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
    167    uint32_t code = self->state & MASK;
    168 
    169    uint8_t rem = data[pos & ring_buffer_mask];
    170    uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
    171    size_t found_ix = FN(kInvalidPos);
    172 
    173    self->state = FN(HashRollingFunction)(
    174        self->state, add, rem, self->factor, self->factor_remove);
    175 
    176    if (code < NUMBUCKETS) {
    177      found_ix = self->table[code];
    178      self->table[code] = (uint32_t)pos;
    179      if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
    180        /* The cast to 32-bit makes backward distances up to 4GB work even
    181           if cur_ix is above 4GB, despite using 32-bit values in the table. */
    182        size_t backward = (uint32_t)(cur_ix - found_ix);
    183        if (backward <= max_backward) {
    184          const size_t found_ix_masked = found_ix & ring_buffer_mask;
    185          const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
    186                                                      &data[cur_ix_masked],
    187                                                      max_length);
    188          if (len >= 4 && len > out->len) {
    189            score_t score = BackwardReferenceScore(len, backward);
    190            if (score > out->score) {
    191              out->len = len;
    192              out->distance = backward;
    193              out->score = score;
    194              out->len_code_delta = 0;
    195            }
    196          }
    197        }
    198      }
    199    }
    200  }
    201 
    202  self->next_ix = cur_ix + JUMP;
    203 
    204  /* NOTE: this hasher does not search in the dictionary. It is used as
    205     backup-hasher, the main hasher already searches in it. */
    206  BROTLI_UNUSED(dictionary);
    207  BROTLI_UNUSED(distance_cache);
    208  BROTLI_UNUSED(dictionary_distance);
    209  BROTLI_UNUSED(max_distance);
    210 }
    211 
    212 #undef HashRolling