tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hash_forgetful_chain_inc.h (11317B)


      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2016 Google Inc. All Rights Reserved.
      3 
      4   Distributed under MIT license.
      5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
      9                        NUM_LAST_DISTANCES_TO_CHECK */
     10 
     11 /* A (forgetful) hash table to the data seen by the compressor, to
     12   help create backward references to previous data.
     13 
     14   Hashes are stored in chains which are bucketed to groups. Group of chains
     15   share a storage "bank". When more than "bank size" chain nodes are added,
     16   oldest nodes are replaced; this way several chains may share a tail. */
     17 
     18 #define HashForgetfulChain HASHER()
     19 
     20 #define BANK_SIZE (1 << BANK_BITS)
     21 
     22 /* Number of hash buckets. */
     23 #define BUCKET_SIZE (1 << BUCKET_BITS)
     24 
     25 #define CAPPED_CHAINS 0
     26 
     27 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
     28 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
     29 
     30 /* HashBytes is the function that chooses the bucket to place the address in.*/
     31 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
     32  const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
     33  /* The higher bits contain more mixture from the multiplication,
     34     so we take our results from there. */
     35  return h >> (32 - BUCKET_BITS);
     36 }
     37 
     38 typedef struct FN(Slot) {
     39  uint16_t delta;
     40  uint16_t next;
     41 } FN(Slot);
     42 
     43 typedef struct FN(Bank) {
     44  FN(Slot) slots[BANK_SIZE];
     45 } FN(Bank);
     46 
     47 typedef struct HashForgetfulChain {
     48  uint16_t free_slot_idx[NUM_BANKS];  /* Up to 1KiB. Move to dynamic? */
     49  size_t max_hops;
     50 
     51  /* Shortcuts. */
     52  void* extra[2];
     53  HasherCommon* common;
     54 
     55  /* --- Dynamic size members --- */
     56 
     57  /* uint32_t addr[BUCKET_SIZE]; */
     58 
     59  /* uint16_t head[BUCKET_SIZE]; */
     60 
     61  /* Truncated hash used for quick rejection of "distance cache" candidates. */
     62  /* uint8_t tiny_hash[65536];*/
     63 
     64  /* FN(Bank) banks[NUM_BANKS]; */
     65 } HashForgetfulChain;
     66 
     67 static uint32_t* FN(Addr)(void* extra) {
     68  return (uint32_t*)extra;
     69 }
     70 
     71 static uint16_t* FN(Head)(void* extra) {
     72  return (uint16_t*)(&FN(Addr)(extra)[BUCKET_SIZE]);
     73 }
     74 
     75 static uint8_t* FN(TinyHash)(void* extra) {
     76  return (uint8_t*)(&FN(Head)(extra)[BUCKET_SIZE]);
     77 }
     78 
     79 static FN(Bank)* FN(Banks)(void* extra) {
     80  return (FN(Bank)*)(extra);
     81 }
     82 
     83 static void FN(Initialize)(
     84    HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
     85    const BrotliEncoderParams* params) {
     86  self->common = common;
     87  self->extra[0] = common->extra[0];
     88  self->extra[1] = common->extra[1];
     89 
     90  self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
     91 }
     92 
     93 static void FN(Prepare)(
     94    HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
     95    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
     96  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
     97  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
     98  uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
     99  /* Partial preparation is 100 times slower (per socket). */
    100  size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
    101  if (one_shot && input_size <= partial_prepare_threshold) {
    102    size_t i;
    103    for (i = 0; i < input_size; ++i) {
    104      size_t bucket = FN(HashBytes)(&data[i]);
    105      /* See InitEmpty comment. */
    106      addr[bucket] = 0xCCCCCCCC;
    107      head[bucket] = 0xCCCC;
    108    }
    109  } else {
    110    /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
    111       processed by hasher never reaches 3GB + 64M; this makes all new chains
    112       to be terminated after the first node. */
    113    memset(addr, 0xCC, sizeof(uint32_t) * BUCKET_SIZE);
    114    memset(head, 0, sizeof(uint16_t) * BUCKET_SIZE);
    115  }
    116  memset(tiny_hash, 0, sizeof(uint8_t) * 65536);
    117  memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
    118 }
    119 
    120 static BROTLI_INLINE void FN(HashMemAllocInBytes)(
    121    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
    122    size_t input_size, size_t* alloc_size) {
    123  BROTLI_UNUSED(params);
    124  BROTLI_UNUSED(one_shot);
    125  BROTLI_UNUSED(input_size);
    126  alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE +
    127                  sizeof(uint16_t) * BUCKET_SIZE + sizeof(uint8_t) * 65536;
    128  alloc_size[1] = sizeof(FN(Bank)) * NUM_BANKS;
    129 }
    130 
    131 /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
    132   node to corresponding chain; also update tiny_hash for current position. */
    133 static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
    134    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
    135  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
    136  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
    137  uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
    138  FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
    139  const size_t key = FN(HashBytes)(&data[ix & mask]);
    140  const size_t bank = key & (NUM_BANKS - 1);
    141  const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
    142  size_t delta = ix - addr[key];
    143  tiny_hash[(uint16_t)ix] = (uint8_t)key;
    144  if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
    145  banks[bank].slots[idx].delta = (uint16_t)delta;
    146  banks[bank].slots[idx].next = head[key];
    147  addr[key] = (uint32_t)ix;
    148  head[key] = (uint16_t)idx;
    149 }
    150 
    151 static BROTLI_INLINE void FN(StoreRange)(
    152    HashForgetfulChain* BROTLI_RESTRICT self,
    153    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
    154    const size_t ix_start, const size_t ix_end) {
    155  size_t i;
    156  for (i = ix_start; i < ix_end; ++i) {
    157    FN(Store)(self, data, mask, i);
    158  }
    159 }
    160 
    161 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
    162    HashForgetfulChain* BROTLI_RESTRICT self,
    163    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
    164    size_t ring_buffer_mask) {
    165  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
    166    /* Prepare the hashes for three last bytes of the last write.
    167       These could not be calculated before, since they require knowledge
    168       of both the previous and the current block. */
    169    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
    170    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
    171    FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
    172  }
    173 }
    174 
    175 static BROTLI_INLINE void FN(PrepareDistanceCache)(
    176    HashForgetfulChain* BROTLI_RESTRICT self,
    177    int* BROTLI_RESTRICT distance_cache) {
    178  BROTLI_UNUSED(self);
    179  PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
    180 }
    181 
    182 /* Find a longest backward match of &data[cur_ix] up to the length of
    183   max_length and stores the position cur_ix in the hash table.
    184 
    185   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
    186             values; if this method is invoked repeatedly with the same distance
    187             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
    188 
    189   Does not look for matches longer than max_length.
    190   Does not look for matches further away than max_backward.
    191   Writes the best match into |out|.
    192   |out|->score is updated only if a better match is found. */
    193 static BROTLI_INLINE void FN(FindLongestMatch)(
    194    HashForgetfulChain* BROTLI_RESTRICT self,
    195    const BrotliEncoderDictionary* dictionary,
    196    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
    197    const int* BROTLI_RESTRICT distance_cache,
    198    const size_t cur_ix, const size_t max_length, const size_t max_backward,
    199    const size_t dictionary_distance, const size_t max_distance,
    200    HasherSearchResult* BROTLI_RESTRICT out) {
    201  uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
    202  uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
    203  uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra[0]);
    204  FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
    205  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    206  /* Don't accept a short copy from far away. */
    207  score_t min_score = out->score;
    208  score_t best_score = out->score;
    209  size_t best_len = out->len;
    210  size_t i;
    211  const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
    212  const uint8_t tiny_hash = (uint8_t)(key);
    213  out->len = 0;
    214  out->len_code_delta = 0;
    215 
    216  BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
    217 
    218  /* Try last distance first. */
    219  for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
    220    const size_t backward = (size_t)distance_cache[i];
    221    size_t prev_ix = (cur_ix - backward);
    222    /* For distance code 0 we want to consider 2-byte matches. */
    223    if (i > 0 && tiny_hashes[(uint16_t)prev_ix] != tiny_hash) continue;
    224    if (prev_ix >= cur_ix || backward > max_backward) {
    225      continue;
    226    }
    227    prev_ix &= ring_buffer_mask;
    228    {
    229      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    230                                                  &data[cur_ix_masked],
    231                                                  max_length);
    232      if (len >= 2) {
    233        score_t score = BackwardReferenceScoreUsingLastDistance(len);
    234        if (best_score < score) {
    235          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
    236          if (best_score < score) {
    237            best_score = score;
    238            best_len = len;
    239            out->len = best_len;
    240            out->distance = backward;
    241            out->score = best_score;
    242          }
    243        }
    244      }
    245    }
    246  }
    247  /* we require matches of len >4, so increase best_len to 3, so we can compare
    248   * 4 bytes all the time. */
    249  if (best_len < 3) {
    250    best_len = 3;
    251  }
    252  {
    253    const size_t bank = key & (NUM_BANKS - 1);
    254    size_t backward = 0;
    255    size_t hops = self->max_hops;
    256    size_t delta = cur_ix - addr[key];
    257    size_t slot = head[key];
    258    while (hops--) {
    259      size_t prev_ix;
    260      size_t last = slot;
    261      backward += delta;
    262      if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
    263      prev_ix = (cur_ix - backward) & ring_buffer_mask;
    264      slot = banks[bank].slots[last].next;
    265      delta = banks[bank].slots[last].delta;
    266      if (cur_ix_masked + best_len > ring_buffer_mask ||
    267          prev_ix + best_len > ring_buffer_mask ||
    268          /* compare 4 bytes ending at best_len + 1 */
    269          BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
    270              BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
    271        continue;
    272      }
    273      {
    274        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    275                                                    &data[cur_ix_masked],
    276                                                    max_length);
    277        if (len >= 4) {
    278          /* Comparing for >= 3 does not change the semantics, but just saves
    279             for a few unnecessary binary logarithms in backward reference
    280             score, since we are not interested in such short matches. */
    281          score_t score = BackwardReferenceScore(len, backward);
    282          if (best_score < score) {
    283            best_score = score;
    284            best_len = len;
    285            out->len = best_len;
    286            out->distance = backward;
    287            out->score = best_score;
    288          }
    289        }
    290      }
    291    }
    292    FN(Store)(self, data, ring_buffer_mask, cur_ix);
    293  }
    294  if (out->score == min_score) {
    295    SearchInStaticDictionary(dictionary,
    296        self->common, &data[cur_ix_masked], max_length, dictionary_distance,
    297        max_distance, out, BROTLI_FALSE);
    298  }
    299 }
    300 
    301 #undef BANK_SIZE
    302 #undef BUCKET_SIZE
    303 #undef CAPPED_CHAINS
    304 
    305 #undef HashForgetfulChain