tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

static_dict_lut.c (7420B)


      1 /* Copyright 2025 Google Inc. All Rights Reserved.
      2 
      3   Distributed under MIT license.
      4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Lookup table for static dictionary and transforms. */
      8 
      9 #include "static_dict_lut.h"
     10 
     11 #include "../common/platform.h"  /* IWYU pragma: keep */
     12 #include "../common/static_init.h"
     13 
     14 #if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE)
     15 #include "../common/dictionary.h"
     16 #include "../common/transform.h"
     17 #include "hash_base.h"
     18 #endif
     19 
     20 #if defined(__cplusplus) || defined(c_plusplus)
     21 extern "C" {
     22 #endif
     23 
     24 #if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE)
     25 
     26 /* TODO(eustas): deal with largest bucket(s). Not it contains 163 items. */
     27 static BROTLI_BOOL BROTLI_COLD DoBrotliEncoderInitStaticDictionaryLut(
     28    const BrotliDictionary* dict, uint16_t* buckets, DictWord* words,
     29    void* arena) {
     30  DictWord* slots = (DictWord*)arena;
     31  uint16_t* heads = (uint16_t*)(slots + BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS);
     32  uint16_t* counts = heads + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS;
     33  uint16_t* prev = counts + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS;
     34  size_t next_slot = 0;
     35  uint8_t transformed_word[24];
     36  uint8_t transformed_other_word[24];
     37  size_t l;
     38  size_t pos;
     39  size_t i;
     40 
     41  memset(counts, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t));
     42  memset(heads, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t));
     43  memset(prev, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS * sizeof(uint16_t));
     44 
     45  for (l = 4; l <= 24; ++l) {
     46    size_t n = 1u << dict->size_bits_by_length[l];
     47    const uint8_t* dict_words = dict->data + dict->offsets_by_length[l];
     48    for (i = 0; i < n; ++i) {
     49      const uint8_t* dict_word = dict_words + l * i;
     50      uint32_t key = Hash15(dict_word);
     51      slots[next_slot].len = (uint8_t)l;
     52      slots[next_slot].transform = BROTLI_TRANSFORM_IDENTITY;
     53      slots[next_slot].idx = (uint16_t)i;
     54      prev[next_slot] = heads[key];
     55      heads[key] = (uint16_t)next_slot;
     56      counts[key]++;
     57      ++next_slot;
     58    }
     59    for (i = 0; i < n; ++i) {
     60      uint32_t key;
     61      uint32_t prefix;
     62      BROTLI_BOOL found;
     63      size_t curr;
     64      const uint8_t* dict_word = dict_words + l * i;
     65      if (dict_word[0] < 'a' || dict_word[0] > 'z') continue;
     66      memcpy(transformed_word, dict_word, l);
     67      transformed_word[0] = transformed_word[0] - 32;
     68      key = Hash15(transformed_word);
     69      prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u;
     70      found = BROTLI_FALSE;
     71      curr = heads[key];
     72      while (curr != 0) {
     73        const uint8_t* other_word;
     74        uint32_t other_prefix;
     75        if (slots[curr].len != l) break;
     76        other_word = dict_words + l * slots[curr].idx;
     77        other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u;
     78        if (prefix == other_prefix) {
     79          if (memcmp(transformed_word, other_word, l) == 0) {
     80            found = BROTLI_TRUE;
     81            break;
     82          }
     83        }
     84        curr = prev[curr];
     85      }
     86      if (found) continue;
     87      slots[next_slot].len = (uint8_t)l;
     88      slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_FIRST;
     89      slots[next_slot].idx = (uint16_t)i;
     90      prev[next_slot] = heads[key];
     91      heads[key] = (uint16_t)next_slot;
     92      counts[key]++;
     93      ++next_slot;
     94    }
     95    for (i = 0; i < n; ++i) {
     96      const uint8_t* dict_word = dict_words + l * i;
     97      BROTLI_BOOL is_ascii = BROTLI_TRUE;
     98      BROTLI_BOOL has_lower = BROTLI_FALSE;
     99      size_t k;
    100      uint32_t prefix;
    101      uint32_t key;
    102      size_t curr;
    103      BROTLI_BOOL found;
    104      for (k = 0; k < l; ++k) {
    105        if (dict_word[k] >= 128) is_ascii = BROTLI_FALSE;
    106        if (k > 0 && dict_word[k] >= 'a' && dict_word[k] <= 'z')
    107          has_lower = BROTLI_TRUE;
    108      }
    109      if (!is_ascii || !has_lower) continue;
    110      memcpy(transformed_word, dict_word, l);
    111      prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u;
    112      for (k = 0; k < l; ++k) {
    113        if (transformed_word[k] >= 'a' && transformed_word[k] <= 'z') {
    114          transformed_word[k] = transformed_word[k] - 32;
    115        }
    116      }
    117      key = Hash15(transformed_word);
    118      found = BROTLI_FALSE;
    119      curr = heads[key];
    120      while (curr != 0) {
    121        const uint8_t* other_word;
    122        uint32_t other_prefix;
    123        if (slots[curr].len != l) break;
    124        other_word = dict_words + l * slots[curr].idx;
    125        other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u;
    126        if (prefix == other_prefix) {
    127          if (slots[curr].transform == BROTLI_TRANSFORM_IDENTITY) {
    128            if (memcmp(transformed_word, other_word, l) == 0) {
    129              found = BROTLI_TRUE;
    130              break;
    131            }
    132          } else if (slots[curr].transform ==
    133                     BROTLI_TRANSFORM_UPPERCASE_FIRST) {
    134            if ((transformed_word[0] == (other_word[0] - 32)) &&
    135                memcmp(transformed_word + 1, other_word + 1, l - 1) == 0) {
    136              found = BROTLI_TRUE;
    137              break;
    138            }
    139          } else {
    140            for (k = 0; k < l; ++k) {
    141              if (other_word[k] >= 'a' && other_word[k] <= 'z') {
    142                transformed_other_word[k] = other_word[k] - 32;
    143              } else {
    144                transformed_other_word[k] = other_word[k];
    145              }
    146            }
    147            if (memcmp(transformed_word, transformed_other_word, l) == 0) {
    148              found = BROTLI_TRUE;
    149              break;
    150            }
    151          }
    152        }
    153        curr = prev[curr];
    154      }
    155      if (found) {
    156        continue;
    157      }
    158      slots[next_slot].len = (uint8_t)l;
    159      slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_ALL;
    160      slots[next_slot].idx = (uint16_t)i;
    161      prev[next_slot] = heads[key];
    162      heads[key] = (uint16_t)next_slot;
    163      counts[key]++;
    164      ++next_slot;
    165    }
    166  }
    167 
    168  if (next_slot != 31704) return BROTLI_FALSE;
    169  pos = 0;
    170  /* Unused; makes offsets start from 1. */
    171  words[pos].len = 0;
    172  words[pos].transform = 0;
    173  words[pos].idx = 0;
    174  pos++;
    175  for (i = 0; i < BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS; ++i) {
    176    size_t num_words = counts[i];
    177    size_t curr;
    178    if (num_words == 0) {
    179      buckets[i] = 0;
    180      continue;
    181    }
    182    buckets[i] = (uint16_t)pos;
    183    curr = heads[i];
    184    pos += num_words;
    185    for (size_t k = 0; k < num_words; ++k) {
    186      words[pos - 1 - k] = slots[curr];
    187      curr = prev[curr];
    188    }
    189    words[pos - 1].len |= 0x80;
    190  }
    191  return BROTLI_TRUE;
    192 }
    193 
    194 BROTLI_BOOL BrotliEncoderInitStaticDictionaryLut(
    195    const BrotliDictionary* dict, uint16_t* buckets, DictWord* words) {
    196  size_t arena_size =
    197      BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS *
    198          (sizeof(uint16_t) + sizeof(DictWord)) +
    199      BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * 2 * sizeof(uint16_t);
    200  void* arena = malloc(arena_size);
    201  BROTLI_BOOL ok;
    202  if (arena == NULL) {
    203    return BROTLI_FALSE;
    204  }
    205  ok = DoBrotliEncoderInitStaticDictionaryLut(dict, buckets, words, arena);
    206  free(arena);
    207  return ok;
    208 }
    209 
    210 BROTLI_MODEL("small")
    211 uint16_t kStaticDictionaryBuckets[BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS];
    212 BROTLI_MODEL("small")
    213 DictWord kStaticDictionaryWords[BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS];
    214 
    215 #else  /* BROTLI_STATIC_INIT */
    216 
    217 /* Embed kStaticDictionaryBuckets and kStaticDictionaryWords. */
    218 #include "static_dict_lut_inc.h"
    219 
    220 #endif  /* BROTLI_STATIC_INIT */
    221 
    222 #if defined(__cplusplus) || defined(c_plusplus)
    223 } /* extern "C" */
    224 #endif