tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

shared_dictionary.c (17347B)


      1 /* Copyright 2017 Google Inc. All Rights Reserved.
      2 
      3   Distributed under MIT license.
      4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Shared Dictionary definition and utilities. */
      8 
      9 #include <brotli/shared_dictionary.h>
     10 
     11 #include "dictionary.h"
     12 #include "platform.h"
     13 #include "shared_dictionary_internal.h"
     14 
     15 #if defined(__cplusplus) || defined(c_plusplus)
     16 extern "C" {
     17 #endif
     18 
     19 #if defined(BROTLI_EXPERIMENTAL)
     20 
     21 #define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
     22    - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
     23 
     24 /* Max allowed by spec */
     25 #define BROTLI_MAX_SIZE_BITS 15u
     26 
     27 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
     28 static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
     29    BROTLI_BOOL* result) {
     30  uint8_t value;
     31  size_t position = *pos;
     32  if (position >= size) return BROTLI_FALSE;  /* past file end */
     33  value = encoded[position++];
     34  if (value > 1) return BROTLI_FALSE;  /* invalid bool */
     35  *result = TO_BROTLI_BOOL(value);
     36  *pos = position;
     37  return BROTLI_TRUE;  /* success */
     38 }
     39 
     40 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
     41 static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
     42    uint8_t* result) {
     43  size_t position = *pos;
     44  if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
     45  *result = encoded[position++];
     46  *pos = position;
     47  return BROTLI_TRUE;
     48 }
     49 
     50 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
     51 static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
     52    uint16_t* result) {
     53  size_t position = *pos;
     54  if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
     55  *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
     56  position += 2;
     57  *pos = position;
     58  return BROTLI_TRUE;
     59 }
     60 
     61 /* Reads a varint into a uint32_t, and returns error if it's too large */
     62 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
     63 static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
     64    size_t* pos, uint32_t* result) {
     65  int num = 0;
     66  uint8_t byte;
     67  *result = 0;
     68  for (;;) {
     69    if (*pos >= size) return BROTLI_FALSE;
     70    byte = encoded[(*pos)++];
     71    if (num == 4 && byte > 15) return BROTLI_FALSE;
     72    *result |= (uint32_t)(byte & 127) << (num * 7);
     73    if (byte < 128) return BROTLI_TRUE;
     74    num++;
     75  }
     76 }
     77 
     78 /* Returns the total length of word list. */
     79 static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
     80    uint32_t* offsets_by_length) {
     81  uint32_t pos = 0;
     82  uint32_t i;
     83  for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
     84    offsets_by_length[i] = pos;
     85    if (size_bits_by_length[i] != 0) {
     86      pos += i << size_bits_by_length[i];
     87    }
     88  }
     89  return pos;
     90 }
     91 
     92 static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
     93    size_t* pos, BrotliDictionary* out) {
     94  size_t offset;
     95  size_t i;
     96  size_t position = *pos;
     97  if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
     98    return BROTLI_FALSE;
     99  }
    100 
    101  memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
    102  memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
    103      &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
    104  for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
    105      i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
    106    if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
    107      return BROTLI_FALSE;
    108    }
    109  }
    110  position += BROTLI_NUM_ENCODED_LENGTHS;
    111  offset = BrotliSizeBitsToOffsets(
    112      out->size_bits_by_length, out->offsets_by_length);
    113 
    114  out->data = &encoded[position];
    115  out->data_size = offset;
    116  position += offset;
    117  if (position > size) return BROTLI_FALSE;
    118  *pos = position;
    119  return BROTLI_TRUE;
    120 }
    121 
    122 /* Computes the cutOffTransforms of a BrotliTransforms which already has the
    123   transforms data correctly filled in. */
    124 static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
    125  uint32_t i;
    126  for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
    127    transforms->cutOffTransforms[i] = -1;
    128  }
    129  for (i = 0; i < transforms->num_transforms; i++) {
    130    const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
    131    uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
    132    const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
    133    if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
    134        transforms->cutOffTransforms[type] == -1) {
    135      transforms->cutOffTransforms[type] = (int16_t)i;
    136    }
    137  }
    138 }
    139 
    140 static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
    141    size_t* pos, BrotliTransforms* out, uint16_t* out_table,
    142    size_t* out_table_size) {
    143  size_t position = *pos;
    144  size_t offset = 0;
    145  size_t stringlet_count = 0;  /* NUM_PREFIX_SUFFIX */
    146  size_t data_length = 0;
    147 
    148  /* PREFIX_SUFFIX_LENGTH */
    149  if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
    150    return BROTLI_FALSE;
    151  }
    152  data_length = out->prefix_suffix_size;
    153 
    154  /* Must at least have space for null terminator. */
    155  if (data_length < 1) return BROTLI_FALSE;
    156  out->prefix_suffix = &encoded[position];
    157  if (position + data_length >= size) return BROTLI_FALSE;
    158  while (BROTLI_TRUE) {
    159    /* STRING_LENGTH */
    160    size_t stringlet_len = encoded[position + offset];
    161    out_table[stringlet_count] = (uint16_t)offset;
    162    stringlet_count++;
    163    offset++;
    164    if (stringlet_len == 0) {
    165      if (offset == data_length) {
    166        break;
    167      } else {
    168        return BROTLI_FALSE;
    169      }
    170    }
    171    if (stringlet_count > 255) return BROTLI_FALSE;
    172    offset += stringlet_len;
    173    if (offset >= data_length) return BROTLI_FALSE;
    174  }
    175 
    176  position += data_length;
    177  *pos = position;
    178  *out_table_size = (uint16_t)stringlet_count;
    179  return BROTLI_TRUE;
    180 }
    181 
    182 static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
    183    size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
    184    size_t* prefix_suffix_count) {
    185  uint32_t i;
    186  BROTLI_BOOL has_params = BROTLI_FALSE;
    187  BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
    188  size_t position = *pos;
    189  size_t stringlet_cnt = 0;
    190  if (position >= size) return BROTLI_FALSE;
    191 
    192  prefix_suffix_ok = ParsePrefixSuffixTable(
    193      size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
    194  if (!prefix_suffix_ok) return BROTLI_FALSE;
    195  out->prefix_suffix_map = prefix_suffix_table;
    196  *prefix_suffix_count = stringlet_cnt;
    197 
    198  out->num_transforms = encoded[position++];
    199  out->transforms = &encoded[position];
    200  position += (size_t)out->num_transforms * 3;
    201  if (position > size) return BROTLI_FALSE;
    202  /* Check for errors and read extra parameters. */
    203  for (i = 0; i < out->num_transforms; i++) {
    204    uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
    205    uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
    206    uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
    207    if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
    208    if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
    209    if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
    210    if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
    211        type == BROTLI_TRANSFORM_SHIFT_ALL) {
    212      has_params = BROTLI_TRUE;
    213    }
    214  }
    215  if (has_params) {
    216    out->params = &encoded[position];
    217    position += (size_t)out->num_transforms * 2;
    218    if (position > size) return BROTLI_FALSE;
    219    for (i = 0; i < out->num_transforms; i++) {
    220      uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
    221      if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
    222          type != BROTLI_TRANSFORM_SHIFT_ALL) {
    223        if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
    224          return BROTLI_FALSE;
    225        }
    226      }
    227    }
    228  } else {
    229    out->params = NULL;
    230  }
    231  ComputeCutoffTransforms(out);
    232  *pos = position;
    233  return BROTLI_TRUE;
    234 }
    235 
    236 static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
    237    size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
    238  size_t pos = 0;
    239  uint32_t chunk_size = 0;
    240  uint8_t num_word_lists;
    241  uint8_t num_transform_lists;
    242  *is_custom_static_dict = BROTLI_FALSE;
    243  *num_prefix = 0;
    244 
    245  /* Skip magic header bytes. */
    246  pos += 2;
    247 
    248  /* LZ77_DICTIONARY_LENGTH */
    249  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
    250  if (chunk_size != 0) {
    251    /* This limitation is not specified but the 32-bit Brotli decoder for now */
    252    if (chunk_size > 1073741823) return BROTLI_FALSE;
    253    *num_prefix = 1;
    254    if (pos + chunk_size > size) return BROTLI_FALSE;
    255    pos += chunk_size;
    256  }
    257 
    258  if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
    259    return BROTLI_FALSE;
    260  }
    261  if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
    262    return BROTLI_FALSE;
    263  }
    264 
    265  if (num_word_lists > 0 || num_transform_lists > 0) {
    266    *is_custom_static_dict = BROTLI_TRUE;
    267  }
    268 
    269  return BROTLI_TRUE;
    270 }
    271 
    272 static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
    273    BrotliSharedDictionary* dict) {
    274  uint32_t i;
    275  size_t pos = 0;
    276  uint32_t chunk_size = 0;
    277  size_t total_prefix_suffix_count = 0;
    278  size_t transform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
    279  uint16_t temporary_prefix_suffix_table[256];
    280 
    281  /* Skip magic header bytes. */
    282  pos += 2;
    283 
    284  /* LZ77_DICTIONARY_LENGTH */
    285  if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
    286  if (chunk_size != 0) {
    287    if (pos + chunk_size > size) return BROTLI_FALSE;
    288    dict->prefix_size[dict->num_prefix] = chunk_size;
    289    dict->prefix[dict->num_prefix] = &encoded[pos];
    290    dict->num_prefix++;
    291    /* LZ77_DICTIONARY_LENGTH bytes. */
    292    pos += chunk_size;
    293  }
    294 
    295  /* NUM_WORD_LISTS */
    296  if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
    297    return BROTLI_FALSE;
    298  }
    299  if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
    300    return BROTLI_FALSE;
    301  }
    302 
    303  if (dict->num_word_lists != 0) {
    304    dict->words_instances = (BrotliDictionary*)dict->alloc_func(
    305        dict->memory_manager_opaque,
    306        dict->num_word_lists * sizeof(*dict->words_instances));
    307    if (!dict->words_instances) return BROTLI_FALSE;  /* OOM */
    308  }
    309  for (i = 0; i < dict->num_word_lists; i++) {
    310    if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
    311      return BROTLI_FALSE;
    312    }
    313  }
    314 
    315  /* NUM_TRANSFORM_LISTS */
    316  if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
    317    return BROTLI_FALSE;
    318  }
    319  if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
    320    return BROTLI_FALSE;
    321  }
    322 
    323  if (dict->num_transform_lists != 0) {
    324    dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
    325        dict->memory_manager_opaque,
    326        dict->num_transform_lists * sizeof(*dict->transforms_instances));
    327    if (!dict->transforms_instances) return BROTLI_FALSE;  /* OOM */
    328  }
    329  for (i = 0; i < dict->num_transform_lists; i++) {
    330    BROTLI_BOOL ok = BROTLI_FALSE;
    331    size_t prefix_suffix_count = 0;
    332    transform_list_start[i] = pos;
    333    dict->transforms_instances[i].prefix_suffix_map =
    334        temporary_prefix_suffix_table;
    335    ok = ParseTransformsList(
    336        size, encoded, &pos, &dict->transforms_instances[i],
    337        temporary_prefix_suffix_table, &prefix_suffix_count);
    338    if (!ok) return BROTLI_FALSE;
    339    total_prefix_suffix_count += prefix_suffix_count;
    340  }
    341  if (total_prefix_suffix_count != 0) {
    342    dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
    343        dict->memory_manager_opaque,
    344        total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
    345    if (!dict->prefix_suffix_maps) return BROTLI_FALSE;  /* OOM */
    346  }
    347  total_prefix_suffix_count = 0;
    348  for (i = 0; i < dict->num_transform_lists; i++) {
    349    size_t prefix_suffix_count = 0;
    350    size_t position = transform_list_start[i];
    351    uint16_t* prefix_suffix_map =
    352      &dict->prefix_suffix_maps[total_prefix_suffix_count];
    353    BROTLI_BOOL ok = ParsePrefixSuffixTable(
    354        size, encoded, &position, &dict->transforms_instances[i],
    355        prefix_suffix_map, &prefix_suffix_count);
    356    if (!ok) return BROTLI_FALSE;
    357    dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
    358    total_prefix_suffix_count += prefix_suffix_count;
    359  }
    360 
    361  if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
    362    if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
    363      return BROTLI_FALSE;
    364    }
    365    if (dict->num_dictionaries == 0 ||
    366        dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
    367      return BROTLI_FALSE;
    368    }
    369    for (i = 0; i < dict->num_dictionaries; i++) {
    370      uint8_t words_index;
    371      uint8_t transforms_index;
    372      if (!ReadUint8(encoded, size, &pos, &words_index)) {
    373        return BROTLI_FALSE;
    374      }
    375      if (words_index > dict->num_word_lists) return BROTLI_FALSE;
    376      if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
    377        return BROTLI_FALSE;
    378      }
    379      if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
    380      dict->words[i] = words_index == dict->num_word_lists ?
    381          BrotliGetDictionary() : &dict->words_instances[words_index];
    382      dict->transforms[i] = transforms_index == dict->num_transform_lists ?
    383          BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
    384    }
    385    /* CONTEXT_ENABLED */
    386    if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
    387      return BROTLI_FALSE;
    388    }
    389 
    390    /* CONTEXT_MAP */
    391    if (dict->context_based) {
    392      for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
    393        if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
    394          return BROTLI_FALSE;
    395        }
    396        if (dict->context_map[i] >= dict->num_dictionaries) {
    397          return BROTLI_FALSE;
    398        }
    399      }
    400    }
    401  } else {
    402    dict->context_based = BROTLI_FALSE;
    403    dict->num_dictionaries = 1;
    404    dict->words[0] = BrotliGetDictionary();
    405    dict->transforms[0] = BrotliGetTransforms();
    406  }
    407 
    408  return BROTLI_TRUE;
    409 }
    410 
    411 /* Decodes shared dictionary and verifies correctness.
    412   Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
    413   The BrotliSharedDictionary must already have been initialized. If the
    414   BrotliSharedDictionary already contains data, compound dictionaries
    415   will be appended, but an error will be returned if it already has
    416   custom words or transforms.
    417   TODO(lode): link to RFC for shared brotli once published. */
    418 static BROTLI_BOOL DecodeSharedDictionary(
    419    const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
    420  uint32_t num_prefix = 0;
    421  BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
    422  BROTLI_BOOL has_custom_static_dict =
    423      dict->num_word_lists > 0 || dict->num_transform_lists > 0;
    424 
    425  /* Check magic header bytes. */
    426  if (size < 2) return BROTLI_FALSE;
    427  if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
    428 
    429  if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
    430    return BROTLI_FALSE;
    431  }
    432 
    433  if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
    434    return BROTLI_FALSE;
    435  }
    436 
    437  /* Cannot combine different static dictionaries, only prefix dictionaries */
    438  if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
    439 
    440  return ParseDictionary(encoded, size, dict);
    441 }
    442 
    443 #endif  /* BROTLI_EXPERIMENTAL */
    444 
    445 void BrotliSharedDictionaryDestroyInstance(
    446    BrotliSharedDictionary* dict) {
    447  if (!dict) {
    448    return;
    449  } else {
    450    brotli_free_func free_func = dict->free_func;
    451    void* opaque = dict->memory_manager_opaque;
    452    /* Cleanup. */
    453    free_func(opaque, dict->words_instances);
    454    free_func(opaque, dict->transforms_instances);
    455    free_func(opaque, dict->prefix_suffix_maps);
    456    /* Self-destruction. */
    457    free_func(opaque, dict);
    458  }
    459 }
    460 
    461 BROTLI_BOOL BrotliSharedDictionaryAttach(
    462    BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
    463    size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
    464  if (!dict) {
    465    return BROTLI_FALSE;
    466  }
    467 #if defined(BROTLI_EXPERIMENTAL)
    468  if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
    469    return DecodeSharedDictionary(data, data_size, dict);
    470  }
    471 #endif  /* BROTLI_EXPERIMENTAL */
    472  if (type == BROTLI_SHARED_DICTIONARY_RAW) {
    473    if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
    474      return BROTLI_FALSE;
    475    }
    476    dict->prefix_size[dict->num_prefix] = data_size;
    477    dict->prefix[dict->num_prefix] = data;
    478    dict->num_prefix++;
    479    return BROTLI_TRUE;
    480  }
    481  return BROTLI_FALSE;
    482 }
    483 
    484 BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
    485    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
    486  BrotliSharedDictionary* dict = 0;
    487  if (!alloc_func && !free_func) {
    488    dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
    489  } else if (alloc_func && free_func) {
    490    dict = (BrotliSharedDictionary*)alloc_func(
    491        opaque, sizeof(BrotliSharedDictionary));
    492  }
    493  if (dict == 0) {
    494    return 0;
    495  }
    496 
    497  /* TODO(eustas): explicitly initialize all the fields? */
    498  memset(dict, 0, sizeof(BrotliSharedDictionary));
    499 
    500  dict->context_based = BROTLI_FALSE;
    501  dict->num_dictionaries = 1;
    502  dict->num_word_lists = 0;
    503  dict->num_transform_lists = 0;
    504 
    505  dict->words[0] = BrotliGetDictionary();
    506  dict->transforms[0] = BrotliGetTransforms();
    507 
    508  dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
    509  dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
    510  dict->memory_manager_opaque = opaque;
    511 
    512  return dict;
    513 }
    514 
    515 #if defined(__cplusplus) || defined(c_plusplus)
    516 }  /* extern "C" */
    517 #endif