tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

transform.c (10787B)


      1 /* Copyright 2013 Google Inc. All Rights Reserved.
      2 
      3   Distributed under MIT license.
      4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 #include "platform.h"
      8 #include "transform.h"
      9 
     10 #if defined(__cplusplus) || defined(c_plusplus)
     11 extern "C" {
     12 #endif
     13 
     14 /* RFC 7932 transforms string data */
     15 static const BROTLI_MODEL("small") char kPrefixSuffix[217] =
     16      "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
     17 /* 0x  _0 _2  __5        _E    _3  _6 _8     _E */
     18      "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
     19 /* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */
     20      "that \1\'\6 with \6 from \4 by \1(\6. T"
     21 /* 4x       _5_ _7      _E      _5    _A _C */
     22      "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
     23 /* 6x     _3    _8    _D    _2    _7_ _ _A _C */
     24      "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
     25 /* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */
     26      " not \3er \3al \4ful \4ive \5less \4es"
     27 /* Ax       _5   _9   _D    _2    _7     _D */
     28      "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
     29 /* Cx    _2    _7___ ___ _A    _F     _5        _8 */
     30 
     31 static const BROTLI_MODEL("small") uint16_t kPrefixSuffixMap[50] = {
     32  0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
     33  0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
     34  0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
     35  0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
     36  0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
     37 };
     38 
     39 /* RFC 7932 transforms */
     40 static const BROTLI_MODEL("small") uint8_t kTransformsData[] = {
     41  49, BROTLI_TRANSFORM_IDENTITY, 49,
     42  49, BROTLI_TRANSFORM_IDENTITY, 0,
     43   0, BROTLI_TRANSFORM_IDENTITY, 0,
     44  49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
     45  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
     46  49, BROTLI_TRANSFORM_IDENTITY, 47,
     47   0, BROTLI_TRANSFORM_IDENTITY, 49,
     48   4, BROTLI_TRANSFORM_IDENTITY, 0,
     49  49, BROTLI_TRANSFORM_IDENTITY, 3,
     50  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
     51  49, BROTLI_TRANSFORM_IDENTITY, 6,
     52  49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
     53  49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
     54   1, BROTLI_TRANSFORM_IDENTITY, 0,
     55  49, BROTLI_TRANSFORM_IDENTITY, 1,
     56   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
     57  49, BROTLI_TRANSFORM_IDENTITY, 7,
     58  49, BROTLI_TRANSFORM_IDENTITY, 9,
     59  48, BROTLI_TRANSFORM_IDENTITY, 0,
     60  49, BROTLI_TRANSFORM_IDENTITY, 8,
     61  49, BROTLI_TRANSFORM_IDENTITY, 5,
     62  49, BROTLI_TRANSFORM_IDENTITY, 10,
     63  49, BROTLI_TRANSFORM_IDENTITY, 11,
     64  49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
     65  49, BROTLI_TRANSFORM_IDENTITY, 13,
     66  49, BROTLI_TRANSFORM_IDENTITY, 14,
     67  49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
     68  49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
     69  49, BROTLI_TRANSFORM_IDENTITY, 15,
     70  49, BROTLI_TRANSFORM_IDENTITY, 16,
     71   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
     72  49, BROTLI_TRANSFORM_IDENTITY, 12,
     73   5, BROTLI_TRANSFORM_IDENTITY, 49,
     74   0, BROTLI_TRANSFORM_IDENTITY, 1,
     75  49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
     76  49, BROTLI_TRANSFORM_IDENTITY, 18,
     77  49, BROTLI_TRANSFORM_IDENTITY, 17,
     78  49, BROTLI_TRANSFORM_IDENTITY, 19,
     79  49, BROTLI_TRANSFORM_IDENTITY, 20,
     80  49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
     81  49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
     82  47, BROTLI_TRANSFORM_IDENTITY, 49,
     83  49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
     84  49, BROTLI_TRANSFORM_IDENTITY, 22,
     85  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
     86  49, BROTLI_TRANSFORM_IDENTITY, 23,
     87  49, BROTLI_TRANSFORM_IDENTITY, 24,
     88  49, BROTLI_TRANSFORM_IDENTITY, 25,
     89  49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
     90  49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
     91  49, BROTLI_TRANSFORM_IDENTITY, 27,
     92  49, BROTLI_TRANSFORM_IDENTITY, 28,
     93   0, BROTLI_TRANSFORM_IDENTITY, 12,
     94  49, BROTLI_TRANSFORM_IDENTITY, 29,
     95  49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
     96  49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
     97  49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
     98  49, BROTLI_TRANSFORM_IDENTITY, 21,
     99  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
    100  49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
    101  49, BROTLI_TRANSFORM_IDENTITY, 31,
    102  49, BROTLI_TRANSFORM_IDENTITY, 32,
    103  47, BROTLI_TRANSFORM_IDENTITY, 3,
    104  49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
    105  49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
    106   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
    107  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
    108   5, BROTLI_TRANSFORM_IDENTITY, 21,
    109  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
    110  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
    111  49, BROTLI_TRANSFORM_IDENTITY, 30,
    112   0, BROTLI_TRANSFORM_IDENTITY, 5,
    113  35, BROTLI_TRANSFORM_IDENTITY, 49,
    114  47, BROTLI_TRANSFORM_IDENTITY, 2,
    115  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
    116  49, BROTLI_TRANSFORM_IDENTITY, 36,
    117  49, BROTLI_TRANSFORM_IDENTITY, 33,
    118   5, BROTLI_TRANSFORM_IDENTITY, 0,
    119  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
    120  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
    121  49, BROTLI_TRANSFORM_IDENTITY, 37,
    122   0, BROTLI_TRANSFORM_IDENTITY, 30,
    123  49, BROTLI_TRANSFORM_IDENTITY, 38,
    124   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
    125  49, BROTLI_TRANSFORM_IDENTITY, 39,
    126   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
    127  49, BROTLI_TRANSFORM_IDENTITY, 34,
    128  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
    129  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
    130   0, BROTLI_TRANSFORM_IDENTITY, 21,
    131  49, BROTLI_TRANSFORM_IDENTITY, 40,
    132   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
    133  49, BROTLI_TRANSFORM_IDENTITY, 41,
    134  49, BROTLI_TRANSFORM_IDENTITY, 42,
    135  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
    136  49, BROTLI_TRANSFORM_IDENTITY, 43,
    137   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
    138  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
    139   0, BROTLI_TRANSFORM_IDENTITY, 34,
    140  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
    141  49, BROTLI_TRANSFORM_IDENTITY, 44,
    142  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
    143  45, BROTLI_TRANSFORM_IDENTITY, 49,
    144   0, BROTLI_TRANSFORM_IDENTITY, 33,
    145  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
    146  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
    147  49, BROTLI_TRANSFORM_IDENTITY, 46,
    148  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
    149  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
    150   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
    151   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
    152   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
    153  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
    154  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
    155  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
    156   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
    157  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
    158   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
    159   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
    160   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
    161   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
    162 };
    163 
    164 static const BROTLI_MODEL("small")
    165 BrotliTransforms kBrotliTransforms = {
    166  sizeof(kPrefixSuffix),
    167  (const uint8_t*)kPrefixSuffix,
    168  kPrefixSuffixMap,
    169  sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
    170  kTransformsData,
    171  NULL,  /* no extra parameters */
    172  {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
    173 };
    174 
    175 const BrotliTransforms* BrotliGetTransforms(void) {
    176  return &kBrotliTransforms;
    177 }
    178 
    179 static int ToUpperCase(uint8_t* p) {
    180  if (p[0] < 0xC0) {
    181    if (p[0] >= 'a' && p[0] <= 'z') {
    182      p[0] ^= 32;
    183    }
    184    return 1;
    185  }
    186  /* An overly simplified uppercasing model for UTF-8. */
    187  if (p[0] < 0xE0) {
    188    p[1] ^= 32;
    189    return 2;
    190  }
    191  /* An arbitrary transform for three byte characters. */
    192  p[2] ^= 5;
    193  return 3;
    194 }
    195 
    196 static int Shift(uint8_t* word, int word_len, uint16_t parameter) {
    197  /* Limited sign extension: scalar < (1 << 24). */
    198  uint32_t scalar =
    199      (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u));
    200  if (word[0] < 0x80) {
    201    /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
    202    scalar += (uint32_t)word[0];
    203    word[0] = (uint8_t)(scalar & 0x7Fu);
    204    return 1;
    205  } else if (word[0] < 0xC0) {
    206    /* Continuation / 10AAAAAA. */
    207    return 1;
    208  } else if (word[0] < 0xE0) {
    209    /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
    210    if (word_len < 2) return 1;
    211    scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u));
    212    word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F));
    213    word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F));
    214    return 2;
    215  } else if (word[0] < 0xF0) {
    216    /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
    217    if (word_len < 3) return word_len;
    218    scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) |
    219        ((word[0] & 0x0Fu) << 12u));
    220    word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F));
    221    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F));
    222    word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F));
    223    return 3;
    224  } else if (word[0] < 0xF8) {
    225    /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
    226    if (word_len < 4) return word_len;
    227    scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) |
    228        ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u));
    229    word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07));
    230    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F));
    231    word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F));
    232    word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F));
    233    return 4;
    234  }
    235  return 1;
    236 }
    237 
    238 int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
    239    const BrotliTransforms* transforms, int transform_idx) {
    240  int idx = 0;
    241  const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
    242  uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
    243  const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
    244  {
    245    int prefix_len = *prefix++;
    246    while (prefix_len--) { dst[idx++] = *prefix++; }
    247  }
    248  {
    249    const int t = type;
    250    int i = 0;
    251    if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
    252      len -= t;
    253    } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
    254        && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
    255      int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
    256      word += skip;
    257      len -= skip;
    258    }
    259    while (i < len) { dst[idx++] = word[i++]; }
    260    if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
    261      ToUpperCase(&dst[idx - len]);
    262    } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
    263      uint8_t* uppercase = &dst[idx - len];
    264      while (len > 0) {
    265        int step = ToUpperCase(uppercase);
    266        uppercase += step;
    267        len -= step;
    268      }
    269    } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) {
    270      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
    271          + (transforms->params[transform_idx * 2 + 1] << 8u));
    272      Shift(&dst[idx - len], len, param);
    273    } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) {
    274      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
    275          + (transforms->params[transform_idx * 2 + 1] << 8u));
    276      uint8_t* shift = &dst[idx - len];
    277      while (len > 0) {
    278        int step = Shift(shift, len, param);
    279        shift += step;
    280        len -= step;
    281      }
    282    }
    283  }
    284  {
    285    int suffix_len = *suffix++;
    286    while (suffix_len--) { dst[idx++] = *suffix++; }
    287    return idx;
    288  }
    289 }
    290 
    291 #if defined(__cplusplus) || defined(c_plusplus)
    292 }  /* extern "C" */
    293 #endif