transform.c (10787B)
1 /* Copyright 2013 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 #include "platform.h" 8 #include "transform.h" 9 10 #if defined(__cplusplus) || defined(c_plusplus) 11 extern "C" { 12 #endif 13 14 /* RFC 7932 transforms string data */ 15 static const BROTLI_MODEL("small") char kPrefixSuffix[217] = 16 "\1 \2, \10 of the \4 of \2s \1.\5 and \4 " 17 /* 0x _0 _2 __5 _E _3 _6 _8 _E */ 18 "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 " 19 /* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */ 20 "that \1\'\6 with \6 from \4 by \1(\6. T" 21 /* 4x _5_ _7 _E _5 _A _C */ 22 "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed " 23 /* 6x _3 _8 _D _2 _7_ _ _A _C */ 24 "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5" 25 /* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */ 26 " not \3er \3al \4ful \4ive \5less \4es" 27 /* Ax _5 _9 _D _2 _7 _D */ 28 "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */ 29 /* Cx _2 _7___ ___ _A _F _5 _8 */ 30 31 static const BROTLI_MODEL("small") uint16_t kPrefixSuffixMap[50] = { 32 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25, 33 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E, 34 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C, 35 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9, 36 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8 37 }; 38 39 /* RFC 7932 transforms */ 40 static const BROTLI_MODEL("small") uint8_t kTransformsData[] = { 41 49, BROTLI_TRANSFORM_IDENTITY, 49, 42 49, BROTLI_TRANSFORM_IDENTITY, 0, 43 0, BROTLI_TRANSFORM_IDENTITY, 0, 44 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49, 45 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, 46 49, BROTLI_TRANSFORM_IDENTITY, 47, 47 0, BROTLI_TRANSFORM_IDENTITY, 49, 48 4, BROTLI_TRANSFORM_IDENTITY, 0, 49 49, BROTLI_TRANSFORM_IDENTITY, 3, 50 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, 51 49, BROTLI_TRANSFORM_IDENTITY, 6, 52 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49, 53 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49, 54 1, BROTLI_TRANSFORM_IDENTITY, 0, 55 49, BROTLI_TRANSFORM_IDENTITY, 1, 56 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, 57 49, BROTLI_TRANSFORM_IDENTITY, 7, 58 49, BROTLI_TRANSFORM_IDENTITY, 9, 59 48, BROTLI_TRANSFORM_IDENTITY, 0, 60 49, BROTLI_TRANSFORM_IDENTITY, 8, 61 49, BROTLI_TRANSFORM_IDENTITY, 5, 62 49, BROTLI_TRANSFORM_IDENTITY, 10, 63 49, BROTLI_TRANSFORM_IDENTITY, 11, 64 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49, 65 49, BROTLI_TRANSFORM_IDENTITY, 13, 66 49, BROTLI_TRANSFORM_IDENTITY, 14, 67 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49, 68 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49, 69 49, BROTLI_TRANSFORM_IDENTITY, 15, 70 49, BROTLI_TRANSFORM_IDENTITY, 16, 71 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, 72 49, BROTLI_TRANSFORM_IDENTITY, 12, 73 5, BROTLI_TRANSFORM_IDENTITY, 49, 74 0, BROTLI_TRANSFORM_IDENTITY, 1, 75 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49, 76 49, BROTLI_TRANSFORM_IDENTITY, 18, 77 49, BROTLI_TRANSFORM_IDENTITY, 17, 78 49, BROTLI_TRANSFORM_IDENTITY, 19, 79 49, BROTLI_TRANSFORM_IDENTITY, 20, 80 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49, 81 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49, 82 47, BROTLI_TRANSFORM_IDENTITY, 49, 83 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49, 84 49, BROTLI_TRANSFORM_IDENTITY, 22, 85 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, 86 49, BROTLI_TRANSFORM_IDENTITY, 23, 87 49, BROTLI_TRANSFORM_IDENTITY, 24, 88 49, BROTLI_TRANSFORM_IDENTITY, 25, 89 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49, 90 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26, 91 49, BROTLI_TRANSFORM_IDENTITY, 27, 92 49, BROTLI_TRANSFORM_IDENTITY, 28, 93 0, BROTLI_TRANSFORM_IDENTITY, 12, 94 49, BROTLI_TRANSFORM_IDENTITY, 29, 95 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49, 96 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49, 97 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49, 98 49, BROTLI_TRANSFORM_IDENTITY, 21, 99 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, 100 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49, 101 49, BROTLI_TRANSFORM_IDENTITY, 31, 102 49, BROTLI_TRANSFORM_IDENTITY, 32, 103 47, BROTLI_TRANSFORM_IDENTITY, 3, 104 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49, 105 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49, 106 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, 107 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8, 108 5, BROTLI_TRANSFORM_IDENTITY, 21, 109 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, 110 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10, 111 49, BROTLI_TRANSFORM_IDENTITY, 30, 112 0, BROTLI_TRANSFORM_IDENTITY, 5, 113 35, BROTLI_TRANSFORM_IDENTITY, 49, 114 47, BROTLI_TRANSFORM_IDENTITY, 2, 115 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17, 116 49, BROTLI_TRANSFORM_IDENTITY, 36, 117 49, BROTLI_TRANSFORM_IDENTITY, 33, 118 5, BROTLI_TRANSFORM_IDENTITY, 0, 119 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21, 120 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, 121 49, BROTLI_TRANSFORM_IDENTITY, 37, 122 0, BROTLI_TRANSFORM_IDENTITY, 30, 123 49, BROTLI_TRANSFORM_IDENTITY, 38, 124 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, 125 49, BROTLI_TRANSFORM_IDENTITY, 39, 126 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, 127 49, BROTLI_TRANSFORM_IDENTITY, 34, 128 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8, 129 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, 130 0, BROTLI_TRANSFORM_IDENTITY, 21, 131 49, BROTLI_TRANSFORM_IDENTITY, 40, 132 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, 133 49, BROTLI_TRANSFORM_IDENTITY, 41, 134 49, BROTLI_TRANSFORM_IDENTITY, 42, 135 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17, 136 49, BROTLI_TRANSFORM_IDENTITY, 43, 137 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, 138 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10, 139 0, BROTLI_TRANSFORM_IDENTITY, 34, 140 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, 141 49, BROTLI_TRANSFORM_IDENTITY, 44, 142 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, 143 45, BROTLI_TRANSFORM_IDENTITY, 49, 144 0, BROTLI_TRANSFORM_IDENTITY, 33, 145 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, 146 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, 147 49, BROTLI_TRANSFORM_IDENTITY, 46, 148 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, 149 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, 150 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, 151 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, 152 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, 153 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33, 154 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21, 155 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, 156 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, 157 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, 158 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, 159 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, 160 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, 161 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, 162 }; 163 164 static const BROTLI_MODEL("small") 165 BrotliTransforms kBrotliTransforms = { 166 sizeof(kPrefixSuffix), 167 (const uint8_t*)kPrefixSuffix, 168 kPrefixSuffixMap, 169 sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])), 170 kTransformsData, 171 NULL, /* no extra parameters */ 172 {0, 12, 27, 23, 42, 63, 56, 48, 59, 64} 173 }; 174 175 const BrotliTransforms* BrotliGetTransforms(void) { 176 return &kBrotliTransforms; 177 } 178 179 static int ToUpperCase(uint8_t* p) { 180 if (p[0] < 0xC0) { 181 if (p[0] >= 'a' && p[0] <= 'z') { 182 p[0] ^= 32; 183 } 184 return 1; 185 } 186 /* An overly simplified uppercasing model for UTF-8. */ 187 if (p[0] < 0xE0) { 188 p[1] ^= 32; 189 return 2; 190 } 191 /* An arbitrary transform for three byte characters. */ 192 p[2] ^= 5; 193 return 3; 194 } 195 196 static int Shift(uint8_t* word, int word_len, uint16_t parameter) { 197 /* Limited sign extension: scalar < (1 << 24). */ 198 uint32_t scalar = 199 (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u)); 200 if (word[0] < 0x80) { 201 /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */ 202 scalar += (uint32_t)word[0]; 203 word[0] = (uint8_t)(scalar & 0x7Fu); 204 return 1; 205 } else if (word[0] < 0xC0) { 206 /* Continuation / 10AAAAAA. */ 207 return 1; 208 } else if (word[0] < 0xE0) { 209 /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */ 210 if (word_len < 2) return 1; 211 scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u)); 212 word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F)); 213 word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F)); 214 return 2; 215 } else if (word[0] < 0xF0) { 216 /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */ 217 if (word_len < 3) return word_len; 218 scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) | 219 ((word[0] & 0x0Fu) << 12u)); 220 word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F)); 221 word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F)); 222 word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F)); 223 return 3; 224 } else if (word[0] < 0xF8) { 225 /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */ 226 if (word_len < 4) return word_len; 227 scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) | 228 ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u)); 229 word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07)); 230 word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F)); 231 word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F)); 232 word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F)); 233 return 4; 234 } 235 return 1; 236 } 237 238 int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len, 239 const BrotliTransforms* transforms, int transform_idx) { 240 int idx = 0; 241 const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx); 242 uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx); 243 const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx); 244 { 245 int prefix_len = *prefix++; 246 while (prefix_len--) { dst[idx++] = *prefix++; } 247 } 248 { 249 const int t = type; 250 int i = 0; 251 if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) { 252 len -= t; 253 } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1 254 && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) { 255 int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1); 256 word += skip; 257 len -= skip; 258 } 259 while (i < len) { dst[idx++] = word[i++]; } 260 if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) { 261 ToUpperCase(&dst[idx - len]); 262 } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) { 263 uint8_t* uppercase = &dst[idx - len]; 264 while (len > 0) { 265 int step = ToUpperCase(uppercase); 266 uppercase += step; 267 len -= step; 268 } 269 } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) { 270 uint16_t param = (uint16_t)(transforms->params[transform_idx * 2] 271 + (transforms->params[transform_idx * 2 + 1] << 8u)); 272 Shift(&dst[idx - len], len, param); 273 } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) { 274 uint16_t param = (uint16_t)(transforms->params[transform_idx * 2] 275 + (transforms->params[transform_idx * 2 + 1] << 8u)); 276 uint8_t* shift = &dst[idx - len]; 277 while (len > 0) { 278 int step = Shift(shift, len, param); 279 shift += step; 280 len -= step; 281 } 282 } 283 } 284 { 285 int suffix_len = *suffix++; 286 while (suffix_len--) { dst[idx++] = *suffix++; } 287 return idx; 288 } 289 } 290 291 #if defined(__cplusplus) || defined(c_plusplus) 292 } /* extern "C" */ 293 #endif