shared_dictionary.c (17347B)
1 /* Copyright 2017 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* Shared Dictionary definition and utilities. */ 8 9 #include <brotli/shared_dictionary.h> 10 11 #include "dictionary.h" 12 #include "platform.h" 13 #include "shared_dictionary_internal.h" 14 15 #if defined(__cplusplus) || defined(c_plusplus) 16 extern "C" { 17 #endif 18 19 #if defined(BROTLI_EXPERIMENTAL) 20 21 #define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \ 22 - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1) 23 24 /* Max allowed by spec */ 25 #define BROTLI_MAX_SIZE_BITS 15u 26 27 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */ 28 static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos, 29 BROTLI_BOOL* result) { 30 uint8_t value; 31 size_t position = *pos; 32 if (position >= size) return BROTLI_FALSE; /* past file end */ 33 value = encoded[position++]; 34 if (value > 1) return BROTLI_FALSE; /* invalid bool */ 35 *result = TO_BROTLI_BOOL(value); 36 *pos = position; 37 return BROTLI_TRUE; /* success */ 38 } 39 40 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */ 41 static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos, 42 uint8_t* result) { 43 size_t position = *pos; 44 if (position + sizeof(uint8_t) > size) return BROTLI_FALSE; 45 *result = encoded[position++]; 46 *pos = position; 47 return BROTLI_TRUE; 48 } 49 50 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */ 51 static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos, 52 uint16_t* result) { 53 size_t position = *pos; 54 if (position + sizeof(uint16_t) > size) return BROTLI_FALSE; 55 *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]); 56 position += 2; 57 *pos = position; 58 return BROTLI_TRUE; 59 } 60 61 /* Reads a varint into a uint32_t, and returns error if it's too large */ 62 /* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */ 63 static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size, 64 size_t* pos, uint32_t* result) { 65 int num = 0; 66 uint8_t byte; 67 *result = 0; 68 for (;;) { 69 if (*pos >= size) return BROTLI_FALSE; 70 byte = encoded[(*pos)++]; 71 if (num == 4 && byte > 15) return BROTLI_FALSE; 72 *result |= (uint32_t)(byte & 127) << (num * 7); 73 if (byte < 128) return BROTLI_TRUE; 74 num++; 75 } 76 } 77 78 /* Returns the total length of word list. */ 79 static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length, 80 uint32_t* offsets_by_length) { 81 uint32_t pos = 0; 82 uint32_t i; 83 for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) { 84 offsets_by_length[i] = pos; 85 if (size_bits_by_length[i] != 0) { 86 pos += i << size_bits_by_length[i]; 87 } 88 } 89 return pos; 90 } 91 92 static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded, 93 size_t* pos, BrotliDictionary* out) { 94 size_t offset; 95 size_t i; 96 size_t position = *pos; 97 if (position + BROTLI_NUM_ENCODED_LENGTHS > size) { 98 return BROTLI_FALSE; 99 } 100 101 memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH); 102 memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH, 103 &encoded[position], BROTLI_NUM_ENCODED_LENGTHS); 104 for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; 105 i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) { 106 if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) { 107 return BROTLI_FALSE; 108 } 109 } 110 position += BROTLI_NUM_ENCODED_LENGTHS; 111 offset = BrotliSizeBitsToOffsets( 112 out->size_bits_by_length, out->offsets_by_length); 113 114 out->data = &encoded[position]; 115 out->data_size = offset; 116 position += offset; 117 if (position > size) return BROTLI_FALSE; 118 *pos = position; 119 return BROTLI_TRUE; 120 } 121 122 /* Computes the cutOffTransforms of a BrotliTransforms which already has the 123 transforms data correctly filled in. */ 124 static void ComputeCutoffTransforms(BrotliTransforms* transforms) { 125 uint32_t i; 126 for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) { 127 transforms->cutOffTransforms[i] = -1; 128 } 129 for (i = 0; i < transforms->num_transforms; i++) { 130 const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i); 131 uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i); 132 const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i); 133 if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 && 134 transforms->cutOffTransforms[type] == -1) { 135 transforms->cutOffTransforms[type] = (int16_t)i; 136 } 137 } 138 } 139 140 static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded, 141 size_t* pos, BrotliTransforms* out, uint16_t* out_table, 142 size_t* out_table_size) { 143 size_t position = *pos; 144 size_t offset = 0; 145 size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */ 146 size_t data_length = 0; 147 148 /* PREFIX_SUFFIX_LENGTH */ 149 if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) { 150 return BROTLI_FALSE; 151 } 152 data_length = out->prefix_suffix_size; 153 154 /* Must at least have space for null terminator. */ 155 if (data_length < 1) return BROTLI_FALSE; 156 out->prefix_suffix = &encoded[position]; 157 if (position + data_length >= size) return BROTLI_FALSE; 158 while (BROTLI_TRUE) { 159 /* STRING_LENGTH */ 160 size_t stringlet_len = encoded[position + offset]; 161 out_table[stringlet_count] = (uint16_t)offset; 162 stringlet_count++; 163 offset++; 164 if (stringlet_len == 0) { 165 if (offset == data_length) { 166 break; 167 } else { 168 return BROTLI_FALSE; 169 } 170 } 171 if (stringlet_count > 255) return BROTLI_FALSE; 172 offset += stringlet_len; 173 if (offset >= data_length) return BROTLI_FALSE; 174 } 175 176 position += data_length; 177 *pos = position; 178 *out_table_size = (uint16_t)stringlet_count; 179 return BROTLI_TRUE; 180 } 181 182 static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded, 183 size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table, 184 size_t* prefix_suffix_count) { 185 uint32_t i; 186 BROTLI_BOOL has_params = BROTLI_FALSE; 187 BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE; 188 size_t position = *pos; 189 size_t stringlet_cnt = 0; 190 if (position >= size) return BROTLI_FALSE; 191 192 prefix_suffix_ok = ParsePrefixSuffixTable( 193 size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt); 194 if (!prefix_suffix_ok) return BROTLI_FALSE; 195 out->prefix_suffix_map = prefix_suffix_table; 196 *prefix_suffix_count = stringlet_cnt; 197 198 out->num_transforms = encoded[position++]; 199 out->transforms = &encoded[position]; 200 position += (size_t)out->num_transforms * 3; 201 if (position > size) return BROTLI_FALSE; 202 /* Check for errors and read extra parameters. */ 203 for (i = 0; i < out->num_transforms; i++) { 204 uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i); 205 uint8_t type = BROTLI_TRANSFORM_TYPE(out, i); 206 uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i); 207 if (prefix_id >= stringlet_cnt) return BROTLI_FALSE; 208 if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE; 209 if (suffix_id >= stringlet_cnt) return BROTLI_FALSE; 210 if (type == BROTLI_TRANSFORM_SHIFT_FIRST || 211 type == BROTLI_TRANSFORM_SHIFT_ALL) { 212 has_params = BROTLI_TRUE; 213 } 214 } 215 if (has_params) { 216 out->params = &encoded[position]; 217 position += (size_t)out->num_transforms * 2; 218 if (position > size) return BROTLI_FALSE; 219 for (i = 0; i < out->num_transforms; i++) { 220 uint8_t type = BROTLI_TRANSFORM_TYPE(out, i); 221 if (type != BROTLI_TRANSFORM_SHIFT_FIRST && 222 type != BROTLI_TRANSFORM_SHIFT_ALL) { 223 if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) { 224 return BROTLI_FALSE; 225 } 226 } 227 } 228 } else { 229 out->params = NULL; 230 } 231 ComputeCutoffTransforms(out); 232 *pos = position; 233 return BROTLI_TRUE; 234 } 235 236 static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded, 237 size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) { 238 size_t pos = 0; 239 uint32_t chunk_size = 0; 240 uint8_t num_word_lists; 241 uint8_t num_transform_lists; 242 *is_custom_static_dict = BROTLI_FALSE; 243 *num_prefix = 0; 244 245 /* Skip magic header bytes. */ 246 pos += 2; 247 248 /* LZ77_DICTIONARY_LENGTH */ 249 if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE; 250 if (chunk_size != 0) { 251 /* This limitation is not specified but the 32-bit Brotli decoder for now */ 252 if (chunk_size > 1073741823) return BROTLI_FALSE; 253 *num_prefix = 1; 254 if (pos + chunk_size > size) return BROTLI_FALSE; 255 pos += chunk_size; 256 } 257 258 if (!ReadUint8(encoded, size, &pos, &num_word_lists)) { 259 return BROTLI_FALSE; 260 } 261 if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) { 262 return BROTLI_FALSE; 263 } 264 265 if (num_word_lists > 0 || num_transform_lists > 0) { 266 *is_custom_static_dict = BROTLI_TRUE; 267 } 268 269 return BROTLI_TRUE; 270 } 271 272 static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size, 273 BrotliSharedDictionary* dict) { 274 uint32_t i; 275 size_t pos = 0; 276 uint32_t chunk_size = 0; 277 size_t total_prefix_suffix_count = 0; 278 size_t transform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS]; 279 uint16_t temporary_prefix_suffix_table[256]; 280 281 /* Skip magic header bytes. */ 282 pos += 2; 283 284 /* LZ77_DICTIONARY_LENGTH */ 285 if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE; 286 if (chunk_size != 0) { 287 if (pos + chunk_size > size) return BROTLI_FALSE; 288 dict->prefix_size[dict->num_prefix] = chunk_size; 289 dict->prefix[dict->num_prefix] = &encoded[pos]; 290 dict->num_prefix++; 291 /* LZ77_DICTIONARY_LENGTH bytes. */ 292 pos += chunk_size; 293 } 294 295 /* NUM_WORD_LISTS */ 296 if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) { 297 return BROTLI_FALSE; 298 } 299 if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) { 300 return BROTLI_FALSE; 301 } 302 303 if (dict->num_word_lists != 0) { 304 dict->words_instances = (BrotliDictionary*)dict->alloc_func( 305 dict->memory_manager_opaque, 306 dict->num_word_lists * sizeof(*dict->words_instances)); 307 if (!dict->words_instances) return BROTLI_FALSE; /* OOM */ 308 } 309 for (i = 0; i < dict->num_word_lists; i++) { 310 if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) { 311 return BROTLI_FALSE; 312 } 313 } 314 315 /* NUM_TRANSFORM_LISTS */ 316 if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) { 317 return BROTLI_FALSE; 318 } 319 if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) { 320 return BROTLI_FALSE; 321 } 322 323 if (dict->num_transform_lists != 0) { 324 dict->transforms_instances = (BrotliTransforms*)dict->alloc_func( 325 dict->memory_manager_opaque, 326 dict->num_transform_lists * sizeof(*dict->transforms_instances)); 327 if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */ 328 } 329 for (i = 0; i < dict->num_transform_lists; i++) { 330 BROTLI_BOOL ok = BROTLI_FALSE; 331 size_t prefix_suffix_count = 0; 332 transform_list_start[i] = pos; 333 dict->transforms_instances[i].prefix_suffix_map = 334 temporary_prefix_suffix_table; 335 ok = ParseTransformsList( 336 size, encoded, &pos, &dict->transforms_instances[i], 337 temporary_prefix_suffix_table, &prefix_suffix_count); 338 if (!ok) return BROTLI_FALSE; 339 total_prefix_suffix_count += prefix_suffix_count; 340 } 341 if (total_prefix_suffix_count != 0) { 342 dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func( 343 dict->memory_manager_opaque, 344 total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps)); 345 if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */ 346 } 347 total_prefix_suffix_count = 0; 348 for (i = 0; i < dict->num_transform_lists; i++) { 349 size_t prefix_suffix_count = 0; 350 size_t position = transform_list_start[i]; 351 uint16_t* prefix_suffix_map = 352 &dict->prefix_suffix_maps[total_prefix_suffix_count]; 353 BROTLI_BOOL ok = ParsePrefixSuffixTable( 354 size, encoded, &position, &dict->transforms_instances[i], 355 prefix_suffix_map, &prefix_suffix_count); 356 if (!ok) return BROTLI_FALSE; 357 dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map; 358 total_prefix_suffix_count += prefix_suffix_count; 359 } 360 361 if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) { 362 if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) { 363 return BROTLI_FALSE; 364 } 365 if (dict->num_dictionaries == 0 || 366 dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) { 367 return BROTLI_FALSE; 368 } 369 for (i = 0; i < dict->num_dictionaries; i++) { 370 uint8_t words_index; 371 uint8_t transforms_index; 372 if (!ReadUint8(encoded, size, &pos, &words_index)) { 373 return BROTLI_FALSE; 374 } 375 if (words_index > dict->num_word_lists) return BROTLI_FALSE; 376 if (!ReadUint8(encoded, size, &pos, &transforms_index)) { 377 return BROTLI_FALSE; 378 } 379 if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE; 380 dict->words[i] = words_index == dict->num_word_lists ? 381 BrotliGetDictionary() : &dict->words_instances[words_index]; 382 dict->transforms[i] = transforms_index == dict->num_transform_lists ? 383 BrotliGetTransforms(): &dict->transforms_instances[transforms_index]; 384 } 385 /* CONTEXT_ENABLED */ 386 if (!ReadBool(encoded, size, &pos, &dict->context_based)) { 387 return BROTLI_FALSE; 388 } 389 390 /* CONTEXT_MAP */ 391 if (dict->context_based) { 392 for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) { 393 if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) { 394 return BROTLI_FALSE; 395 } 396 if (dict->context_map[i] >= dict->num_dictionaries) { 397 return BROTLI_FALSE; 398 } 399 } 400 } 401 } else { 402 dict->context_based = BROTLI_FALSE; 403 dict->num_dictionaries = 1; 404 dict->words[0] = BrotliGetDictionary(); 405 dict->transforms[0] = BrotliGetTransforms(); 406 } 407 408 return BROTLI_TRUE; 409 } 410 411 /* Decodes shared dictionary and verifies correctness. 412 Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise. 413 The BrotliSharedDictionary must already have been initialized. If the 414 BrotliSharedDictionary already contains data, compound dictionaries 415 will be appended, but an error will be returned if it already has 416 custom words or transforms. 417 TODO(lode): link to RFC for shared brotli once published. */ 418 static BROTLI_BOOL DecodeSharedDictionary( 419 const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) { 420 uint32_t num_prefix = 0; 421 BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE; 422 BROTLI_BOOL has_custom_static_dict = 423 dict->num_word_lists > 0 || dict->num_transform_lists > 0; 424 425 /* Check magic header bytes. */ 426 if (size < 2) return BROTLI_FALSE; 427 if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE; 428 429 if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) { 430 return BROTLI_FALSE; 431 } 432 433 if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) { 434 return BROTLI_FALSE; 435 } 436 437 /* Cannot combine different static dictionaries, only prefix dictionaries */ 438 if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE; 439 440 return ParseDictionary(encoded, size, dict); 441 } 442 443 #endif /* BROTLI_EXPERIMENTAL */ 444 445 void BrotliSharedDictionaryDestroyInstance( 446 BrotliSharedDictionary* dict) { 447 if (!dict) { 448 return; 449 } else { 450 brotli_free_func free_func = dict->free_func; 451 void* opaque = dict->memory_manager_opaque; 452 /* Cleanup. */ 453 free_func(opaque, dict->words_instances); 454 free_func(opaque, dict->transforms_instances); 455 free_func(opaque, dict->prefix_suffix_maps); 456 /* Self-destruction. */ 457 free_func(opaque, dict); 458 } 459 } 460 461 BROTLI_BOOL BrotliSharedDictionaryAttach( 462 BrotliSharedDictionary* dict, BrotliSharedDictionaryType type, 463 size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) { 464 if (!dict) { 465 return BROTLI_FALSE; 466 } 467 #if defined(BROTLI_EXPERIMENTAL) 468 if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) { 469 return DecodeSharedDictionary(data, data_size, dict); 470 } 471 #endif /* BROTLI_EXPERIMENTAL */ 472 if (type == BROTLI_SHARED_DICTIONARY_RAW) { 473 if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) { 474 return BROTLI_FALSE; 475 } 476 dict->prefix_size[dict->num_prefix] = data_size; 477 dict->prefix[dict->num_prefix] = data; 478 dict->num_prefix++; 479 return BROTLI_TRUE; 480 } 481 return BROTLI_FALSE; 482 } 483 484 BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance( 485 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) { 486 BrotliSharedDictionary* dict = 0; 487 if (!alloc_func && !free_func) { 488 dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary)); 489 } else if (alloc_func && free_func) { 490 dict = (BrotliSharedDictionary*)alloc_func( 491 opaque, sizeof(BrotliSharedDictionary)); 492 } 493 if (dict == 0) { 494 return 0; 495 } 496 497 /* TODO(eustas): explicitly initialize all the fields? */ 498 memset(dict, 0, sizeof(BrotliSharedDictionary)); 499 500 dict->context_based = BROTLI_FALSE; 501 dict->num_dictionaries = 1; 502 dict->num_word_lists = 0; 503 dict->num_transform_lists = 0; 504 505 dict->words[0] = BrotliGetDictionary(); 506 dict->transforms[0] = BrotliGetTransforms(); 507 508 dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc; 509 dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc; 510 dict->memory_manager_opaque = opaque; 511 512 return dict; 513 } 514 515 #if defined(__cplusplus) || defined(c_plusplus) 516 } /* extern "C" */ 517 #endif