hash.h (24296B)
1 /* Copyright 2010 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* A (forgetful) hash table to the data seen by the compressor, to 8 help create backward references to previous data. */ 9 10 #ifndef BROTLI_ENC_HASH_H_ 11 #define BROTLI_ENC_HASH_H_ 12 13 #include "../common/constants.h" 14 #include "../common/dictionary.h" 15 #include "../common/platform.h" 16 #include "compound_dictionary.h" 17 #include "encoder_dict.h" 18 #include "fast_log.h" 19 #include "find_match_length.h" 20 #include "hash_base.h" 21 #include "matching_tag_mask.h" 22 #include "memory.h" 23 #include "params.h" 24 #include "quality.h" 25 #include "static_dict.h" 26 27 #if defined(__cplusplus) || defined(c_plusplus) 28 extern "C" { 29 #endif 30 31 typedef struct { 32 /** 33 * Dynamically allocated areas; regular hasher uses one or two allocations; 34 * "composite" hasher uses up to 4 allocations. 35 */ 36 void* extra[4]; 37 38 /** 39 * False before the first invocation of HasherSetup (where "extra" memory) 40 * is allocated. 41 */ 42 BROTLI_BOOL is_setup_; 43 44 size_t dict_num_lookups; 45 size_t dict_num_matches; 46 47 BrotliHasherParams params; 48 49 /** 50 * False if hasher needs to be "prepared" before use (before the first 51 * invocation of HasherSetup or after HasherReset). "preparation" is hasher 52 * data initialization (using input ringbuffer). 53 */ 54 BROTLI_BOOL is_prepared_; 55 } HasherCommon; 56 57 #define score_t size_t 58 59 static const uint32_t kCutoffTransformsCount = 10; 60 /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */ 61 /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */ 62 static const uint64_t kCutoffTransforms = 63 BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200); 64 65 typedef struct HasherSearchResult { 66 size_t len; 67 size_t distance; 68 score_t score; 69 int len_code_delta; /* == len_code - len */ 70 } HasherSearchResult; 71 72 static BROTLI_INLINE void PrepareDistanceCache( 73 int* BROTLI_RESTRICT distance_cache, const int num_distances) { 74 if (num_distances > 4) { 75 int last_distance = distance_cache[0]; 76 distance_cache[4] = last_distance - 1; 77 distance_cache[5] = last_distance + 1; 78 distance_cache[6] = last_distance - 2; 79 distance_cache[7] = last_distance + 2; 80 distance_cache[8] = last_distance - 3; 81 distance_cache[9] = last_distance + 3; 82 if (num_distances > 10) { 83 int next_last_distance = distance_cache[1]; 84 distance_cache[10] = next_last_distance - 1; 85 distance_cache[11] = next_last_distance + 1; 86 distance_cache[12] = next_last_distance - 2; 87 distance_cache[13] = next_last_distance + 2; 88 distance_cache[14] = next_last_distance - 3; 89 distance_cache[15] = next_last_distance + 3; 90 } 91 } 92 } 93 94 #define BROTLI_LITERAL_BYTE_SCORE 135 95 #define BROTLI_DISTANCE_BIT_PENALTY 30 96 /* Score must be positive after applying maximal penalty. */ 97 #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t)) 98 99 /* Usually, we always choose the longest backward reference. This function 100 allows for the exception of that rule. 101 102 If we choose a backward reference that is further away, it will 103 usually be coded with more bits. We approximate this by assuming 104 log2(distance). If the distance can be expressed in terms of the 105 last four distances, we use some heuristic constants to estimate 106 the bits cost. For the first up to four literals we use the bit 107 cost of the literals from the literal cost model, after that we 108 use the average bit cost of the cost model. 109 110 This function is used to sometimes discard a longer backward reference 111 when it is not much longer and the bit cost for encoding it is more 112 than the saved literals. 113 114 backward_reference_offset MUST be positive. */ 115 static BROTLI_INLINE score_t BackwardReferenceScore( 116 size_t copy_length, size_t backward_reference_offset) { 117 return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length - 118 BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset); 119 } 120 121 static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance( 122 size_t copy_length) { 123 return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length + 124 BROTLI_SCORE_BASE + 15; 125 } 126 127 static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( 128 size_t distance_short_code) { 129 return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE); 130 } 131 132 static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( 133 const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx, 134 const uint8_t* data, size_t max_length, size_t max_backward, 135 size_t max_distance, HasherSearchResult* out) { 136 size_t offset; 137 size_t matchlen; 138 size_t backward; 139 score_t score; 140 offset = dictionary->words->offsets_by_length[len] + len * word_idx; 141 if (len > max_length) { 142 return BROTLI_FALSE; 143 } 144 145 matchlen = 146 FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); 147 if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { 148 return BROTLI_FALSE; 149 } 150 { 151 size_t cut = len - matchlen; 152 size_t transform_id = (cut << 2) + 153 (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); 154 backward = max_backward + 1 + word_idx + 155 (transform_id << dictionary->words->size_bits_by_length[len]); 156 } 157 if (backward > max_distance) { 158 return BROTLI_FALSE; 159 } 160 score = BackwardReferenceScore(matchlen, backward); 161 if (score < out->score) { 162 return BROTLI_FALSE; 163 } 164 out->len = matchlen; 165 out->len_code_delta = (int)len - (int)matchlen; 166 out->distance = backward; 167 out->score = score; 168 return BROTLI_TRUE; 169 } 170 171 static BROTLI_INLINE void SearchInStaticDictionary( 172 const BrotliEncoderDictionary* dictionary, 173 HasherCommon* common, const uint8_t* data, size_t max_length, 174 size_t max_backward, size_t max_distance, 175 HasherSearchResult* out, BROTLI_BOOL shallow) { 176 size_t key; 177 size_t i; 178 if (common->dict_num_matches < (common->dict_num_lookups >> 7)) { 179 return; 180 } 181 key = Hash14(data) << 1; 182 for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { 183 common->dict_num_lookups++; 184 if (dictionary->hash_table_lengths[key] != 0) { 185 BROTLI_BOOL item_matches = TestStaticDictionaryItem( 186 dictionary, dictionary->hash_table_lengths[key], 187 dictionary->hash_table_words[key], data, 188 max_length, max_backward, max_distance, out); 189 if (item_matches) { 190 common->dict_num_matches++; 191 } 192 } 193 } 194 } 195 196 typedef struct BackwardMatch { 197 uint32_t distance; 198 uint32_t length_and_code; 199 } BackwardMatch; 200 201 static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self, 202 size_t dist, size_t len) { 203 self->distance = (uint32_t)dist; 204 self->length_and_code = (uint32_t)(len << 5); 205 } 206 207 static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self, 208 size_t dist, size_t len, size_t len_code) { 209 self->distance = (uint32_t)dist; 210 self->length_and_code = 211 (uint32_t)((len << 5) | (len == len_code ? 0 : len_code)); 212 } 213 214 static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) { 215 return self->length_and_code >> 5; 216 } 217 218 static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { 219 size_t code = self->length_and_code & 31; 220 return code ? code : BackwardMatchLength(self); 221 } 222 223 #define EXPAND_CAT(a, b) CAT(a, b) 224 #define CAT(a, b) a ## b 225 #define FN(X) EXPAND_CAT(X, HASHER()) 226 227 #define HASHER() H10 228 #define BUCKET_BITS 17 229 #define MAX_TREE_SEARCH_DEPTH 64 230 #define MAX_TREE_COMP_LENGTH 128 231 #include "hash_to_binary_tree_inc.h" /* NOLINT(build/include) */ 232 #undef MAX_TREE_SEARCH_DEPTH 233 #undef MAX_TREE_COMP_LENGTH 234 #undef BUCKET_BITS 235 #undef HASHER 236 /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */ 237 #define MAX_NUM_MATCHES_H10 128 238 239 /* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression 240 a little faster (0.5% - 1%) and it compresses 0.15% better on small text 241 and HTML inputs. */ 242 243 #define HASHER() H2 244 #define BUCKET_BITS 16 245 #define BUCKET_SWEEP_BITS 0 246 #define HASH_LEN 5 247 #define USE_DICTIONARY 1 248 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 249 #undef BUCKET_SWEEP_BITS 250 #undef USE_DICTIONARY 251 #undef HASHER 252 253 #define HASHER() H3 254 #define BUCKET_SWEEP_BITS 1 255 #define USE_DICTIONARY 0 256 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 257 #undef USE_DICTIONARY 258 #undef BUCKET_SWEEP_BITS 259 #undef BUCKET_BITS 260 #undef HASHER 261 262 #define HASHER() H4 263 #define BUCKET_BITS 17 264 #define BUCKET_SWEEP_BITS 2 265 #define USE_DICTIONARY 1 266 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 267 #undef USE_DICTIONARY 268 #undef HASH_LEN 269 #undef BUCKET_SWEEP_BITS 270 #undef BUCKET_BITS 271 #undef HASHER 272 273 #define HASHER() H5 274 #include "hash_longest_match_inc.h" /* NOLINT(build/include) */ 275 #undef HASHER 276 277 #define HASHER() H6 278 #include "hash_longest_match64_inc.h" /* NOLINT(build/include) */ 279 #undef HASHER 280 281 #if defined(BROTLI_MAX_SIMD_QUALITY) 282 #define HASHER() H58 283 #include "hash_longest_match_simd_inc.h" /* NOLINT(build/include) */ 284 #undef HASHER 285 286 #define HASHER() H68 287 #include "hash_longest_match64_simd_inc.h" /* NOLINT(build/include) */ 288 #undef HASHER 289 #endif 290 291 #define BUCKET_BITS 15 292 293 #define NUM_LAST_DISTANCES_TO_CHECK 4 294 #define NUM_BANKS 1 295 #define BANK_BITS 16 296 #define HASHER() H40 297 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ 298 #undef HASHER 299 #undef NUM_LAST_DISTANCES_TO_CHECK 300 301 #define NUM_LAST_DISTANCES_TO_CHECK 10 302 #define HASHER() H41 303 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ 304 #undef HASHER 305 #undef NUM_LAST_DISTANCES_TO_CHECK 306 #undef NUM_BANKS 307 #undef BANK_BITS 308 309 #define NUM_LAST_DISTANCES_TO_CHECK 16 310 #define NUM_BANKS 512 311 #define BANK_BITS 9 312 #define HASHER() H42 313 #include "hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ 314 #undef HASHER 315 #undef NUM_LAST_DISTANCES_TO_CHECK 316 #undef NUM_BANKS 317 #undef BANK_BITS 318 319 #undef BUCKET_BITS 320 321 #define HASHER() H54 322 #define BUCKET_BITS 20 323 #define BUCKET_SWEEP_BITS 2 324 #define HASH_LEN 7 325 #define USE_DICTIONARY 0 326 #include "hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ 327 #undef USE_DICTIONARY 328 #undef HASH_LEN 329 #undef BUCKET_SWEEP_BITS 330 #undef BUCKET_BITS 331 #undef HASHER 332 333 /* fast large window hashers */ 334 335 #define HASHER() HROLLING_FAST 336 #define CHUNKLEN 32 337 #define JUMP 4 338 #define NUMBUCKETS 16777216 339 #define MASK ((NUMBUCKETS * 64) - 1) 340 #include "hash_rolling_inc.h" /* NOLINT(build/include) */ 341 #undef JUMP 342 #undef HASHER 343 344 345 #define HASHER() HROLLING 346 #define JUMP 1 347 #include "hash_rolling_inc.h" /* NOLINT(build/include) */ 348 #undef MASK 349 #undef NUMBUCKETS 350 #undef JUMP 351 #undef CHUNKLEN 352 #undef HASHER 353 354 #define HASHER() H35 355 #define HASHER_A H3 356 #define HASHER_B HROLLING_FAST 357 #include "hash_composite_inc.h" /* NOLINT(build/include) */ 358 #undef HASHER_A 359 #undef HASHER_B 360 #undef HASHER 361 362 #define HASHER() H55 363 #define HASHER_A H54 364 #define HASHER_B HROLLING_FAST 365 #include "hash_composite_inc.h" /* NOLINT(build/include) */ 366 #undef HASHER_A 367 #undef HASHER_B 368 #undef HASHER 369 370 #define HASHER() H65 371 #define HASHER_A H6 372 #define HASHER_B HROLLING 373 #include "hash_composite_inc.h" /* NOLINT(build/include) */ 374 #undef HASHER_A 375 #undef HASHER_B 376 #undef HASHER 377 378 #undef FN 379 #undef CAT 380 #undef EXPAND_CAT 381 382 #if defined(BROTLI_MAX_SIMD_QUALITY) 383 #define FOR_SIMPLE_HASHERS(H) \ 384 H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54) H(58) H(68) 385 #else 386 #define FOR_SIMPLE_HASHERS(H) \ 387 H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54) 388 #endif 389 #define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65) 390 #define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H) 391 #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10) 392 393 typedef struct { 394 HasherCommon common; 395 396 union { 397 #define MEMBER_(N) \ 398 H ## N _H ## N; 399 FOR_ALL_HASHERS(MEMBER_) 400 #undef MEMBER_ 401 } privat; 402 } Hasher; 403 404 /* MUST be invoked before any other method. */ 405 static BROTLI_INLINE void HasherInit(Hasher* hasher) { 406 hasher->common.is_setup_ = BROTLI_FALSE; 407 hasher->common.extra[0] = NULL; 408 hasher->common.extra[1] = NULL; 409 hasher->common.extra[2] = NULL; 410 hasher->common.extra[3] = NULL; 411 } 412 413 static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) { 414 if (hasher->common.extra[0] != NULL) BROTLI_FREE(m, hasher->common.extra[0]); 415 if (hasher->common.extra[1] != NULL) BROTLI_FREE(m, hasher->common.extra[1]); 416 if (hasher->common.extra[2] != NULL) BROTLI_FREE(m, hasher->common.extra[2]); 417 if (hasher->common.extra[3] != NULL) BROTLI_FREE(m, hasher->common.extra[3]); 418 } 419 420 static BROTLI_INLINE void HasherReset(Hasher* hasher) { 421 hasher->common.is_prepared_ = BROTLI_FALSE; 422 } 423 424 static BROTLI_INLINE void HasherSize(const BrotliEncoderParams* params, 425 BROTLI_BOOL one_shot, const size_t input_size, size_t* alloc_size) { 426 switch (params->hasher.type) { 427 #define SIZE_(N) \ 428 case N: \ 429 HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \ 430 break; 431 FOR_ALL_HASHERS(SIZE_) 432 #undef SIZE_ 433 default: 434 break; 435 } 436 } 437 438 static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher, 439 BrotliEncoderParams* params, const uint8_t* data, size_t position, 440 size_t input_size, BROTLI_BOOL is_last) { 441 BROTLI_BOOL one_shot = (position == 0 && is_last); 442 if (!hasher->common.is_setup_) { 443 size_t alloc_size[4] = {0}; 444 size_t i; 445 ChooseHasher(params, ¶ms->hasher); 446 hasher->common.params = params->hasher; 447 hasher->common.dict_num_lookups = 0; 448 hasher->common.dict_num_matches = 0; 449 HasherSize(params, one_shot, input_size, alloc_size); 450 for (i = 0; i < 4; ++i) { 451 if (alloc_size[i] == 0) continue; 452 hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]); 453 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return; 454 } 455 switch (hasher->common.params.type) { 456 #define INITIALIZE_(N) \ 457 case N: \ 458 InitializeH ## N(&hasher->common, \ 459 &hasher->privat._H ## N, params); \ 460 break; 461 FOR_ALL_HASHERS(INITIALIZE_); 462 #undef INITIALIZE_ 463 default: 464 break; 465 } 466 HasherReset(hasher); 467 hasher->common.is_setup_ = BROTLI_TRUE; 468 } 469 470 if (!hasher->common.is_prepared_) { 471 switch (hasher->common.params.type) { 472 #define PREPARE_(N) \ 473 case N: \ 474 PrepareH ## N( \ 475 &hasher->privat._H ## N, \ 476 one_shot, input_size, data); \ 477 break; 478 FOR_ALL_HASHERS(PREPARE_) 479 #undef PREPARE_ 480 default: break; 481 } 482 hasher->common.is_prepared_ = BROTLI_TRUE; 483 } 484 } 485 486 static BROTLI_INLINE void InitOrStitchToPreviousBlock( 487 MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask, 488 BrotliEncoderParams* params, size_t position, size_t input_size, 489 BROTLI_BOOL is_last) { 490 HasherSetup(m, hasher, params, data, position, input_size, is_last); 491 if (BROTLI_IS_OOM(m)) return; 492 switch (hasher->common.params.type) { 493 #define INIT_(N) \ 494 case N: \ 495 StitchToPreviousBlockH ## N( \ 496 &hasher->privat._H ## N, \ 497 input_size, position, data, mask); \ 498 break; 499 FOR_ALL_HASHERS(INIT_) 500 #undef INIT_ 501 default: break; 502 } 503 } 504 505 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget 506 to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */ 507 static BROTLI_INLINE void FindCompoundDictionaryMatch( 508 const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data, 509 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, 510 const size_t cur_ix, const size_t max_length, const size_t distance_offset, 511 const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) { 512 const uint32_t source_size = self->source_size; 513 const size_t boundary = distance_offset - source_size; 514 const uint32_t hash_bits = self->hash_bits; 515 const uint32_t bucket_bits = self->bucket_bits; 516 const uint32_t slot_bits = self->slot_bits; 517 518 const uint32_t hash_shift = 64u - bucket_bits; 519 const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits); 520 const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits); 521 522 const uint32_t* slot_offsets = (uint32_t*)(&self[1]); 523 const uint16_t* heads = (uint16_t*)(&slot_offsets[(size_t)1u << slot_bits]); 524 const uint32_t* items = (uint32_t*)(&heads[(size_t)1u << bucket_bits]); 525 const uint8_t* source = NULL; 526 527 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 528 score_t best_score = out->score; 529 size_t best_len = out->len; 530 size_t i; 531 const uint64_t h = 532 (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) * 533 kPreparedDictionaryHashMul64Long; 534 const uint32_t key = (uint32_t)(h >> hash_shift); 535 const uint32_t slot = key & slot_mask; 536 const uint32_t head = heads[key]; 537 const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head]; 538 uint32_t item = (head == 0xFFFF) ? 1 : 0; 539 540 const void* tail = (void*)&items[self->num_items]; 541 if (self->magic == kPreparedDictionaryMagic) { 542 source = (const uint8_t*)tail; 543 } else { 544 /* kLeanPreparedDictionaryMagic */ 545 source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); 546 } 547 548 BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask); 549 550 for (i = 0; i < 4; ++i) { 551 const size_t distance = (size_t)distance_cache[i]; 552 size_t offset; 553 size_t limit; 554 size_t len; 555 if (distance <= boundary || distance > distance_offset) continue; 556 offset = distance_offset - distance; 557 limit = source_size - offset; 558 limit = limit > max_length ? max_length : limit; 559 len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked], 560 limit); 561 if (len >= 2) { 562 score_t score = BackwardReferenceScoreUsingLastDistance(len); 563 if (best_score < score) { 564 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i); 565 if (best_score < score) { 566 best_score = score; 567 if (len > best_len) best_len = len; 568 out->len = len; 569 out->len_code_delta = 0; 570 out->distance = distance; 571 out->score = best_score; 572 } 573 } 574 } 575 } 576 /* we require matches of len >4, so increase best_len to 3, so we can compare 577 * 4 bytes all the time. */ 578 if (best_len < 3) { 579 best_len = 3; 580 } 581 while (item == 0) { 582 size_t offset; 583 size_t distance; 584 size_t limit; 585 item = *chain; 586 chain++; 587 offset = item & 0x7FFFFFFF; 588 item &= 0x80000000; 589 distance = distance_offset - offset; 590 limit = source_size - offset; 591 limit = (limit > max_length) ? max_length : limit; 592 if (distance > max_distance) continue; 593 if (cur_ix_masked + best_len > ring_buffer_mask || best_len >= limit || 594 /* compare 4 bytes ending at best_len + 1 */ 595 BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) != 596 BrotliUnalignedRead32(&source[offset + best_len - 3])) { 597 continue; 598 } 599 { 600 const size_t len = FindMatchLengthWithLimit(&source[offset], 601 &data[cur_ix_masked], 602 limit); 603 if (len >= 4) { 604 score_t score = BackwardReferenceScore(len, distance); 605 if (best_score < score) { 606 best_score = score; 607 best_len = len; 608 out->len = best_len; 609 out->len_code_delta = 0; 610 out->distance = distance; 611 out->score = best_score; 612 } 613 } 614 } 615 } 616 } 617 618 /* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget 619 to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */ 620 static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches( 621 const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data, 622 const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length, 623 const size_t max_length, const size_t distance_offset, 624 const size_t max_distance, BackwardMatch* matches, size_t match_limit) { 625 const uint32_t source_size = self->source_size; 626 const uint32_t hash_bits = self->hash_bits; 627 const uint32_t bucket_bits = self->bucket_bits; 628 const uint32_t slot_bits = self->slot_bits; 629 630 const uint32_t hash_shift = 64u - bucket_bits; 631 const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits); 632 const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits); 633 634 const uint32_t* slot_offsets = (uint32_t*)(&self[1]); 635 const uint16_t* heads = (uint16_t*)(&slot_offsets[(size_t)1u << slot_bits]); 636 const uint32_t* items = (uint32_t*)(&heads[(size_t)1u << bucket_bits]); 637 const uint8_t* source = NULL; 638 639 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 640 size_t best_len = min_length; 641 const uint64_t h = 642 (BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) * 643 kPreparedDictionaryHashMul64Long; 644 const uint32_t key = (uint32_t)(h >> hash_shift); 645 const uint32_t slot = key & slot_mask; 646 const uint32_t head = heads[key]; 647 const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head]; 648 uint32_t item = (head == 0xFFFF) ? 1 : 0; 649 size_t found = 0; 650 651 const void* tail = (void*)&items[self->num_items]; 652 if (self->magic == kPreparedDictionaryMagic) { 653 source = (const uint8_t*)tail; 654 } else { 655 /* kLeanPreparedDictionaryMagic */ 656 source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail); 657 } 658 659 BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask); 660 661 while (item == 0) { 662 size_t offset; 663 size_t distance; 664 size_t limit; 665 size_t len; 666 item = *chain; 667 chain++; 668 offset = item & 0x7FFFFFFF; 669 item &= 0x80000000; 670 distance = distance_offset - offset; 671 limit = source_size - offset; 672 limit = (limit > max_length) ? max_length : limit; 673 if (distance > max_distance) continue; 674 if (cur_ix_masked + best_len > ring_buffer_mask || 675 best_len >= limit || 676 data[cur_ix_masked + best_len] != source[offset + best_len]) { 677 continue; 678 } 679 len = FindMatchLengthWithLimit( 680 &source[offset], &data[cur_ix_masked], limit); 681 if (len > best_len) { 682 best_len = len; 683 InitBackwardMatch(matches++, distance, len); 684 found++; 685 if (found == match_limit) break; 686 } 687 } 688 return found; 689 } 690 691 static BROTLI_INLINE void LookupCompoundDictionaryMatch( 692 const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data, 693 const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache, 694 const size_t cur_ix, const size_t max_length, 695 const size_t max_ring_buffer_distance, const size_t max_distance, 696 HasherSearchResult* sr) { 697 size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1; 698 size_t d; 699 for (d = 0; d < addon->num_chunks; ++d) { 700 /* Only one prepared dictionary type is currently supported. */ 701 FindCompoundDictionaryMatch( 702 (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask, 703 distance_cache, cur_ix, max_length, 704 base_offset - addon->chunk_offsets[d], max_distance, sr); 705 } 706 } 707 708 static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches( 709 const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data, 710 const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length, 711 const size_t max_length, const size_t max_ring_buffer_distance, 712 const size_t max_distance, BackwardMatch* matches, 713 size_t match_limit) { 714 size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1; 715 size_t d; 716 size_t total_found = 0; 717 for (d = 0; d < addon->num_chunks; ++d) { 718 /* Only one prepared dictionary type is currently supported. */ 719 total_found += FindAllCompoundDictionaryMatches( 720 (const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask, 721 cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d], 722 max_distance, matches + total_found, match_limit - total_found); 723 if (total_found == match_limit) break; 724 if (total_found > 0) { 725 min_length = BackwardMatchLength(&matches[total_found - 1]); 726 } 727 } 728 return total_found; 729 } 730 731 #if defined(__cplusplus) || defined(c_plusplus) 732 } /* extern "C" */ 733 #endif 734 735 #endif /* BROTLI_ENC_HASH_H_ */