compress_fragment.c (32875B)
1 /* Copyright 2015 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* Function for fast encoding of an input fragment, independently from the input 8 history. This function uses one-pass processing: when we find a backward 9 match, we immediately emit the corresponding command and literal codes to 10 the bit stream. 11 12 Adapted from the CompressFragment() function in 13 https://github.com/google/snappy/blob/master/snappy.cc */ 14 15 #include "compress_fragment.h" 16 17 #include "../common/constants.h" 18 #include "../common/platform.h" 19 #include "brotli_bit_stream.h" 20 #include "entropy_encode.h" 21 #include "fast_log.h" 22 #include "find_match_length.h" 23 #include "hash_base.h" 24 #include "write_bits.h" 25 26 #if defined(__cplusplus) || defined(c_plusplus) 27 extern "C" { 28 #endif 29 30 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18) 31 32 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { 33 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32; 34 return (uint32_t)(h >> shift); 35 } 36 37 static BROTLI_INLINE uint32_t HashBytesAtOffset( 38 uint64_t v, int offset, size_t shift) { 39 BROTLI_DCHECK(offset >= 0); 40 BROTLI_DCHECK(offset <= 3); 41 { 42 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32; 43 return (uint32_t)(h >> shift); 44 } 45 } 46 47 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) { 48 return TO_BROTLI_BOOL( 49 BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) && 50 p1[4] == p2[4]); 51 } 52 53 /* Builds a literal prefix code into "depths" and "bits" based on the statistics 54 of the "input" string and stores it into the bit stream. 55 Note that the prefix code here is built from the pre-LZ77 input, therefore 56 we can only approximate the statistics of the actual literal stream. 57 Moreover, for long inputs we build a histogram from a sample of the input 58 and thus have to assign a non-zero depth for each literal. 59 Returns estimated compression ratio millibytes/char for encoding given input 60 with generated code. */ 61 static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s, 62 const uint8_t* input, 63 const size_t input_size, 64 uint8_t depths[256], 65 uint16_t bits[256], 66 size_t* storage_ix, 67 uint8_t* storage) { 68 uint32_t* BROTLI_RESTRICT const histogram = s->histogram; 69 size_t histogram_total; 70 size_t i; 71 memset(histogram, 0, sizeof(s->histogram)); 72 73 if (input_size < (1 << 15)) { 74 for (i = 0; i < input_size; ++i) { 75 ++histogram[input[i]]; 76 } 77 histogram_total = input_size; 78 for (i = 0; i < 256; ++i) { 79 /* We weigh the first 11 samples with weight 3 to account for the 80 balancing effect of the LZ77 phase on the histogram. */ 81 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u); 82 histogram[i] += adjust; 83 histogram_total += adjust; 84 } 85 } else { 86 static const size_t kSampleRate = 29; 87 for (i = 0; i < input_size; i += kSampleRate) { 88 ++histogram[input[i]]; 89 } 90 histogram_total = (input_size + kSampleRate - 1) / kSampleRate; 91 for (i = 0; i < 256; ++i) { 92 /* We add 1 to each population count to avoid 0 bit depths (since this is 93 only a sample and we don't know if the symbol appears or not), and we 94 weigh the first 11 samples with weight 3 to account for the balancing 95 effect of the LZ77 phase on the histogram (more frequent symbols are 96 more likely to be in backward references instead as literals). */ 97 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u); 98 histogram[i] += adjust; 99 histogram_total += adjust; 100 } 101 } 102 BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total, 103 /* max_bits = */ 8, 104 depths, bits, storage_ix, storage); 105 { 106 size_t literal_ratio = 0; 107 for (i = 0; i < 256; ++i) { 108 if (histogram[i]) literal_ratio += histogram[i] * depths[i]; 109 } 110 /* Estimated encoding ratio, millibytes per symbol. */ 111 return (literal_ratio * 125) / histogram_total; 112 } 113 } 114 115 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and 116 "bits" based on "histogram" and stores it into the bit stream. */ 117 static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s, 118 size_t* storage_ix, uint8_t* storage) { 119 const uint32_t* const histogram = s->cmd_histo; 120 uint8_t* const depth = s->cmd_depth; 121 uint16_t* const bits = s->cmd_bits; 122 uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth; 123 uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits; 124 /* TODO(eustas): do only once on initialization. */ 125 memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS); 126 127 BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth); 128 BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]); 129 /* We have to jump through a few hoops here in order to compute 130 the command bits because the symbols are in a different order than in 131 the full alphabet. This looks complicated, but having the symbols 132 in this order in the command bits saves a few branches in the Emit* 133 functions. */ 134 memcpy(tmp_depth, depth, 24); 135 memcpy(tmp_depth + 24, depth + 40, 8); 136 memcpy(tmp_depth + 32, depth + 24, 8); 137 memcpy(tmp_depth + 40, depth + 48, 8); 138 memcpy(tmp_depth + 48, depth + 32, 8); 139 memcpy(tmp_depth + 56, depth + 56, 8); 140 BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits); 141 memcpy(bits, tmp_bits, 48); 142 memcpy(bits + 24, tmp_bits + 32, 16); 143 memcpy(bits + 32, tmp_bits + 48, 16); 144 memcpy(bits + 40, tmp_bits + 24, 16); 145 memcpy(bits + 48, tmp_bits + 40, 16); 146 memcpy(bits + 56, tmp_bits + 56, 16); 147 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]); 148 { 149 /* Create the bit length array for the full command alphabet. */ 150 size_t i; 151 memset(tmp_depth, 0, 64); /* only 64 first values were used */ 152 memcpy(tmp_depth, depth, 8); 153 memcpy(tmp_depth + 64, depth + 8, 8); 154 memcpy(tmp_depth + 128, depth + 16, 8); 155 memcpy(tmp_depth + 192, depth + 24, 8); 156 memcpy(tmp_depth + 384, depth + 32, 8); 157 for (i = 0; i < 8; ++i) { 158 tmp_depth[128 + 8 * i] = depth[40 + i]; 159 tmp_depth[256 + 8 * i] = depth[48 + i]; 160 tmp_depth[448 + 8 * i] = depth[56 + i]; 161 } 162 /* TODO(eustas): could/should full-length machinery be avoided? */ 163 BrotliStoreHuffmanTree( 164 tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage); 165 } 166 BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage); 167 } 168 169 /* REQUIRES: insertlen < 6210 */ 170 static BROTLI_INLINE void EmitInsertLen(size_t insertlen, 171 const uint8_t depth[128], 172 const uint16_t bits[128], 173 uint32_t histo[128], 174 size_t* storage_ix, 175 uint8_t* storage) { 176 if (insertlen < 6) { 177 const size_t code = insertlen + 40; 178 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 179 ++histo[code]; 180 } else if (insertlen < 130) { 181 const size_t tail = insertlen - 2; 182 const uint32_t nbits = Log2FloorNonZero(tail) - 1u; 183 const size_t prefix = tail >> nbits; 184 const size_t inscode = (nbits << 1) + prefix + 42; 185 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage); 186 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 187 ++histo[inscode]; 188 } else if (insertlen < 2114) { 189 const size_t tail = insertlen - 66; 190 const uint32_t nbits = Log2FloorNonZero(tail); 191 const size_t code = nbits + 50; 192 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 193 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 194 ++histo[code]; 195 } else { 196 BrotliWriteBits(depth[61], bits[61], storage_ix, storage); 197 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage); 198 ++histo[61]; 199 } 200 } 201 202 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen, 203 const uint8_t depth[128], 204 const uint16_t bits[128], 205 uint32_t histo[128], 206 size_t* storage_ix, 207 uint8_t* storage) { 208 if (insertlen < 22594) { 209 BrotliWriteBits(depth[62], bits[62], storage_ix, storage); 210 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage); 211 ++histo[62]; 212 } else { 213 BrotliWriteBits(depth[63], bits[63], storage_ix, storage); 214 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage); 215 ++histo[63]; 216 } 217 } 218 219 static BROTLI_INLINE void EmitCopyLen(size_t copylen, 220 const uint8_t depth[128], 221 const uint16_t bits[128], 222 uint32_t histo[128], 223 size_t* storage_ix, 224 uint8_t* storage) { 225 if (copylen < 10) { 226 BrotliWriteBits( 227 depth[copylen + 14], bits[copylen + 14], storage_ix, storage); 228 ++histo[copylen + 14]; 229 } else if (copylen < 134) { 230 const size_t tail = copylen - 6; 231 const uint32_t nbits = Log2FloorNonZero(tail) - 1u; 232 const size_t prefix = tail >> nbits; 233 const size_t code = (nbits << 1) + prefix + 20; 234 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 235 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 236 ++histo[code]; 237 } else if (copylen < 2118) { 238 const size_t tail = copylen - 70; 239 const uint32_t nbits = Log2FloorNonZero(tail); 240 const size_t code = nbits + 28; 241 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 242 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 243 ++histo[code]; 244 } else { 245 BrotliWriteBits(depth[39], bits[39], storage_ix, storage); 246 BrotliWriteBits(24, copylen - 2118, storage_ix, storage); 247 ++histo[39]; 248 } 249 } 250 251 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen, 252 const uint8_t depth[128], 253 const uint16_t bits[128], 254 uint32_t histo[128], 255 size_t* storage_ix, 256 uint8_t* storage) { 257 if (copylen < 12) { 258 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage); 259 ++histo[copylen - 4]; 260 } else if (copylen < 72) { 261 const size_t tail = copylen - 8; 262 const uint32_t nbits = Log2FloorNonZero(tail) - 1; 263 const size_t prefix = tail >> nbits; 264 const size_t code = (nbits << 1) + prefix + 4; 265 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 266 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 267 ++histo[code]; 268 } else if (copylen < 136) { 269 const size_t tail = copylen - 8; 270 const size_t code = (tail >> 5) + 30; 271 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 272 BrotliWriteBits(5, tail & 31, storage_ix, storage); 273 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 274 ++histo[code]; 275 ++histo[64]; 276 } else if (copylen < 2120) { 277 const size_t tail = copylen - 72; 278 const uint32_t nbits = Log2FloorNonZero(tail); 279 const size_t code = nbits + 28; 280 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 281 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 282 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 283 ++histo[code]; 284 ++histo[64]; 285 } else { 286 BrotliWriteBits(depth[39], bits[39], storage_ix, storage); 287 BrotliWriteBits(24, copylen - 2120, storage_ix, storage); 288 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 289 ++histo[39]; 290 ++histo[64]; 291 } 292 } 293 294 static BROTLI_INLINE void EmitDistance(size_t distance, 295 const uint8_t depth[128], 296 const uint16_t bits[128], 297 uint32_t histo[128], 298 size_t* storage_ix, uint8_t* storage) { 299 const size_t d = distance + 3; 300 const uint32_t nbits = Log2FloorNonZero(d) - 1u; 301 const size_t prefix = (d >> nbits) & 1; 302 const size_t offset = (2 + prefix) << nbits; 303 const size_t distcode = 2 * (nbits - 1) + prefix + 80; 304 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage); 305 BrotliWriteBits(nbits, d - offset, storage_ix, storage); 306 ++histo[distcode]; 307 } 308 309 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len, 310 const uint8_t depth[256], 311 const uint16_t bits[256], 312 size_t* storage_ix, uint8_t* storage) { 313 size_t j; 314 for (j = 0; j < len; j++) { 315 const uint8_t lit = input[j]; 316 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage); 317 } 318 } 319 320 /* REQUIRES: len <= 1 << 24. */ 321 static void BrotliStoreMetaBlockHeader( 322 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix, 323 uint8_t* storage) { 324 size_t nibbles = 6; 325 /* ISLAST */ 326 BrotliWriteBits(1, 0, storage_ix, storage); 327 if (len <= (1U << 16)) { 328 nibbles = 4; 329 } else if (len <= (1U << 20)) { 330 nibbles = 5; 331 } 332 BrotliWriteBits(2, nibbles - 4, storage_ix, storage); 333 BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage); 334 /* ISUNCOMPRESSED */ 335 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage); 336 } 337 338 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos, 339 uint8_t* array) { 340 while (n_bits > 0) { 341 size_t byte_pos = pos >> 3; 342 size_t n_unchanged_bits = pos & 7; 343 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits); 344 size_t total_bits = n_unchanged_bits + n_changed_bits; 345 uint32_t mask = 346 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u); 347 uint32_t unchanged_bits = array[byte_pos] & mask; 348 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u); 349 array[byte_pos] = 350 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits); 351 n_bits -= n_changed_bits; 352 bits >>= n_changed_bits; 353 pos += n_changed_bits; 354 } 355 } 356 357 static void RewindBitPosition(const size_t new_storage_ix, 358 size_t* storage_ix, uint8_t* storage) { 359 const size_t bitpos = new_storage_ix & 7; 360 const size_t mask = (1u << bitpos) - 1; 361 storage[new_storage_ix >> 3] &= (uint8_t)mask; 362 *storage_ix = new_storage_ix; 363 } 364 365 static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s, 366 const uint8_t* data, size_t len, const uint8_t* depths) { 367 uint32_t* BROTLI_RESTRICT const histo = s->histogram; 368 static const size_t kSampleRate = 43; 369 size_t i; 370 memset(histo, 0, sizeof(s->histogram)); 371 for (i = 0; i < len; i += kSampleRate) { 372 ++histo[data[i]]; 373 } 374 { 375 const size_t total = (len + kSampleRate - 1) / kSampleRate; 376 double r = (FastLog2(total) + 0.5) * (double)total + 200; 377 for (i = 0; i < 256; ++i) { 378 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i])); 379 } 380 return TO_BROTLI_BOOL(r >= 0.0); 381 } 382 } 383 384 /* Acceptable loss for uncompressible speedup is 2% */ 385 #define MIN_RATIO 980 386 387 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode( 388 const uint8_t* metablock_start, const uint8_t* next_emit, 389 const size_t insertlen, const size_t literal_ratio) { 390 const size_t compressed = (size_t)(next_emit - metablock_start); 391 if (compressed * 50 > insertlen) { 392 return BROTLI_FALSE; 393 } else { 394 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO); 395 } 396 } 397 398 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end, 399 const size_t storage_ix_start, 400 size_t* storage_ix, uint8_t* storage) { 401 const size_t len = (size_t)(end - begin); 402 RewindBitPosition(storage_ix_start, storage_ix, storage); 403 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage); 404 *storage_ix = (*storage_ix + 7u) & ~7u; 405 memcpy(&storage[*storage_ix >> 3], begin, len); 406 *storage_ix += len << 3; 407 storage[*storage_ix >> 3] = 0; 408 } 409 410 static BROTLI_MODEL("small") uint32_t kCmdHistoSeed[128] = { 411 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 412 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 413 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 414 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 415 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 416 1, 1, 1, 1, 0, 0, 0, 0, 417 }; 418 419 static BROTLI_INLINE void BrotliCompressFragmentFastImpl( 420 BrotliOnePassArena* s, const uint8_t* input, size_t input_size, 421 BROTLI_BOOL is_last, int* table, size_t table_bits, 422 size_t* storage_ix, uint8_t* storage) { 423 uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth; 424 uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits; 425 uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo; 426 uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth; 427 uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits; 428 const uint8_t* ip_end; 429 430 /* "next_emit" is a pointer to the first byte that is not covered by a 431 previous copy. Bytes between "next_emit" and the start of the next copy or 432 the end of the input will be emitted as literal bytes. */ 433 const uint8_t* next_emit = input; 434 /* Save the start of the first block for position and distance computations. 435 */ 436 const uint8_t* base_ip = input; 437 438 static const size_t kFirstBlockSize = 3 << 15; 439 static const size_t kMergeBlockSize = 1 << 16; 440 441 const size_t kInputMarginBytes = BROTLI_WINDOW_GAP; 442 const size_t kMinMatchLen = 5; 443 444 const uint8_t* metablock_start = input; 445 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize); 446 size_t total_block_size = block_size; 447 /* Save the bit position of the MLEN field of the meta-block header, so that 448 we can update it later if we decide to extend this meta-block. */ 449 size_t mlen_storage_ix = *storage_ix + 3; 450 451 size_t literal_ratio; 452 453 const uint8_t* ip; 454 int last_distance; 455 456 const size_t shift = 64u - table_bits; 457 458 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage); 459 /* No block splits, no contexts. */ 460 BrotliWriteBits(13, 0, storage_ix, storage); 461 462 literal_ratio = BuildAndStoreLiteralPrefixCode( 463 s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage); 464 465 { 466 /* Store the pre-compressed command and distance prefix codes. */ 467 size_t i; 468 for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) { 469 BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage); 470 } 471 } 472 BrotliWriteBits(s->cmd_code_numbits & 7, 473 s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage); 474 475 emit_commands: 476 /* Initialize the command and distance histograms. We will gather 477 statistics of command and distance codes during the processing 478 of this block and use it to update the command and distance 479 prefix codes for the next block. */ 480 memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed)); 481 482 /* "ip" is the input pointer. */ 483 ip = input; 484 last_distance = -1; 485 ip_end = input + block_size; 486 487 if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) { 488 /* For the last block, we need to keep a 16 bytes margin so that we can be 489 sure that all distances are at most window size - 16. 490 For all other blocks, we only need to keep a margin of 5 bytes so that 491 we don't go over the block size with a copy. */ 492 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen, 493 input_size - kInputMarginBytes); 494 const uint8_t* ip_limit = input + len_limit; 495 496 uint32_t next_hash; 497 for (next_hash = Hash(++ip, shift); ; ) { 498 /* Step 1: Scan forward in the input looking for a 5-byte-long match. 499 If we get close to exhausting the input then goto emit_remainder. 500 501 Heuristic match skipping: If 32 bytes are scanned with no matches 502 found, start looking only at every other byte. If 32 more bytes are 503 scanned, look at every third byte, etc.. When a match is found, 504 immediately go back to looking at every byte. This is a small loss 505 (~5% performance, ~0.1% density) for compressible data due to more 506 bookkeeping, but for non-compressible data (such as JPEG) it's a huge 507 win since the compressor quickly "realizes" the data is incompressible 508 and doesn't bother looking for matches everywhere. 509 510 The "skip" variable keeps track of how many bytes there are since the 511 last match; dividing it by 32 (i.e. right-shifting by five) gives the 512 number of bytes to move ahead for each iteration. */ 513 uint32_t skip = 32; 514 515 const uint8_t* next_ip = ip; 516 const uint8_t* candidate; 517 BROTLI_DCHECK(next_emit < ip); 518 trawl: 519 do { 520 uint32_t hash = next_hash; 521 uint32_t bytes_between_hash_lookups = skip++ >> 5; 522 BROTLI_DCHECK(hash == Hash(next_ip, shift)); 523 ip = next_ip; 524 next_ip = ip + bytes_between_hash_lookups; 525 if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) { 526 goto emit_remainder; 527 } 528 next_hash = Hash(next_ip, shift); 529 candidate = ip - last_distance; 530 if (IsMatch(ip, candidate)) { 531 if (BROTLI_PREDICT_TRUE(candidate < ip)) { 532 table[hash] = (int)(ip - base_ip); 533 break; 534 } 535 } 536 candidate = base_ip + table[hash]; 537 BROTLI_DCHECK(candidate >= base_ip); 538 BROTLI_DCHECK(candidate < ip); 539 540 table[hash] = (int)(ip - base_ip); 541 } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate))); 542 543 /* Check copy distance. If candidate is not feasible, continue search. 544 Checking is done outside of hot loop to reduce overhead. */ 545 if (ip - candidate > MAX_DISTANCE) goto trawl; 546 547 /* Step 2: Emit the found match together with the literal bytes from 548 "next_emit" to the bit stream, and then see if we can find a next match 549 immediately afterwards. Repeat until we find no match for the input 550 without emitting some literal bytes. */ 551 552 { 553 /* We have a 5-byte match at ip, and we need to emit bytes in 554 [next_emit, ip). */ 555 const uint8_t* base = ip; 556 size_t matched = 5 + FindMatchLengthWithLimit( 557 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5); 558 int distance = (int)(base - candidate); /* > 0 */ 559 size_t insert = (size_t)(base - next_emit); 560 ip += matched; 561 BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n", 562 (int)(next_emit - base_ip), (unsigned long)insert, 2)); 563 BROTLI_DCHECK(0 == memcmp(base, candidate, matched)); 564 if (BROTLI_PREDICT_TRUE(insert < 6210)) { 565 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 566 storage_ix, storage); 567 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert, 568 literal_ratio)) { 569 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3, 570 storage_ix, storage); 571 input_size -= (size_t)(base - input); 572 input = base; 573 next_emit = input; 574 goto next_block; 575 } else { 576 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 577 storage_ix, storage); 578 } 579 EmitLiterals(next_emit, insert, lit_depth, lit_bits, 580 storage_ix, storage); 581 if (distance == last_distance) { 582 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage); 583 ++cmd_histo[64]; 584 } else { 585 EmitDistance((size_t)distance, cmd_depth, cmd_bits, 586 cmd_histo, storage_ix, storage); 587 last_distance = distance; 588 } 589 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo, 590 storage_ix, storage); 591 BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n" 592 "[CompressFragment] pos = %d insert = %d copy = %d\n" 593 "[CompressFragment] pos = %d distance = %d\n", 594 (int)(base - base_ip), (int)distance, 595 (int)(base - base_ip) + 2, 0, (int)matched - 2, 596 (int)(base - base_ip) + 2, (int)distance)); 597 598 next_emit = ip; 599 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) { 600 goto emit_remainder; 601 } 602 /* We could immediately start working at ip now, but to improve 603 compression we first update "table" with the hashes of some positions 604 within the last copy. */ 605 { 606 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3); 607 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); 608 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift); 609 table[prev_hash] = (int)(ip - base_ip - 3); 610 prev_hash = HashBytesAtOffset(input_bytes, 1, shift); 611 table[prev_hash] = (int)(ip - base_ip - 2); 612 prev_hash = HashBytesAtOffset(input_bytes, 2, shift); 613 table[prev_hash] = (int)(ip - base_ip - 1); 614 615 candidate = base_ip + table[cur_hash]; 616 table[cur_hash] = (int)(ip - base_ip); 617 } 618 } 619 620 while (IsMatch(ip, candidate)) { 621 /* We have a 5-byte match at ip, and no need to emit any literal bytes 622 prior to ip. */ 623 const uint8_t* base = ip; 624 size_t matched = 5 + FindMatchLengthWithLimit( 625 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5); 626 if (ip - candidate > MAX_DISTANCE) break; 627 ip += matched; 628 last_distance = (int)(base - candidate); /* > 0 */ 629 BROTLI_DCHECK(0 == memcmp(base, candidate, matched)); 630 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo, 631 storage_ix, storage); 632 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits, 633 cmd_histo, storage_ix, storage); 634 BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n" 635 "[CompressFragment] pos = %d distance = %d\n", 636 (int)(base - base_ip), 0, (int)matched, 637 (int)(base - base_ip), (int)last_distance)); 638 639 next_emit = ip; 640 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) { 641 goto emit_remainder; 642 } 643 /* We could immediately start working at ip now, but to improve 644 compression we first update "table" with the hashes of some positions 645 within the last copy. */ 646 { 647 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3); 648 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); 649 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift); 650 table[prev_hash] = (int)(ip - base_ip - 3); 651 prev_hash = HashBytesAtOffset(input_bytes, 1, shift); 652 table[prev_hash] = (int)(ip - base_ip - 2); 653 prev_hash = HashBytesAtOffset(input_bytes, 2, shift); 654 table[prev_hash] = (int)(ip - base_ip - 1); 655 656 candidate = base_ip + table[cur_hash]; 657 table[cur_hash] = (int)(ip - base_ip); 658 } 659 } 660 661 next_hash = Hash(++ip, shift); 662 } 663 } 664 665 emit_remainder: 666 BROTLI_DCHECK(next_emit <= ip_end); 667 input += block_size; 668 input_size -= block_size; 669 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize); 670 671 /* Decide if we want to continue this meta-block instead of emitting the 672 last insert-only command. */ 673 if (input_size > 0 && 674 total_block_size + block_size <= (1 << 20) && 675 ShouldMergeBlock(s, input, block_size, lit_depth)) { 676 BROTLI_DCHECK(total_block_size > (1 << 16)); 677 /* Update the size of the current meta-block and continue emitting commands. 678 We can do this because the current size and the new size both have 5 679 nibbles. */ 680 total_block_size += block_size; 681 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage); 682 goto emit_commands; 683 } 684 685 /* Emit the remaining bytes as literals. */ 686 if (next_emit < ip_end) { 687 const size_t insert = (size_t)(ip_end - next_emit); 688 BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n", 689 (int)(next_emit - base_ip), (unsigned long)insert, 2)); 690 if (BROTLI_PREDICT_TRUE(insert < 6210)) { 691 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 692 storage_ix, storage); 693 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage); 694 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert, 695 literal_ratio)) { 696 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3, 697 storage_ix, storage); 698 } else { 699 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 700 storage_ix, storage); 701 EmitLiterals(next_emit, insert, lit_depth, lit_bits, 702 storage_ix, storage); 703 } 704 } 705 next_emit = ip_end; 706 707 next_block: 708 /* If we have more data, write a new meta-block header and prefix codes and 709 then continue emitting commands. */ 710 if (input_size > 0) { 711 metablock_start = input; 712 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize); 713 total_block_size = block_size; 714 /* Save the bit position of the MLEN field of the meta-block header, so that 715 we can update it later if we decide to extend this meta-block. */ 716 mlen_storage_ix = *storage_ix + 3; 717 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage); 718 /* No block splits, no contexts. */ 719 BrotliWriteBits(13, 0, storage_ix, storage); 720 literal_ratio = BuildAndStoreLiteralPrefixCode( 721 s, input, block_size, lit_depth, lit_bits, storage_ix, storage); 722 BuildAndStoreCommandPrefixCode(s, storage_ix, storage); 723 goto emit_commands; 724 } 725 726 if (!is_last) { 727 /* If this is not the last block, update the command and distance prefix 728 codes for the next block and store the compressed forms. */ 729 s->cmd_code[0] = 0; 730 s->cmd_code_numbits = 0; 731 BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code); 732 } 733 } 734 735 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15) 736 737 #define BAKE_METHOD_PARAM_(B) \ 738 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \ 739 BrotliOnePassArena* s, const uint8_t* input, size_t input_size, \ 740 BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \ 741 BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B, \ 742 storage_ix, storage); \ 743 } 744 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_) 745 #undef BAKE_METHOD_PARAM_ 746 747 void BrotliCompressFragmentFast( 748 BrotliOnePassArena* s, const uint8_t* input, size_t input_size, 749 BROTLI_BOOL is_last, int* table, size_t table_size, 750 size_t* storage_ix, uint8_t* storage) { 751 const size_t initial_storage_ix = *storage_ix; 752 const size_t table_bits = Log2FloorNonZero(table_size); 753 754 if (input_size == 0) { 755 BROTLI_DCHECK(is_last); 756 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */ 757 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */ 758 *storage_ix = (*storage_ix + 7u) & ~7u; 759 return; 760 } 761 762 switch (table_bits) { 763 #define CASE_(B) \ 764 case B: \ 765 BrotliCompressFragmentFastImpl ## B( \ 766 s, input, input_size, is_last, table, storage_ix, storage);\ 767 break; 768 FOR_TABLE_BITS_(CASE_) 769 #undef CASE_ 770 default: BROTLI_DCHECK(0); break; 771 } 772 773 /* If output is larger than single uncompressed block, rewrite it. */ 774 if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) { 775 EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix, 776 storage_ix, storage); 777 } 778 779 if (is_last) { 780 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */ 781 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */ 782 *storage_ix = (*storage_ix + 7u) & ~7u; 783 } 784 } 785 786 #undef FOR_TABLE_BITS_ 787 788 #if defined(__cplusplus) || defined(c_plusplus) 789 } /* extern "C" */ 790 #endif