tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

compress_fragment.c (32875B)


      1 /* Copyright 2015 Google Inc. All Rights Reserved.
      2 
      3   Distributed under MIT license.
      4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Function for fast encoding of an input fragment, independently from the input
      8   history. This function uses one-pass processing: when we find a backward
      9   match, we immediately emit the corresponding command and literal codes to
     10   the bit stream.
     11 
     12   Adapted from the CompressFragment() function in
     13   https://github.com/google/snappy/blob/master/snappy.cc */
     14 
     15 #include "compress_fragment.h"
     16 
     17 #include "../common/constants.h"
     18 #include "../common/platform.h"
     19 #include "brotli_bit_stream.h"
     20 #include "entropy_encode.h"
     21 #include "fast_log.h"
     22 #include "find_match_length.h"
     23 #include "hash_base.h"
     24 #include "write_bits.h"
     25 
     26 #if defined(__cplusplus) || defined(c_plusplus)
     27 extern "C" {
     28 #endif
     29 
     30 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
     31 
     32 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
     33  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
     34  return (uint32_t)(h >> shift);
     35 }
     36 
     37 static BROTLI_INLINE uint32_t HashBytesAtOffset(
     38    uint64_t v, int offset, size_t shift) {
     39  BROTLI_DCHECK(offset >= 0);
     40  BROTLI_DCHECK(offset <= 3);
     41  {
     42    const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
     43    return (uint32_t)(h >> shift);
     44  }
     45 }
     46 
     47 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
     48  return TO_BROTLI_BOOL(
     49      BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
     50      p1[4] == p2[4]);
     51 }
     52 
     53 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
     54   of the "input" string and stores it into the bit stream.
     55   Note that the prefix code here is built from the pre-LZ77 input, therefore
     56   we can only approximate the statistics of the actual literal stream.
     57   Moreover, for long inputs we build a histogram from a sample of the input
     58   and thus have to assign a non-zero depth for each literal.
     59   Returns estimated compression ratio millibytes/char for encoding given input
     60   with generated code. */
     61 static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
     62                                             const uint8_t* input,
     63                                             const size_t input_size,
     64                                             uint8_t depths[256],
     65                                             uint16_t bits[256],
     66                                             size_t* storage_ix,
     67                                             uint8_t* storage) {
     68  uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
     69  size_t histogram_total;
     70  size_t i;
     71  memset(histogram, 0, sizeof(s->histogram));
     72 
     73  if (input_size < (1 << 15)) {
     74    for (i = 0; i < input_size; ++i) {
     75      ++histogram[input[i]];
     76    }
     77    histogram_total = input_size;
     78    for (i = 0; i < 256; ++i) {
     79      /* We weigh the first 11 samples with weight 3 to account for the
     80         balancing effect of the LZ77 phase on the histogram. */
     81      const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
     82      histogram[i] += adjust;
     83      histogram_total += adjust;
     84    }
     85  } else {
     86    static const size_t kSampleRate = 29;
     87    for (i = 0; i < input_size; i += kSampleRate) {
     88      ++histogram[input[i]];
     89    }
     90    histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
     91    for (i = 0; i < 256; ++i) {
     92      /* We add 1 to each population count to avoid 0 bit depths (since this is
     93         only a sample and we don't know if the symbol appears or not), and we
     94         weigh the first 11 samples with weight 3 to account for the balancing
     95         effect of the LZ77 phase on the histogram (more frequent symbols are
     96         more likely to be in backward references instead as literals). */
     97      const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
     98      histogram[i] += adjust;
     99      histogram_total += adjust;
    100    }
    101  }
    102  BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
    103                                     /* max_bits = */ 8,
    104                                     depths, bits, storage_ix, storage);
    105  {
    106    size_t literal_ratio = 0;
    107    for (i = 0; i < 256; ++i) {
    108      if (histogram[i]) literal_ratio += histogram[i] * depths[i];
    109    }
    110    /* Estimated encoding ratio, millibytes per symbol. */
    111    return (literal_ratio * 125) / histogram_total;
    112  }
    113 }
    114 
    115 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
    116   "bits" based on "histogram" and stores it into the bit stream. */
    117 static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
    118    size_t* storage_ix, uint8_t* storage) {
    119  const uint32_t* const histogram = s->cmd_histo;
    120  uint8_t* const depth = s->cmd_depth;
    121  uint16_t* const bits = s->cmd_bits;
    122  uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
    123  uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
    124  /* TODO(eustas): do only once on initialization. */
    125  memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
    126 
    127  BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
    128  BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
    129  /* We have to jump through a few hoops here in order to compute
    130     the command bits because the symbols are in a different order than in
    131     the full alphabet. This looks complicated, but having the symbols
    132     in this order in the command bits saves a few branches in the Emit*
    133     functions. */
    134  memcpy(tmp_depth, depth, 24);
    135  memcpy(tmp_depth + 24, depth + 40, 8);
    136  memcpy(tmp_depth + 32, depth + 24, 8);
    137  memcpy(tmp_depth + 40, depth + 48, 8);
    138  memcpy(tmp_depth + 48, depth + 32, 8);
    139  memcpy(tmp_depth + 56, depth + 56, 8);
    140  BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
    141  memcpy(bits, tmp_bits, 48);
    142  memcpy(bits + 24, tmp_bits + 32, 16);
    143  memcpy(bits + 32, tmp_bits + 48, 16);
    144  memcpy(bits + 40, tmp_bits + 24, 16);
    145  memcpy(bits + 48, tmp_bits + 40, 16);
    146  memcpy(bits + 56, tmp_bits + 56, 16);
    147  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
    148  {
    149    /* Create the bit length array for the full command alphabet. */
    150    size_t i;
    151    memset(tmp_depth, 0, 64);  /* only 64 first values were used */
    152    memcpy(tmp_depth, depth, 8);
    153    memcpy(tmp_depth + 64, depth + 8, 8);
    154    memcpy(tmp_depth + 128, depth + 16, 8);
    155    memcpy(tmp_depth + 192, depth + 24, 8);
    156    memcpy(tmp_depth + 384, depth + 32, 8);
    157    for (i = 0; i < 8; ++i) {
    158      tmp_depth[128 + 8 * i] = depth[40 + i];
    159      tmp_depth[256 + 8 * i] = depth[48 + i];
    160      tmp_depth[448 + 8 * i] = depth[56 + i];
    161    }
    162    /* TODO(eustas): could/should full-length machinery be avoided? */
    163    BrotliStoreHuffmanTree(
    164        tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
    165  }
    166  BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
    167 }
    168 
    169 /* REQUIRES: insertlen < 6210 */
    170 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
    171                                        const uint8_t depth[128],
    172                                        const uint16_t bits[128],
    173                                        uint32_t histo[128],
    174                                        size_t* storage_ix,
    175                                        uint8_t* storage) {
    176  if (insertlen < 6) {
    177    const size_t code = insertlen + 40;
    178    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    179    ++histo[code];
    180  } else if (insertlen < 130) {
    181    const size_t tail = insertlen - 2;
    182    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    183    const size_t prefix = tail >> nbits;
    184    const size_t inscode = (nbits << 1) + prefix + 42;
    185    BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
    186    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    187    ++histo[inscode];
    188  } else if (insertlen < 2114) {
    189    const size_t tail = insertlen - 66;
    190    const uint32_t nbits = Log2FloorNonZero(tail);
    191    const size_t code = nbits + 50;
    192    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    193    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    194    ++histo[code];
    195  } else {
    196    BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
    197    BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
    198    ++histo[61];
    199  }
    200 }
    201 
    202 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
    203                                            const uint8_t depth[128],
    204                                            const uint16_t bits[128],
    205                                            uint32_t histo[128],
    206                                            size_t* storage_ix,
    207                                            uint8_t* storage) {
    208  if (insertlen < 22594) {
    209    BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
    210    BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
    211    ++histo[62];
    212  } else {
    213    BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
    214    BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
    215    ++histo[63];
    216  }
    217 }
    218 
    219 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
    220                                      const uint8_t depth[128],
    221                                      const uint16_t bits[128],
    222                                      uint32_t histo[128],
    223                                      size_t* storage_ix,
    224                                      uint8_t* storage) {
    225  if (copylen < 10) {
    226    BrotliWriteBits(
    227        depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
    228    ++histo[copylen + 14];
    229  } else if (copylen < 134) {
    230    const size_t tail = copylen - 6;
    231    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    232    const size_t prefix = tail >> nbits;
    233    const size_t code = (nbits << 1) + prefix + 20;
    234    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    235    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    236    ++histo[code];
    237  } else if (copylen < 2118) {
    238    const size_t tail = copylen - 70;
    239    const uint32_t nbits = Log2FloorNonZero(tail);
    240    const size_t code = nbits + 28;
    241    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    242    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    243    ++histo[code];
    244  } else {
    245    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
    246    BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
    247    ++histo[39];
    248  }
    249 }
    250 
    251 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
    252                                                  const uint8_t depth[128],
    253                                                  const uint16_t bits[128],
    254                                                  uint32_t histo[128],
    255                                                  size_t* storage_ix,
    256                                                  uint8_t* storage) {
    257  if (copylen < 12) {
    258    BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
    259    ++histo[copylen - 4];
    260  } else if (copylen < 72) {
    261    const size_t tail = copylen - 8;
    262    const uint32_t nbits = Log2FloorNonZero(tail) - 1;
    263    const size_t prefix = tail >> nbits;
    264    const size_t code = (nbits << 1) + prefix + 4;
    265    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    266    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    267    ++histo[code];
    268  } else if (copylen < 136) {
    269    const size_t tail = copylen - 8;
    270    const size_t code = (tail >> 5) + 30;
    271    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    272    BrotliWriteBits(5, tail & 31, storage_ix, storage);
    273    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    274    ++histo[code];
    275    ++histo[64];
    276  } else if (copylen < 2120) {
    277    const size_t tail = copylen - 72;
    278    const uint32_t nbits = Log2FloorNonZero(tail);
    279    const size_t code = nbits + 28;
    280    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    281    BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    282    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    283    ++histo[code];
    284    ++histo[64];
    285  } else {
    286    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
    287    BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
    288    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    289    ++histo[39];
    290    ++histo[64];
    291  }
    292 }
    293 
    294 static BROTLI_INLINE void EmitDistance(size_t distance,
    295                                       const uint8_t depth[128],
    296                                       const uint16_t bits[128],
    297                                       uint32_t histo[128],
    298                                       size_t* storage_ix, uint8_t* storage) {
    299  const size_t d = distance + 3;
    300  const uint32_t nbits = Log2FloorNonZero(d) - 1u;
    301  const size_t prefix = (d >> nbits) & 1;
    302  const size_t offset = (2 + prefix) << nbits;
    303  const size_t distcode = 2 * (nbits - 1) + prefix + 80;
    304  BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
    305  BrotliWriteBits(nbits, d - offset, storage_ix, storage);
    306  ++histo[distcode];
    307 }
    308 
    309 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
    310                                       const uint8_t depth[256],
    311                                       const uint16_t bits[256],
    312                                       size_t* storage_ix, uint8_t* storage) {
    313  size_t j;
    314  for (j = 0; j < len; j++) {
    315    const uint8_t lit = input[j];
    316    BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
    317  }
    318 }
    319 
    320 /* REQUIRES: len <= 1 << 24. */
    321 static void BrotliStoreMetaBlockHeader(
    322    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
    323    uint8_t* storage) {
    324  size_t nibbles = 6;
    325  /* ISLAST */
    326  BrotliWriteBits(1, 0, storage_ix, storage);
    327  if (len <= (1U << 16)) {
    328    nibbles = 4;
    329  } else if (len <= (1U << 20)) {
    330    nibbles = 5;
    331  }
    332  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
    333  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
    334  /* ISUNCOMPRESSED */
    335  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
    336 }
    337 
    338 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
    339    uint8_t* array) {
    340  while (n_bits > 0) {
    341    size_t byte_pos = pos >> 3;
    342    size_t n_unchanged_bits = pos & 7;
    343    size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
    344    size_t total_bits = n_unchanged_bits + n_changed_bits;
    345    uint32_t mask =
    346        (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
    347    uint32_t unchanged_bits = array[byte_pos] & mask;
    348    uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
    349    array[byte_pos] =
    350        (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
    351    n_bits -= n_changed_bits;
    352    bits >>= n_changed_bits;
    353    pos += n_changed_bits;
    354  }
    355 }
    356 
    357 static void RewindBitPosition(const size_t new_storage_ix,
    358                              size_t* storage_ix, uint8_t* storage) {
    359  const size_t bitpos = new_storage_ix & 7;
    360  const size_t mask = (1u << bitpos) - 1;
    361  storage[new_storage_ix >> 3] &= (uint8_t)mask;
    362  *storage_ix = new_storage_ix;
    363 }
    364 
    365 static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
    366    const uint8_t* data, size_t len, const uint8_t* depths) {
    367  uint32_t* BROTLI_RESTRICT const histo = s->histogram;
    368  static const size_t kSampleRate = 43;
    369  size_t i;
    370  memset(histo, 0, sizeof(s->histogram));
    371  for (i = 0; i < len; i += kSampleRate) {
    372    ++histo[data[i]];
    373  }
    374  {
    375    const size_t total = (len + kSampleRate - 1) / kSampleRate;
    376    double r = (FastLog2(total) + 0.5) * (double)total + 200;
    377    for (i = 0; i < 256; ++i) {
    378      r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
    379    }
    380    return TO_BROTLI_BOOL(r >= 0.0);
    381  }
    382 }
    383 
    384 /* Acceptable loss for uncompressible speedup is 2% */
    385 #define MIN_RATIO 980
    386 
    387 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
    388    const uint8_t* metablock_start, const uint8_t* next_emit,
    389    const size_t insertlen, const size_t literal_ratio) {
    390  const size_t compressed = (size_t)(next_emit - metablock_start);
    391  if (compressed * 50 > insertlen) {
    392    return BROTLI_FALSE;
    393  } else {
    394    return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
    395  }
    396 }
    397 
    398 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
    399                                      const size_t storage_ix_start,
    400                                      size_t* storage_ix, uint8_t* storage) {
    401  const size_t len = (size_t)(end - begin);
    402  RewindBitPosition(storage_ix_start, storage_ix, storage);
    403  BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
    404  *storage_ix = (*storage_ix + 7u) & ~7u;
    405  memcpy(&storage[*storage_ix >> 3], begin, len);
    406  *storage_ix += len << 3;
    407  storage[*storage_ix >> 3] = 0;
    408 }
    409 
    410 static BROTLI_MODEL("small") uint32_t kCmdHistoSeed[128] = {
    411  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
    412  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
    413  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
    414  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    415  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    416  1, 1, 1, 1, 0, 0, 0, 0,
    417 };
    418 
    419 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
    420    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
    421    BROTLI_BOOL is_last, int* table, size_t table_bits,
    422    size_t* storage_ix, uint8_t* storage) {
    423  uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
    424  uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
    425  uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
    426  uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
    427  uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
    428  const uint8_t* ip_end;
    429 
    430  /* "next_emit" is a pointer to the first byte that is not covered by a
    431     previous copy. Bytes between "next_emit" and the start of the next copy or
    432     the end of the input will be emitted as literal bytes. */
    433  const uint8_t* next_emit = input;
    434  /* Save the start of the first block for position and distance computations.
    435  */
    436  const uint8_t* base_ip = input;
    437 
    438  static const size_t kFirstBlockSize = 3 << 15;
    439  static const size_t kMergeBlockSize = 1 << 16;
    440 
    441  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
    442  const size_t kMinMatchLen = 5;
    443 
    444  const uint8_t* metablock_start = input;
    445  size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
    446  size_t total_block_size = block_size;
    447  /* Save the bit position of the MLEN field of the meta-block header, so that
    448     we can update it later if we decide to extend this meta-block. */
    449  size_t mlen_storage_ix = *storage_ix + 3;
    450 
    451  size_t literal_ratio;
    452 
    453  const uint8_t* ip;
    454  int last_distance;
    455 
    456  const size_t shift = 64u - table_bits;
    457 
    458  BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    459  /* No block splits, no contexts. */
    460  BrotliWriteBits(13, 0, storage_ix, storage);
    461 
    462  literal_ratio = BuildAndStoreLiteralPrefixCode(
    463      s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
    464 
    465  {
    466    /* Store the pre-compressed command and distance prefix codes. */
    467    size_t i;
    468    for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
    469      BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
    470    }
    471  }
    472  BrotliWriteBits(s->cmd_code_numbits & 7,
    473                  s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
    474 
    475 emit_commands:
    476  /* Initialize the command and distance histograms. We will gather
    477     statistics of command and distance codes during the processing
    478     of this block and use it to update the command and distance
    479     prefix codes for the next block. */
    480  memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
    481 
    482  /* "ip" is the input pointer. */
    483  ip = input;
    484  last_distance = -1;
    485  ip_end = input + block_size;
    486 
    487  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
    488    /* For the last block, we need to keep a 16 bytes margin so that we can be
    489       sure that all distances are at most window size - 16.
    490       For all other blocks, we only need to keep a margin of 5 bytes so that
    491       we don't go over the block size with a copy. */
    492    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
    493                                        input_size - kInputMarginBytes);
    494    const uint8_t* ip_limit = input + len_limit;
    495 
    496    uint32_t next_hash;
    497    for (next_hash = Hash(++ip, shift); ; ) {
    498      /* Step 1: Scan forward in the input looking for a 5-byte-long match.
    499         If we get close to exhausting the input then goto emit_remainder.
    500 
    501         Heuristic match skipping: If 32 bytes are scanned with no matches
    502         found, start looking only at every other byte. If 32 more bytes are
    503         scanned, look at every third byte, etc.. When a match is found,
    504         immediately go back to looking at every byte. This is a small loss
    505         (~5% performance, ~0.1% density) for compressible data due to more
    506         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
    507         win since the compressor quickly "realizes" the data is incompressible
    508         and doesn't bother looking for matches everywhere.
    509 
    510         The "skip" variable keeps track of how many bytes there are since the
    511         last match; dividing it by 32 (i.e. right-shifting by five) gives the
    512         number of bytes to move ahead for each iteration. */
    513      uint32_t skip = 32;
    514 
    515      const uint8_t* next_ip = ip;
    516      const uint8_t* candidate;
    517      BROTLI_DCHECK(next_emit < ip);
    518 trawl:
    519      do {
    520        uint32_t hash = next_hash;
    521        uint32_t bytes_between_hash_lookups = skip++ >> 5;
    522        BROTLI_DCHECK(hash == Hash(next_ip, shift));
    523        ip = next_ip;
    524        next_ip = ip + bytes_between_hash_lookups;
    525        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
    526          goto emit_remainder;
    527        }
    528        next_hash = Hash(next_ip, shift);
    529        candidate = ip - last_distance;
    530        if (IsMatch(ip, candidate)) {
    531          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
    532            table[hash] = (int)(ip - base_ip);
    533            break;
    534          }
    535        }
    536        candidate = base_ip + table[hash];
    537        BROTLI_DCHECK(candidate >= base_ip);
    538        BROTLI_DCHECK(candidate < ip);
    539 
    540        table[hash] = (int)(ip - base_ip);
    541      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
    542 
    543      /* Check copy distance. If candidate is not feasible, continue search.
    544         Checking is done outside of hot loop to reduce overhead. */
    545      if (ip - candidate > MAX_DISTANCE) goto trawl;
    546 
    547      /* Step 2: Emit the found match together with the literal bytes from
    548         "next_emit" to the bit stream, and then see if we can find a next match
    549         immediately afterwards. Repeat until we find no match for the input
    550         without emitting some literal bytes. */
    551 
    552      {
    553        /* We have a 5-byte match at ip, and we need to emit bytes in
    554           [next_emit, ip). */
    555        const uint8_t* base = ip;
    556        size_t matched = 5 + FindMatchLengthWithLimit(
    557            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
    558        int distance = (int)(base - candidate);  /* > 0 */
    559        size_t insert = (size_t)(base - next_emit);
    560        ip += matched;
    561        BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
    562                    (int)(next_emit - base_ip), (unsigned long)insert, 2));
    563        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
    564        if (BROTLI_PREDICT_TRUE(insert < 6210)) {
    565          EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    566                        storage_ix, storage);
    567        } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
    568                                             literal_ratio)) {
    569          EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
    570                                    storage_ix, storage);
    571          input_size -= (size_t)(base - input);
    572          input = base;
    573          next_emit = input;
    574          goto next_block;
    575        } else {
    576          EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    577                            storage_ix, storage);
    578        }
    579        EmitLiterals(next_emit, insert, lit_depth, lit_bits,
    580                     storage_ix, storage);
    581        if (distance == last_distance) {
    582          BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
    583          ++cmd_histo[64];
    584        } else {
    585          EmitDistance((size_t)distance, cmd_depth, cmd_bits,
    586                       cmd_histo, storage_ix, storage);
    587          last_distance = distance;
    588        }
    589        EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
    590                                storage_ix, storage);
    591        BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
    592                    "[CompressFragment] pos = %d insert = %d copy = %d\n"
    593                    "[CompressFragment] pos = %d distance = %d\n",
    594                    (int)(base - base_ip), (int)distance,
    595                    (int)(base - base_ip) + 2, 0, (int)matched - 2,
    596                    (int)(base - base_ip) + 2, (int)distance));
    597 
    598        next_emit = ip;
    599        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    600          goto emit_remainder;
    601        }
    602        /* We could immediately start working at ip now, but to improve
    603           compression we first update "table" with the hashes of some positions
    604           within the last copy. */
    605        {
    606          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
    607          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    608          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
    609          table[prev_hash] = (int)(ip - base_ip - 3);
    610          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    611          table[prev_hash] = (int)(ip - base_ip - 2);
    612          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    613          table[prev_hash] = (int)(ip - base_ip - 1);
    614 
    615          candidate = base_ip + table[cur_hash];
    616          table[cur_hash] = (int)(ip - base_ip);
    617        }
    618      }
    619 
    620      while (IsMatch(ip, candidate)) {
    621        /* We have a 5-byte match at ip, and no need to emit any literal bytes
    622           prior to ip. */
    623        const uint8_t* base = ip;
    624        size_t matched = 5 + FindMatchLengthWithLimit(
    625            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
    626        if (ip - candidate > MAX_DISTANCE) break;
    627        ip += matched;
    628        last_distance = (int)(base - candidate);  /* > 0 */
    629        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
    630        EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
    631                    storage_ix, storage);
    632        EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
    633                     cmd_histo, storage_ix, storage);
    634        BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
    635                    "[CompressFragment] pos = %d distance = %d\n",
    636                    (int)(base - base_ip), 0, (int)matched,
    637                    (int)(base - base_ip), (int)last_distance));
    638 
    639        next_emit = ip;
    640        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    641          goto emit_remainder;
    642        }
    643        /* We could immediately start working at ip now, but to improve
    644           compression we first update "table" with the hashes of some positions
    645           within the last copy. */
    646        {
    647          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
    648          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    649          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
    650          table[prev_hash] = (int)(ip - base_ip - 3);
    651          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    652          table[prev_hash] = (int)(ip - base_ip - 2);
    653          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    654          table[prev_hash] = (int)(ip - base_ip - 1);
    655 
    656          candidate = base_ip + table[cur_hash];
    657          table[cur_hash] = (int)(ip - base_ip);
    658        }
    659      }
    660 
    661      next_hash = Hash(++ip, shift);
    662    }
    663  }
    664 
    665 emit_remainder:
    666  BROTLI_DCHECK(next_emit <= ip_end);
    667  input += block_size;
    668  input_size -= block_size;
    669  block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
    670 
    671  /* Decide if we want to continue this meta-block instead of emitting the
    672     last insert-only command. */
    673  if (input_size > 0 &&
    674      total_block_size + block_size <= (1 << 20) &&
    675      ShouldMergeBlock(s, input, block_size, lit_depth)) {
    676    BROTLI_DCHECK(total_block_size > (1 << 16));
    677    /* Update the size of the current meta-block and continue emitting commands.
    678       We can do this because the current size and the new size both have 5
    679       nibbles. */
    680    total_block_size += block_size;
    681    UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
    682    goto emit_commands;
    683  }
    684 
    685  /* Emit the remaining bytes as literals. */
    686  if (next_emit < ip_end) {
    687    const size_t insert = (size_t)(ip_end - next_emit);
    688    BROTLI_LOG(("[CompressFragment] pos = %d insert = %lu copy = %d\n",
    689                (int)(next_emit - base_ip), (unsigned long)insert, 2));
    690    if (BROTLI_PREDICT_TRUE(insert < 6210)) {
    691      EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    692                    storage_ix, storage);
    693      EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
    694    } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
    695                                         literal_ratio)) {
    696      EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
    697                                storage_ix, storage);
    698    } else {
    699      EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    700                        storage_ix, storage);
    701      EmitLiterals(next_emit, insert, lit_depth, lit_bits,
    702                   storage_ix, storage);
    703    }
    704  }
    705  next_emit = ip_end;
    706 
    707 next_block:
    708  /* If we have more data, write a new meta-block header and prefix codes and
    709     then continue emitting commands. */
    710  if (input_size > 0) {
    711    metablock_start = input;
    712    block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
    713    total_block_size = block_size;
    714    /* Save the bit position of the MLEN field of the meta-block header, so that
    715       we can update it later if we decide to extend this meta-block. */
    716    mlen_storage_ix = *storage_ix + 3;
    717    BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    718    /* No block splits, no contexts. */
    719    BrotliWriteBits(13, 0, storage_ix, storage);
    720    literal_ratio = BuildAndStoreLiteralPrefixCode(
    721        s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
    722    BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
    723    goto emit_commands;
    724  }
    725 
    726  if (!is_last) {
    727    /* If this is not the last block, update the command and distance prefix
    728       codes for the next block and store the compressed forms. */
    729    s->cmd_code[0] = 0;
    730    s->cmd_code_numbits = 0;
    731    BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
    732  }
    733 }
    734 
    735 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
    736 
    737 #define BAKE_METHOD_PARAM_(B) \
    738 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
    739    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,          \
    740    BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
    741  BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B,    \
    742      storage_ix, storage);                                                  \
    743 }
    744 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
    745 #undef BAKE_METHOD_PARAM_
    746 
    747 void BrotliCompressFragmentFast(
    748    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
    749    BROTLI_BOOL is_last, int* table, size_t table_size,
    750    size_t* storage_ix, uint8_t* storage) {
    751  const size_t initial_storage_ix = *storage_ix;
    752  const size_t table_bits = Log2FloorNonZero(table_size);
    753 
    754  if (input_size == 0) {
    755    BROTLI_DCHECK(is_last);
    756    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    757    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    758    *storage_ix = (*storage_ix + 7u) & ~7u;
    759    return;
    760  }
    761 
    762  switch (table_bits) {
    763 #define CASE_(B)                                                     \
    764    case B:                                                          \
    765      BrotliCompressFragmentFastImpl ## B(                           \
    766          s, input, input_size, is_last, table, storage_ix, storage);\
    767      break;
    768    FOR_TABLE_BITS_(CASE_)
    769 #undef CASE_
    770    default: BROTLI_DCHECK(0); break;
    771  }
    772 
    773  /* If output is larger than single uncompressed block, rewrite it. */
    774  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
    775    EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
    776                              storage_ix, storage);
    777  }
    778 
    779  if (is_last) {
    780    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    781    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    782    *storage_ix = (*storage_ix + 7u) & ~7u;
    783  }
    784 }
    785 
    786 #undef FOR_TABLE_BITS_
    787 
    788 #if defined(__cplusplus) || defined(c_plusplus)
    789 }  /* extern "C" */
    790 #endif