tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

compress_fragment_two_pass.c (26806B)


      1 /* Copyright 2015 Google Inc. All Rights Reserved.
      2 
      3   Distributed under MIT license.
      4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Function for fast encoding of an input fragment, independently from the input
      8   history. This function uses two-pass processing: in the first pass we save
      9   the found backward matches and literal bytes into a buffer, and in the
     10   second pass we emit them into the bit stream using prefix codes built based
     11   on the actual command and literal byte histograms. */
     12 
     13 #include "compress_fragment_two_pass.h"
     14 
     15 #include "../common/constants.h"
     16 #include "../common/platform.h"
     17 #include "bit_cost.h"
     18 #include "brotli_bit_stream.h"
     19 #include "entropy_encode.h"
     20 #include "fast_log.h"
     21 #include "find_match_length.h"
     22 #include "hash_base.h"
     23 #include "write_bits.h"
     24 
     25 #if defined(__cplusplus) || defined(c_plusplus)
     26 extern "C" {
     27 #endif
     28 
     29 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
     30 
     31 static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
     32    size_t shift, size_t length) {
     33  const uint64_t h =
     34      (BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
     35  return (uint32_t)(h >> shift);
     36 }
     37 
     38 static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
     39    size_t shift, size_t length) {
     40  BROTLI_DCHECK(offset <= 8 - length);
     41  {
     42    const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
     43    return (uint32_t)(h >> shift);
     44  }
     45 }
     46 
     47 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
     48    size_t length) {
     49  if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
     50    if (length == 4) return BROTLI_TRUE;
     51    return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
     52  }
     53  return BROTLI_FALSE;
     54 }
     55 
     56 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
     57   "bits" based on "histogram" and stores it into the bit stream. */
     58 static void BuildAndStoreCommandPrefixCode(BrotliTwoPassArena* s,
     59                                           size_t* storage_ix,
     60                                           uint8_t* storage) {
     61  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
     62  /* TODO(eustas): initialize once. */
     63  memset(s->tmp_depth, 0, sizeof(s->tmp_depth));
     64  BrotliCreateHuffmanTree(s->cmd_histo, 64, 15, s->tmp_tree, s->cmd_depth);
     65  BrotliCreateHuffmanTree(&s->cmd_histo[64], 64, 14, s->tmp_tree,
     66                          &s->cmd_depth[64]);
     67  /* We have to jump through a few hoops here in order to compute
     68     the command bits because the symbols are in a different order than in
     69     the full alphabet. This looks complicated, but having the symbols
     70     in this order in the command bits saves a few branches in the Emit*
     71     functions. */
     72  memcpy(s->tmp_depth, s->cmd_depth + 24, 24);
     73  memcpy(s->tmp_depth + 24, s->cmd_depth, 8);
     74  memcpy(s->tmp_depth + 32, s->cmd_depth + 48, 8);
     75  memcpy(s->tmp_depth + 40, s->cmd_depth + 8, 8);
     76  memcpy(s->tmp_depth + 48, s->cmd_depth + 56, 8);
     77  memcpy(s->tmp_depth + 56, s->cmd_depth + 16, 8);
     78  BrotliConvertBitDepthsToSymbols(s->tmp_depth, 64, s->tmp_bits);
     79  memcpy(s->cmd_bits, s->tmp_bits + 24, 16);
     80  memcpy(s->cmd_bits + 8, s->tmp_bits + 40, 16);
     81  memcpy(s->cmd_bits + 16, s->tmp_bits + 56, 16);
     82  memcpy(s->cmd_bits + 24, s->tmp_bits, 48);
     83  memcpy(s->cmd_bits + 48, s->tmp_bits + 32, 16);
     84  memcpy(s->cmd_bits + 56, s->tmp_bits + 48, 16);
     85  BrotliConvertBitDepthsToSymbols(&s->cmd_depth[64], 64, &s->cmd_bits[64]);
     86  {
     87    /* Create the bit length array for the full command alphabet. */
     88    size_t i;
     89    memset(s->tmp_depth, 0, 64); /* only 64 first values were used */
     90    memcpy(s->tmp_depth, s->cmd_depth + 24, 8);
     91    memcpy(s->tmp_depth + 64, s->cmd_depth + 32, 8);
     92    memcpy(s->tmp_depth + 128, s->cmd_depth + 40, 8);
     93    memcpy(s->tmp_depth + 192, s->cmd_depth + 48, 8);
     94    memcpy(s->tmp_depth + 384, s->cmd_depth + 56, 8);
     95    for (i = 0; i < 8; ++i) {
     96      s->tmp_depth[128 + 8 * i] = s->cmd_depth[i];
     97      s->tmp_depth[256 + 8 * i] = s->cmd_depth[8 + i];
     98      s->tmp_depth[448 + 8 * i] = s->cmd_depth[16 + i];
     99    }
    100    BrotliStoreHuffmanTree(s->tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS,
    101                           s->tmp_tree, storage_ix, storage);
    102  }
    103  BrotliStoreHuffmanTree(&s->cmd_depth[64], 64, s->tmp_tree, storage_ix,
    104                         storage);
    105 }
    106 
    107 static BROTLI_INLINE void EmitInsertLen(
    108    uint32_t insertlen, uint32_t** commands) {
    109  if (insertlen < 6) {
    110    **commands = insertlen;
    111  } else if (insertlen < 130) {
    112    const uint32_t tail = insertlen - 2;
    113    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    114    const uint32_t prefix = tail >> nbits;
    115    const uint32_t inscode = (nbits << 1) + prefix + 2;
    116    const uint32_t extra = tail - (prefix << nbits);
    117    **commands = inscode | (extra << 8);
    118  } else if (insertlen < 2114) {
    119    const uint32_t tail = insertlen - 66;
    120    const uint32_t nbits = Log2FloorNonZero(tail);
    121    const uint32_t code = nbits + 10;
    122    const uint32_t extra = tail - (1u << nbits);
    123    **commands = code | (extra << 8);
    124  } else if (insertlen < 6210) {
    125    const uint32_t extra = insertlen - 2114;
    126    **commands = 21 | (extra << 8);
    127  } else if (insertlen < 22594) {
    128    const uint32_t extra = insertlen - 6210;
    129    **commands = 22 | (extra << 8);
    130  } else {
    131    const uint32_t extra = insertlen - 22594;
    132    **commands = 23 | (extra << 8);
    133  }
    134  ++(*commands);
    135 }
    136 
    137 static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
    138  if (copylen < 10) {
    139    **commands = (uint32_t)(copylen + 38);
    140  } else if (copylen < 134) {
    141    const size_t tail = copylen - 6;
    142    const size_t nbits = Log2FloorNonZero(tail) - 1;
    143    const size_t prefix = tail >> nbits;
    144    const size_t code = (nbits << 1) + prefix + 44;
    145    const size_t extra = tail - (prefix << nbits);
    146    **commands = (uint32_t)(code | (extra << 8));
    147  } else if (copylen < 2118) {
    148    const size_t tail = copylen - 70;
    149    const size_t nbits = Log2FloorNonZero(tail);
    150    const size_t code = nbits + 52;
    151    const size_t extra = tail - ((size_t)1 << nbits);
    152    **commands = (uint32_t)(code | (extra << 8));
    153  } else {
    154    const size_t extra = copylen - 2118;
    155    **commands = (uint32_t)(63 | (extra << 8));
    156  }
    157  ++(*commands);
    158 }
    159 
    160 static BROTLI_INLINE void EmitCopyLenLastDistance(
    161    size_t copylen, uint32_t** commands) {
    162  if (copylen < 12) {
    163    **commands = (uint32_t)(copylen + 20);
    164    ++(*commands);
    165  } else if (copylen < 72) {
    166    const size_t tail = copylen - 8;
    167    const size_t nbits = Log2FloorNonZero(tail) - 1;
    168    const size_t prefix = tail >> nbits;
    169    const size_t code = (nbits << 1) + prefix + 28;
    170    const size_t extra = tail - (prefix << nbits);
    171    **commands = (uint32_t)(code | (extra << 8));
    172    ++(*commands);
    173  } else if (copylen < 136) {
    174    const size_t tail = copylen - 8;
    175    const size_t code = (tail >> 5) + 54;
    176    const size_t extra = tail & 31;
    177    **commands = (uint32_t)(code | (extra << 8));
    178    ++(*commands);
    179    **commands = 64;
    180    ++(*commands);
    181  } else if (copylen < 2120) {
    182    const size_t tail = copylen - 72;
    183    const size_t nbits = Log2FloorNonZero(tail);
    184    const size_t code = nbits + 52;
    185    const size_t extra = tail - ((size_t)1 << nbits);
    186    **commands = (uint32_t)(code | (extra << 8));
    187    ++(*commands);
    188    **commands = 64;
    189    ++(*commands);
    190  } else {
    191    const size_t extra = copylen - 2120;
    192    **commands = (uint32_t)(63 | (extra << 8));
    193    ++(*commands);
    194    **commands = 64;
    195    ++(*commands);
    196  }
    197 }
    198 
    199 static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
    200  uint32_t d = distance + 3;
    201  uint32_t nbits = Log2FloorNonZero(d) - 1;
    202  const uint32_t prefix = (d >> nbits) & 1;
    203  const uint32_t offset = (2 + prefix) << nbits;
    204  const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
    205  uint32_t extra = d - offset;
    206  **commands = distcode | (extra << 8);
    207  ++(*commands);
    208 }
    209 
    210 /* REQUIRES: len <= 1 << 24. */
    211 static void BrotliStoreMetaBlockHeader(
    212    size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
    213    uint8_t* storage) {
    214  size_t nibbles = 6;
    215  /* ISLAST */
    216  BrotliWriteBits(1, 0, storage_ix, storage);
    217  if (len <= (1U << 16)) {
    218    nibbles = 4;
    219  } else if (len <= (1U << 20)) {
    220    nibbles = 5;
    221  }
    222  BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
    223  BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
    224  /* ISUNCOMPRESSED */
    225  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
    226 }
    227 
    228 static BROTLI_INLINE void CreateCommands(const uint8_t* input,
    229    size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
    230    size_t table_bits, size_t min_match,
    231    uint8_t** literals, uint32_t** commands) {
    232  /* "ip" is the input pointer. */
    233  const uint8_t* ip = input;
    234  const size_t shift = 64u - table_bits;
    235  const uint8_t* ip_end = input + block_size;
    236  /* "next_emit" is a pointer to the first byte that is not covered by a
    237     previous copy. Bytes between "next_emit" and the start of the next copy or
    238     the end of the input will be emitted as literal bytes. */
    239  const uint8_t* next_emit = input;
    240 
    241  int last_distance = -1;
    242  const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
    243 
    244  if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
    245    /* For the last block, we need to keep a 16 bytes margin so that we can be
    246       sure that all distances are at most window size - 16.
    247       For all other blocks, we only need to keep a margin of 5 bytes so that
    248       we don't go over the block size with a copy. */
    249    const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
    250                                        input_size - kInputMarginBytes);
    251    const uint8_t* ip_limit = input + len_limit;
    252 
    253    uint32_t next_hash;
    254    for (next_hash = Hash(++ip, shift, min_match); ; ) {
    255      /* Step 1: Scan forward in the input looking for a 6-byte-long match.
    256         If we get close to exhausting the input then goto emit_remainder.
    257 
    258         Heuristic match skipping: If 32 bytes are scanned with no matches
    259         found, start looking only at every other byte. If 32 more bytes are
    260         scanned, look at every third byte, etc.. When a match is found,
    261         immediately go back to looking at every byte. This is a small loss
    262         (~5% performance, ~0.1% density) for compressible data due to more
    263         bookkeeping, but for non-compressible data (such as JPEG) it's a huge
    264         win since the compressor quickly "realizes" the data is incompressible
    265         and doesn't bother looking for matches everywhere.
    266 
    267         The "skip" variable keeps track of how many bytes there are since the
    268         last match; dividing it by 32 (ie. right-shifting by five) gives the
    269         number of bytes to move ahead for each iteration. */
    270      uint32_t skip = 32;
    271 
    272      const uint8_t* next_ip = ip;
    273      const uint8_t* candidate;
    274 
    275      BROTLI_DCHECK(next_emit < ip);
    276 trawl:
    277      do {
    278        uint32_t hash = next_hash;
    279        uint32_t bytes_between_hash_lookups = skip++ >> 5;
    280        ip = next_ip;
    281        BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
    282        next_ip = ip + bytes_between_hash_lookups;
    283        if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
    284          goto emit_remainder;
    285        }
    286        next_hash = Hash(next_ip, shift, min_match);
    287        candidate = ip - last_distance;
    288        if (IsMatch(ip, candidate, min_match)) {
    289          if (BROTLI_PREDICT_TRUE(candidate < ip)) {
    290            table[hash] = (int)(ip - base_ip);
    291            break;
    292          }
    293        }
    294        candidate = base_ip + table[hash];
    295        BROTLI_DCHECK(candidate >= base_ip);
    296        BROTLI_DCHECK(candidate < ip);
    297 
    298        table[hash] = (int)(ip - base_ip);
    299      } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
    300 
    301      /* Check copy distance. If candidate is not feasible, continue search.
    302         Checking is done outside of hot loop to reduce overhead. */
    303      if (ip - candidate > MAX_DISTANCE) goto trawl;
    304 
    305      /* Step 2: Emit the found match together with the literal bytes from
    306         "next_emit", and then see if we can find a next match immediately
    307         afterwards. Repeat until we find no match for the input
    308         without emitting some literal bytes. */
    309 
    310      {
    311        /* We have a 6-byte match at ip, and we need to emit bytes in
    312           [next_emit, ip). */
    313        const uint8_t* base = ip;
    314        size_t matched = min_match + FindMatchLengthWithLimit(
    315            candidate + min_match, ip + min_match,
    316            (size_t)(ip_end - ip) - min_match);
    317        int distance = (int)(base - candidate);  /* > 0 */
    318        int insert = (int)(base - next_emit);
    319        ip += matched;
    320        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
    321        EmitInsertLen((uint32_t)insert, commands);
    322        BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
    323                    (int)(next_emit - base_ip), insert, 2));
    324        memcpy(*literals, next_emit, (size_t)insert);
    325        *literals += insert;
    326        if (distance == last_distance) {
    327          **commands = 64;
    328          ++(*commands);
    329        } else {
    330          EmitDistance((uint32_t)distance, commands);
    331          last_distance = distance;
    332        }
    333        EmitCopyLenLastDistance(matched, commands);
    334        BROTLI_LOG(("[CompressFragment] pos = %d distance = %d\n"
    335                    "[CompressFragment] pos = %d insert = %d copy = %d\n"
    336                    "[CompressFragment] pos = %d distance = %d\n",
    337                    (int)(base - base_ip), (int)distance,
    338                    (int)(base - base_ip) + 2, 0, (int)matched - 2,
    339                    (int)(base - base_ip) + 2, (int)distance));
    340 
    341        next_emit = ip;
    342        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    343          goto emit_remainder;
    344        }
    345        {
    346          /* We could immediately start working at ip now, but to improve
    347             compression we first update "table" with the hashes of some
    348             positions within the last copy. */
    349          uint64_t input_bytes;
    350          uint32_t cur_hash;
    351          uint32_t prev_hash;
    352          if (min_match == 4) {
    353            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
    354            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
    355            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
    356            table[prev_hash] = (int)(ip - base_ip - 3);
    357            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
    358            table[prev_hash] = (int)(ip - base_ip - 2);
    359            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
    360            table[prev_hash] = (int)(ip - base_ip - 1);
    361          } else {
    362            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
    363            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
    364            table[prev_hash] = (int)(ip - base_ip - 5);
    365            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
    366            table[prev_hash] = (int)(ip - base_ip - 4);
    367            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
    368            table[prev_hash] = (int)(ip - base_ip - 3);
    369            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
    370            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
    371            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
    372            table[prev_hash] = (int)(ip - base_ip - 2);
    373            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
    374            table[prev_hash] = (int)(ip - base_ip - 1);
    375          }
    376 
    377          candidate = base_ip + table[cur_hash];
    378          table[cur_hash] = (int)(ip - base_ip);
    379        }
    380      }
    381 
    382      while (ip - candidate <= MAX_DISTANCE &&
    383          IsMatch(ip, candidate, min_match)) {
    384        /* We have a 6-byte match at ip, and no need to emit any
    385           literal bytes prior to ip. */
    386        const uint8_t* base = ip;
    387        size_t matched = min_match + FindMatchLengthWithLimit(
    388            candidate + min_match, ip + min_match,
    389            (size_t)(ip_end - ip) - min_match);
    390        ip += matched;
    391        last_distance = (int)(base - candidate);  /* > 0 */
    392        BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
    393        EmitCopyLen(matched, commands);
    394        EmitDistance((uint32_t)last_distance, commands);
    395        BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n"
    396                    "[CompressFragment] pos = %d distance = %d\n",
    397                    (int)(base - base_ip), 0, (int)matched,
    398                    (int)(base - base_ip), (int)last_distance));
    399 
    400        next_emit = ip;
    401        if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    402          goto emit_remainder;
    403        }
    404        {
    405          /* We could immediately start working at ip now, but to improve
    406             compression we first update "table" with the hashes of some
    407             positions within the last copy. */
    408          uint64_t input_bytes;
    409          uint32_t cur_hash;
    410          uint32_t prev_hash;
    411          if (min_match == 4) {
    412            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
    413            cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
    414            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
    415            table[prev_hash] = (int)(ip - base_ip - 3);
    416            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
    417            table[prev_hash] = (int)(ip - base_ip - 2);
    418            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
    419            table[prev_hash] = (int)(ip - base_ip - 1);
    420          } else {
    421            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
    422            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
    423            table[prev_hash] = (int)(ip - base_ip - 5);
    424            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
    425            table[prev_hash] = (int)(ip - base_ip - 4);
    426            prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
    427            table[prev_hash] = (int)(ip - base_ip - 3);
    428            input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
    429            cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
    430            prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
    431            table[prev_hash] = (int)(ip - base_ip - 2);
    432            prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
    433            table[prev_hash] = (int)(ip - base_ip - 1);
    434          }
    435 
    436          candidate = base_ip + table[cur_hash];
    437          table[cur_hash] = (int)(ip - base_ip);
    438        }
    439      }
    440 
    441      next_hash = Hash(++ip, shift, min_match);
    442    }
    443  }
    444 
    445 emit_remainder:
    446  BROTLI_DCHECK(next_emit <= ip_end);
    447  /* Emit the remaining bytes as literals. */
    448  if (next_emit < ip_end) {
    449    const uint32_t insert = (uint32_t)(ip_end - next_emit);
    450    EmitInsertLen(insert, commands);
    451    BROTLI_LOG(("[CompressFragment] pos = %d insert = %d copy = %d\n",
    452                (int)(next_emit - base_ip), insert, 2));
    453    memcpy(*literals, next_emit, insert);
    454    *literals += insert;
    455  }
    456 }
    457 
    458 static void StoreCommands(BrotliTwoPassArena* s,
    459                          const uint8_t* literals, const size_t num_literals,
    460                          const uint32_t* commands, const size_t num_commands,
    461                          size_t* storage_ix, uint8_t* storage) {
    462  static const BROTLI_MODEL("small") uint32_t kNumExtraBits[128] = {
    463      0,  0,  0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,
    464      6,  7,  8,  9,  10, 12, 14, 24, 0,  0,  0,  0,  0,  0,  0,  0,
    465      1,  1,  2,  2,  3,  3,  4,  4,  0,  0,  0,  0,  0,  0,  0,  0,
    466      1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  7,  8,  9,  10, 24,
    467      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    468      1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
    469      9,  9,  10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
    470      17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
    471  };
    472  static const BROTLI_MODEL("small") uint32_t kInsertOffset[24] = {
    473      0,  1,  2,  3,  4,   5,   6,   8,   10,   14,   18,   26,
    474      34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594,
    475  };
    476 
    477  size_t i;
    478  memset(s->lit_histo, 0, sizeof(s->lit_histo));
    479  /* TODO(eustas): is that necessary? */
    480  memset(s->cmd_depth, 0, sizeof(s->cmd_depth));
    481  /* TODO(eustas): is that necessary? */
    482  memset(s->cmd_bits, 0, sizeof(s->cmd_bits));
    483  memset(s->cmd_histo, 0, sizeof(s->cmd_histo));
    484  for (i = 0; i < num_literals; ++i) {
    485    ++s->lit_histo[literals[i]];
    486  }
    487  BrotliBuildAndStoreHuffmanTreeFast(s->tmp_tree, s->lit_histo, num_literals,
    488                                     /* max_bits = */ 8, s->lit_depth,
    489                                     s->lit_bits, storage_ix, storage);
    490 
    491  for (i = 0; i < num_commands; ++i) {
    492    const uint32_t code = commands[i] & 0xFF;
    493    BROTLI_DCHECK(code < 128);
    494    ++s->cmd_histo[code];
    495  }
    496  s->cmd_histo[1] += 1;
    497  s->cmd_histo[2] += 1;
    498  s->cmd_histo[64] += 1;
    499  s->cmd_histo[84] += 1;
    500  BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
    501 
    502  for (i = 0; i < num_commands; ++i) {
    503    const uint32_t cmd = commands[i];
    504    const uint32_t code = cmd & 0xFF;
    505    const uint32_t extra = cmd >> 8;
    506    BROTLI_DCHECK(code < 128);
    507    BrotliWriteBits(s->cmd_depth[code], s->cmd_bits[code], storage_ix, storage);
    508    BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
    509    if (code < 24) {
    510      const uint32_t insert = kInsertOffset[code] + extra;
    511      uint32_t j;
    512      for (j = 0; j < insert; ++j) {
    513        const uint8_t lit = *literals;
    514        BrotliWriteBits(s->lit_depth[lit], s->lit_bits[lit], storage_ix,
    515                        storage);
    516        ++literals;
    517      }
    518    }
    519  }
    520 }
    521 
    522 /* Acceptable loss for uncompressible speedup is 2% */
    523 #define MIN_RATIO 0.98
    524 #define SAMPLE_RATE 43
    525 
    526 static BROTLI_BOOL ShouldCompress(BrotliTwoPassArena* s,
    527    const uint8_t* input, size_t input_size, size_t num_literals) {
    528  double corpus_size = (double)input_size;
    529  if ((double)num_literals < MIN_RATIO * corpus_size) {
    530    return BROTLI_TRUE;
    531  } else {
    532    const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
    533    size_t i;
    534    memset(s->lit_histo, 0, sizeof(s->lit_histo));
    535    for (i = 0; i < input_size; i += SAMPLE_RATE) {
    536      ++s->lit_histo[input[i]];
    537    }
    538    return TO_BROTLI_BOOL(
    539        BrotliBitsEntropy(s->lit_histo, 256) < max_total_bit_cost);
    540  }
    541 }
    542 
    543 static void RewindBitPosition(const size_t new_storage_ix,
    544                              size_t* storage_ix, uint8_t* storage) {
    545  const size_t bitpos = new_storage_ix & 7;
    546  const size_t mask = (1u << bitpos) - 1;
    547  storage[new_storage_ix >> 3] &= (uint8_t)mask;
    548  *storage_ix = new_storage_ix;
    549 }
    550 
    551 static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
    552                                      size_t* storage_ix, uint8_t* storage) {
    553  BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
    554  *storage_ix = (*storage_ix + 7u) & ~7u;
    555  memcpy(&storage[*storage_ix >> 3], input, input_size);
    556  *storage_ix += input_size << 3;
    557  storage[*storage_ix >> 3] = 0;
    558 }
    559 
    560 static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
    561    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
    562    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
    563    int* table, size_t table_bits, size_t min_match,
    564    size_t* storage_ix, uint8_t* storage) {
    565  /* Save the start of the first block for position and distance computations.
    566  */
    567  const uint8_t* base_ip = input;
    568  BROTLI_UNUSED(is_last);
    569 
    570  while (input_size > 0) {
    571    size_t block_size =
    572        BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
    573    uint32_t* commands = command_buf;
    574    uint8_t* literals = literal_buf;
    575    size_t num_literals;
    576    CreateCommands(input, block_size, input_size, base_ip, table,
    577                   table_bits, min_match, &literals, &commands);
    578    num_literals = (size_t)(literals - literal_buf);
    579    if (ShouldCompress(s, input, block_size, num_literals)) {
    580      const size_t num_commands = (size_t)(commands - command_buf);
    581      BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    582      /* No block splits, no contexts. */
    583      BrotliWriteBits(13, 0, storage_ix, storage);
    584      StoreCommands(s, literal_buf, num_literals, command_buf, num_commands,
    585                    storage_ix, storage);
    586    } else {
    587      /* Since we did not find many backward references and the entropy of
    588         the data is close to 8 bits, we can simply emit an uncompressed block.
    589         This makes compression speed of uncompressible data about 3x faster. */
    590      EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
    591    }
    592    input += block_size;
    593    input_size -= block_size;
    594  }
    595 }
    596 
    597 #define FOR_TABLE_BITS_(X) \
    598  X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
    599 
    600 #define BAKE_METHOD_PARAM_(B)                                                  \
    601 static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B(            \
    602    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,            \
    603    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,          \
    604    int* table, size_t* storage_ix, uint8_t* storage) {                        \
    605  size_t min_match = (B <= 15) ? 4 : 6;                                        \
    606  BrotliCompressFragmentTwoPassImpl(s, input, input_size, is_last, command_buf,\
    607      literal_buf, table, B, min_match, storage_ix, storage);                  \
    608 }
    609 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
    610 #undef BAKE_METHOD_PARAM_
    611 
    612 void BrotliCompressFragmentTwoPass(
    613    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
    614    BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
    615    int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
    616  const size_t initial_storage_ix = *storage_ix;
    617  const size_t table_bits = Log2FloorNonZero(table_size);
    618  switch (table_bits) {
    619 #define CASE_(B)                                      \
    620    case B:                                           \
    621      BrotliCompressFragmentTwoPassImpl ## B(         \
    622          s, input, input_size, is_last, command_buf, \
    623          literal_buf, table, storage_ix, storage);   \
    624      break;
    625    FOR_TABLE_BITS_(CASE_)
    626 #undef CASE_
    627    default: BROTLI_DCHECK(0); break;
    628  }
    629 
    630  /* If output is larger than single uncompressed block, rewrite it. */
    631  if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
    632    RewindBitPosition(initial_storage_ix, storage_ix, storage);
    633    EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
    634  }
    635 
    636  if (is_last) {
    637    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    638    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    639    *storage_ix = (*storage_ix + 7u) & ~7u;
    640  }
    641 }
    642 
    643 #undef FOR_TABLE_BITS_
    644 
    645 #if defined(__cplusplus) || defined(c_plusplus)
    646 }  /* extern "C" */
    647 #endif