tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

huffman.cc (11964B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jpegli/huffman.h"
      7 
      8 #include <limits>
      9 #include <vector>
     10 
     11 #include "lib/jpegli/common.h"
     12 #include "lib/jpegli/error.h"
     13 #include "lib/jxl/base/status.h"
     14 
     15 namespace jpegli {
     16 
     17 // Returns the table width of the next 2nd level table, count is the histogram
     18 // of bit lengths for the remaining symbols, len is the code length of the next
     19 // processed symbol.
     20 static inline int NextTableBitSize(const int* count, int len) {
     21  int left = 1 << (len - kJpegHuffmanRootTableBits);
     22  while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) {
     23    left -= count[len];
     24    if (left <= 0) break;
     25    ++len;
     26    left <<= 1;
     27  }
     28  return len - kJpegHuffmanRootTableBits;
     29 }
     30 
     31 void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
     32                           HuffmanTableEntry* lut) {
     33  HuffmanTableEntry code;    // current table entry
     34  HuffmanTableEntry* table;  // next available space in table
     35  int len;                   // current code length
     36  int idx;                   // symbol index
     37  int key;                   // prefix code
     38  int reps;                  // number of replicate key values in current table
     39  int low;                   // low bits for current root entry
     40  int table_bits;            // key length of current table
     41  int table_size;            // size of current table
     42 
     43  // Make a local copy of the input bit length histogram.
     44  int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0};
     45  int total_count = 0;
     46  for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
     47    tmp_count[len] = count[len];
     48    total_count += tmp_count[len];
     49  }
     50 
     51  table = lut;
     52  table_bits = kJpegHuffmanRootTableBits;
     53  table_size = 1 << table_bits;
     54 
     55  // Special case code with only one value.
     56  if (total_count == 1) {
     57    code.bits = 0;
     58    code.value = symbols[0];
     59    for (key = 0; key < table_size; ++key) {
     60      table[key] = code;
     61    }
     62    return;
     63  }
     64 
     65  // Fill in root table.
     66  key = 0;
     67  idx = 0;
     68  for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) {
     69    for (; tmp_count[len] > 0; --tmp_count[len]) {
     70      code.bits = len;
     71      code.value = symbols[idx++];
     72      reps = 1 << (kJpegHuffmanRootTableBits - len);
     73      while (reps--) {
     74        table[key++] = code;
     75      }
     76    }
     77  }
     78 
     79  // Fill in 2nd level tables and add pointers to root table.
     80  table += table_size;
     81  table_size = 0;
     82  low = 0;
     83  for (len = kJpegHuffmanRootTableBits + 1;
     84       len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
     85    for (; tmp_count[len] > 0; --tmp_count[len]) {
     86      // Start a new sub-table if the previous one is full.
     87      if (low >= table_size) {
     88        table += table_size;
     89        table_bits = NextTableBitSize(tmp_count, len);
     90        table_size = 1 << table_bits;
     91        low = 0;
     92        lut[key].bits = table_bits + kJpegHuffmanRootTableBits;
     93        lut[key].value = (table - lut) - key;
     94        ++key;
     95      }
     96      code.bits = len - kJpegHuffmanRootTableBits;
     97      code.value = symbols[idx++];
     98      reps = 1 << (table_bits - code.bits);
     99      while (reps--) {
    100        table[low++] = code;
    101      }
    102    }
    103  }
    104 }
    105 
    106 // A node of a Huffman tree.
    107 struct HuffmanTree {
    108  HuffmanTree(uint32_t count, int16_t left, int16_t right)
    109      : total_count(count), index_left(left), index_right_or_value(right) {}
    110  uint32_t total_count;
    111  int16_t index_left;
    112  int16_t index_right_or_value;
    113 };
    114 
    115 void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
    116              uint8_t level) {
    117  if (p.index_left >= 0) {
    118    ++level;
    119    SetDepth(pool[p.index_left], pool, depth, level);
    120    SetDepth(pool[p.index_right_or_value], pool, depth, level);
    121  } else {
    122    depth[p.index_right_or_value] = level;
    123  }
    124 }
    125 
    126 // Sort the root nodes, least popular first.
    127 static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
    128  return v0.total_count < v1.total_count;
    129 }
    130 
    131 // This function will create a Huffman tree.
    132 //
    133 // The catch here is that the tree cannot be arbitrarily deep.
    134 // Brotli specifies a maximum depth of 15 bits for "code trees"
    135 // and 7 bits for "code length code trees."
    136 //
    137 // count_limit is the value that is to be faked as the minimum value
    138 // and this minimum value is raised until the tree matches the
    139 // maximum length requirement.
    140 //
    141 // This algorithm is not of excellent performance for very long data blocks,
    142 // especially when population counts are longer than 2**tree_limit, but
    143 // we are not planning to use this with extremely long blocks.
    144 //
    145 // See http://en.wikipedia.org/wiki/Huffman_coding
    146 void CreateHuffmanTree(const uint32_t* data, const size_t length,
    147                       const int tree_limit, uint8_t* depth) {
    148  // For block sizes below 64 kB, we never need to do a second iteration
    149  // of this loop. Probably all of our block sizes will be smaller than
    150  // that, so this loop is mostly of academic interest. If we actually
    151  // would need this, we would be better off with the Katajainen algorithm.
    152  for (uint32_t count_limit = 1;; count_limit *= 2) {
    153    std::vector<HuffmanTree> tree;
    154    tree.reserve(2 * length + 1);
    155 
    156    for (size_t i = length; i != 0;) {
    157      --i;
    158      if (data[i]) {
    159        const uint32_t count = std::max(data[i], count_limit - 1);
    160        tree.emplace_back(count, -1, static_cast<int16_t>(i));
    161      }
    162    }
    163 
    164    const size_t n = tree.size();
    165    if (n == 1) {
    166      // Fake value; will be fixed on upper level.
    167      depth[tree[0].index_right_or_value] = 1;
    168      break;
    169    }
    170 
    171    std::stable_sort(tree.begin(), tree.end(), Compare);
    172 
    173    // The nodes are:
    174    // [0, n): the sorted leaf nodes that we start with.
    175    // [n]: we add a sentinel here.
    176    // [n + 1, 2n): new parent nodes are added here, starting from
    177    //              (n+1). These are naturally in ascending order.
    178    // [2n]: we add a sentinel at the end as well.
    179    // There will be (2n+1) elements at the end.
    180    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
    181    tree.push_back(sentinel);
    182    tree.push_back(sentinel);
    183 
    184    size_t i = 0;      // Points to the next leaf node.
    185    size_t j = n + 1;  // Points to the next non-leaf node.
    186    for (size_t k = n - 1; k != 0; --k) {
    187      size_t left;
    188      size_t right;
    189      if (tree[i].total_count <= tree[j].total_count) {
    190        left = i;
    191        ++i;
    192      } else {
    193        left = j;
    194        ++j;
    195      }
    196      if (tree[i].total_count <= tree[j].total_count) {
    197        right = i;
    198        ++i;
    199      } else {
    200        right = j;
    201        ++j;
    202      }
    203 
    204      // The sentinel node becomes the parent node.
    205      size_t j_end = tree.size() - 1;
    206      tree[j_end].total_count =
    207          tree[left].total_count + tree[right].total_count;
    208      tree[j_end].index_left = static_cast<int16_t>(left);
    209      tree[j_end].index_right_or_value = static_cast<int16_t>(right);
    210 
    211      // Add back the last sentinel node.
    212      tree.push_back(sentinel);
    213    }
    214    JXL_DASSERT(tree.size() == 2 * n + 1);
    215    SetDepth(tree[2 * n - 1], tree.data(), depth, 0);
    216 
    217    // We need to pack the Huffman tree in tree_limit bits.
    218    // If this was not successful, add fake entities to the lowest values
    219    // and retry.
    220    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
    221      break;
    222    }
    223  }
    224 }
    225 
    226 void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table,
    227                          bool is_dc) {
    228  size_t total_symbols = 0;
    229  size_t total_p = 0;
    230  size_t max_depth = 0;
    231  for (size_t d = 1; d <= kJpegHuffmanMaxBitLength; ++d) {
    232    uint8_t count = table->bits[d];
    233    if (count) {
    234      total_symbols += count;
    235      total_p += (1u << (kJpegHuffmanMaxBitLength - d)) * count;
    236      max_depth = d;
    237    }
    238  }
    239  total_p += 1u << (kJpegHuffmanMaxBitLength - max_depth);  // sentinel symbol
    240  if (total_symbols == 0) {
    241    JPEGLI_ERROR("Empty Huffman table");
    242  }
    243  if (total_symbols > kJpegHuffmanAlphabetSize) {
    244    JPEGLI_ERROR("Too many symbols in Huffman table");
    245  }
    246  if (total_p != (1u << kJpegHuffmanMaxBitLength)) {
    247    JPEGLI_ERROR("Invalid bit length distribution");
    248  }
    249  uint8_t symbol_seen[kJpegHuffmanAlphabetSize] = {};
    250  for (size_t i = 0; i < total_symbols; ++i) {
    251    uint8_t symbol = table->huffval[i];
    252    if (symbol_seen[symbol]) {
    253      JPEGLI_ERROR("Duplicate symbol %d in Huffman table", symbol);
    254    }
    255    symbol_seen[symbol] = 1;
    256  }
    257 }
    258 
    259 void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc) {
    260  // Huffman tables from the JPEG standard.
    261  static constexpr JHUFF_TBL kStandardDCTables[2] = {
    262      // DC luma
    263      {{0, 0, 1, 5, 1, 1, 1, 1, 1, 1},
    264       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
    265       FALSE},
    266      // DC chroma
    267      {{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1},
    268       {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
    269       FALSE}};
    270  static constexpr JHUFF_TBL kStandardACTables[2] = {
    271      // AC luma
    272      {{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125},
    273       {0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06,
    274        0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
    275        0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72,
    276        0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
    277        0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45,
    278        0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
    279        0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75,
    280        0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
    281        0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3,
    282        0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
    283        0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9,
    284        0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
    285        0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4,
    286        0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
    287       FALSE},
    288      // AC chroma
    289      {{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119},
    290       {0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41,
    291        0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
    292        0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1,
    293        0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
    294        0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44,
    295        0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
    296        0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74,
    297        0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
    298        0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a,
    299        0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
    300        0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
    301        0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
    302        0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4,
    303        0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
    304       FALSE}};
    305  const JHUFF_TBL* std_tables = is_dc ? kStandardDCTables : kStandardACTables;
    306  JHUFF_TBL** tables;
    307  if (cinfo->is_decompressor) {
    308    j_decompress_ptr cinfo_d = reinterpret_cast<j_decompress_ptr>(cinfo);
    309    tables = is_dc ? cinfo_d->dc_huff_tbl_ptrs : cinfo_d->ac_huff_tbl_ptrs;
    310  } else {
    311    j_compress_ptr cinfo_c = reinterpret_cast<j_compress_ptr>(cinfo);
    312    tables = is_dc ? cinfo_c->dc_huff_tbl_ptrs : cinfo_c->ac_huff_tbl_ptrs;
    313  }
    314  for (int i = 0; i < 2; ++i) {
    315    if (tables[i] == nullptr) {
    316      tables[i] = jpegli_alloc_huff_table(cinfo);
    317      memcpy(tables[i], &std_tables[i], sizeof(JHUFF_TBL));
    318      ValidateHuffmanTable(cinfo, tables[i], is_dc);
    319    }
    320  }
    321 }
    322 
    323 }  // namespace jpegli