tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

enc_huffman_tree.cc (10319B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/enc_huffman_tree.h"
      7 
      8 // Suppress any -Wdeprecated-declarations warning that might be emitted by
      9 // GCC or Clang by std::stable_sort in C++17 or later mode
     10 #ifdef __clang__
     11 #pragma clang diagnostic push
     12 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
     13 #elif defined(__GNUC__)
     14 #pragma GCC push_options
     15 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
     16 #endif
     17 
     18 #include <algorithm>
     19 
     20 #ifdef __clang__
     21 #pragma clang diagnostic pop
     22 #elif defined(__GNUC__)
     23 #pragma GCC pop_options
     24 #endif
     25 
     26 #include <limits>
     27 #include <vector>
     28 
     29 #include "lib/jxl/base/status.h"
     30 
     31 namespace jxl {
     32 
     33 void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
     34              uint8_t level) {
     35  if (p.index_left >= 0) {
     36    ++level;
     37    SetDepth(pool[p.index_left], pool, depth, level);
     38    SetDepth(pool[p.index_right_or_value], pool, depth, level);
     39  } else {
     40    depth[p.index_right_or_value] = level;
     41  }
     42 }
     43 
     44 // Sort the root nodes, least popular first.
     45 static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
     46  return v0.total_count < v1.total_count;
     47 }
     48 
     49 // This function will create a Huffman tree.
     50 //
     51 // The catch here is that the tree cannot be arbitrarily deep.
     52 // Brotli specifies a maximum depth of 15 bits for "code trees"
     53 // and 7 bits for "code length code trees."
     54 //
     55 // count_limit is the value that is to be faked as the minimum value
     56 // and this minimum value is raised until the tree matches the
     57 // maximum length requirement.
     58 //
     59 // This algorithm is not of excellent performance for very long data blocks,
     60 // especially when population counts are longer than 2**tree_limit, but
     61 // we are not planning to use this with extremely long blocks.
     62 //
     63 // See http://en.wikipedia.org/wiki/Huffman_coding
     64 void CreateHuffmanTree(const uint32_t* data, const size_t length,
     65                       const int tree_limit, uint8_t* depth) {
     66  // For block sizes below 64 kB, we never need to do a second iteration
     67  // of this loop. Probably all of our block sizes will be smaller than
     68  // that, so this loop is mostly of academic interest. If we actually
     69  // would need this, we would be better off with the Katajainen algorithm.
     70  for (uint32_t count_limit = 1;; count_limit *= 2) {
     71    std::vector<HuffmanTree> tree;
     72    tree.reserve(2 * length + 1);
     73 
     74    for (size_t i = length; i != 0;) {
     75      --i;
     76      if (data[i]) {
     77        const uint32_t count = std::max(data[i], count_limit - 1);
     78        tree.emplace_back(count, -1, static_cast<int16_t>(i));
     79      }
     80    }
     81 
     82    const size_t n = tree.size();
     83    if (n == 1) {
     84      // Fake value; will be fixed on upper level.
     85      depth[tree[0].index_right_or_value] = 1;
     86      break;
     87    }
     88 
     89    std::stable_sort(tree.begin(), tree.end(), Compare);
     90 
     91    // The nodes are:
     92    // [0, n): the sorted leaf nodes that we start with.
     93    // [n]: we add a sentinel here.
     94    // [n + 1, 2n): new parent nodes are added here, starting from
     95    //              (n+1). These are naturally in ascending order.
     96    // [2n]: we add a sentinel at the end as well.
     97    // There will be (2n+1) elements at the end.
     98    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
     99    tree.push_back(sentinel);
    100    tree.push_back(sentinel);
    101 
    102    size_t i = 0;      // Points to the next leaf node.
    103    size_t j = n + 1;  // Points to the next non-leaf node.
    104    for (size_t k = n - 1; k != 0; --k) {
    105      size_t left;
    106      size_t right;
    107      if (tree[i].total_count <= tree[j].total_count) {
    108        left = i;
    109        ++i;
    110      } else {
    111        left = j;
    112        ++j;
    113      }
    114      if (tree[i].total_count <= tree[j].total_count) {
    115        right = i;
    116        ++i;
    117      } else {
    118        right = j;
    119        ++j;
    120      }
    121 
    122      // The sentinel node becomes the parent node.
    123      size_t j_end = tree.size() - 1;
    124      tree[j_end].total_count =
    125          tree[left].total_count + tree[right].total_count;
    126      tree[j_end].index_left = static_cast<int16_t>(left);
    127      tree[j_end].index_right_or_value = static_cast<int16_t>(right);
    128 
    129      // Add back the last sentinel node.
    130      tree.push_back(sentinel);
    131    }
    132    JXL_DASSERT(tree.size() == 2 * n + 1);
    133    SetDepth(tree[2 * n - 1], tree.data(), depth, 0);
    134 
    135    // We need to pack the Huffman tree in tree_limit bits.
    136    // If this was not successful, add fake entities to the lowest values
    137    // and retry.
    138    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
    139      break;
    140    }
    141  }
    142 }
    143 
    144 void Reverse(uint8_t* v, size_t start, size_t end) {
    145  --end;
    146  while (start < end) {
    147    uint8_t tmp = v[start];
    148    v[start] = v[end];
    149    v[end] = tmp;
    150    ++start;
    151    --end;
    152  }
    153 }
    154 
    155 void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
    156                                 const uint8_t value, size_t repetitions,
    157                                 size_t* tree_size, uint8_t* tree,
    158                                 uint8_t* extra_bits_data) {
    159  JXL_DASSERT(repetitions > 0);
    160  if (previous_value != value) {
    161    tree[*tree_size] = value;
    162    extra_bits_data[*tree_size] = 0;
    163    ++(*tree_size);
    164    --repetitions;
    165  }
    166  if (repetitions == 7) {
    167    tree[*tree_size] = value;
    168    extra_bits_data[*tree_size] = 0;
    169    ++(*tree_size);
    170    --repetitions;
    171  }
    172  if (repetitions < 3) {
    173    for (size_t i = 0; i < repetitions; ++i) {
    174      tree[*tree_size] = value;
    175      extra_bits_data[*tree_size] = 0;
    176      ++(*tree_size);
    177    }
    178  } else {
    179    repetitions -= 3;
    180    size_t start = *tree_size;
    181    while (true) {
    182      tree[*tree_size] = 16;
    183      extra_bits_data[*tree_size] = repetitions & 0x3;
    184      ++(*tree_size);
    185      repetitions >>= 2;
    186      if (repetitions == 0) {
    187        break;
    188      }
    189      --repetitions;
    190    }
    191    Reverse(tree, start, *tree_size);
    192    Reverse(extra_bits_data, start, *tree_size);
    193  }
    194 }
    195 
    196 void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
    197                                      uint8_t* tree, uint8_t* extra_bits_data) {
    198  if (repetitions == 11) {
    199    tree[*tree_size] = 0;
    200    extra_bits_data[*tree_size] = 0;
    201    ++(*tree_size);
    202    --repetitions;
    203  }
    204  if (repetitions < 3) {
    205    for (size_t i = 0; i < repetitions; ++i) {
    206      tree[*tree_size] = 0;
    207      extra_bits_data[*tree_size] = 0;
    208      ++(*tree_size);
    209    }
    210  } else {
    211    repetitions -= 3;
    212    size_t start = *tree_size;
    213    while (true) {
    214      tree[*tree_size] = 17;
    215      extra_bits_data[*tree_size] = repetitions & 0x7;
    216      ++(*tree_size);
    217      repetitions >>= 3;
    218      if (repetitions == 0) {
    219        break;
    220      }
    221      --repetitions;
    222    }
    223    Reverse(tree, start, *tree_size);
    224    Reverse(extra_bits_data, start, *tree_size);
    225  }
    226 }
    227 
    228 static void DecideOverRleUse(const uint8_t* depth, const size_t length,
    229                             bool* use_rle_for_non_zero,
    230                             bool* use_rle_for_zero) {
    231  size_t total_reps_zero = 0;
    232  size_t total_reps_non_zero = 0;
    233  size_t count_reps_zero = 1;
    234  size_t count_reps_non_zero = 1;
    235  for (size_t i = 0; i < length;) {
    236    const uint8_t value = depth[i];
    237    size_t reps = 1;
    238    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
    239      ++reps;
    240    }
    241    if (reps >= 3 && value == 0) {
    242      total_reps_zero += reps;
    243      ++count_reps_zero;
    244    }
    245    if (reps >= 4 && value != 0) {
    246      total_reps_non_zero += reps;
    247      ++count_reps_non_zero;
    248    }
    249    i += reps;
    250  }
    251  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
    252  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
    253 }
    254 
    255 void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
    256                      uint8_t* tree, uint8_t* extra_bits_data) {
    257  uint8_t previous_value = 8;
    258 
    259  // Throw away trailing zeros.
    260  size_t new_length = length;
    261  for (size_t i = 0; i < length; ++i) {
    262    if (depth[length - i - 1] == 0) {
    263      --new_length;
    264    } else {
    265      break;
    266    }
    267  }
    268 
    269  // First gather statistics on if it is a good idea to do rle.
    270  bool use_rle_for_non_zero = false;
    271  bool use_rle_for_zero = false;
    272  if (length > 50) {
    273    // Find rle coding for longer codes.
    274    // Shorter codes seem not to benefit from rle.
    275    DecideOverRleUse(depth, new_length, &use_rle_for_non_zero,
    276                     &use_rle_for_zero);
    277  }
    278 
    279  // Actual rle coding.
    280  for (size_t i = 0; i < new_length;) {
    281    const uint8_t value = depth[i];
    282    size_t reps = 1;
    283    if ((value != 0 && use_rle_for_non_zero) ||
    284        (value == 0 && use_rle_for_zero)) {
    285      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
    286        ++reps;
    287      }
    288    }
    289    if (value == 0) {
    290      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
    291    } else {
    292      WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree,
    293                                  extra_bits_data);
    294      previous_value = value;
    295    }
    296    i += reps;
    297  }
    298 }
    299 
    300 namespace {
    301 
    302 uint16_t ReverseBits(int num_bits, uint16_t bits) {
    303  static const size_t kLut[16] = {// Pre-reversed 4-bit values.
    304                                  0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
    305                                  0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
    306  size_t retval = kLut[bits & 0xf];
    307  for (int i = 4; i < num_bits; i += 4) {
    308    retval <<= 4;
    309    bits = static_cast<uint16_t>(bits >> 4);
    310    retval |= kLut[bits & 0xf];
    311  }
    312  retval >>= (-num_bits & 0x3);
    313  return static_cast<uint16_t>(retval);
    314 }
    315 
    316 }  // namespace
    317 
    318 void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
    319                               uint16_t* bits) {
    320  // In Brotli, all bit depths are [1..15]
    321  // 0 bit depth means that the symbol does not exist.
    322  const int kMaxBits = 16;  // 0..15 are values for bits
    323  uint16_t bl_count[kMaxBits] = {0};
    324  {
    325    for (size_t i = 0; i < len; ++i) {
    326      ++bl_count[depth[i]];
    327    }
    328    bl_count[0] = 0;
    329  }
    330  uint16_t next_code[kMaxBits];
    331  next_code[0] = 0;
    332  {
    333    int code = 0;
    334    for (size_t i = 1; i < kMaxBits; ++i) {
    335      code = (code + bl_count[i - 1]) << 1;
    336      next_code[i] = static_cast<uint16_t>(code);
    337    }
    338  }
    339  for (size_t i = 0; i < len; ++i) {
    340    if (depth[i]) {
    341      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
    342    }
    343  }
    344 }
    345 
    346 }  // namespace jxl