tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

enc_coeff_order.cc (12602B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include <jxl/memory_manager.h>
      7 
      8 #include "lib/jxl/base/status.h"
      9 #include "lib/jxl/memory_manager_internal.h"
     10 
     11 // Suppress any -Wdeprecated-declarations warning that might be emitted by
     12 // GCC or Clang by std::stable_sort in C++17 or later mode
     13 #ifdef __clang__
     14 #pragma clang diagnostic push
     15 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
     16 #elif defined(__GNUC__)
     17 #pragma GCC push_options
     18 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
     19 #endif
     20 
     21 #include <algorithm>
     22 
     23 #ifdef __clang__
     24 #pragma clang diagnostic pop
     25 #elif defined(__GNUC__)
     26 #pragma GCC pop_options
     27 #endif
     28 
     29 #include <cmath>
     30 #include <cstdint>
     31 #include <vector>
     32 
     33 #include "lib/jxl/ac_strategy.h"
     34 #include "lib/jxl/base/rect.h"
     35 #include "lib/jxl/coeff_order.h"
     36 #include "lib/jxl/coeff_order_fwd.h"
     37 #include "lib/jxl/dct_util.h"
     38 #include "lib/jxl/enc_ans.h"
     39 #include "lib/jxl/enc_bit_writer.h"
     40 #include "lib/jxl/lehmer_code.h"
     41 
     42 namespace jxl {
     43 
     44 struct AuxOut;
     45 enum class LayerType : uint8_t;
     46 
     47 std::pair<uint32_t, uint32_t> ComputeUsedOrders(
     48    const SpeedTier speed, const AcStrategyImage& ac_strategy,
     49    const Rect& rect) {
     50  // No coefficient reordering in Falcon or faster.
     51  // Only uses DCT8 = 0, so bitfield = 1.
     52  if (speed >= SpeedTier::kFalcon) return {1, 1};
     53 
     54  uint32_t ret = 0;
     55  uint32_t ret_customize = 0;
     56  size_t xsize_blocks = rect.xsize();
     57  size_t ysize_blocks = rect.ysize();
     58  // TODO(veluca): precompute when doing DCT.
     59  for (size_t by = 0; by < ysize_blocks; ++by) {
     60    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
     61    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
     62      int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
     63      // Do not customize coefficient orders for blocks bigger than 32x32.
     64      ret |= 1u << ord;
     65      if (ord > 6) {
     66        continue;
     67      }
     68      ret_customize |= 1u << ord;
     69    }
     70  }
     71  // Use default orders for small images.
     72  if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0};
     73  return {ret, ret_customize};
     74 }
     75 
     76 Status ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
     77                         const AcStrategyImage& ac_strategy,
     78                         const FrameDimensions& frame_dim,
     79                         uint32_t& all_used_orders, uint32_t prev_used_acs,
     80                         uint32_t current_used_acs,
     81                         uint32_t current_used_orders,
     82                         coeff_order_t* JXL_RESTRICT order) {
     83  JxlMemoryManager* memory_manager = ac_strategy.memory_manager();
     84  std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
     85  // If compressing at high speed and only using 8x8 DCTs, only consider a
     86  // subset of blocks.
     87  double block_fraction = 1.0f;
     88  // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
     89  // encoding significantly less dense.
     90  if (speed >= SpeedTier::kSquirrel && current_used_orders == 1) {
     91    block_fraction = 0.5f;
     92  }
     93  // No need to compute number of zero coefficients if all orders are the
     94  // default.
     95  if (current_used_orders != 0) {
     96    uint64_t threshold =
     97        (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
     98    uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
     99                     static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
    100    // Xorshift128+ adapted from xorshift128+-inl.h
    101    auto use_sample = [&]() {
    102      auto s1 = s[0];
    103      const auto s0 = s[1];
    104      const auto bits = s1 + s0;  // b, c
    105      s[0] = s0;
    106      s1 ^= s1 << 23;
    107      s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
    108      s[1] = s1;
    109      return (bits >> 32) <= threshold;
    110    };
    111 
    112    // Count number of zero coefficients, separately for each DCT band.
    113    // TODO(veluca): precompute when doing DCT.
    114    for (size_t group_index = 0; group_index < frame_dim.num_groups;
    115         group_index++) {
    116      const size_t gx = group_index % frame_dim.xsize_groups;
    117      const size_t gy = group_index / frame_dim.xsize_groups;
    118      const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
    119                      kGroupDimInBlocks, kGroupDimInBlocks,
    120                      frame_dim.xsize_blocks, frame_dim.ysize_blocks);
    121      ConstACPtr rows[3];
    122      ACType type = acs.Type();
    123      for (size_t c = 0; c < 3; c++) {
    124        rows[c] = acs.PlaneRow(c, group_index, 0);
    125      }
    126      size_t ac_offset = 0;
    127 
    128      // TODO(veluca): SIMDfy.
    129      for (size_t by = 0; by < rect.ysize(); ++by) {
    130        AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
    131        for (size_t bx = 0; bx < rect.xsize(); ++bx) {
    132          AcStrategy acs = acs_row[bx];
    133          if (!acs.IsFirstBlock()) continue;
    134          if (!use_sample()) continue;
    135          size_t size = kDCTBlockSize << acs.log2_covered_blocks();
    136          for (size_t c = 0; c < 3; ++c) {
    137            const size_t order_offset =
    138                CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
    139            if (type == ACType::k16) {
    140              for (size_t k = 0; k < size; k++) {
    141                bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
    142                num_zeros[order_offset + k] += is_zero ? 1 : 0;
    143              }
    144            } else {
    145              for (size_t k = 0; k < size; k++) {
    146                bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
    147                num_zeros[order_offset + k] += is_zero ? 1 : 0;
    148              }
    149            }
    150            // Ensure LLFs are first in the order.
    151            size_t cx = acs.covered_blocks_x();
    152            size_t cy = acs.covered_blocks_y();
    153            CoefficientLayout(&cy, &cx);
    154            for (size_t iy = 0; iy < cy; iy++) {
    155              for (size_t ix = 0; ix < cx; ix++) {
    156                num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
    157              }
    158            }
    159          }
    160          ac_offset += size;
    161        }
    162      }
    163    }
    164  }
    165  struct PosAndCount {
    166    uint32_t pos;
    167    uint32_t count;
    168  };
    169  size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(PosAndCount);
    170  JXL_ASSIGN_OR_RETURN(auto mem,
    171                       AlignedMemory::Create(memory_manager, mem_bytes));
    172 
    173  std::vector<coeff_order_t> natural_order_buffer;
    174 
    175  uint16_t computed = 0;
    176  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
    177    uint8_t ord = kStrategyOrder[o];
    178    if (computed & (1 << ord)) continue;
    179    computed |= 1 << ord;
    180    AcStrategy acs = AcStrategy::FromRawStrategy(o);
    181    size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
    182 
    183    // Do nothing for transforms that don't appear.
    184    if ((1 << ord) & ~current_used_acs) continue;
    185 
    186    // Do nothing if we already committed to this custom order previously.
    187    if ((1 << ord) & prev_used_acs) continue;
    188    if ((1 << ord) & all_used_orders) continue;
    189 
    190    if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz);
    191    acs.ComputeNaturalCoeffOrder(natural_order_buffer.data());
    192 
    193    // Ensure natural coefficient order is not permuted if the order is
    194    // not transmitted.
    195    if ((1 << ord) & ~current_used_orders) {
    196      for (size_t c = 0; c < 3; c++) {
    197        size_t offset = CoeffOrderOffset(ord, c);
    198        JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz);
    199        memcpy(&order[offset], natural_order_buffer.data(),
    200               sz * sizeof(*order));
    201      }
    202      continue;
    203    }
    204 
    205    bool is_nondefault = false;
    206    for (uint8_t c = 0; c < 3; c++) {
    207      // Apply zig-zag order.
    208      PosAndCount* pos_and_val = mem.address<PosAndCount>();
    209      size_t offset = CoeffOrderOffset(ord, c);
    210      JXL_ENSURE(CoeffOrderOffset(ord, c + 1) - offset == sz);
    211      float inv_sqrt_sz = 1.0f / std::sqrt(sz);
    212      for (size_t i = 0; i < sz; ++i) {
    213        size_t pos = natural_order_buffer[i];
    214        pos_and_val[i].pos = pos;
    215        // We don't care for the exact number -> quantize number of zeros,
    216        // to get less permuted order.
    217        pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
    218      }
    219 
    220      // Stable-sort -> elements with same number of zeros will preserve their
    221      // order.
    222      auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
    223        return a.count < b.count;
    224      };
    225      std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
    226 
    227      // Grab indices.
    228      for (size_t i = 0; i < sz; ++i) {
    229        order[offset + i] = pos_and_val[i].pos;
    230        is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos;
    231      }
    232    }
    233    if (!is_nondefault) {
    234      current_used_orders &= ~(1 << ord);
    235    }
    236  }
    237  all_used_orders |= current_used_orders;
    238  return true;
    239 }
    240 
    241 namespace {
    242 
    243 Status TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
    244                           size_t size, std::vector<Token>* tokens) {
    245  std::vector<LehmerT> lehmer(size);
    246  std::vector<uint32_t> temp(size + 1);
    247  JXL_RETURN_IF_ERROR(
    248      ComputeLehmerCode(order, temp.data(), size, lehmer.data()));
    249  size_t end = size;
    250  while (end > skip && lehmer[end - 1] == 0) {
    251    --end;
    252  }
    253  tokens->emplace_back(CoeffOrderContext(size), end - skip);
    254  uint32_t last = 0;
    255  for (size_t i = skip; i < end; ++i) {
    256    tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
    257    last = lehmer[i];
    258  }
    259  return true;
    260 }
    261 
    262 }  // namespace
    263 
    264 Status EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
    265                         size_t size, BitWriter* writer, LayerType layer,
    266                         AuxOut* aux_out) {
    267  JxlMemoryManager* memory_manager = writer->memory_manager();
    268  std::vector<std::vector<Token>> tokens(1);
    269  JXL_RETURN_IF_ERROR(TokenizePermutation(order, skip, size, tokens.data()));
    270  std::vector<uint8_t> context_map;
    271  EntropyEncodingData codes;
    272  JXL_ASSIGN_OR_RETURN(
    273      size_t cost, BuildAndEncodeHistograms(
    274                       memory_manager, HistogramParams(), kPermutationContexts,
    275                       tokens, &codes, &context_map, writer, layer, aux_out));
    276  (void)cost;
    277  JXL_RETURN_IF_ERROR(
    278      WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out));
    279  return true;
    280 }
    281 
    282 namespace {
    283 Status EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
    284                        std::vector<Token>* tokens, coeff_order_t* order_zigzag,
    285                        std::vector<coeff_order_t>& natural_order_lut) {
    286  const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
    287  const size_t size = kDCTBlockSize * llf;
    288  for (size_t i = 0; i < size; ++i) {
    289    order_zigzag[i] = natural_order_lut[order[i]];
    290  }
    291  JXL_RETURN_IF_ERROR(TokenizePermutation(order_zigzag, llf, size, tokens));
    292  return true;
    293 }
    294 }  // namespace
    295 
    296 Status EncodeCoeffOrders(uint16_t used_orders,
    297                         const coeff_order_t* JXL_RESTRICT order,
    298                         BitWriter* writer, LayerType layer,
    299                         AuxOut* JXL_RESTRICT aux_out) {
    300  JxlMemoryManager* memory_manager = writer->memory_manager();
    301  size_t mem_bytes = AcStrategy::kMaxCoeffArea * sizeof(coeff_order_t);
    302  JXL_ASSIGN_OR_RETURN(auto mem,
    303                       AlignedMemory::Create(memory_manager, mem_bytes));
    304  uint16_t computed = 0;
    305  std::vector<std::vector<Token>> tokens(1);
    306  std::vector<coeff_order_t> natural_order_lut;
    307  for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
    308    uint8_t ord = kStrategyOrder[o];
    309    if (computed & (1 << ord)) continue;
    310    computed |= 1 << ord;
    311    if ((used_orders & (1 << ord)) == 0) continue;
    312    AcStrategy acs = AcStrategy::FromRawStrategy(o);
    313    const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
    314    const size_t size = kDCTBlockSize * llf;
    315    if (natural_order_lut.size() < size) natural_order_lut.resize(size);
    316    acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data());
    317    for (size_t c = 0; c < 3; c++) {
    318      JXL_RETURN_IF_ERROR(
    319          EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, tokens.data(),
    320                           mem.address<coeff_order_t>(), natural_order_lut));
    321    }
    322  }
    323  // Do not write anything if no order is used.
    324  if (used_orders != 0) {
    325    std::vector<uint8_t> context_map;
    326    EntropyEncodingData codes;
    327    JXL_ASSIGN_OR_RETURN(
    328        size_t cost,
    329        BuildAndEncodeHistograms(memory_manager, HistogramParams(),
    330                                 kPermutationContexts, tokens, &codes,
    331                                 &context_map, writer, layer, aux_out));
    332    (void)cost;
    333    JXL_RETURN_IF_ERROR(
    334        WriteTokens(tokens[0], codes, context_map, 0, writer, layer, aux_out));
    335  }
    336  return true;
    337 }
    338 
    339 }  // namespace jxl