[ tor-browser ].git.dasho

enc_entropy_coder.cc (10443B)
      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/enc_entropy_coder.h"
      7 
      8 #include <cstddef>
      9 #include <cstdint>
     10 #include <vector>
     11 
     12 #undef HWY_TARGET_INCLUDE
     13 #define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
     14 #include <hwy/foreach_target.h>
     15 #include <hwy/highway.h>
     16 
     17 #include "lib/jxl/ac_context.h"
     18 #include "lib/jxl/ac_strategy.h"
     19 #include "lib/jxl/base/bits.h"
     20 #include "lib/jxl/base/compiler_specific.h"
     21 #include "lib/jxl/base/status.h"
     22 #include "lib/jxl/coeff_order.h"
     23 #include "lib/jxl/coeff_order_fwd.h"
     24 #include "lib/jxl/entropy_coder.h"
     25 #include "lib/jxl/image.h"
     26 #include "lib/jxl/pack_signed.h"
     27 
     28 HWY_BEFORE_NAMESPACE();
     29 namespace jxl {
     30 namespace HWY_NAMESPACE {
     31 
     32 // These templates are not found via ADL.
     33 using hwy::HWY_NAMESPACE::Add;
     34 using hwy::HWY_NAMESPACE::AndNot;
     35 using hwy::HWY_NAMESPACE::Eq;
     36 using hwy::HWY_NAMESPACE::GetLane;
     37 
     38 // Returns number of non-zero coefficients (but skip LLF).
     39 // We cannot rely on block[] being all-zero bits, so first truncate to integer.
     40 // Also writes the per-8x8 block nzeros starting at nzeros_pos.
     41 int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
     42                            const AcStrategy acs, const size_t covered_blocks,
     43                            const size_t log2_covered_blocks,
     44                            const int32_t* JXL_RESTRICT block,
     45                            const size_t nzeros_stride,
     46                            int32_t* JXL_RESTRICT nzeros_pos) {
     47  const HWY_CAPPED(int32_t, kBlockDim) di;
     48 
     49  const auto zero = Zero(di);
     50  // Add FF..FF for every zero coefficient, negate to get #zeros.
     51  auto neg_sum_zero = zero;
     52 
     53  {
     54    // Mask sufficient for one row of coefficients.
     55    HWY_ALIGN const int32_t
     56        llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
     57            -1, -1, -1, -1};
     58    // First cx=1,2,4 elements are FF..FF, others 0.
     59    const int32_t* llf_mask_pos =
     60        llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
     61 
     62    // Rows with LLF: mask out the LLF
     63    for (size_t y = 0; y < cy; y++) {
     64      for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
     65        const auto llf_mask = LoadU(di, llf_mask_pos + x);
     66 
     67        // LLF counts as zero so we don't include it in nzeros.
     68        const auto coef =
     69            AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
     70 
     71        neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
     72      }
     73    }
     74  }
     75 
     76  // Remaining rows: no mask
     77  for (size_t y = cy; y < cy * kBlockDim; y++) {
     78    for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
     79      const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
     80      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
     81    }
     82  }
     83 
     84  // We want area - sum_zero, add because neg_sum_zero is already negated.
     85  const int32_t nzeros = static_cast<int32_t>(cx * cy * kDCTBlockSize) +
     86                         GetLane(SumOfLanes(di, neg_sum_zero));
     87 
     88  const int32_t shifted_nzeros = static_cast<int32_t>(
     89      (nzeros + covered_blocks - 1) >> log2_covered_blocks);
     90  // Need non-canonicalized dimensions!
     91  for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
     92    for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
     93      nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
     94    }
     95  }
     96 
     97  return nzeros;
     98 }
     99 
    100 // Specialization for 8x8, where only top-left is LLF/DC.
    101 // About 1% overall speedup vs. NumNonZeroExceptLLF.
    102 int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
    103                              int32_t* JXL_RESTRICT nzeros_pos) {
    104  const HWY_CAPPED(int32_t, kBlockDim) di;
    105 
    106  const auto zero = Zero(di);
    107  // Add FF..FF for every zero coefficient, negate to get #zeros.
    108  auto neg_sum_zero = zero;
    109 
    110  {
    111    // First row has DC, so mask
    112    const size_t y = 0;
    113    HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
    114 
    115    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
    116      const auto dc_mask = Load(di, dc_mask_lanes + x);
    117 
    118      // DC counts as zero so we don't include it in nzeros.
    119      const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
    120 
    121      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
    122    }
    123  }
    124 
    125  // Remaining rows: no mask
    126  for (size_t y = 1; y < kBlockDim; y++) {
    127    for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
    128      const auto coef = Load(di, &block[y * kBlockDim + x]);
    129      neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
    130    }
    131  }
    132 
    133  // We want 64 - sum_zero, add because neg_sum_zero is already negated.
    134  const int32_t nzeros = static_cast<int32_t>(kDCTBlockSize) +
    135                         GetLane(SumOfLanes(di, neg_sum_zero));
    136 
    137  *nzeros_pos = nzeros;
    138 
    139  return nzeros;
    140 }
    141 
    142 // The number of nonzeros of each block is predicted from the top and the left
    143 // blocks, with opportune scaling to take into account the number of blocks of
    144 // each strategy.  The predicted number of nonzeros divided by two is used as a
    145 // context; if this number is above 63, a specific context is used.  If the
    146 // number of nonzeros of a strategy is above 63, it is written directly using a
    147 // fixed number of bits (that depends on the size of the strategy).
    148 Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
    149                            const Rect& rect,
    150                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
    151                            const AcStrategyImage& ac_strategy,
    152                            const YCbCrChromaSubsampling& cs,
    153                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
    154                            std::vector<Token>* JXL_RESTRICT output,
    155                            const ImageB& qdc, const ImageI& qf,
    156                            const BlockCtxMap& block_ctx_map) {
    157  const size_t xsize_blocks = rect.xsize();
    158  const size_t ysize_blocks = rect.ysize();
    159  output->clear();
    160  // TODO(user): update the estimate: usually less coefficients are used.
    161  output->reserve(3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
    162 
    163  size_t offset[3] = {};
    164  const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
    165  for (size_t by = 0; by < ysize_blocks; ++by) {
    166    size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
    167                     by >> cs.VShift(2)};
    168    int32_t* JXL_RESTRICT row_nzeros[3] = {
    169        tmp_num_nzeroes->PlaneRow(0, sby[0]),
    170        tmp_num_nzeroes->PlaneRow(1, sby[1]),
    171        tmp_num_nzeroes->PlaneRow(2, sby[2]),
    172    };
    173    const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
    174        sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
    175        sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
    176        sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
    177    };
    178    const uint8_t* JXL_RESTRICT row_qdc =
    179        qdc.ConstRow(rect.y0() + by) + rect.x0();
    180    const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
    181    AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
    182    for (size_t bx = 0; bx < xsize_blocks; ++bx) {
    183      AcStrategy acs = acs_row[bx];
    184      if (!acs.IsFirstBlock()) continue;
    185      size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
    186                       bx >> cs.HShift(2)};
    187      size_t cx = acs.covered_blocks_x();
    188      size_t cy = acs.covered_blocks_y();
    189      const size_t covered_blocks = cx * cy;  // = #LLF coefficients
    190      const size_t log2_covered_blocks =
    191          Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
    192      const size_t size = covered_blocks * kDCTBlockSize;
    193 
    194      CoefficientLayout(&cy, &cx);  // swap cx/cy to canonical order
    195 
    196      for (int c : {1, 0, 2}) {
    197        if (sbx[c] << cs.HShift(c) != bx) continue;
    198        if (sby[c] << cs.VShift(c) != by) continue;
    199        const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
    200 
    201        int32_t nzeros =
    202            (covered_blocks == 1)
    203                ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
    204                : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
    205                                      log2_covered_blocks, block, nzeros_stride,
    206                                      row_nzeros[c] + sbx[c]);
    207 
    208        int ord = kStrategyOrder[acs.RawStrategy()];
    209        const coeff_order_t* JXL_RESTRICT order =
    210            &orders[CoeffOrderOffset(ord, c)];
    211 
    212        int32_t predicted_nzeros =
    213            PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
    214        size_t block_ctx =
    215            block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
    216        const int32_t nzero_ctx =
    217            block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
    218 
    219        output->emplace_back(nzero_ctx, nzeros);
    220        const size_t histo_offset =
    221            block_ctx_map.ZeroDensityContextsOffset(block_ctx);
    222        // Skip LLF.
    223        size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
    224        for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
    225          int32_t coeff = block[order[k]];
    226          size_t ctx =
    227              histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
    228                                                log2_covered_blocks, prev);
    229          uint32_t u_coeff = PackSigned(coeff);
    230          output->emplace_back(ctx, u_coeff);
    231          prev = (coeff != 0) ? 1 : 0;
    232          nzeros -= prev;
    233        }
    234        JXL_ENSURE(nzeros == 0);
    235        offset[c] += size;
    236      }
    237    }
    238  }
    239  return true;
    240 }
    241 
    242 // NOLINTNEXTLINE(google-readability-namespace-comments)
    243 }  // namespace HWY_NAMESPACE
    244 }  // namespace jxl
    245 HWY_AFTER_NAMESPACE();
    246 
    247 #if HWY_ONCE
    248 namespace jxl {
    249 HWY_EXPORT(TokenizeCoefficients);
    250 Status TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
    251                            const Rect& rect,
    252                            const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
    253                            const AcStrategyImage& ac_strategy,
    254                            const YCbCrChromaSubsampling& cs,
    255                            Image3I* JXL_RESTRICT tmp_num_nzeroes,
    256                            std::vector<Token>* JXL_RESTRICT output,
    257                            const ImageB& qdc, const ImageI& qf,
    258                            const BlockCtxMap& block_ctx_map) {
    259  return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
    260      orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
    261      block_ctx_map);
    262 }
    263 
    264 }  // namespace jxl
    265 #endif  // HWY_ONCE
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE