tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

encode_finish.cc (7589B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jpegli/encode_finish.h"
      7 
      8 #include <cmath>
      9 #include <limits>
     10 
     11 #include "lib/jpegli/error.h"
     12 #include "lib/jpegli/memory_manager.h"
     13 #include "lib/jpegli/quant.h"
     14 
     15 #undef HWY_TARGET_INCLUDE
     16 #define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc"
     17 #include <hwy/foreach_target.h>
     18 #include <hwy/highway.h>
     19 
     20 #include "lib/jpegli/dct-inl.h"
     21 
     22 HWY_BEFORE_NAMESPACE();
     23 namespace jpegli {
     24 namespace HWY_NAMESPACE {
     25 
     26 // These templates are not found via ADL.
     27 using hwy::HWY_NAMESPACE::GetLane;
     28 
     29 using D = HWY_FULL(float);
     30 using DI = HWY_FULL(int32_t);
     31 using DI16 = Rebind<int16_t, HWY_FULL(int32_t)>;
     32 
     33 void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength,
     34                     const float* zero_bias_offset,
     35                     const float* zero_bias_mul) {
     36  D d;
     37  DI di;
     38  DI16 di16;
     39  const auto aq_mul = Set(d, aq_strength);
     40  for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
     41    const auto in = Load(di16, block + k);
     42    const auto val = ConvertTo(d, PromoteTo(di, in));
     43    const auto q = Load(d, qmc + k);
     44    const auto qval = Mul(val, q);
     45    const auto zb_offset = Load(d, zero_bias_offset + k);
     46    const auto zb_mul = Load(d, zero_bias_mul + k);
     47    const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
     48    const auto nzero_mask = Ge(Abs(qval), threshold);
     49    const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
     50    Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k);
     51  }
     52 }
     53 
     54 float BlockError(const int16_t* block, const float* qmc, const float* iqmc,
     55                 const float aq_strength, const float* zero_bias_offset,
     56                 const float* zero_bias_mul) {
     57  D d;
     58  DI di;
     59  DI16 di16;
     60  auto err = Zero(d);
     61  const auto scale = Set(d, 1.0 / 16);
     62  const auto aq_mul = Set(d, aq_strength);
     63  for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
     64    const auto in = Load(di16, block + k);
     65    const auto val = ConvertTo(d, PromoteTo(di, in));
     66    const auto q = Load(d, qmc + k);
     67    const auto qval = Mul(val, q);
     68    const auto zb_offset = Load(d, zero_bias_offset + k);
     69    const auto zb_mul = Load(d, zero_bias_mul + k);
     70    const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
     71    const auto nzero_mask = Ge(Abs(qval), threshold);
     72    const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
     73    const auto invq = Load(d, iqmc + k);
     74    const auto rval = Mul(iqval, invq);
     75    const auto diff = Mul(Sub(val, rval), scale);
     76    err = Add(err, Mul(diff, diff));
     77  }
     78  return GetLane(SumOfLanes(d, err));
     79 }
     80 
     81 void ComputeInverseWeights(const float* qmc, float* iqmc) {
     82  for (int k = 0; k < 64; ++k) {
     83    iqmc[k] = 1.0f / qmc[k];
     84  }
     85 }
     86 
     87 float ComputePSNR(j_compress_ptr cinfo, int sampling) {
     88  jpeg_comp_master* m = cinfo->master;
     89  InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
     90  double error = 0.0;
     91  size_t num = 0;
     92  for (int c = 0; c < cinfo->num_components; ++c) {
     93    jpeg_component_info* comp = &cinfo->comp_info[c];
     94    const float* qmc = m->quant_mul[c];
     95    const int h_factor = m->h_factor[c];
     96    const int v_factor = m->v_factor[c];
     97    const float* zero_bias_offset = m->zero_bias_offset[c];
     98    const float* zero_bias_mul = m->zero_bias_mul[c];
     99    HWY_ALIGN float iqmc[64];
    100    ComputeInverseWeights(qmc, iqmc);
    101    for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) {
    102      JBLOCKARRAY blocks = GetBlockRow(cinfo, c, by);
    103      const float* qf = m->quant_field.Row(by * v_factor);
    104      for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) {
    105        error += BlockError(&blocks[0][bx][0], qmc, iqmc, qf[bx * h_factor],
    106                            zero_bias_offset, zero_bias_mul);
    107        num += DCTSIZE2;
    108      }
    109    }
    110  }
    111  return 4.3429448f * log(num / (error / 255. / 255.));
    112 }
    113 
    114 void ReQuantizeCoeffs(j_compress_ptr cinfo) {
    115  jpeg_comp_master* m = cinfo->master;
    116  InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
    117  for (int c = 0; c < cinfo->num_components; ++c) {
    118    jpeg_component_info* comp = &cinfo->comp_info[c];
    119    const float* qmc = m->quant_mul[c];
    120    const int h_factor = m->h_factor[c];
    121    const int v_factor = m->v_factor[c];
    122    const float* zero_bias_offset = m->zero_bias_offset[c];
    123    const float* zero_bias_mul = m->zero_bias_mul[c];
    124    for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
    125      JBLOCKARRAY block = GetBlockRow(cinfo, c, by);
    126      const float* qf = m->quant_field.Row(by * v_factor);
    127      for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
    128        ReQuantizeBlock(&block[0][bx][0], qmc, qf[bx * h_factor],
    129                        zero_bias_offset, zero_bias_mul);
    130      }
    131    }
    132  }
    133 }
    134 
    135 // NOLINTNEXTLINE(google-readability-namespace-comments)
    136 }  // namespace HWY_NAMESPACE
    137 }  // namespace jpegli
    138 HWY_AFTER_NAMESPACE();
    139 
    140 #if HWY_ONCE
    141 namespace jpegli {
    142 namespace {
    143 HWY_EXPORT(ComputePSNR);
    144 HWY_EXPORT(ReQuantizeCoeffs);
    145 
    146 void ReQuantizeCoeffs(j_compress_ptr cinfo) {
    147  HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo);
    148 }
    149 
    150 float ComputePSNR(j_compress_ptr cinfo, int sampling) {
    151  return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling);
    152 }
    153 
    154 void UpdateDistance(j_compress_ptr cinfo, float distance) {
    155  float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
    156  SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
    157 }
    158 
    159 float Clamp(float val, float minval, float maxval) {
    160  return std::max(minval, std::min(maxval, val));
    161 }
    162 
    163 #define PSNR_SEARCH_DBG 0
    164 
    165 float FindDistanceForPSNR(j_compress_ptr cinfo) {
    166  constexpr int kMaxIters = 20;
    167  const float psnr_target = cinfo->master->psnr_target;
    168  const float tolerance = cinfo->master->psnr_tolerance;
    169  const float min_dist = cinfo->master->min_distance;
    170  const float max_dist = cinfo->master->max_distance;
    171  float d = Clamp(1.0f, min_dist, max_dist);
    172  for (int sampling : {4, 1}) {
    173    float best_diff = std::numeric_limits<float>::max();
    174    float best_distance = 0.0f;
    175    float best_psnr = 0.0;
    176    float dmin = min_dist;
    177    float dmax = max_dist;
    178    bool found_lower_bound = false;
    179    bool found_upper_bound = false;
    180    for (int i = 0; i < kMaxIters; ++i) {
    181      UpdateDistance(cinfo, d);
    182      float psnr = ComputePSNR(cinfo, sampling);
    183      if (psnr > psnr_target) {
    184        dmin = d;
    185        found_lower_bound = true;
    186      } else {
    187        dmax = d;
    188        found_upper_bound = true;
    189      }
    190 #if (PSNR_SEARCH_DBG > 1)
    191      printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr);
    192      if (found_upper_bound && found_lower_bound) {
    193        printf("    d-interval: [ %7.4f .. %7.4f ]", dmin, dmax);
    194      }
    195      printf("\n");
    196 #endif
    197      float diff = std::abs(psnr - psnr_target);
    198      if (diff < best_diff) {
    199        best_diff = diff;
    200        best_distance = d;
    201        best_psnr = psnr;
    202      }
    203      if (diff < tolerance * psnr_target || dmin == dmax) {
    204        break;
    205      }
    206      if (!found_lower_bound || !found_upper_bound) {
    207        d *= std::exp(0.15f * (psnr - psnr_target));
    208      } else {
    209        d = 0.5f * (dmin + dmax);
    210      }
    211      d = Clamp(d, min_dist, max_dist);
    212    }
    213    d = best_distance;
    214    if (sampling == 1 && PSNR_SEARCH_DBG) {
    215      printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d);
    216    } else {
    217      (void)best_psnr;
    218    }
    219  }
    220  return d;
    221 }
    222 
    223 }  // namespace
    224 
    225 void QuantizetoPSNR(j_compress_ptr cinfo) {
    226  float distance = FindDistanceForPSNR(cinfo);
    227  UpdateDistance(cinfo, distance);
    228  ReQuantizeCoeffs(cinfo);
    229 }
    230 
    231 }  // namespace jpegli
    232 #endif  // HWY_ONCE