tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

dec_noise.cc (5361B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "lib/jxl/dec_noise.h"
      7 
      8 #include <cstdint>
      9 #include <cstdlib>
     10 #include <utility>
     11 
     12 #undef HWY_TARGET_INCLUDE
     13 #define HWY_TARGET_INCLUDE "lib/jxl/dec_noise.cc"
     14 #include <hwy/foreach_target.h>
     15 #include <hwy/highway.h>
     16 
     17 #include "lib/jxl/base/compiler_specific.h"
     18 #include "lib/jxl/base/rect.h"
     19 #include "lib/jxl/frame_dimensions.h"
     20 #include "lib/jxl/image.h"
     21 #include "lib/jxl/xorshift128plus-inl.h"
     22 
     23 HWY_BEFORE_NAMESPACE();
     24 namespace jxl {
     25 namespace HWY_NAMESPACE {
     26 
     27 // These templates are not found via ADL.
     28 using hwy::HWY_NAMESPACE::Or;
     29 using hwy::HWY_NAMESPACE::ShiftRight;
     30 using hwy::HWY_NAMESPACE::Vec;
     31 
     32 using D = HWY_CAPPED(float, kBlockDim);
     33 using DI = hwy::HWY_NAMESPACE::Rebind<int, D>;
     34 using DI8 = hwy::HWY_NAMESPACE::Repartition<uint8_t, D>;
     35 
     36 // Converts one vector's worth of random bits to floats in [1, 2).
     37 // NOTE: as the convolution kernel sums to 0, it doesn't matter if inputs are in
     38 // [0, 1) or in [1, 2).
     39 void BitsToFloat(const uint32_t* JXL_RESTRICT random_bits,
     40                 float* JXL_RESTRICT floats) {
     41  const HWY_FULL(float) df;
     42  const HWY_FULL(uint32_t) du;
     43 
     44  const auto bits = Load(du, random_bits);
     45  // 1.0 + 23 random mantissa bits = [1, 2)
     46  const auto rand12 = BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000)));
     47  Store(rand12, df, floats);
     48 }
     49 
     50 void RandomImage(Xorshift128Plus* rng, const Rect& rect,
     51                 ImageF* JXL_RESTRICT noise) {
     52  const size_t xsize = rect.xsize();
     53  const size_t ysize = rect.ysize();
     54 
     55  // May exceed the vector size, hence we have two loops over x below.
     56  constexpr size_t kFloatsPerBatch =
     57      Xorshift128Plus::N * sizeof(uint64_t) / sizeof(float);
     58  HWY_ALIGN uint64_t batch[Xorshift128Plus::N] = {};
     59 
     60  const HWY_FULL(float) df;
     61  const size_t N = Lanes(df);
     62 
     63  for (size_t y = 0; y < ysize; ++y) {
     64    float* JXL_RESTRICT row = rect.Row(noise, y);
     65 
     66    size_t x = 0;
     67    // Only entire batches (avoids exceeding the image padding).
     68    for (; x + kFloatsPerBatch < xsize; x += kFloatsPerBatch) {
     69      rng->Fill(batch);
     70      for (size_t i = 0; i < kFloatsPerBatch; i += Lanes(df)) {
     71        BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + i, row + x + i);
     72      }
     73    }
     74 
     75    // Any remaining pixels, rounded up to vectors (safe due to padding).
     76    rng->Fill(batch);
     77    size_t batch_pos = 0;  // < kFloatsPerBatch
     78    for (; x < xsize; x += N) {
     79      BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + batch_pos,
     80                  row + x);
     81      batch_pos += N;
     82    }
     83  }
     84 }
     85 void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index,
     86                   size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0,
     87                   const std::pair<ImageF*, Rect>& plane1,
     88                   const std::pair<ImageF*, Rect>& plane2) {
     89  HWY_ALIGN Xorshift128Plus rng(visible_frame_index, nonvisible_frame_index, x0,
     90                                y0);
     91  RandomImage(&rng, plane0.second, plane0.first);
     92  RandomImage(&rng, plane1.second, plane1.first);
     93  RandomImage(&rng, plane2.second, plane2.first);
     94 }
     95 
     96 // NOLINTNEXTLINE(google-readability-namespace-comments)
     97 }  // namespace HWY_NAMESPACE
     98 }  // namespace jxl
     99 HWY_AFTER_NAMESPACE();
    100 
    101 #if HWY_ONCE
    102 namespace jxl {
    103 
    104 namespace {
    105 HWY_EXPORT(Random3Planes);
    106 }  // namespace
    107 
    108 void PrepareNoiseInput(const PassesDecoderState& dec_state,
    109                       const FrameDimensions& frame_dim,
    110                       const FrameHeader& frame_header, size_t group_index,
    111                       size_t thread) {
    112  size_t group_dim = frame_dim.group_dim;
    113  const size_t gx = group_index % frame_dim.xsize_groups;
    114  const size_t gy = group_index / frame_dim.xsize_groups;
    115  RenderPipelineInput input =
    116      dec_state.render_pipeline->GetInputBuffers(group_index, thread);
    117  size_t noise_c_start =
    118      3 + frame_header.nonserialized_metadata->m.num_extra_channels;
    119  // When the color channels are downsampled, we need to generate more noise
    120  // input for the current group than just the group dimensions.
    121  std::pair<ImageF*, Rect> rects[3];
    122  for (size_t iy = 0; iy < frame_header.upsampling; iy++) {
    123    for (size_t ix = 0; ix < frame_header.upsampling; ix++) {
    124      for (size_t c = 0; c < 3; c++) {
    125        auto r = input.GetBuffer(noise_c_start + c);
    126        rects[c].first = r.first;
    127        size_t x1 = r.second.x0() + r.second.xsize();
    128        size_t y1 = r.second.y0() + r.second.ysize();
    129        rects[c].second =
    130            Rect(r.second.x0() + ix * group_dim, r.second.y0() + iy * group_dim,
    131                 group_dim, group_dim, x1, y1);
    132      }
    133      HWY_DYNAMIC_DISPATCH(Random3Planes)
    134      (dec_state.visible_frame_index, dec_state.nonvisible_frame_index,
    135       (gx * frame_header.upsampling + ix) * group_dim,
    136       (gy * frame_header.upsampling + iy) * group_dim, rects[0], rects[1],
    137       rects[2]);
    138    }
    139  }
    140 }
    141 
    142 void DecodeFloatParam(float precision, float* val, BitReader* br) {
    143  const int absval_quant = br->ReadFixedBits<10>();
    144  *val = absval_quant / precision;
    145 }
    146 
    147 Status DecodeNoise(BitReader* br, NoiseParams* noise_params) {
    148  for (float& i : noise_params->lut) {
    149    DecodeFloatParam(kNoisePrecision, &i, br);
    150  }
    151  return true;
    152 }
    153 
    154 }  // namespace jxl
    155 #endif  // HWY_ONCE