tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

convolve_slow.cc (7480B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include <atomic>
      7 
      8 #include "lib/jxl/base/rect.h"
      9 #include "lib/jxl/base/status.h"
     10 #include "lib/jxl/convolve-inl.h"
     11 #include "lib/jxl/convolve.h"
     12 
     13 namespace jxl {
     14 
     15 //------------------------------------------------------------------------------
     16 // Kernels
     17 
     18 // 4 instances of a given literal value, useful as input to LoadDup128.
     19 #define JXL_REP4(literal) literal, literal, literal, literal
     20 
     21 // Concentrates energy in low-frequency components (e.g. for antialiasing).
     22 const WeightsSymmetric3& WeightsSymmetric3Lowpass() {
     23  // Computed by research/convolve_weights.py's cubic spline approximations of
     24  // prolate spheroidal wave functions.
     25  constexpr float w0 = 0.36208932f;
     26  constexpr float w1 = 0.12820096f;
     27  constexpr float w2 = 0.03127668f;
     28  static constexpr WeightsSymmetric3 weights = {
     29      {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}};
     30  return weights;
     31 }
     32 
     33 const WeightsSeparable5& WeightsSeparable5Lowpass() {
     34  constexpr float w0 = 0.41714928f;
     35  constexpr float w1 = 0.25539268f;
     36  constexpr float w2 = 0.03603267f;
     37  static constexpr WeightsSeparable5 weights = {
     38      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
     39      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
     40  return weights;
     41 }
     42 
     43 const WeightsSymmetric5& WeightsSymmetric5Lowpass() {
     44  static constexpr WeightsSymmetric5 weights = {
     45      {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)},
     46      {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}};
     47  return weights;
     48 }
     49 
     50 const WeightsSeparable5& WeightsSeparable5Gaussian1() {
     51  constexpr float w0 = 0.38774f;
     52  constexpr float w1 = 0.24477f;
     53  constexpr float w2 = 0.06136f;
     54  static constexpr WeightsSeparable5 weights = {
     55      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
     56      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
     57  return weights;
     58 }
     59 
     60 const WeightsSeparable5& WeightsSeparable5Gaussian2() {
     61  constexpr float w0 = 0.250301f;
     62  constexpr float w1 = 0.221461f;
     63  constexpr float w2 = 0.153388f;
     64  static constexpr WeightsSeparable5 weights = {
     65      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
     66      {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
     67  return weights;
     68 }
     69 
     70 #undef JXL_REP4
     71 
     72 //------------------------------------------------------------------------------
     73 // Slow
     74 
     75 namespace {
     76 
     77 template <class WrapX, class WrapY>
     78 float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy,
     79                          const int64_t xsize, const int64_t ysize,
     80                          const WeightsSymmetric3& weights) {
     81  float sum = 0.0f;
     82 
     83  // ix: image; kx: kernel
     84  for (int64_t ky = -1; ky <= 1; ky++) {
     85    const int64_t y = WrapY()(iy + ky, ysize);
     86    const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y));
     87 
     88    const float wc = ky == 0 ? weights.c[0] : weights.r[0];
     89    const float wlr = ky == 0 ? weights.r[0] : weights.d[0];
     90 
     91    const int64_t xm1 = WrapX()(ix - 1, xsize);
     92    const int64_t xp1 = WrapX()(ix + 1, xsize);
     93    sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr;
     94  }
     95  return sum;
     96 }
     97 
     98 template <class WrapY>
     99 void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize,
    100                       const int64_t ysize, const WeightsSymmetric3& weights,
    101                       float* JXL_RESTRICT row_out) {
    102  row_out[0] =
    103      SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights);
    104  for (int64_t ix = 1; ix < xsize - 1; ix++) {
    105    row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize,
    106                                                            ysize, weights);
    107  }
    108  {
    109    const int64_t ix = xsize - 1;
    110    row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize,
    111                                                         ysize, weights);
    112  }
    113 }
    114 
    115 }  // namespace
    116 
    117 Status SlowSymmetric3(const ImageF& in, const Rect& rect,
    118                      const WeightsSymmetric3& weights, ThreadPool* pool,
    119                      ImageF* JXL_RESTRICT out) {
    120  const int64_t xsize = static_cast<int64_t>(rect.xsize());
    121  const int64_t ysize = static_cast<int64_t>(rect.ysize());
    122  const int64_t kRadius = 1;
    123 
    124  const auto process_row = [&](const uint32_t task,
    125                               size_t /*thread*/) -> Status {
    126    const int64_t iy = task;
    127    float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy));
    128 
    129    if (iy < kRadius || iy >= ysize - kRadius) {
    130      SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row);
    131    } else {
    132      SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights, out_row);
    133    }
    134    return true;
    135  };
    136  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
    137                                ThreadPool::NoInit, process_row,
    138                                "SlowSymmetric3"));
    139  return true;
    140 }
    141 
    142 namespace {
    143 
    144 // Separable kernels, any radius.
    145 StatusOr<float> SlowSeparablePixel(const ImageF& in, const Rect& rect,
    146                                   const int64_t x, const int64_t y,
    147                                   const int64_t radius,
    148                                   const float* JXL_RESTRICT horz_weights,
    149                                   const float* JXL_RESTRICT vert_weights) {
    150  const size_t xsize = in.xsize();
    151  const size_t ysize = in.ysize();
    152  const WrapMirror wrap;
    153 
    154  float mul = 0.0f;
    155  for (int dy = -radius; dy <= radius; ++dy) {
    156    const float wy = vert_weights[std::abs(dy) * 4];
    157    const size_t sy = wrap(rect.y0() + y + dy, ysize);
    158    JXL_ENSURE(sy < ysize);
    159    const float* const JXL_RESTRICT row = in.ConstRow(sy);
    160    for (int dx = -radius; dx <= radius; ++dx) {
    161      const float wx = horz_weights[std::abs(dx) * 4];
    162      const size_t sx = wrap(rect.x0() + x + dx, xsize);
    163      JXL_ENSURE(sx < xsize);
    164      mul += row[sx] * wx * wy;
    165    }
    166  }
    167  return mul;
    168 }
    169 
    170 template <int R, typename Weights>
    171 Status SlowSeparable(const ImageF& in, const Rect& in_rect,
    172                     const Weights& weights, ThreadPool* pool, ImageF* out,
    173                     const Rect& out_rect) {
    174  JXL_ENSURE(in_rect.xsize() == out_rect.xsize());
    175  JXL_ENSURE(in_rect.ysize() == out_rect.ysize());
    176  JXL_ENSURE(in_rect.IsInside(Rect(in)));
    177  JXL_ENSURE(out_rect.IsInside(Rect(*out)));
    178  const float* horz_weights = &weights.horz[0];
    179  const float* vert_weights = &weights.vert[0];
    180 
    181  const auto process_row = [&](const uint32_t task,
    182                               size_t /*thread*/) -> Status {
    183    const int64_t y = task;
    184 
    185    float* const JXL_RESTRICT row_out = out_rect.Row(out, y);
    186    for (size_t x = 0; x < in_rect.xsize(); ++x) {
    187      JXL_ASSIGN_OR_RETURN(row_out[x],
    188                           SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R,
    189                                              horz_weights, vert_weights));
    190    }
    191    return true;
    192  };
    193  const size_t ysize = in_rect.ysize();
    194  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
    195                                ThreadPool::NoInit, process_row,
    196                                "SlowSeparable"));
    197  return true;
    198 }
    199 
    200 }  // namespace
    201 
    202 Status SlowSeparable5(const ImageF& in, const Rect& in_rect,
    203                      const WeightsSeparable5& weights, ThreadPool* pool,
    204                      ImageF* out, const Rect& out_rect) {
    205  return SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect);
    206 }
    207 
    208 }  // namespace jxl