convolve_slow.cc (7480B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include <atomic> 7 8 #include "lib/jxl/base/rect.h" 9 #include "lib/jxl/base/status.h" 10 #include "lib/jxl/convolve-inl.h" 11 #include "lib/jxl/convolve.h" 12 13 namespace jxl { 14 15 //------------------------------------------------------------------------------ 16 // Kernels 17 18 // 4 instances of a given literal value, useful as input to LoadDup128. 19 #define JXL_REP4(literal) literal, literal, literal, literal 20 21 // Concentrates energy in low-frequency components (e.g. for antialiasing). 22 const WeightsSymmetric3& WeightsSymmetric3Lowpass() { 23 // Computed by research/convolve_weights.py's cubic spline approximations of 24 // prolate spheroidal wave functions. 25 constexpr float w0 = 0.36208932f; 26 constexpr float w1 = 0.12820096f; 27 constexpr float w2 = 0.03127668f; 28 static constexpr WeightsSymmetric3 weights = { 29 {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}}; 30 return weights; 31 } 32 33 const WeightsSeparable5& WeightsSeparable5Lowpass() { 34 constexpr float w0 = 0.41714928f; 35 constexpr float w1 = 0.25539268f; 36 constexpr float w2 = 0.03603267f; 37 static constexpr WeightsSeparable5 weights = { 38 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, 39 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; 40 return weights; 41 } 42 43 const WeightsSymmetric5& WeightsSymmetric5Lowpass() { 44 static constexpr WeightsSymmetric5 weights = { 45 {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)}, 46 {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}}; 47 return weights; 48 } 49 50 const WeightsSeparable5& WeightsSeparable5Gaussian1() { 51 constexpr float w0 = 0.38774f; 52 constexpr float w1 = 0.24477f; 53 constexpr float w2 = 0.06136f; 54 static constexpr WeightsSeparable5 weights = { 55 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, 56 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; 57 return weights; 58 } 59 60 const WeightsSeparable5& WeightsSeparable5Gaussian2() { 61 constexpr float w0 = 0.250301f; 62 constexpr float w1 = 0.221461f; 63 constexpr float w2 = 0.153388f; 64 static constexpr WeightsSeparable5 weights = { 65 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, 66 {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; 67 return weights; 68 } 69 70 #undef JXL_REP4 71 72 //------------------------------------------------------------------------------ 73 // Slow 74 75 namespace { 76 77 template <class WrapX, class WrapY> 78 float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy, 79 const int64_t xsize, const int64_t ysize, 80 const WeightsSymmetric3& weights) { 81 float sum = 0.0f; 82 83 // ix: image; kx: kernel 84 for (int64_t ky = -1; ky <= 1; ky++) { 85 const int64_t y = WrapY()(iy + ky, ysize); 86 const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y)); 87 88 const float wc = ky == 0 ? weights.c[0] : weights.r[0]; 89 const float wlr = ky == 0 ? weights.r[0] : weights.d[0]; 90 91 const int64_t xm1 = WrapX()(ix - 1, xsize); 92 const int64_t xp1 = WrapX()(ix + 1, xsize); 93 sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr; 94 } 95 return sum; 96 } 97 98 template <class WrapY> 99 void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize, 100 const int64_t ysize, const WeightsSymmetric3& weights, 101 float* JXL_RESTRICT row_out) { 102 row_out[0] = 103 SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights); 104 for (int64_t ix = 1; ix < xsize - 1; ix++) { 105 row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize, 106 ysize, weights); 107 } 108 { 109 const int64_t ix = xsize - 1; 110 row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize, 111 ysize, weights); 112 } 113 } 114 115 } // namespace 116 117 Status SlowSymmetric3(const ImageF& in, const Rect& rect, 118 const WeightsSymmetric3& weights, ThreadPool* pool, 119 ImageF* JXL_RESTRICT out) { 120 const int64_t xsize = static_cast<int64_t>(rect.xsize()); 121 const int64_t ysize = static_cast<int64_t>(rect.ysize()); 122 const int64_t kRadius = 1; 123 124 const auto process_row = [&](const uint32_t task, 125 size_t /*thread*/) -> Status { 126 const int64_t iy = task; 127 float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy)); 128 129 if (iy < kRadius || iy >= ysize - kRadius) { 130 SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row); 131 } else { 132 SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights, out_row); 133 } 134 return true; 135 }; 136 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 137 ThreadPool::NoInit, process_row, 138 "SlowSymmetric3")); 139 return true; 140 } 141 142 namespace { 143 144 // Separable kernels, any radius. 145 StatusOr<float> SlowSeparablePixel(const ImageF& in, const Rect& rect, 146 const int64_t x, const int64_t y, 147 const int64_t radius, 148 const float* JXL_RESTRICT horz_weights, 149 const float* JXL_RESTRICT vert_weights) { 150 const size_t xsize = in.xsize(); 151 const size_t ysize = in.ysize(); 152 const WrapMirror wrap; 153 154 float mul = 0.0f; 155 for (int dy = -radius; dy <= radius; ++dy) { 156 const float wy = vert_weights[std::abs(dy) * 4]; 157 const size_t sy = wrap(rect.y0() + y + dy, ysize); 158 JXL_ENSURE(sy < ysize); 159 const float* const JXL_RESTRICT row = in.ConstRow(sy); 160 for (int dx = -radius; dx <= radius; ++dx) { 161 const float wx = horz_weights[std::abs(dx) * 4]; 162 const size_t sx = wrap(rect.x0() + x + dx, xsize); 163 JXL_ENSURE(sx < xsize); 164 mul += row[sx] * wx * wy; 165 } 166 } 167 return mul; 168 } 169 170 template <int R, typename Weights> 171 Status SlowSeparable(const ImageF& in, const Rect& in_rect, 172 const Weights& weights, ThreadPool* pool, ImageF* out, 173 const Rect& out_rect) { 174 JXL_ENSURE(in_rect.xsize() == out_rect.xsize()); 175 JXL_ENSURE(in_rect.ysize() == out_rect.ysize()); 176 JXL_ENSURE(in_rect.IsInside(Rect(in))); 177 JXL_ENSURE(out_rect.IsInside(Rect(*out))); 178 const float* horz_weights = &weights.horz[0]; 179 const float* vert_weights = &weights.vert[0]; 180 181 const auto process_row = [&](const uint32_t task, 182 size_t /*thread*/) -> Status { 183 const int64_t y = task; 184 185 float* const JXL_RESTRICT row_out = out_rect.Row(out, y); 186 for (size_t x = 0; x < in_rect.xsize(); ++x) { 187 JXL_ASSIGN_OR_RETURN(row_out[x], 188 SlowSeparablePixel(in, in_rect, x, y, /*radius=*/R, 189 horz_weights, vert_weights)); 190 } 191 return true; 192 }; 193 const size_t ysize = in_rect.ysize(); 194 JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize), 195 ThreadPool::NoInit, process_row, 196 "SlowSeparable")); 197 return true; 198 } 199 200 } // namespace 201 202 Status SlowSeparable5(const ImageF& in, const Rect& in_rect, 203 const WeightsSeparable5& weights, ThreadPool* pool, 204 ImageF* out, const Rect& out_rect) { 205 return SlowSeparable<2>(in, in_rect, weights, pool, out, out_rect); 206 } 207 208 } // namespace jxl