butteraugli.cc (81907B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 // 6 // Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com) 7 // 8 // The physical architecture of butteraugli is based on the following naming 9 // convention: 10 // * Opsin - dynamics of the photosensitive chemicals in the retina 11 // with their immediate electrical processing 12 // * Xyb - hybrid opponent/trichromatic color space 13 // x is roughly red-subtract-green. 14 // y is yellow. 15 // b is blue. 16 // Xyb values are computed from Opsin mixing, not directly from rgb. 17 // * Mask - for visual masking 18 // * Hf - color modeling for spatially high-frequency features 19 // * Lf - color modeling for spatially low-frequency features 20 // * Diffmap - to cluster and build an image of error between the images 21 // * Blur - to hold the smoothing code 22 23 #include "lib/jxl/butteraugli/butteraugli.h" 24 25 #include <jxl/memory_manager.h> 26 27 #include <algorithm> 28 #include <cmath> 29 #include <cstdint> 30 #include <cstdio> 31 #include <cstdlib> 32 #include <cstring> 33 #include <memory> 34 #include <vector> 35 36 #include "lib/jxl/image.h" 37 38 #undef HWY_TARGET_INCLUDE 39 #define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc" 40 #include <hwy/foreach_target.h> 41 42 #include "lib/jxl/base/fast_math-inl.h" 43 #include "lib/jxl/base/rect.h" 44 #include "lib/jxl/base/status.h" 45 #include "lib/jxl/convolve.h" 46 #include "lib/jxl/image_ops.h" 47 48 #if BUTTERAUGLI_ENABLE_CHECKS 49 #include "lib/jxl/base/printf_macros.h" 50 #endif 51 52 #ifndef JXL_BUTTERAUGLI_ONCE 53 #define JXL_BUTTERAUGLI_ONCE 54 55 namespace jxl { 56 57 static const double wMfMalta = 37.0819870399; 58 static const double norm1Mf = 130262059.556; 59 static const double wMfMaltaX = 8246.75321353; 60 static const double norm1MfX = 1009002.70582; 61 static const double wHfMalta = 18.7237414387; 62 static const double norm1Hf = 4498534.45232; 63 static const double wHfMaltaX = 6923.99476109; 64 static const double norm1HfX = 8051.15833247; 65 static const double wUhfMalta = 1.10039032555; 66 static const double norm1Uhf = 71.7800275169; 67 static const double wUhfMaltaX = 173.5; 68 static const double norm1UhfX = 5.0; 69 static const double wmul[9] = { 70 400.0, 1.50815703118, 0, 71 2150.0, 10.6195433239, 16.2176043152, 72 29.2353797994, 0.844626970982, 0.703646627719, 73 }; 74 75 std::vector<float> ComputeKernel(float sigma) { 76 const float m = 2.25; // Accuracy increases when m is increased. 77 const double scaler = -1.0 / (2.0 * sigma * sigma); 78 const int diff = std::max<int>(1, m * std::fabs(sigma)); 79 std::vector<float> kernel(2 * diff + 1); 80 for (int i = -diff; i <= diff; ++i) { 81 kernel[i + diff] = std::exp(scaler * i * i); 82 } 83 return kernel; 84 } 85 86 void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel, 87 const size_t x, float* BUTTERAUGLI_RESTRICT row_out) { 88 const size_t offset = kernel.size() / 2; 89 int minx = x < offset ? 0 : x - offset; 90 int maxx = std::min<int>(in.xsize() - 1, x + offset); 91 float weight = 0.0f; 92 for (int j = minx; j <= maxx; ++j) { 93 weight += kernel[j - x + offset]; 94 } 95 float scale = 1.0f / weight; 96 for (size_t y = 0; y < in.ysize(); ++y) { 97 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y); 98 float sum = 0.0f; 99 for (int j = minx; j <= maxx; ++j) { 100 sum += row_in[j] * kernel[j - x + offset]; 101 } 102 row_out[y] = sum * scale; 103 } 104 } 105 106 // Computes a horizontal convolution and transposes the result. 107 Status ConvolutionWithTranspose(const ImageF& in, 108 const std::vector<float>& kernel, 109 ImageF* BUTTERAUGLI_RESTRICT out) { 110 JXL_ENSURE(out->xsize() == in.ysize()); 111 JXL_ENSURE(out->ysize() == in.xsize()); 112 const size_t len = kernel.size(); 113 const size_t offset = len / 2; 114 float weight_no_border = 0.0f; 115 for (size_t j = 0; j < len; ++j) { 116 weight_no_border += kernel[j]; 117 } 118 const float scale_no_border = 1.0f / weight_no_border; 119 const size_t border1 = std::min(in.xsize(), offset); 120 const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0; 121 std::vector<float> scaled_kernel(len / 2 + 1); 122 for (size_t i = 0; i <= len / 2; ++i) { 123 scaled_kernel[i] = kernel[i] * scale_no_border; 124 } 125 126 // middle 127 switch (len) { 128 case 7: { 129 const float sk0 = scaled_kernel[0]; 130 const float sk1 = scaled_kernel[1]; 131 const float sk2 = scaled_kernel[2]; 132 const float sk3 = scaled_kernel[3]; 133 for (size_t y = 0; y < in.ysize(); ++y) { 134 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 135 for (size_t x = border1; x < border2; ++x, ++row_in) { 136 const float sum0 = (row_in[0] + row_in[6]) * sk0; 137 const float sum1 = (row_in[1] + row_in[5]) * sk1; 138 const float sum2 = (row_in[2] + row_in[4]) * sk2; 139 const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2; 140 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 141 row_out[y] = sum; 142 } 143 } 144 } break; 145 case 13: { 146 for (size_t y = 0; y < in.ysize(); ++y) { 147 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 148 for (size_t x = border1; x < border2; ++x, ++row_in) { 149 float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0]; 150 float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1]; 151 float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2]; 152 float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3]; 153 sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4]; 154 sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5]; 155 const float sum = (row_in[6]) * scaled_kernel[6]; 156 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 157 row_out[y] = sum + sum0 + sum1 + sum2 + sum3; 158 } 159 } 160 break; 161 } 162 case 15: { 163 for (size_t y = 0; y < in.ysize(); ++y) { 164 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 165 for (size_t x = border1; x < border2; ++x, ++row_in) { 166 float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0]; 167 float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1]; 168 float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2]; 169 float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3]; 170 sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4]; 171 sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5]; 172 sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6]; 173 const float sum = (row_in[7]) * scaled_kernel[7]; 174 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 175 row_out[y] = sum + sum0 + sum1 + sum2 + sum3; 176 } 177 } 178 break; 179 } 180 case 33: { 181 for (size_t y = 0; y < in.ysize(); ++y) { 182 const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; 183 for (size_t x = border1; x < border2; ++x, ++row_in) { 184 float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0]; 185 float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1]; 186 float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2]; 187 float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3]; 188 sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4]; 189 sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5]; 190 sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6]; 191 sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7]; 192 sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8]; 193 sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9]; 194 sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10]; 195 sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11]; 196 sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12]; 197 sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13]; 198 sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14]; 199 sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15]; 200 const float sum = (row_in[16]) * scaled_kernel[16]; 201 float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); 202 row_out[y] = sum + sum0 + sum1 + sum2 + sum3; 203 } 204 } 205 break; 206 } 207 default: 208 return JXL_UNREACHABLE("kernel size %d not implemented", 209 static_cast<int>(len)); 210 } 211 // left border 212 for (size_t x = 0; x < border1; ++x) { 213 ConvolveBorderColumn(in, kernel, x, out->Row(x)); 214 } 215 216 // right border 217 for (size_t x = border2; x < in.xsize(); ++x) { 218 ConvolveBorderColumn(in, kernel, x, out->Row(x)); 219 } 220 return true; 221 } 222 223 // A blur somewhat similar to a 2D Gaussian blur. 224 // See: https://en.wikipedia.org/wiki/Gaussian_blur 225 // 226 // This is a bottleneck because the sigma can be quite large (>7). We can use 227 // gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated 228 // Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are 229 // zero-valued. This leads to noticeable differences at the edges of diffmaps. 230 // We retain a special case for 5x5 kernels (even faster than gauss_blur), 231 // optionally use gauss_blur followed by fixup of the borders for large images, 232 // or fall back to the previous truncated FIR followed by a transpose. 233 Status Blur(const ImageF& in, float sigma, const ButteraugliParams& params, 234 BlurTemp* temp, ImageF* out) { 235 std::vector<float> kernel = ComputeKernel(sigma); 236 // Separable5 does an in-place convolution, so this fast path is not safe if 237 // in aliases out. 238 if (kernel.size() == 5 && &in != out) { 239 float sum_weights = 0.0f; 240 for (const float w : kernel) { 241 sum_weights += w; 242 } 243 const float scale = 1.0f / sum_weights; 244 const float w0 = kernel[2] * scale; 245 const float w1 = kernel[1] * scale; 246 const float w2 = kernel[0] * scale; 247 const WeightsSeparable5 weights = { 248 {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, 249 {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, 250 }; 251 JXL_RETURN_IF_ERROR( 252 Separable5(in, Rect(in), weights, /*pool=*/nullptr, out)); 253 return true; 254 } 255 256 ImageF* temp_t; 257 JXL_RETURN_IF_ERROR(temp->GetTransposed(in, &temp_t)); 258 JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(in, kernel, temp_t)); 259 JXL_RETURN_IF_ERROR(ConvolutionWithTranspose(*temp_t, kernel, out)); 260 return true; 261 } 262 263 // Allows PaddedMaltaUnit to call either function via overloading. 264 struct MaltaTagLF {}; 265 struct MaltaTag {}; 266 267 } // namespace jxl 268 269 #endif // JXL_BUTTERAUGLI_ONCE 270 271 #include <hwy/highway.h> 272 HWY_BEFORE_NAMESPACE(); 273 namespace jxl { 274 namespace HWY_NAMESPACE { 275 276 // These templates are not found via ADL. 277 using hwy::HWY_NAMESPACE::Abs; 278 using hwy::HWY_NAMESPACE::Div; 279 using hwy::HWY_NAMESPACE::Gt; 280 using hwy::HWY_NAMESPACE::IfThenElse; 281 using hwy::HWY_NAMESPACE::IfThenElseZero; 282 using hwy::HWY_NAMESPACE::Lt; 283 using hwy::HWY_NAMESPACE::Max; 284 using hwy::HWY_NAMESPACE::Mul; 285 using hwy::HWY_NAMESPACE::MulAdd; 286 using hwy::HWY_NAMESPACE::MulSub; 287 using hwy::HWY_NAMESPACE::Neg; 288 using hwy::HWY_NAMESPACE::Sub; 289 using hwy::HWY_NAMESPACE::Vec; 290 using hwy::HWY_NAMESPACE::ZeroIfNegative; 291 292 template <class D, class V> 293 HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) { 294 static const double kMul = 0.724216145665; 295 const V mul = Set(d, kMul); 296 const V maxval = Set(d, kMaxVal); 297 // If greater than maxval or less than -maxval, replace with if_*. 298 const V if_pos = MulAdd(Sub(v, maxval), mul, maxval); 299 const V if_neg = MulSub(Add(v, maxval), mul, maxval); 300 const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v); 301 return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v); 302 } 303 304 // Make area around zero less important (remove it). 305 template <class D, class V> 306 HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) { 307 const auto w = Set(d, kw); 308 return IfThenElse(Gt(x, w), Sub(x, w), 309 IfThenElseZero(Lt(x, Neg(w)), Add(x, w))); 310 } 311 312 // Make area around zero more important (2x it until the limit). 313 template <class D, class V> 314 HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) { 315 const auto w = Set(d, kw); 316 return IfThenElse(Gt(x, w), Add(x, w), 317 IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x))); 318 } 319 320 // XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space. 321 // Vals space can be converted to L2-norm space (Euclidean and normalized) 322 // through visual masking. 323 template <class D, class V> 324 HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y, 325 const V& b_arg, V* HWY_RESTRICT valx, 326 V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) { 327 static const double xmul_scalar = 33.832837186260; 328 static const double ymul_scalar = 14.458268100570; 329 static const double bmul_scalar = 49.87984651440; 330 static const double y_to_b_mul_scalar = -0.362267051518; 331 const V xmul = Set(d, xmul_scalar); 332 const V ymul = Set(d, ymul_scalar); 333 const V bmul = Set(d, bmul_scalar); 334 const V y_to_b_mul = Set(d, y_to_b_mul_scalar); 335 const V b = MulAdd(y_to_b_mul, y, b_arg); 336 *valb = Mul(b, bmul); 337 *valx = Mul(x, xmul); 338 *valy = Mul(y, ymul); 339 } 340 341 void XybLowFreqToVals(Image3F* xyb_lf) { 342 // Modify range around zero code only concerns the high frequency 343 // planes and only the X and Y channels. 344 // Convert low freq xyb to vals space so that we can do a simple squared sum 345 // diff on the low frequencies later. 346 const HWY_FULL(float) d; 347 for (size_t y = 0; y < xyb_lf->ysize(); ++y) { 348 float* BUTTERAUGLI_RESTRICT row_x = xyb_lf->PlaneRow(0, y); 349 float* BUTTERAUGLI_RESTRICT row_y = xyb_lf->PlaneRow(1, y); 350 float* BUTTERAUGLI_RESTRICT row_b = xyb_lf->PlaneRow(2, y); 351 for (size_t x = 0; x < xyb_lf->xsize(); x += Lanes(d)) { 352 auto valx = Undefined(d); 353 auto valy = Undefined(d); 354 auto valb = Undefined(d); 355 XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x), 356 Load(d, row_b + x), &valx, &valy, &valb); 357 Store(valx, d, row_x + x); 358 Store(valy, d, row_y + x); 359 Store(valb, d, row_b + x); 360 } 361 } 362 } 363 364 Status SuppressXByY(const ImageF& in_y, ImageF* HWY_RESTRICT inout_x) { 365 JXL_ENSURE(SameSize(*inout_x, in_y)); 366 const size_t xsize = in_y.xsize(); 367 const size_t ysize = in_y.ysize(); 368 const HWY_FULL(float) d; 369 static const double suppress = 46.0; 370 static const double s = 0.653020556257; 371 const auto sv = Set(d, s); 372 const auto one_minus_s = Set(d, 1.0 - s); 373 const auto ywv = Set(d, suppress); 374 375 for (size_t y = 0; y < ysize; ++y) { 376 const float* HWY_RESTRICT row_y = in_y.ConstRow(y); 377 float* HWY_RESTRICT row_x = inout_x->Row(y); 378 for (size_t x = 0; x < xsize; x += Lanes(d)) { 379 const auto vx = Load(d, row_x + x); 380 const auto vy = Load(d, row_y + x); 381 const auto scaler = 382 MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv); 383 Store(Mul(scaler, vx), d, row_x + x); 384 } 385 } 386 return true; 387 } 388 389 void Subtract(const ImageF& a, const ImageF& b, ImageF* c) { 390 const HWY_FULL(float) d; 391 for (size_t y = 0; y < a.ysize(); ++y) { 392 const float* row_a = a.ConstRow(y); 393 const float* row_b = b.ConstRow(y); 394 float* row_c = c->Row(y); 395 for (size_t x = 0; x < a.xsize(); x += Lanes(d)) { 396 Store(Sub(Load(d, row_a + x), Load(d, row_b + x)), d, row_c + x); 397 } 398 } 399 } 400 401 Status SeparateLFAndMF(const ButteraugliParams& params, const Image3F& xyb, 402 Image3F* lf, Image3F* mf, BlurTemp* blur_temp) { 403 static const double kSigmaLf = 7.15593339443; 404 for (int i = 0; i < 3; ++i) { 405 // Extract lf ... 406 JXL_RETURN_IF_ERROR( 407 Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &lf->Plane(i))); 408 // ... and keep everything else in mf. 409 Subtract(xyb.Plane(i), lf->Plane(i), &mf->Plane(i)); 410 } 411 XybLowFreqToVals(lf); 412 return true; 413 } 414 415 Status SeparateMFAndHF(const ButteraugliParams& params, Image3F* mf, ImageF* hf, 416 BlurTemp* blur_temp) { 417 const HWY_FULL(float) d; 418 static const double kSigmaHf = 3.22489901262; 419 const size_t xsize = mf->xsize(); 420 const size_t ysize = mf->ysize(); 421 JxlMemoryManager* memory_manager = mf[0].memory_manager(); 422 JXL_ASSIGN_OR_RETURN(hf[0], ImageF::Create(memory_manager, xsize, ysize)); 423 JXL_ASSIGN_OR_RETURN(hf[1], ImageF::Create(memory_manager, xsize, ysize)); 424 for (int i = 0; i < 3; ++i) { 425 if (i == 2) { 426 JXL_RETURN_IF_ERROR( 427 Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); 428 break; 429 } 430 for (size_t y = 0; y < ysize; ++y) { 431 float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(i, y); 432 float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); 433 for (size_t x = 0; x < xsize; x += Lanes(d)) { 434 Store(Load(d, row_mf + x), d, row_hf + x); 435 } 436 } 437 JXL_RETURN_IF_ERROR( 438 Blur(mf->Plane(i), kSigmaHf, params, blur_temp, &mf->Plane(i))); 439 static const double kRemoveMfRange = 0.29; 440 static const double kAddMfRange = 0.1; 441 if (i == 0) { 442 for (size_t y = 0; y < ysize; ++y) { 443 float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(0, y); 444 float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); 445 for (size_t x = 0; x < xsize; x += Lanes(d)) { 446 auto mf = Load(d, row_mf + x); 447 auto hf = Sub(Load(d, row_hf + x), mf); 448 mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf); 449 Store(mf, d, row_mf + x); 450 Store(hf, d, row_hf + x); 451 } 452 } 453 } else { 454 for (size_t y = 0; y < ysize; ++y) { 455 float* BUTTERAUGLI_RESTRICT row_mf = mf->PlaneRow(1, y); 456 float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); 457 for (size_t x = 0; x < xsize; x += Lanes(d)) { 458 auto mf = Load(d, row_mf + x); 459 auto hf = Sub(Load(d, row_hf + x), mf); 460 461 mf = AmplifyRangeAroundZero(d, kAddMfRange, mf); 462 Store(mf, d, row_mf + x); 463 Store(hf, d, row_hf + x); 464 } 465 } 466 } 467 } 468 // Suppress red-green by intensity change in the high freq channels. 469 JXL_RETURN_IF_ERROR(SuppressXByY(hf[1], &hf[0])); 470 return true; 471 } 472 473 Status SeparateHFAndUHF(const ButteraugliParams& params, ImageF* hf, 474 ImageF* uhf, BlurTemp* blur_temp) { 475 const HWY_FULL(float) d; 476 const size_t xsize = hf[0].xsize(); 477 const size_t ysize = hf[0].ysize(); 478 JxlMemoryManager* memory_manager = hf[0].memory_manager(); 479 static const double kSigmaUhf = 1.56416327805; 480 JXL_ASSIGN_OR_RETURN(uhf[0], ImageF::Create(memory_manager, xsize, ysize)); 481 JXL_ASSIGN_OR_RETURN(uhf[1], ImageF::Create(memory_manager, xsize, ysize)); 482 for (int i = 0; i < 2; ++i) { 483 // Divide hf into hf and uhf. 484 for (size_t y = 0; y < ysize; ++y) { 485 float* BUTTERAUGLI_RESTRICT row_uhf = uhf[i].Row(y); 486 float* BUTTERAUGLI_RESTRICT row_hf = hf[i].Row(y); 487 for (size_t x = 0; x < xsize; ++x) { 488 row_uhf[x] = row_hf[x]; 489 } 490 } 491 JXL_RETURN_IF_ERROR(Blur(hf[i], kSigmaUhf, params, blur_temp, &hf[i])); 492 static const double kRemoveHfRange = 1.5; 493 static const double kAddHfRange = 0.132; 494 static const double kRemoveUhfRange = 0.04; 495 static const double kMaxclampHf = 28.4691806922; 496 static const double kMaxclampUhf = 5.19175294647; 497 static double kMulYHf = 2.155; 498 static double kMulYUhf = 2.69313763794; 499 if (i == 0) { 500 for (size_t y = 0; y < ysize; ++y) { 501 float* BUTTERAUGLI_RESTRICT row_uhf = uhf[0].Row(y); 502 float* BUTTERAUGLI_RESTRICT row_hf = hf[0].Row(y); 503 for (size_t x = 0; x < xsize; x += Lanes(d)) { 504 auto hf = Load(d, row_hf + x); 505 auto uhf = Sub(Load(d, row_uhf + x), hf); 506 hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf); 507 uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf); 508 Store(hf, d, row_hf + x); 509 Store(uhf, d, row_uhf + x); 510 } 511 } 512 } else { 513 for (size_t y = 0; y < ysize; ++y) { 514 float* BUTTERAUGLI_RESTRICT row_uhf = uhf[1].Row(y); 515 float* BUTTERAUGLI_RESTRICT row_hf = hf[1].Row(y); 516 for (size_t x = 0; x < xsize; x += Lanes(d)) { 517 auto hf = Load(d, row_hf + x); 518 hf = MaximumClamp(d, hf, kMaxclampHf); 519 520 auto uhf = Sub(Load(d, row_uhf + x), hf); 521 uhf = MaximumClamp(d, uhf, kMaxclampUhf); 522 uhf = Mul(uhf, Set(d, kMulYUhf)); 523 Store(uhf, d, row_uhf + x); 524 525 hf = Mul(hf, Set(d, kMulYHf)); 526 hf = AmplifyRangeAroundZero(d, kAddHfRange, hf); 527 Store(hf, d, row_hf + x); 528 } 529 } 530 } 531 } 532 return true; 533 } 534 535 void DeallocateHFAndUHF(ImageF* hf, ImageF* uhf) { 536 for (int i = 0; i < 2; ++i) { 537 hf[i] = ImageF(); 538 uhf[i] = ImageF(); 539 } 540 } 541 542 Status SeparateFrequencies(size_t xsize, size_t ysize, 543 const ButteraugliParams& params, BlurTemp* blur_temp, 544 const Image3F& xyb, PsychoImage& ps) { 545 JxlMemoryManager* memory_manager = xyb.memory_manager(); 546 JXL_ASSIGN_OR_RETURN( 547 ps.lf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize())); 548 JXL_ASSIGN_OR_RETURN( 549 ps.mf, Image3F::Create(memory_manager, xyb.xsize(), xyb.ysize())); 550 JXL_RETURN_IF_ERROR(SeparateLFAndMF(params, xyb, &ps.lf, &ps.mf, blur_temp)); 551 JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &ps.mf, &ps.hf[0], blur_temp)); 552 JXL_RETURN_IF_ERROR( 553 SeparateHFAndUHF(params, &ps.hf[0], &ps.uhf[0], blur_temp)); 554 return true; 555 } 556 557 namespace { 558 template <typename V> 559 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) { 560 return Add(Add(a, b), Add(c, d)); 561 } 562 template <typename V> 563 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) { 564 return Sum(a, b, c, Add(d, e)); 565 } 566 template <typename V> 567 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) { 568 return Sum(a, b, c, Sum(d, e, f, g)); 569 } 570 template <typename V> 571 BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) { 572 return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i); 573 } 574 } // namespace 575 576 template <class D> 577 Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df, 578 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { 579 const intptr_t xs3 = 3 * xs; 580 581 const auto center = LoadU(df, d); 582 583 // x grows, y constant 584 const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center, 585 LoadU(df, d + 2), LoadU(df, d + 4)); 586 // Will return this, sum of all line kernels 587 auto retval = Mul(sum_yconst, sum_yconst); 588 { 589 // y grows, x constant 590 auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center, 591 LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs)); 592 retval = MulAdd(sum, sum, retval); 593 } 594 { 595 // both grow 596 auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center, 597 LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); 598 retval = MulAdd(sum, sum, retval); 599 } 600 { 601 // y grows, x shrinks 602 auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center, 603 LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); 604 retval = MulAdd(sum, sum, retval); 605 } 606 { 607 // y grows -4 to 4, x shrinks 1 -> -1 608 auto sum = 609 Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center, 610 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1)); 611 retval = MulAdd(sum, sum, retval); 612 } 613 { 614 // y grows -4 to 4, x grows -1 -> 1 615 auto sum = 616 Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center, 617 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1)); 618 retval = MulAdd(sum, sum, retval); 619 } 620 { 621 // x grows -4 to 4, y grows -1 to 1 622 auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center, 623 LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs)); 624 retval = MulAdd(sum, sum, retval); 625 } 626 { 627 // x grows -4 to 4, y shrinks 1 to -1 628 auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center, 629 LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs)); 630 retval = MulAdd(sum, sum, retval); 631 } 632 { 633 /* 0_________ 634 1__*______ 635 2___*_____ 636 3_________ 637 4____0____ 638 5_________ 639 6_____*___ 640 7______*__ 641 8_________ */ 642 auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center, 643 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); 644 retval = MulAdd(sum, sum, retval); 645 } 646 { 647 /* 0_________ 648 1______*__ 649 2_____*___ 650 3_________ 651 4____0____ 652 5_________ 653 6___*_____ 654 7__*______ 655 8_________ */ 656 auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center, 657 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); 658 retval = MulAdd(sum, sum, retval); 659 } 660 { 661 /* 0_________ 662 1_________ 663 2_*_______ 664 3__*______ 665 4____0____ 666 5______*__ 667 6_______*_ 668 7_________ 669 8_________ */ 670 auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center, 671 LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); 672 retval = MulAdd(sum, sum, retval); 673 } 674 { 675 /* 0_________ 676 1_________ 677 2_______*_ 678 3______*__ 679 4____0____ 680 5__*______ 681 6_*_______ 682 7_________ 683 8_________ */ 684 auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center, 685 LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); 686 retval = MulAdd(sum, sum, retval); 687 } 688 { 689 /* 0_________ 690 1_________ 691 2________* 692 3______*__ 693 4____0____ 694 5__*______ 695 6*________ 696 7_________ 697 8_________ */ 698 699 auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center, 700 LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4)); 701 retval = MulAdd(sum, sum, retval); 702 } 703 { 704 /* 0_________ 705 1_________ 706 2*________ 707 3__*______ 708 4____0____ 709 5______*__ 710 6________* 711 7_________ 712 8_________ */ 713 auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center, 714 LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4)); 715 retval = MulAdd(sum, sum, retval); 716 } 717 { 718 /* 0__*______ 719 1_________ 720 2___*_____ 721 3_________ 722 4____0____ 723 5_________ 724 6_____*___ 725 7_________ 726 8______*__ */ 727 auto sum = 728 Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center, 729 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2)); 730 retval = MulAdd(sum, sum, retval); 731 } 732 { 733 /* 0______*__ 734 1_________ 735 2_____*___ 736 3_________ 737 4____0____ 738 5_________ 739 6___*_____ 740 7_________ 741 8__*______ */ 742 auto sum = 743 Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center, 744 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2)); 745 retval = MulAdd(sum, sum, retval); 746 } 747 return retval; 748 } 749 750 template <class D> 751 Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df, 752 const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { 753 const intptr_t xs3 = 3 * xs; 754 755 const auto center = LoadU(df, d); 756 757 // x grows, y constant 758 const auto sum_yconst = 759 Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2), 760 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2), 761 LoadU(df, d + 3), LoadU(df, d + 4)); 762 // Will return this, sum of all line kernels 763 auto retval = Mul(sum_yconst, sum_yconst); 764 765 { 766 // y grows, x constant 767 auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3), 768 LoadU(df, d - xs - xs), LoadU(df, d - xs), center, 769 LoadU(df, d + xs), LoadU(df, d + xs + xs), 770 LoadU(df, d + xs3), LoadU(df, d + xs3 + xs)); 771 retval = MulAdd(sum, sum, retval); 772 } 773 { 774 // both grow 775 auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), 776 LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), 777 LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); 778 retval = MulAdd(sum, sum, retval); 779 } 780 { 781 // y grows, x shrinks 782 auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), 783 LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), 784 LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); 785 retval = MulAdd(sum, sum, retval); 786 } 787 { 788 // y grows -4 to 4, x shrinks 1 -> -1 789 auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), 790 LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, 791 LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), 792 LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); 793 retval = MulAdd(sum, sum, retval); 794 } 795 { 796 // y grows -4 to 4, x grows -1 -> 1 797 auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), 798 LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, 799 LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), 800 LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); 801 retval = MulAdd(sum, sum, retval); 802 } 803 { 804 // x grows -4 to 4, y grows -1 to 1 805 auto sum = 806 Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs), 807 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs), 808 LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs)); 809 retval = MulAdd(sum, sum, retval); 810 } 811 { 812 // x grows -4 to 4, y shrinks 1 to -1 813 auto sum = 814 Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs), 815 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs), 816 LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs)); 817 retval = MulAdd(sum, sum, retval); 818 } 819 { 820 /* 0_________ 821 1__*______ 822 2___*_____ 823 3___*_____ 824 4____0____ 825 5_____*___ 826 6_____*___ 827 7______*__ 828 8_________ */ 829 auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), 830 LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), 831 LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); 832 retval = MulAdd(sum, sum, retval); 833 } 834 { 835 /* 0_________ 836 1______*__ 837 2_____*___ 838 3_____*___ 839 4____0____ 840 5___*_____ 841 6___*_____ 842 7__*______ 843 8_________ */ 844 auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), 845 LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), 846 LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); 847 retval = MulAdd(sum, sum, retval); 848 } 849 { 850 /* 0_________ 851 1_________ 852 2_*_______ 853 3__**_____ 854 4____0____ 855 5_____**__ 856 6_______*_ 857 7_________ 858 8_________ */ 859 auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), 860 LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), 861 LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); 862 retval = MulAdd(sum, sum, retval); 863 } 864 { 865 /* 0_________ 866 1_________ 867 2_______*_ 868 3_____**__ 869 4____0____ 870 5__**_____ 871 6_*_______ 872 7_________ 873 8_________ */ 874 auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), 875 LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), 876 LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); 877 retval = MulAdd(sum, sum, retval); 878 } 879 { 880 /* 0_________ 881 1_________ 882 2_________ 883 3______*** 884 4___*0*___ 885 5***______ 886 6_________ 887 7_________ 888 8_________ */ 889 890 auto sum = 891 Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2), 892 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2), 893 LoadU(df, d - xs + 3), LoadU(df, d - xs + 4)); 894 retval = MulAdd(sum, sum, retval); 895 } 896 { 897 /* 0_________ 898 1_________ 899 2_________ 900 3***______ 901 4___*0*___ 902 5______*** 903 6_________ 904 7_________ 905 8_________ */ 906 auto sum = 907 Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2), 908 LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2), 909 LoadU(df, d + xs + 3), LoadU(df, d + xs + 4)); 910 retval = MulAdd(sum, sum, retval); 911 } 912 { 913 /* 0___*_____ 914 1___*_____ 915 2___*_____ 916 3____*____ 917 4____0____ 918 5____*____ 919 6_____*___ 920 7_____*___ 921 8_____*___ */ 922 auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), 923 LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, 924 LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), 925 LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); 926 retval = MulAdd(sum, sum, retval); 927 } 928 { 929 /* 0_____*___ 930 1_____*___ 931 2____ *___ 932 3____*____ 933 4____0____ 934 5____*____ 935 6___*_____ 936 7___*_____ 937 8___*_____ */ 938 auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), 939 LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, 940 LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), 941 LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); 942 retval = MulAdd(sum, sum, retval); 943 } 944 return retval; 945 } 946 947 // Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known 948 // to be far enough from the image borders. "diffs" is a packed image. 949 template <class Tag> 950 static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs, 951 const size_t x0, 952 const size_t y0) { 953 const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0; 954 const HWY_CAPPED(float, 1) df; 955 if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) && 956 y0 < (diffs.ysize() - 4))) { 957 return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow())); 958 } 959 960 float borderimage[12 * 9]; // round up to 4 961 for (int dy = 0; dy < 9; ++dy) { 962 int y = y0 + dy - 4; 963 if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) { 964 for (int dx = 0; dx < 12; ++dx) { 965 borderimage[dy * 12 + dx] = 0.0f; 966 } 967 continue; 968 } 969 970 const float* row_diffs = diffs.ConstRow(y); 971 for (int dx = 0; dx < 9; ++dx) { 972 int x = x0 + dx - 4; 973 if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) { 974 borderimage[dy * 12 + dx] = 0.0f; 975 } else { 976 borderimage[dy * 12 + dx] = row_diffs[x]; 977 } 978 } 979 std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f); 980 } 981 return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12)); 982 } 983 984 template <class Tag> 985 static Status MaltaDiffMapT(const Tag tag, const ImageF& lum0, 986 const ImageF& lum1, const double w_0gt1, 987 const double w_0lt1, const double norm1, 988 const double len, const double mulli, 989 ImageF* HWY_RESTRICT diffs, 990 ImageF* HWY_RESTRICT block_diff_ac) { 991 JXL_ENSURE(SameSize(lum0, lum1) && SameSize(lum0, *diffs)); 992 const size_t xsize_ = lum0.xsize(); 993 const size_t ysize_ = lum0.ysize(); 994 995 const float kWeight0 = 0.5; 996 const float kWeight1 = 0.33; 997 998 const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1); 999 const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1); 1000 const float norm2_0gt1 = w_pre0gt1 * norm1; 1001 const float norm2_0lt1 = w_pre0lt1 * norm1; 1002 1003 for (size_t y = 0; y < ysize_; ++y) { 1004 const float* HWY_RESTRICT row0 = lum0.ConstRow(y); 1005 const float* HWY_RESTRICT row1 = lum1.ConstRow(y); 1006 float* HWY_RESTRICT row_diffs = diffs->Row(y); 1007 for (size_t x = 0; x < xsize_; ++x) { 1008 const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x])); 1009 const float diff = row0[x] - row1[x]; 1010 const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval); 1011 1012 // Primary symmetric quadratic objective. 1013 row_diffs[x] = scaler * diff; 1014 1015 const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval); 1016 const double fabs0 = std::fabs(row0[x]); 1017 1018 // Secondary half-open quadratic objectives. 1019 const double too_small = 0.55 * fabs0; 1020 const double too_big = 1.05 * fabs0; 1021 1022 if (row0[x] < 0) { 1023 if (row1[x] > -too_small) { 1024 double impact = scaler2 * (row1[x] + too_small); 1025 row_diffs[x] -= impact; 1026 } else if (row1[x] < -too_big) { 1027 double impact = scaler2 * (-row1[x] - too_big); 1028 row_diffs[x] += impact; 1029 } 1030 } else { 1031 if (row1[x] < too_small) { 1032 double impact = scaler2 * (too_small - row1[x]); 1033 row_diffs[x] += impact; 1034 } else if (row1[x] > too_big) { 1035 double impact = scaler2 * (row1[x] - too_big); 1036 row_diffs[x] -= impact; 1037 } 1038 } 1039 } 1040 } 1041 1042 size_t y0 = 0; 1043 // Top 1044 for (; y0 < 4; ++y0) { 1045 float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); 1046 for (size_t x0 = 0; x0 < xsize_; ++x0) { 1047 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1048 } 1049 } 1050 1051 const HWY_FULL(float) df; 1052 const size_t aligned_x = std::max(static_cast<size_t>(4), Lanes(df)); 1053 const intptr_t stride = diffs->PixelsPerRow(); 1054 1055 // Middle 1056 for (; y0 < ysize_ - 4; ++y0) { 1057 const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0); 1058 float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); 1059 size_t x0 = 0; 1060 for (; x0 < aligned_x; ++x0) { 1061 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1062 } 1063 for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) { 1064 auto diff = Load(df, row_diff + x0); 1065 diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride)); 1066 Store(diff, df, row_diff + x0); 1067 } 1068 1069 for (; x0 < xsize_; ++x0) { 1070 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1071 } 1072 } 1073 1074 // Bottom 1075 for (; y0 < ysize_; ++y0) { 1076 float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->Row(y0); 1077 for (size_t x0 = 0; x0 < xsize_; ++x0) { 1078 row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0); 1079 } 1080 } 1081 return true; 1082 } 1083 1084 // Need non-template wrapper functions for HWY_EXPORT. 1085 Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, 1086 const double w_0lt1, const double norm1, 1087 ImageF* HWY_RESTRICT diffs, 1088 ImageF* HWY_RESTRICT block_diff_ac) { 1089 const double len = 3.75; 1090 static const double mulli = 0.39905817637; 1091 JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, 1092 norm1, len, mulli, diffs, block_diff_ac)); 1093 return true; 1094 } 1095 1096 Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, 1097 const double w_0gt1, const double w_0lt1, 1098 const double norm1, ImageF* HWY_RESTRICT diffs, 1099 ImageF* HWY_RESTRICT block_diff_ac) { 1100 const double len = 3.75; 1101 static const double mulli = 0.611612573796; 1102 JXL_RETURN_IF_ERROR(MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, 1103 norm1, len, mulli, diffs, block_diff_ac)); 1104 return true; 1105 } 1106 1107 void CombineChannelsForMasking(const ImageF* hf, const ImageF* uhf, 1108 ImageF* out) { 1109 // Only X and Y components are involved in masking. B's influence 1110 // is considered less important in the high frequency area, and we 1111 // don't model masking from lower frequency signals. 1112 static const float muls[3] = { 1113 2.5f, 1114 0.4f, 1115 0.4f, 1116 }; 1117 // Silly and unoptimized approach here. TODO(jyrki): rework this. 1118 for (size_t y = 0; y < hf[0].ysize(); ++y) { 1119 const float* BUTTERAUGLI_RESTRICT row_y_hf = hf[1].Row(y); 1120 const float* BUTTERAUGLI_RESTRICT row_y_uhf = uhf[1].Row(y); 1121 const float* BUTTERAUGLI_RESTRICT row_x_hf = hf[0].Row(y); 1122 const float* BUTTERAUGLI_RESTRICT row_x_uhf = uhf[0].Row(y); 1123 float* BUTTERAUGLI_RESTRICT row = out->Row(y); 1124 for (size_t x = 0; x < hf[0].xsize(); ++x) { 1125 float xdiff = (row_x_uhf[x] + row_x_hf[x]) * muls[0]; 1126 float ydiff = row_y_uhf[x] * muls[1] + row_y_hf[x] * muls[2]; 1127 row[x] = xdiff * xdiff + ydiff * ydiff; 1128 row[x] = std::sqrt(row[x]); 1129 } 1130 } 1131 } 1132 1133 void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) { 1134 const size_t xsize = xyb.xsize(); 1135 const size_t ysize = xyb.ysize(); 1136 const float bias = mul * bias_arg; 1137 const float sqrt_bias = std::sqrt(bias); 1138 for (size_t y = 0; y < ysize; ++y) { 1139 const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y); 1140 float* BUTTERAUGLI_RESTRICT row_out = out->Row(y); 1141 for (size_t x = 0; x < xsize; ++x) { 1142 // kBias makes sqrt behave more linearly. 1143 row_out[x] = std::sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias; 1144 } 1145 } 1146 } 1147 1148 // std::log(80.0) / std::log(255.0); 1149 constexpr float kIntensityTargetNormalizationHack = 0.79079917404f; 1150 static const float kInternalGoodQualityThreshold = 1151 17.83f * kIntensityTargetNormalizationHack; 1152 static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold; 1153 1154 void StoreMin3(const float v, float& min0, float& min1, float& min2) { 1155 if (v < min2) { 1156 if (v < min0) { 1157 min2 = min1; 1158 min1 = min0; 1159 min0 = v; 1160 } else if (v < min1) { 1161 min2 = min1; 1162 min1 = v; 1163 } else { 1164 min2 = v; 1165 } 1166 } 1167 } 1168 1169 // Look for smooth areas near the area of degradation. 1170 // If the areas area generally smooth, don't do masking. 1171 void FuzzyErosion(const ImageF& from, ImageF* to) { 1172 const size_t xsize = from.xsize(); 1173 const size_t ysize = from.ysize(); 1174 static const int kStep = 3; 1175 for (size_t y = 0; y < ysize; ++y) { 1176 for (size_t x = 0; x < xsize; ++x) { 1177 float min0 = from.Row(y)[x]; 1178 float min1 = 2 * min0; 1179 float min2 = min1; 1180 if (x >= kStep) { 1181 float v = from.Row(y)[x - kStep]; 1182 StoreMin3(v, min0, min1, min2); 1183 if (y >= kStep) { 1184 float v = from.Row(y - kStep)[x - kStep]; 1185 StoreMin3(v, min0, min1, min2); 1186 } 1187 if (y < ysize - kStep) { 1188 float v = from.Row(y + kStep)[x - kStep]; 1189 StoreMin3(v, min0, min1, min2); 1190 } 1191 } 1192 if (x < xsize - kStep) { 1193 float v = from.Row(y)[x + kStep]; 1194 StoreMin3(v, min0, min1, min2); 1195 if (y >= kStep) { 1196 float v = from.Row(y - kStep)[x + kStep]; 1197 StoreMin3(v, min0, min1, min2); 1198 } 1199 if (y < ysize - kStep) { 1200 float v = from.Row(y + kStep)[x + kStep]; 1201 StoreMin3(v, min0, min1, min2); 1202 } 1203 } 1204 if (y >= kStep) { 1205 float v = from.Row(y - kStep)[x]; 1206 StoreMin3(v, min0, min1, min2); 1207 } 1208 if (y < ysize - kStep) { 1209 float v = from.Row(y + kStep)[x]; 1210 StoreMin3(v, min0, min1, min2); 1211 } 1212 to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2); 1213 } 1214 } 1215 } 1216 1217 // Compute values of local frequency and dc masking based on the activity 1218 // in the two images. img_diff_ac may be null. 1219 Status Mask(const ImageF& mask0, const ImageF& mask1, 1220 const ButteraugliParams& params, BlurTemp* blur_temp, 1221 ImageF* BUTTERAUGLI_RESTRICT mask, 1222 ImageF* BUTTERAUGLI_RESTRICT diff_ac) { 1223 const size_t xsize = mask0.xsize(); 1224 const size_t ysize = mask0.ysize(); 1225 JxlMemoryManager* memory_manager = mask0.memory_manager(); 1226 JXL_ASSIGN_OR_RETURN(*mask, ImageF::Create(memory_manager, xsize, ysize)); 1227 static const float kMul = 6.19424080439; 1228 static const float kBias = 12.61050594197; 1229 static const float kRadius = 2.7; 1230 JXL_ASSIGN_OR_RETURN(ImageF diff0, 1231 ImageF::Create(memory_manager, xsize, ysize)); 1232 JXL_ASSIGN_OR_RETURN(ImageF diff1, 1233 ImageF::Create(memory_manager, xsize, ysize)); 1234 JXL_ASSIGN_OR_RETURN(ImageF blurred0, 1235 ImageF::Create(memory_manager, xsize, ysize)); 1236 JXL_ASSIGN_OR_RETURN(ImageF blurred1, 1237 ImageF::Create(memory_manager, xsize, ysize)); 1238 DiffPrecompute(mask0, kMul, kBias, &diff0); 1239 DiffPrecompute(mask1, kMul, kBias, &diff1); 1240 JXL_RETURN_IF_ERROR(Blur(diff0, kRadius, params, blur_temp, &blurred0)); 1241 FuzzyErosion(blurred0, &diff0); 1242 JXL_RETURN_IF_ERROR(Blur(diff1, kRadius, params, blur_temp, &blurred1)); 1243 for (size_t y = 0; y < ysize; ++y) { 1244 for (size_t x = 0; x < xsize; ++x) { 1245 mask->Row(y)[x] = diff0.Row(y)[x]; 1246 if (diff_ac != nullptr) { 1247 static const float kMaskToErrorMul = 10.0; 1248 float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x]; 1249 diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff; 1250 } 1251 } 1252 } 1253 return true; 1254 } 1255 1256 // `diff_ac` may be null. 1257 Status MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1, 1258 const size_t xsize, const size_t ysize, 1259 const ButteraugliParams& params, BlurTemp* blur_temp, 1260 ImageF* BUTTERAUGLI_RESTRICT mask, 1261 ImageF* BUTTERAUGLI_RESTRICT diff_ac) { 1262 JxlMemoryManager* memory_manager = pi0.hf[0].memory_manager(); 1263 JXL_ASSIGN_OR_RETURN(ImageF mask0, 1264 ImageF::Create(memory_manager, xsize, ysize)); 1265 JXL_ASSIGN_OR_RETURN(ImageF mask1, 1266 ImageF::Create(memory_manager, xsize, ysize)); 1267 CombineChannelsForMasking(&pi0.hf[0], &pi0.uhf[0], &mask0); 1268 CombineChannelsForMasking(&pi1.hf[0], &pi1.uhf[0], &mask1); 1269 JXL_RETURN_IF_ERROR(Mask(mask0, mask1, params, blur_temp, mask, diff_ac)); 1270 return true; 1271 } 1272 1273 double MaskY(double delta) { 1274 static const double offset = 0.829591754942; 1275 static const double scaler = 0.451936922203; 1276 static const double mul = 2.5485944793; 1277 const double c = mul / ((scaler * delta) + offset); 1278 const double retval = kGlobalScale * (1.0 + c); 1279 return retval * retval; 1280 } 1281 1282 double MaskDcY(double delta) { 1283 static const double offset = 0.20025578522; 1284 static const double scaler = 3.87449418804; 1285 static const double mul = 0.505054525019; 1286 const double c = mul / ((scaler * delta) + offset); 1287 const double retval = kGlobalScale * (1.0 + c); 1288 return retval * retval; 1289 } 1290 1291 inline float MaskColor(const float color[3], const float mask) { 1292 return color[0] * mask + color[1] * mask + color[2] * mask; 1293 } 1294 1295 // Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks} 1296 Status CombineChannelsToDiffmap(const ImageF& mask, 1297 const Image3F& block_diff_dc, 1298 const Image3F& block_diff_ac, float xmul, 1299 ImageF* result) { 1300 JXL_ENSURE(SameSize(mask, *result)); 1301 size_t xsize = mask.xsize(); 1302 size_t ysize = mask.ysize(); 1303 for (size_t y = 0; y < ysize; ++y) { 1304 float* BUTTERAUGLI_RESTRICT row_out = result->Row(y); 1305 for (size_t x = 0; x < xsize; ++x) { 1306 float val = mask.Row(y)[x]; 1307 float maskval = MaskY(val); 1308 float dc_maskval = MaskDcY(val); 1309 float diff_dc[3]; 1310 float diff_ac[3]; 1311 for (int i = 0; i < 3; ++i) { 1312 diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x]; 1313 diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x]; 1314 } 1315 diff_ac[0] *= xmul; 1316 diff_dc[0] *= xmul; 1317 row_out[x] = std::sqrt(MaskColor(diff_dc, dc_maskval) + 1318 MaskColor(diff_ac, maskval)); 1319 } 1320 } 1321 return true; 1322 } 1323 1324 // Adds weighted L2 difference between i0 and i1 to diffmap. 1325 static void L2Diff(const ImageF& i0, const ImageF& i1, const float w, 1326 ImageF* BUTTERAUGLI_RESTRICT diffmap) { 1327 if (w == 0) return; 1328 1329 const HWY_FULL(float) d; 1330 const auto weight = Set(d, w); 1331 1332 for (size_t y = 0; y < i0.ysize(); ++y) { 1333 const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); 1334 const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); 1335 float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); 1336 1337 for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { 1338 const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); 1339 const auto diff2 = Mul(diff, diff); 1340 const auto prev = Load(d, row_diff + x); 1341 Store(MulAdd(diff2, weight, prev), d, row_diff + x); 1342 } 1343 } 1344 } 1345 1346 // Initializes diffmap to the weighted L2 difference between i0 and i1. 1347 static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w, 1348 ImageF* BUTTERAUGLI_RESTRICT diffmap) { 1349 if (w == 0) return; 1350 1351 const HWY_FULL(float) d; 1352 const auto weight = Set(d, w); 1353 1354 for (size_t y = 0; y < i0.ysize(); ++y) { 1355 const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); 1356 const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); 1357 float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); 1358 1359 for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { 1360 const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); 1361 const auto diff2 = Mul(diff, diff); 1362 Store(Mul(diff2, weight), d, row_diff + x); 1363 } 1364 } 1365 } 1366 1367 // i0 is the original image. 1368 // i1 is the deformed copy. 1369 static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1, 1370 float w_0lt1, 1371 ImageF* BUTTERAUGLI_RESTRICT diffmap) { 1372 if (w_0gt1 == 0 && w_0lt1 == 0) { 1373 return; 1374 } 1375 1376 const HWY_FULL(float) d; 1377 const auto vw_0gt1 = Set(d, w_0gt1 * 0.8); 1378 const auto vw_0lt1 = Set(d, w_0lt1 * 0.8); 1379 1380 for (size_t y = 0; y < i0.ysize(); ++y) { 1381 const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y); 1382 const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y); 1383 float* BUTTERAUGLI_RESTRICT row_diff = diffmap->Row(y); 1384 1385 for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { 1386 const auto val0 = Load(d, row0 + x); 1387 const auto val1 = Load(d, row1 + x); 1388 1389 // Primary symmetric quadratic objective. 1390 const auto diff = Sub(val0, val1); 1391 auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x)); 1392 1393 // Secondary half-open quadratic objectives. 1394 const auto fabs0 = Abs(val0); 1395 const auto too_small = Mul(Set(d, 0.4), fabs0); 1396 const auto too_big = fabs0; 1397 1398 const auto if_neg = IfThenElse( 1399 Gt(val1, Neg(too_small)), Add(val1, too_small), 1400 IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big))); 1401 const auto if_pos = 1402 IfThenElse(Lt(val1, too_small), Sub(too_small, val1), 1403 IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big))); 1404 const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos); 1405 total = MulAdd(vw_0lt1, Mul(v, v), total); 1406 Store(total, d, row_diff + x); 1407 } 1408 } 1409 } 1410 1411 // A simple HDR compatible gamma function. 1412 template <class DF, class V> 1413 V Gamma(const DF df, V v) { 1414 // ln(2) constant folded in because we want std::log but have FastLog2f. 1415 const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f); 1416 const auto kRetAdd = Set(df, -23.16046239805755); 1417 // This should happen rarely, but may lead to a NaN in log, which is 1418 // undesirable. Since negative photons don't exist we solve the NaNs by 1419 // clamping here. 1420 v = ZeroIfNegative(v); 1421 1422 const auto biased = Add(v, Set(df, 9.9710635769299145)); 1423 const auto log = FastLog2f(df, biased); 1424 // We could fold this into a custom Log2 polynomial, but there would be 1425 // relatively little gain. 1426 return MulAdd(kRetMul, log, kRetAdd); 1427 } 1428 1429 template <bool Clamp, class DF, class V> 1430 BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1, 1431 const V& in2, V* JXL_RESTRICT out0, 1432 V* JXL_RESTRICT out1, 1433 V* JXL_RESTRICT out2) { 1434 // https://en.wikipedia.org/wiki/Photopsin absorbance modeling. 1435 static const double mixi0 = 0.29956550340058319; 1436 static const double mixi1 = 0.63373087833825936; 1437 static const double mixi2 = 0.077705617820981968; 1438 static const double mixi3 = 1.7557483643287353; 1439 static const double mixi4 = 0.22158691104574774; 1440 static const double mixi5 = 0.69391388044116142; 1441 static const double mixi6 = 0.0987313588422; 1442 static const double mixi7 = 1.7557483643287353; 1443 static const double mixi8 = 0.02; 1444 static const double mixi9 = 0.02; 1445 static const double mixi10 = 0.20480129041026129; 1446 static const double mixi11 = 12.226454707163354; 1447 1448 const V mix0 = Set(df, mixi0); 1449 const V mix1 = Set(df, mixi1); 1450 const V mix2 = Set(df, mixi2); 1451 const V mix3 = Set(df, mixi3); 1452 const V mix4 = Set(df, mixi4); 1453 const V mix5 = Set(df, mixi5); 1454 const V mix6 = Set(df, mixi6); 1455 const V mix7 = Set(df, mixi7); 1456 const V mix8 = Set(df, mixi8); 1457 const V mix9 = Set(df, mixi9); 1458 const V mix10 = Set(df, mixi10); 1459 const V mix11 = Set(df, mixi11); 1460 1461 *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3))); 1462 *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7))); 1463 *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11))); 1464 1465 if (Clamp) { 1466 *out0 = Max(*out0, mix3); 1467 *out1 = Max(*out1, mix7); 1468 *out2 = Max(*out2, mix11); 1469 } 1470 } 1471 1472 // `blurred` is a temporary image used inside this function and not returned. 1473 Status OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params, 1474 Image3F* blurred, BlurTemp* blur_temp, Image3F* xyb) { 1475 JXL_ENSURE(blurred != nullptr); 1476 const double kSigma = 1.2; 1477 JXL_RETURN_IF_ERROR( 1478 Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0))); 1479 JXL_RETURN_IF_ERROR( 1480 Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1))); 1481 JXL_RETURN_IF_ERROR( 1482 Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2))); 1483 const HWY_FULL(float) df; 1484 const auto intensity_target_multiplier = Set(df, params.intensity_target); 1485 for (size_t y = 0; y < rgb.ysize(); ++y) { 1486 const float* row_r = rgb.ConstPlaneRow(0, y); 1487 const float* row_g = rgb.ConstPlaneRow(1, y); 1488 const float* row_b = rgb.ConstPlaneRow(2, y); 1489 const float* row_blurred_r = blurred->ConstPlaneRow(0, y); 1490 const float* row_blurred_g = blurred->ConstPlaneRow(1, y); 1491 const float* row_blurred_b = blurred->ConstPlaneRow(2, y); 1492 float* row_out_x = xyb->PlaneRow(0, y); 1493 float* row_out_y = xyb->PlaneRow(1, y); 1494 float* row_out_b = xyb->PlaneRow(2, y); 1495 const auto min = Set(df, 1e-4f); 1496 for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) { 1497 auto sensitivity0 = Undefined(df); 1498 auto sensitivity1 = Undefined(df); 1499 auto sensitivity2 = Undefined(df); 1500 { 1501 // Calculate sensitivity based on the smoothed image gamma derivative. 1502 auto pre_mixed0 = Undefined(df); 1503 auto pre_mixed1 = Undefined(df); 1504 auto pre_mixed2 = Undefined(df); 1505 OpsinAbsorbance<true>( 1506 df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier), 1507 Mul(Load(df, row_blurred_g + x), intensity_target_multiplier), 1508 Mul(Load(df, row_blurred_b + x), intensity_target_multiplier), 1509 &pre_mixed0, &pre_mixed1, &pre_mixed2); 1510 pre_mixed0 = Max(pre_mixed0, min); 1511 pre_mixed1 = Max(pre_mixed1, min); 1512 pre_mixed2 = Max(pre_mixed2, min); 1513 sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0); 1514 sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1); 1515 sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2); 1516 sensitivity0 = Max(sensitivity0, min); 1517 sensitivity1 = Max(sensitivity1, min); 1518 sensitivity2 = Max(sensitivity2, min); 1519 } 1520 auto cur_mixed0 = Undefined(df); 1521 auto cur_mixed1 = Undefined(df); 1522 auto cur_mixed2 = Undefined(df); 1523 OpsinAbsorbance<false>( 1524 df, Mul(Load(df, row_r + x), intensity_target_multiplier), 1525 Mul(Load(df, row_g + x), intensity_target_multiplier), 1526 Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0, 1527 &cur_mixed1, &cur_mixed2); 1528 cur_mixed0 = Mul(cur_mixed0, sensitivity0); 1529 cur_mixed1 = Mul(cur_mixed1, sensitivity1); 1530 cur_mixed2 = Mul(cur_mixed2, sensitivity2); 1531 // This is a kludge. The negative values should be zeroed away before 1532 // blurring. Ideally there would be no negative values in the first place. 1533 const auto min01 = Set(df, 1.7557483643287353f); 1534 const auto min2 = Set(df, 12.226454707163354f); 1535 cur_mixed0 = Max(cur_mixed0, min01); 1536 cur_mixed1 = Max(cur_mixed1, min01); 1537 cur_mixed2 = Max(cur_mixed2, min2); 1538 1539 Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x); 1540 Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x); 1541 Store(cur_mixed2, df, row_out_b + x); 1542 } 1543 } 1544 return true; 1545 } 1546 1547 Status ButteraugliDiffmapInPlace(Image3F& image0, Image3F& image1, 1548 const ButteraugliParams& params, 1549 ImageF& diffmap) { 1550 // image0 and image1 are in linear sRGB color space 1551 const size_t xsize = image0.xsize(); 1552 const size_t ysize = image0.ysize(); 1553 JxlMemoryManager* memory_manager = image0.memory_manager(); 1554 BlurTemp blur_temp; 1555 { 1556 // Convert image0 and image1 to XYB in-place 1557 JXL_ASSIGN_OR_RETURN(Image3F temp, 1558 Image3F::Create(memory_manager, xsize, ysize)); 1559 JXL_RETURN_IF_ERROR( 1560 OpsinDynamicsImage(image0, params, &temp, &blur_temp, &image0)); 1561 JXL_RETURN_IF_ERROR( 1562 OpsinDynamicsImage(image1, params, &temp, &blur_temp, &image1)); 1563 } 1564 // image0 and image1 are in XYB color space 1565 JXL_ASSIGN_OR_RETURN(ImageF block_diff_dc, 1566 ImageF::Create(memory_manager, xsize, ysize)); 1567 ZeroFillImage(&block_diff_dc); 1568 { 1569 // separate out LF components from image0 and image1 and compute the dc 1570 // diff image from them 1571 JXL_ASSIGN_OR_RETURN(Image3F lf0, 1572 Image3F::Create(memory_manager, xsize, ysize)); 1573 JXL_ASSIGN_OR_RETURN(Image3F lf1, 1574 Image3F::Create(memory_manager, xsize, ysize)); 1575 JXL_RETURN_IF_ERROR( 1576 SeparateLFAndMF(params, image0, &lf0, &image0, &blur_temp)); 1577 JXL_RETURN_IF_ERROR( 1578 SeparateLFAndMF(params, image1, &lf1, &image1, &blur_temp)); 1579 for (size_t c = 0; c < 3; ++c) { 1580 L2Diff(lf0.Plane(c), lf1.Plane(c), wmul[6 + c], &block_diff_dc); 1581 } 1582 } 1583 // image0 and image1 are MF residuals (before blurring) in XYB color space 1584 ImageF hf0[2]; 1585 ImageF hf1[2]; 1586 JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image0, &hf0[0], &blur_temp)); 1587 JXL_RETURN_IF_ERROR(SeparateMFAndHF(params, &image1, &hf1[0], &blur_temp)); 1588 // image0 and image1 are MF-images in XYB color space 1589 1590 JXL_ASSIGN_OR_RETURN(ImageF block_diff_ac, 1591 ImageF::Create(memory_manager, xsize, ysize)); 1592 ZeroFillImage(&block_diff_ac); 1593 // start accumulating ac diff image from MF images 1594 { 1595 JXL_ASSIGN_OR_RETURN(ImageF diffs, 1596 ImageF::Create(memory_manager, xsize, ysize)); 1597 JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(1), image1.Plane(1), 1598 wMfMalta, wMfMalta, norm1Mf, &diffs, 1599 &block_diff_ac)); 1600 JXL_RETURN_IF_ERROR(MaltaDiffMapLF(image0.Plane(0), image1.Plane(0), 1601 wMfMaltaX, wMfMaltaX, norm1MfX, &diffs, 1602 &block_diff_ac)); 1603 } 1604 for (size_t c = 0; c < 3; ++c) { 1605 L2Diff(image0.Plane(c), image1.Plane(c), wmul[3 + c], &block_diff_ac); 1606 } 1607 // we will not need the MF-images and more, so we deallocate them to reduce 1608 // peak memory usage 1609 image0 = Image3F(); 1610 image1 = Image3F(); 1611 1612 ImageF uhf0[2]; 1613 ImageF uhf1[2]; 1614 JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf0[0], &uhf0[0], &blur_temp)); 1615 JXL_RETURN_IF_ERROR(SeparateHFAndUHF(params, &hf1[0], &uhf1[0], &blur_temp)); 1616 1617 // continue accumulating ac diff image from HF and UHF images 1618 const float hf_asymmetry = params.hf_asymmetry; 1619 { 1620 JXL_ASSIGN_OR_RETURN(ImageF diffs, 1621 ImageF::Create(memory_manager, xsize, ysize)); 1622 JXL_RETURN_IF_ERROR(MaltaDiffMap(uhf0[1], uhf1[1], wUhfMalta * hf_asymmetry, 1623 wUhfMalta / hf_asymmetry, norm1Uhf, &diffs, 1624 &block_diff_ac)); 1625 JXL_RETURN_IF_ERROR(MaltaDiffMap( 1626 uhf0[0], uhf1[0], wUhfMaltaX * hf_asymmetry, wUhfMaltaX / hf_asymmetry, 1627 norm1UhfX, &diffs, &block_diff_ac)); 1628 JXL_RETURN_IF_ERROR(MaltaDiffMapLF( 1629 hf0[1], hf1[1], wHfMalta * std::sqrt(hf_asymmetry), 1630 wHfMalta / std::sqrt(hf_asymmetry), norm1Hf, &diffs, &block_diff_ac)); 1631 JXL_RETURN_IF_ERROR(MaltaDiffMapLF( 1632 hf0[0], hf1[0], wHfMaltaX * std::sqrt(hf_asymmetry), 1633 wHfMaltaX / std::sqrt(hf_asymmetry), norm1HfX, &diffs, &block_diff_ac)); 1634 } 1635 for (size_t c = 0; c < 2; ++c) { 1636 L2DiffAsymmetric(hf0[c], hf1[c], wmul[c] * hf_asymmetry, 1637 wmul[c] / hf_asymmetry, &block_diff_ac); 1638 } 1639 1640 // compute mask image from HF and UHF X and Y images 1641 JXL_ASSIGN_OR_RETURN(ImageF mask, 1642 ImageF::Create(memory_manager, xsize, ysize)); 1643 { 1644 JXL_ASSIGN_OR_RETURN(ImageF mask0, 1645 ImageF::Create(memory_manager, xsize, ysize)); 1646 JXL_ASSIGN_OR_RETURN(ImageF mask1, 1647 ImageF::Create(memory_manager, xsize, ysize)); 1648 CombineChannelsForMasking(&hf0[0], &uhf0[0], &mask0); 1649 CombineChannelsForMasking(&hf1[0], &uhf1[0], &mask1); 1650 DeallocateHFAndUHF(&hf1[0], &uhf1[0]); 1651 DeallocateHFAndUHF(&hf0[0], &uhf0[0]); 1652 JXL_RETURN_IF_ERROR( 1653 Mask(mask0, mask1, params, &blur_temp, &mask, &block_diff_ac)); 1654 } 1655 1656 // compute final diffmap from mask image and ac and dc diff images 1657 JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize)); 1658 for (size_t y = 0; y < ysize; ++y) { 1659 const float* row_dc = block_diff_dc.Row(y); 1660 const float* row_ac = block_diff_ac.Row(y); 1661 float* row_out = diffmap.Row(y); 1662 for (size_t x = 0; x < xsize; ++x) { 1663 const float val = mask.Row(y)[x]; 1664 row_out[x] = sqrt(row_dc[x] * MaskDcY(val) + row_ac[x] * MaskY(val)); 1665 } 1666 } 1667 return true; 1668 } 1669 1670 // NOLINTNEXTLINE(google-readability-namespace-comments) 1671 } // namespace HWY_NAMESPACE 1672 } // namespace jxl 1673 HWY_AFTER_NAMESPACE(); 1674 1675 #if HWY_ONCE 1676 namespace jxl { 1677 1678 HWY_EXPORT(SeparateFrequencies); // Local function. 1679 HWY_EXPORT(MaskPsychoImage); // Local function. 1680 HWY_EXPORT(L2DiffAsymmetric); // Local function. 1681 HWY_EXPORT(L2Diff); // Local function. 1682 HWY_EXPORT(SetL2Diff); // Local function. 1683 HWY_EXPORT(CombineChannelsToDiffmap); // Local function. 1684 HWY_EXPORT(MaltaDiffMap); // Local function. 1685 HWY_EXPORT(MaltaDiffMapLF); // Local function. 1686 HWY_EXPORT(OpsinDynamicsImage); // Local function. 1687 HWY_EXPORT(ButteraugliDiffmapInPlace); // Local function. 1688 1689 #if BUTTERAUGLI_ENABLE_CHECKS 1690 1691 static inline bool IsNan(const float x) { 1692 uint32_t bits; 1693 memcpy(&bits, &x, sizeof(bits)); 1694 const uint32_t bitmask_exp = 0x7F800000; 1695 return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF); 1696 } 1697 1698 static inline bool IsNan(const double x) { 1699 uint64_t bits; 1700 memcpy(&bits, &x, sizeof(bits)); 1701 return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) || 1702 (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL); 1703 } 1704 1705 static inline void CheckImage(const ImageF& image, const char* name) { 1706 for (size_t y = 0; y < image.ysize(); ++y) { 1707 const float* BUTTERAUGLI_RESTRICT row = image.Row(y); 1708 for (size_t x = 0; x < image.xsize(); ++x) { 1709 if (IsNan(row[x])) { 1710 printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS 1711 ")\n", 1712 name, x, y, image.xsize(), image.ysize()); 1713 exit(1); 1714 } 1715 } 1716 } 1717 } 1718 1719 #define CHECK_NAN(x, str) \ 1720 do { \ 1721 if (IsNan(x)) { \ 1722 printf("%d: %s\n", __LINE__, str); \ 1723 abort(); \ 1724 } \ 1725 } while (0) 1726 1727 #define CHECK_IMAGE(image, name) CheckImage(image, name) 1728 1729 #else // BUTTERAUGLI_ENABLE_CHECKS 1730 1731 #define CHECK_NAN(x, str) 1732 #define CHECK_IMAGE(image, name) 1733 1734 #endif // BUTTERAUGLI_ENABLE_CHECKS 1735 1736 // Calculate a 2x2 subsampled image for purposes of recursive butteraugli at 1737 // multiresolution. 1738 static StatusOr<Image3F> SubSample2x(const Image3F& in) { 1739 size_t xs = (in.xsize() + 1) / 2; 1740 size_t ys = (in.ysize() + 1) / 2; 1741 JxlMemoryManager* memory_manager = in.memory_manager(); 1742 JXL_ASSIGN_OR_RETURN(Image3F retval, Image3F::Create(memory_manager, xs, ys)); 1743 for (size_t c = 0; c < 3; ++c) { 1744 for (size_t y = 0; y < ys; ++y) { 1745 for (size_t x = 0; x < xs; ++x) { 1746 retval.PlaneRow(c, y)[x] = 0; 1747 } 1748 } 1749 } 1750 for (size_t c = 0; c < 3; ++c) { 1751 for (size_t y = 0; y < in.ysize(); ++y) { 1752 for (size_t x = 0; x < in.xsize(); ++x) { 1753 retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x]; 1754 } 1755 } 1756 if ((in.xsize() & 1) != 0) { 1757 for (size_t y = 0; y < retval.ysize(); ++y) { 1758 size_t last_column = retval.xsize() - 1; 1759 retval.PlaneRow(c, y)[last_column] *= 2.0f; 1760 } 1761 } 1762 if ((in.ysize() & 1) != 0) { 1763 for (size_t x = 0; x < retval.xsize(); ++x) { 1764 size_t last_row = retval.ysize() - 1; 1765 retval.PlaneRow(c, last_row)[x] *= 2.0f; 1766 } 1767 } 1768 } 1769 return retval; 1770 } 1771 1772 // Supersample src by 2x and add it to dest. 1773 static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) { 1774 for (size_t y = 0; y < dest.ysize(); ++y) { 1775 for (size_t x = 0; x < dest.xsize(); ++x) { 1776 // There will be less errors from the more averaged images. 1777 // We take it into account to some extent using a scaler. 1778 static const double kHeuristicMixingValue = 0.3; 1779 dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w; 1780 dest.Row(y)[x] += w * src.Row(y / 2)[x / 2]; 1781 } 1782 } 1783 } 1784 1785 Image3F* ButteraugliComparator::Temp() const { 1786 bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel); 1787 if (was_in_use) return nullptr; 1788 return &temp_; 1789 } 1790 1791 void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); } 1792 1793 ButteraugliComparator::ButteraugliComparator(size_t xsize, size_t ysize, 1794 const ButteraugliParams& params) 1795 : xsize_(xsize), ysize_(ysize), params_(params) {} 1796 1797 StatusOr<std::unique_ptr<ButteraugliComparator>> ButteraugliComparator::Make( 1798 const Image3F& rgb0, const ButteraugliParams& params) { 1799 size_t xsize = rgb0.xsize(); 1800 size_t ysize = rgb0.ysize(); 1801 JxlMemoryManager* memory_manager = rgb0.memory_manager(); 1802 std::unique_ptr<ButteraugliComparator> result = 1803 std::unique_ptr<ButteraugliComparator>( 1804 new ButteraugliComparator(xsize, ysize, params)); 1805 JXL_ASSIGN_OR_RETURN(result->temp_, 1806 Image3F::Create(memory_manager, xsize, ysize)); 1807 1808 if (xsize < 8 || ysize < 8) { 1809 return result; 1810 } 1811 1812 JXL_ASSIGN_OR_RETURN(Image3F xyb0, 1813 Image3F::Create(memory_manager, xsize, ysize)); 1814 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( 1815 rgb0, params, result->Temp(), &result->blur_temp_, &xyb0)); 1816 result->ReleaseTemp(); 1817 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( 1818 xsize, ysize, params, &result->blur_temp_, xyb0, result->pi0_)); 1819 1820 // Awful recursive construction of samples of different resolution. 1821 // This is an after-thought and possibly somewhat parallel in 1822 // functionality with the PsychoImage multi-resolution approach. 1823 JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb0, SubSample2x(rgb0)); 1824 JXL_ASSIGN_OR_RETURN(result->sub_, 1825 ButteraugliComparator::Make(subsampledRgb0, params)); 1826 return result; 1827 } 1828 1829 Status ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const { 1830 return HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( 1831 pi0_, pi0_, xsize_, ysize_, params_, &blur_temp_, mask, nullptr); 1832 } 1833 1834 Status ButteraugliComparator::Diffmap(const Image3F& rgb1, 1835 ImageF& result) const { 1836 JxlMemoryManager* memory_manager = rgb1.memory_manager(); 1837 if (xsize_ < 8 || ysize_ < 8) { 1838 ZeroFillImage(&result); 1839 return true; 1840 } 1841 JXL_ASSIGN_OR_RETURN(Image3F xyb1, 1842 Image3F::Create(memory_manager, xsize_, ysize_)); 1843 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( 1844 rgb1, params_, Temp(), &blur_temp_, &xyb1)); 1845 ReleaseTemp(); 1846 JXL_RETURN_IF_ERROR(DiffmapOpsinDynamicsImage(xyb1, result)); 1847 if (sub_) { 1848 if (sub_->xsize_ < 8 || sub_->ysize_ < 8) { 1849 return true; 1850 } 1851 JXL_ASSIGN_OR_RETURN( 1852 Image3F sub_xyb, 1853 Image3F::Create(memory_manager, sub_->xsize_, sub_->ysize_)); 1854 JXL_ASSIGN_OR_RETURN(Image3F subsampledRgb1, SubSample2x(rgb1)); 1855 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( 1856 subsampledRgb1, params_, sub_->Temp(), &sub_->blur_temp_, &sub_xyb)); 1857 sub_->ReleaseTemp(); 1858 ImageF subresult; 1859 JXL_RETURN_IF_ERROR(sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult)); 1860 AddSupersampled2x(subresult, 0.5, result); 1861 } 1862 return true; 1863 } 1864 1865 Status ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1, 1866 ImageF& result) const { 1867 JxlMemoryManager* memory_manager = xyb1.memory_manager(); 1868 if (xsize_ < 8 || ysize_ < 8) { 1869 ZeroFillImage(&result); 1870 return true; 1871 } 1872 PsychoImage pi1; 1873 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(SeparateFrequencies)( 1874 xsize_, ysize_, params_, &blur_temp_, xyb1, pi1)); 1875 JXL_ASSIGN_OR_RETURN(result, ImageF::Create(memory_manager, xsize_, ysize_)); 1876 return DiffmapPsychoImage(pi1, result); 1877 } 1878 1879 namespace { 1880 1881 Status MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, 1882 const double w_0lt1, const double norm1, 1883 ImageF* HWY_RESTRICT diffs, 1884 Image3F* HWY_RESTRICT block_diff_ac, size_t c) { 1885 return HWY_DYNAMIC_DISPATCH(MaltaDiffMap)(lum0, lum1, w_0gt1, w_0lt1, norm1, 1886 diffs, &block_diff_ac->Plane(c)); 1887 } 1888 1889 Status MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, 1890 const double w_0gt1, const double w_0lt1, 1891 const double norm1, ImageF* HWY_RESTRICT diffs, 1892 Image3F* HWY_RESTRICT block_diff_ac, size_t c) { 1893 return HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF)(lum0, lum1, w_0gt1, w_0lt1, norm1, 1894 diffs, &block_diff_ac->Plane(c)); 1895 } 1896 1897 } // namespace 1898 1899 Status ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1, 1900 ImageF& diffmap) const { 1901 JxlMemoryManager* memory_manager = diffmap.memory_manager(); 1902 if (xsize_ < 8 || ysize_ < 8) { 1903 ZeroFillImage(&diffmap); 1904 return true; 1905 } 1906 1907 const float hf_asymmetry_ = params_.hf_asymmetry; 1908 const float xmul_ = params_.xmul; 1909 1910 JXL_ASSIGN_OR_RETURN(ImageF diffs, 1911 ImageF::Create(memory_manager, xsize_, ysize_)); 1912 JXL_ASSIGN_OR_RETURN(Image3F block_diff_ac, 1913 Image3F::Create(memory_manager, xsize_, ysize_)); 1914 ZeroFillImage(&block_diff_ac); 1915 JXL_RETURN_IF_ERROR(MaltaDiffMap( 1916 pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_, 1917 wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1)); 1918 JXL_RETURN_IF_ERROR(MaltaDiffMap( 1919 pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_, 1920 wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac, 0)); 1921 JXL_RETURN_IF_ERROR(MaltaDiffMapLF( 1922 pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_), 1923 wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs, &block_diff_ac, 1)); 1924 JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], 1925 wHfMaltaX * std::sqrt(hf_asymmetry_), 1926 wHfMaltaX / std::sqrt(hf_asymmetry_), 1927 norm1HfX, &diffs, &block_diff_ac, 0)); 1928 JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), 1929 wMfMalta, wMfMalta, norm1Mf, &diffs, 1930 &block_diff_ac, 1)); 1931 JXL_RETURN_IF_ERROR(MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), 1932 wMfMaltaX, wMfMaltaX, norm1MfX, &diffs, 1933 &block_diff_ac, 0)); 1934 1935 JXL_ASSIGN_OR_RETURN(Image3F block_diff_dc, 1936 Image3F::Create(memory_manager, xsize_, ysize_)); 1937 for (size_t c = 0; c < 3; ++c) { 1938 if (c < 2) { // No blue channel error accumulated at HF. 1939 HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric) 1940 (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_, 1941 &block_diff_ac.Plane(c)); 1942 } 1943 HWY_DYNAMIC_DISPATCH(L2Diff) 1944 (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac.Plane(c)); 1945 HWY_DYNAMIC_DISPATCH(SetL2Diff) 1946 (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc.Plane(c)); 1947 } 1948 1949 ImageF mask; 1950 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(MaskPsychoImage)( 1951 pi0_, pi1, xsize_, ysize_, params_, &blur_temp_, &mask, 1952 &block_diff_ac.Plane(1))); 1953 1954 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap)( 1955 mask, block_diff_dc, block_diff_ac, xmul_, &diffmap)); 1956 return true; 1957 } 1958 1959 double ButteraugliScoreFromDiffmap(const ImageF& diffmap, 1960 const ButteraugliParams* params) { 1961 float retval = 0.0f; 1962 for (size_t y = 0; y < diffmap.ysize(); ++y) { 1963 const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y); 1964 for (size_t x = 0; x < diffmap.xsize(); ++x) { 1965 retval = std::max(retval, row[x]); 1966 } 1967 } 1968 return retval; 1969 } 1970 1971 Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, 1972 double hf_asymmetry, double xmul, ImageF& diffmap) { 1973 ButteraugliParams params; 1974 params.hf_asymmetry = hf_asymmetry; 1975 params.xmul = xmul; 1976 return ButteraugliDiffmap(rgb0, rgb1, params, diffmap); 1977 } 1978 1979 template <size_t kMax> 1980 bool ButteraugliDiffmapSmall(const Image3F& rgb0, const Image3F& rgb1, 1981 const ButteraugliParams& params, ImageF& diffmap) { 1982 const size_t xsize = rgb0.xsize(); 1983 const size_t ysize = rgb0.ysize(); 1984 JxlMemoryManager* memory_manager = rgb0.memory_manager(); 1985 // Butteraugli values for small (where xsize or ysize is smaller 1986 // than 8 pixels) images are non-sensical, but most likely it is 1987 // less disruptive to try to compute something than just give up. 1988 // Temporarily extend the borders of the image to fit 8 x 8 size. 1989 size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0; 1990 size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0; 1991 size_t xscaled = std::max<size_t>(kMax, xsize); 1992 size_t yscaled = std::max<size_t>(kMax, ysize); 1993 JXL_ASSIGN_OR_RETURN(Image3F scaled0, 1994 Image3F::Create(memory_manager, xscaled, yscaled)); 1995 JXL_ASSIGN_OR_RETURN(Image3F scaled1, 1996 Image3F::Create(memory_manager, xscaled, yscaled)); 1997 for (int i = 0; i < 3; ++i) { 1998 for (size_t y = 0; y < yscaled; ++y) { 1999 for (size_t x = 0; x < xscaled; ++x) { 2000 size_t x2 = std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0); 2001 size_t y2 = std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0); 2002 scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2]; 2003 scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2]; 2004 } 2005 } 2006 } 2007 ImageF diffmap_scaled; 2008 const bool ok = ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled); 2009 JXL_ASSIGN_OR_RETURN(diffmap, ImageF::Create(memory_manager, xsize, ysize)); 2010 for (size_t y = 0; y < ysize; ++y) { 2011 for (size_t x = 0; x < xsize; ++x) { 2012 diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder]; 2013 } 2014 } 2015 return ok; 2016 } 2017 2018 Status ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, 2019 const ButteraugliParams& params, ImageF& diffmap) { 2020 const size_t xsize = rgb0.xsize(); 2021 const size_t ysize = rgb0.ysize(); 2022 if (xsize < 1 || ysize < 1) { 2023 return JXL_FAILURE("Zero-sized image"); 2024 } 2025 if (!SameSize(rgb0, rgb1)) { 2026 return JXL_FAILURE("Size mismatch"); 2027 } 2028 static const int kMax = 8; 2029 if (xsize < kMax || ysize < kMax) { 2030 return ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); 2031 } 2032 JXL_ASSIGN_OR_RETURN(std::unique_ptr<ButteraugliComparator> butteraugli, 2033 ButteraugliComparator::Make(rgb0, params)); 2034 JXL_RETURN_IF_ERROR(butteraugli->Diffmap(rgb1, diffmap)); 2035 return true; 2036 } 2037 2038 bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, 2039 float hf_asymmetry, float xmul, ImageF& diffmap, 2040 double& diffvalue) { 2041 ButteraugliParams params; 2042 params.hf_asymmetry = hf_asymmetry; 2043 params.xmul = xmul; 2044 return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue); 2045 } 2046 2047 bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, 2048 const ButteraugliParams& params, ImageF& diffmap, 2049 double& diffvalue) { 2050 if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) { 2051 return false; 2052 } 2053 diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); 2054 return true; 2055 } 2056 2057 Status ButteraugliInterfaceInPlace(Image3F&& rgb0, Image3F&& rgb1, 2058 const ButteraugliParams& params, 2059 ImageF& diffmap, double& diffvalue) { 2060 const size_t xsize = rgb0.xsize(); 2061 const size_t ysize = rgb0.ysize(); 2062 if (xsize < 1 || ysize < 1) { 2063 return JXL_FAILURE("Zero-sized image"); 2064 } 2065 if (!SameSize(rgb0, rgb1)) { 2066 return JXL_FAILURE("Size mismatch"); 2067 } 2068 static const int kMax = 8; 2069 if (xsize < kMax || ysize < kMax) { 2070 bool ok = ButteraugliDiffmapSmall<kMax>(rgb0, rgb1, params, diffmap); 2071 diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); 2072 return ok; 2073 } 2074 ImageF subdiffmap; 2075 if (xsize >= 15 && ysize >= 15) { 2076 JXL_ASSIGN_OR_RETURN(Image3F rgb0_sub, SubSample2x(rgb0)); 2077 JXL_ASSIGN_OR_RETURN(Image3F rgb1_sub, SubSample2x(rgb1)); 2078 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( 2079 rgb0_sub, rgb1_sub, params, subdiffmap)); 2080 } 2081 JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ButteraugliDiffmapInPlace)( 2082 rgb0, rgb1, params, diffmap)); 2083 if (xsize >= 15 && ysize >= 15) { 2084 AddSupersampled2x(subdiffmap, 0.5, diffmap); 2085 } 2086 diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); 2087 return true; 2088 } 2089 2090 double ButteraugliFuzzyClass(double score) { 2091 static const double fuzzy_width_up = 4.8; 2092 static const double fuzzy_width_down = 4.8; 2093 static const double m0 = 2.0; 2094 static const double scaler = 0.7777; 2095 double val; 2096 if (score < 1.0) { 2097 // val in [scaler .. 2.0] 2098 val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down)); 2099 val -= 1.0; // from [1 .. 2] to [0 .. 1] 2100 val *= 2.0 - scaler; // from [0 .. 1] to [0 .. 2.0 - scaler] 2101 val += scaler; // from [0 .. 2.0 - scaler] to [scaler .. 2.0] 2102 } else { 2103 // val in [0 .. scaler] 2104 val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up)); 2105 val *= scaler; 2106 } 2107 return val; 2108 } 2109 2110 // #define PRINT_OUT_NORMALIZATION 2111 2112 double ButteraugliFuzzyInverse(double seek) { 2113 double pos = 0; 2114 // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) 2115 for (double range = 1.0; range >= 1e-10; range *= 0.5) { 2116 double cur = ButteraugliFuzzyClass(pos); 2117 if (cur < seek) { 2118 pos -= range; 2119 } else { 2120 pos += range; 2121 } 2122 } 2123 #ifdef PRINT_OUT_NORMALIZATION 2124 if (seek == 1.0) { 2125 fprintf(stderr, "Fuzzy inverse %g\n", pos); 2126 } 2127 #endif 2128 return pos; 2129 } 2130 2131 #ifdef PRINT_OUT_NORMALIZATION 2132 static double print_out_normalization = ButteraugliFuzzyInverse(1.0); 2133 #endif 2134 2135 namespace { 2136 2137 void ScoreToRgb(double score, double good_threshold, double bad_threshold, 2138 float rgb[3]) { 2139 double heatmap[12][3] = { 2140 {0, 0, 0}, {0, 0, 1}, 2141 {0, 1, 1}, {0, 1, 0}, // Good level 2142 {1, 1, 0}, {1, 0, 0}, // Bad level 2143 {1, 0, 1}, {0.5, 0.5, 1.0}, 2144 {1.0, 0.5, 0.5}, // Pastel colors for the very bad quality range. 2145 {1.0, 1.0, 0.5}, {1, 1, 1}, 2146 {1, 1, 1}, // Last color repeated to have a solid range of white. 2147 }; 2148 if (score < good_threshold) { 2149 score = (score / good_threshold) * 0.3; 2150 } else if (score < bad_threshold) { 2151 score = 0.3 + 2152 (score - good_threshold) / (bad_threshold - good_threshold) * 0.15; 2153 } else { 2154 score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5; 2155 } 2156 static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]); 2157 score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0), 2158 kTableSize - 2); 2159 int ix = static_cast<int>(score); 2160 ix = std::min(std::max(0, ix), kTableSize - 2); // Handle NaN 2161 double mix = score - ix; 2162 for (int i = 0; i < 3; ++i) { 2163 double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i]; 2164 rgb[i] = pow(v, 0.5); 2165 } 2166 } 2167 2168 } // namespace 2169 2170 StatusOr<Image3F> CreateHeatMapImage(const ImageF& distmap, 2171 double good_threshold, 2172 double bad_threshold) { 2173 JxlMemoryManager* memory_manager = distmap.memory_manager(); 2174 JXL_ASSIGN_OR_RETURN( 2175 Image3F heatmap, 2176 Image3F::Create(memory_manager, distmap.xsize(), distmap.ysize())); 2177 for (size_t y = 0; y < distmap.ysize(); ++y) { 2178 const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y); 2179 float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y); 2180 float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y); 2181 float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y); 2182 for (size_t x = 0; x < distmap.xsize(); ++x) { 2183 const float d = row_distmap[x]; 2184 float rgb[3]; 2185 ScoreToRgb(d, good_threshold, bad_threshold, rgb); 2186 row_h0[x] = rgb[0]; 2187 row_h1[x] = rgb[1]; 2188 row_h2[x] = rgb[2]; 2189 } 2190 } 2191 return heatmap; 2192 } 2193 2194 } // namespace jxl 2195 #endif // HWY_ONCE