dec_transforms-inl.h (26586B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE) 7 #ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_ 8 #undef LIB_JXL_DEC_TRANSFORMS_INL_H_ 9 #else 10 #define LIB_JXL_DEC_TRANSFORMS_INL_H_ 11 #endif 12 13 #include <cstddef> 14 #include <hwy/highway.h> 15 16 #include "lib/jxl/ac_strategy.h" 17 #include "lib/jxl/dct-inl.h" 18 #include "lib/jxl/dct_scales.h" 19 HWY_BEFORE_NAMESPACE(); 20 namespace jxl { 21 namespace HWY_NAMESPACE { 22 namespace { 23 24 // These templates are not found via ADL. 25 using hwy::HWY_NAMESPACE::MulAdd; 26 27 // Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which 28 // is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the 29 // input block. 30 template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS, 31 size_t ROWS, size_t COLS> 32 JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride, 33 float* output, const size_t output_stride, 34 float* JXL_RESTRICT block, 35 float* JXL_RESTRICT scratch_space) { 36 static_assert(LF_ROWS == ROWS, 37 "ReinterpretingDCT should only be called with LF == N"); 38 static_assert(LF_COLS == COLS, 39 "ReinterpretingDCT should only be called with LF == N"); 40 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block, 41 scratch_space); 42 if (ROWS < COLS) { 43 for (size_t y = 0; y < LF_ROWS; y++) { 44 for (size_t x = 0; x < LF_COLS; x++) { 45 output[y * output_stride + x] = 46 block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) * 47 DCTTotalResampleScale<COLS, DCT_COLS>(x); 48 } 49 } 50 } else { 51 for (size_t y = 0; y < LF_COLS; y++) { 52 for (size_t x = 0; x < LF_ROWS; x++) { 53 output[y * output_stride + x] = 54 block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) * 55 DCTTotalResampleScale<ROWS, DCT_ROWS>(x); 56 } 57 } 58 } 59 } 60 61 template <size_t S> 62 void IDCT2TopBlock(const float* block, size_t stride_out, float* out) { 63 static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); 64 static_assert(S % 2 == 0, "S should be even"); 65 float temp[kDCTBlockSize]; 66 constexpr size_t num_2x2 = S / 2; 67 for (size_t y = 0; y < num_2x2; y++) { 68 for (size_t x = 0; x < num_2x2; x++) { 69 float c00 = block[y * kBlockDim + x]; 70 float c01 = block[y * kBlockDim + num_2x2 + x]; 71 float c10 = block[(y + num_2x2) * kBlockDim + x]; 72 float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x]; 73 float r00 = c00 + c01 + c10 + c11; 74 float r01 = c00 + c01 - c10 - c11; 75 float r10 = c00 - c01 + c10 - c11; 76 float r11 = c00 - c01 - c10 + c11; 77 temp[y * 2 * kBlockDim + x * 2] = r00; 78 temp[y * 2 * kBlockDim + x * 2 + 1] = r01; 79 temp[(y * 2 + 1) * kBlockDim + x * 2] = r10; 80 temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11; 81 } 82 } 83 for (size_t y = 0; y < S; y++) { 84 for (size_t x = 0; x < S; x++) { 85 out[y * stride_out + x] = temp[y * kBlockDim + x]; 86 } 87 } 88 } 89 90 void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) { 91 HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = { 92 { 93 0.25, 94 0.25, 95 0.25, 96 0.25, 97 0.25, 98 0.25, 99 0.25, 100 0.25, 101 0.25, 102 0.25, 103 0.25, 104 0.25, 105 0.25, 106 0.25, 107 0.25, 108 0.25, 109 }, 110 { 111 0.876902929799142f, 112 0.2206518106944235f, 113 -0.10140050393753763f, 114 -0.1014005039375375f, 115 0.2206518106944236f, 116 -0.10140050393753777f, 117 -0.10140050393753772f, 118 -0.10140050393753763f, 119 -0.10140050393753758f, 120 -0.10140050393753769f, 121 -0.1014005039375375f, 122 -0.10140050393753768f, 123 -0.10140050393753768f, 124 -0.10140050393753759f, 125 -0.10140050393753763f, 126 -0.10140050393753741f, 127 }, 128 { 129 0.0, 130 0.0, 131 0.40670075830260755f, 132 0.44444816619734445f, 133 0.0, 134 0.0, 135 0.19574399372042936f, 136 0.2929100136981264f, 137 -0.40670075830260716f, 138 -0.19574399372042872f, 139 0.0, 140 0.11379074460448091f, 141 -0.44444816619734384f, 142 -0.29291001369812636f, 143 -0.1137907446044814f, 144 0.0, 145 }, 146 { 147 0.0, 148 0.0, 149 -0.21255748058288748f, 150 0.3085497062849767f, 151 0.0, 152 0.4706702258572536f, 153 -0.1621205195722993f, 154 0.0, 155 -0.21255748058287047f, 156 -0.16212051957228327f, 157 -0.47067022585725277f, 158 -0.1464291867126764f, 159 0.3085497062849487f, 160 0.0, 161 -0.14642918671266536f, 162 0.4251149611657548f, 163 }, 164 { 165 0.0, 166 -0.7071067811865474f, 167 0.0, 168 0.0, 169 0.7071067811865476f, 170 0.0, 171 0.0, 172 0.0, 173 0.0, 174 0.0, 175 0.0, 176 0.0, 177 0.0, 178 0.0, 179 0.0, 180 0.0, 181 }, 182 { 183 -0.4105377591765233f, 184 0.6235485373547691f, 185 -0.06435071657946274f, 186 -0.06435071657946266f, 187 0.6235485373547694f, 188 -0.06435071657946284f, 189 -0.0643507165794628f, 190 -0.06435071657946274f, 191 -0.06435071657946272f, 192 -0.06435071657946279f, 193 -0.06435071657946266f, 194 -0.06435071657946277f, 195 -0.06435071657946277f, 196 -0.06435071657946273f, 197 -0.06435071657946274f, 198 -0.0643507165794626f, 199 }, 200 { 201 0.0, 202 0.0, 203 -0.4517556589999482f, 204 0.15854503551840063f, 205 0.0, 206 -0.04038515160822202f, 207 0.0074182263792423875f, 208 0.39351034269210167f, 209 -0.45175565899994635f, 210 0.007418226379244351f, 211 0.1107416575309343f, 212 0.08298163094882051f, 213 0.15854503551839705f, 214 0.3935103426921022f, 215 0.0829816309488214f, 216 -0.45175565899994796f, 217 }, 218 { 219 0.0, 220 0.0, 221 -0.304684750724869f, 222 0.5112616136591823f, 223 0.0, 224 0.0, 225 -0.290480129728998f, 226 -0.06578701549142804f, 227 0.304684750724884f, 228 0.2904801297290076f, 229 0.0, 230 -0.23889773523344604f, 231 -0.5112616136592012f, 232 0.06578701549142545f, 233 0.23889773523345467f, 234 0.0, 235 }, 236 { 237 0.0, 238 0.0, 239 0.3017929516615495f, 240 0.25792362796341184f, 241 0.0, 242 0.16272340142866204f, 243 0.09520022653475037f, 244 0.0, 245 0.3017929516615503f, 246 0.09520022653475055f, 247 -0.16272340142866173f, 248 -0.35312385449816297f, 249 0.25792362796341295f, 250 0.0, 251 -0.3531238544981624f, 252 -0.6035859033230976f, 253 }, 254 { 255 0.0, 256 0.0, 257 0.40824829046386274f, 258 0.0, 259 0.0, 260 0.0, 261 0.0, 262 -0.4082482904638628f, 263 -0.4082482904638635f, 264 0.0, 265 0.0, 266 -0.40824829046386296f, 267 0.0, 268 0.4082482904638634f, 269 0.408248290463863f, 270 0.0, 271 }, 272 { 273 0.0, 274 0.0, 275 0.1747866975480809f, 276 0.0812611176717539f, 277 0.0, 278 0.0, 279 -0.3675398009862027f, 280 -0.307882213957909f, 281 -0.17478669754808135f, 282 0.3675398009862011f, 283 0.0, 284 0.4826689115059883f, 285 -0.08126111767175039f, 286 0.30788221395790305f, 287 -0.48266891150598584f, 288 0.0, 289 }, 290 { 291 0.0, 292 0.0, 293 -0.21105601049335784f, 294 0.18567180916109802f, 295 0.0, 296 0.0, 297 0.49215859013738733f, 298 -0.38525013709251915f, 299 0.21105601049335806f, 300 -0.49215859013738905f, 301 0.0, 302 0.17419412659916217f, 303 -0.18567180916109904f, 304 0.3852501370925211f, 305 -0.1741941265991621f, 306 0.0, 307 }, 308 { 309 0.0, 310 0.0, 311 -0.14266084808807264f, 312 -0.3416446842253372f, 313 0.0, 314 0.7367497537172237f, 315 0.24627107722075148f, 316 -0.08574019035519306f, 317 -0.14266084808807344f, 318 0.24627107722075137f, 319 0.14883399227113567f, 320 -0.04768680350229251f, 321 -0.3416446842253373f, 322 -0.08574019035519267f, 323 -0.047686803502292804f, 324 -0.14266084808807242f, 325 }, 326 { 327 0.0, 328 0.0, 329 -0.13813540350758585f, 330 0.3302282550303788f, 331 0.0, 332 0.08755115000587084f, 333 -0.07946706605909573f, 334 -0.4613374887461511f, 335 -0.13813540350758294f, 336 -0.07946706605910261f, 337 0.49724647109535086f, 338 0.12538059448563663f, 339 0.3302282550303805f, 340 -0.4613374887461554f, 341 0.12538059448564315f, 342 -0.13813540350758452f, 343 }, 344 { 345 0.0, 346 0.0, 347 -0.17437602599651067f, 348 0.0702790691196284f, 349 0.0, 350 -0.2921026642334881f, 351 0.3623817333531167f, 352 0.0, 353 -0.1743760259965108f, 354 0.36238173335311646f, 355 0.29210266423348785f, 356 -0.4326608024727445f, 357 0.07027906911962818f, 358 0.0, 359 -0.4326608024727457f, 360 0.34875205199302267f, 361 }, 362 { 363 0.0, 364 0.0, 365 0.11354987314994337f, 366 -0.07417504595810355f, 367 0.0, 368 0.19402893032594343f, 369 -0.435190496523228f, 370 0.21918684838857466f, 371 0.11354987314994257f, 372 -0.4351904965232251f, 373 0.5550443808910661f, 374 -0.25468277124066463f, 375 -0.07417504595810233f, 376 0.2191868483885728f, 377 -0.25468277124066413f, 378 0.1135498731499429f, 379 }, 380 }; 381 382 const HWY_CAPPED(float, 16) d; 383 for (size_t i = 0; i < 16; i += Lanes(d)) { 384 auto pixel = Zero(d); 385 for (size_t j = 0; j < 16; j++) { 386 auto cf = Set(d, coeffs[j]); 387 auto basis = Load(d, k4x4AFVBasis[j] + i); 388 pixel = MulAdd(cf, basis, pixel); 389 } 390 Store(pixel, d, pixels + i); 391 } 392 } 393 394 template <size_t afv_kind> 395 void AFVTransformToPixels(const float* JXL_RESTRICT coefficients, 396 float* JXL_RESTRICT pixels, size_t pixels_stride) { 397 HWY_ALIGN float scratch_space[4 * 8 * 4]; 398 size_t afv_x = afv_kind & 1; 399 size_t afv_y = afv_kind / 2; 400 float dcs[3] = {}; 401 float block00 = coefficients[0]; 402 float block01 = coefficients[1]; 403 float block10 = coefficients[8]; 404 dcs[0] = (block00 + block10 + block01) * 4.0f; 405 dcs[1] = (block00 + block10 - block01); 406 dcs[2] = block00 - block10; 407 // IAFV: (even, even) positions. 408 HWY_ALIGN float coeff[4 * 4]; 409 coeff[0] = dcs[0]; 410 for (size_t iy = 0; iy < 4; iy++) { 411 for (size_t ix = 0; ix < 4; ix++) { 412 if (ix == 0 && iy == 0) continue; 413 coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; 414 } 415 } 416 HWY_ALIGN float block[4 * 8]; 417 AFVIDCT4x4(coeff, block); 418 for (size_t iy = 0; iy < 4; iy++) { 419 for (size_t ix = 0; ix < 4; ix++) { 420 pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = 421 block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; 422 } 423 } 424 // IDCT4x4 in (odd, even) positions. 425 block[0] = dcs[1]; 426 for (size_t iy = 0; iy < 4; iy++) { 427 for (size_t ix = 0; ix < 4; ix++) { 428 if (ix == 0 && iy == 0) continue; 429 block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; 430 } 431 } 432 ComputeScaledIDCT<4, 4>()( 433 block, 434 DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), 435 pixels_stride), 436 scratch_space); 437 // IDCT4x8. 438 block[0] = dcs[2]; 439 for (size_t iy = 0; iy < 4; iy++) { 440 for (size_t ix = 0; ix < 8; ix++) { 441 if (ix == 0 && iy == 0) continue; 442 block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; 443 } 444 } 445 ComputeScaledIDCT<4, 8>()( 446 block, 447 DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), 448 scratch_space); 449 } 450 451 HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy, 452 float* JXL_RESTRICT coefficients, 453 float* JXL_RESTRICT pixels, 454 size_t pixels_stride, 455 float* scratch_space) { 456 using Type = AcStrategyType; 457 switch (strategy) { 458 case Type::IDENTITY: { 459 float dcs[4] = {}; 460 float block00 = coefficients[0]; 461 float block01 = coefficients[1]; 462 float block10 = coefficients[8]; 463 float block11 = coefficients[9]; 464 dcs[0] = block00 + block01 + block10 + block11; 465 dcs[1] = block00 + block01 - block10 - block11; 466 dcs[2] = block00 - block01 + block10 - block11; 467 dcs[3] = block00 - block01 - block10 + block11; 468 for (size_t y = 0; y < 2; y++) { 469 for (size_t x = 0; x < 2; x++) { 470 float block_dc = dcs[y * 2 + x]; 471 float residual_sum = 0; 472 for (size_t iy = 0; iy < 4; iy++) { 473 for (size_t ix = 0; ix < 4; ix++) { 474 if (ix == 0 && iy == 0) continue; 475 residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2]; 476 } 477 } 478 pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] = 479 block_dc - residual_sum * (1.0f / 16); 480 for (size_t iy = 0; iy < 4; iy++) { 481 for (size_t ix = 0; ix < 4; ix++) { 482 if (ix == 1 && iy == 1) continue; 483 pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] = 484 coefficients[(y + iy * 2) * 8 + x + ix * 2] + 485 pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; 486 } 487 } 488 pixels[y * 4 * pixels_stride + x * 4] = 489 coefficients[(y + 2) * 8 + x + 2] + 490 pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; 491 } 492 } 493 break; 494 } 495 case Type::DCT8X4: { 496 float dcs[2] = {}; 497 float block0 = coefficients[0]; 498 float block1 = coefficients[8]; 499 dcs[0] = block0 + block1; 500 dcs[1] = block0 - block1; 501 for (size_t x = 0; x < 2; x++) { 502 HWY_ALIGN float block[4 * 8]; 503 block[0] = dcs[x]; 504 for (size_t iy = 0; iy < 4; iy++) { 505 for (size_t ix = 0; ix < 8; ix++) { 506 if (ix == 0 && iy == 0) continue; 507 block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix]; 508 } 509 } 510 ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride), 511 scratch_space); 512 } 513 break; 514 } 515 case Type::DCT4X8: { 516 float dcs[2] = {}; 517 float block0 = coefficients[0]; 518 float block1 = coefficients[8]; 519 dcs[0] = block0 + block1; 520 dcs[1] = block0 - block1; 521 for (size_t y = 0; y < 2; y++) { 522 HWY_ALIGN float block[4 * 8]; 523 block[0] = dcs[y]; 524 for (size_t iy = 0; iy < 4; iy++) { 525 for (size_t ix = 0; ix < 8; ix++) { 526 if (ix == 0 && iy == 0) continue; 527 block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix]; 528 } 529 } 530 ComputeScaledIDCT<4, 8>()( 531 block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride), 532 scratch_space); 533 } 534 break; 535 } 536 case Type::DCT4X4: { 537 float dcs[4] = {}; 538 float block00 = coefficients[0]; 539 float block01 = coefficients[1]; 540 float block10 = coefficients[8]; 541 float block11 = coefficients[9]; 542 dcs[0] = block00 + block01 + block10 + block11; 543 dcs[1] = block00 + block01 - block10 - block11; 544 dcs[2] = block00 - block01 + block10 - block11; 545 dcs[3] = block00 - block01 - block10 + block11; 546 for (size_t y = 0; y < 2; y++) { 547 for (size_t x = 0; x < 2; x++) { 548 HWY_ALIGN float block[4 * 4]; 549 block[0] = dcs[y * 2 + x]; 550 for (size_t iy = 0; iy < 4; iy++) { 551 for (size_t ix = 0; ix < 4; ix++) { 552 if (ix == 0 && iy == 0) continue; 553 block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2]; 554 } 555 } 556 ComputeScaledIDCT<4, 4>()( 557 block, 558 DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), 559 scratch_space); 560 } 561 } 562 break; 563 } 564 case Type::DCT2X2: { 565 HWY_ALIGN float coeffs[kDCTBlockSize]; 566 memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize); 567 IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs); 568 IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs); 569 IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs); 570 for (size_t y = 0; y < kBlockDim; y++) { 571 for (size_t x = 0; x < kBlockDim; x++) { 572 pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x]; 573 } 574 } 575 break; 576 } 577 case Type::DCT16X16: { 578 ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride), 579 scratch_space); 580 break; 581 } 582 case Type::DCT16X8: { 583 ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride), 584 scratch_space); 585 break; 586 } 587 case Type::DCT8X16: { 588 ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride), 589 scratch_space); 590 break; 591 } 592 case Type::DCT32X8: { 593 ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride), 594 scratch_space); 595 break; 596 } 597 case Type::DCT8X32: { 598 ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride), 599 scratch_space); 600 break; 601 } 602 case Type::DCT32X16: { 603 ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride), 604 scratch_space); 605 break; 606 } 607 case Type::DCT16X32: { 608 ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride), 609 scratch_space); 610 break; 611 } 612 case Type::DCT32X32: { 613 ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride), 614 scratch_space); 615 break; 616 } 617 case Type::DCT: { 618 ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride), 619 scratch_space); 620 break; 621 } 622 case Type::AFV0: { 623 AFVTransformToPixels<0>(coefficients, pixels, pixels_stride); 624 break; 625 } 626 case Type::AFV1: { 627 AFVTransformToPixels<1>(coefficients, pixels, pixels_stride); 628 break; 629 } 630 case Type::AFV2: { 631 AFVTransformToPixels<2>(coefficients, pixels, pixels_stride); 632 break; 633 } 634 case Type::AFV3: { 635 AFVTransformToPixels<3>(coefficients, pixels, pixels_stride); 636 break; 637 } 638 case Type::DCT64X32: { 639 ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride), 640 scratch_space); 641 break; 642 } 643 case Type::DCT32X64: { 644 ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride), 645 scratch_space); 646 break; 647 } 648 case Type::DCT64X64: { 649 ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride), 650 scratch_space); 651 break; 652 } 653 case Type::DCT128X64: { 654 ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride), 655 scratch_space); 656 break; 657 } 658 case Type::DCT64X128: { 659 ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride), 660 scratch_space); 661 break; 662 } 663 case Type::DCT128X128: { 664 ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride), 665 scratch_space); 666 break; 667 } 668 case Type::DCT256X128: { 669 ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride), 670 scratch_space); 671 break; 672 } 673 case Type::DCT128X256: { 674 ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride), 675 scratch_space); 676 break; 677 } 678 case Type::DCT256X256: { 679 ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride), 680 scratch_space); 681 break; 682 } 683 } 684 } 685 686 HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy, 687 const float* dc, size_t dc_stride, 688 float* llf, 689 float* JXL_RESTRICT scratch) { 690 using Type = AcStrategyType; 691 HWY_ALIGN float warm_block[4 * 4]; 692 HWY_ALIGN float warm_scratch_space[4 * 4 * 4]; 693 switch (strategy) { 694 case Type::DCT16X8: { 695 ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim, 696 /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>( 697 dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); 698 break; 699 } 700 case Type::DCT8X16: { 701 ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 702 /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>( 703 dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); 704 break; 705 } 706 case Type::DCT16X16: { 707 ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 708 /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>( 709 dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); 710 break; 711 } 712 case Type::DCT32X8: { 713 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim, 714 /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>( 715 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 716 break; 717 } 718 case Type::DCT8X32: { 719 ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 720 /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>( 721 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 722 break; 723 } 724 case Type::DCT32X16: { 725 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim, 726 /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>( 727 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 728 break; 729 } 730 case Type::DCT16X32: { 731 ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 732 /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>( 733 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 734 break; 735 } 736 case Type::DCT32X32: { 737 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 738 /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>( 739 dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); 740 break; 741 } 742 case Type::DCT64X32: { 743 ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim, 744 /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>( 745 dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4); 746 break; 747 } 748 case Type::DCT32X64: { 749 ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 750 /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>( 751 dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8); 752 break; 753 } 754 case Type::DCT64X64: { 755 ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 756 /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>( 757 dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8); 758 break; 759 } 760 case Type::DCT128X64: { 761 ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, 762 /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( 763 dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8); 764 break; 765 } 766 case Type::DCT64X128: { 767 ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 768 /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( 769 dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16); 770 break; 771 } 772 case Type::DCT128X128: { 773 ReinterpretingDCT< 774 /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 775 /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( 776 dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16); 777 break; 778 } 779 case Type::DCT256X128: { 780 ReinterpretingDCT< 781 /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, 782 /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( 783 dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16); 784 break; 785 } 786 case Type::DCT128X256: { 787 ReinterpretingDCT< 788 /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, 789 /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( 790 dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32); 791 break; 792 } 793 case Type::DCT256X256: { 794 ReinterpretingDCT< 795 /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, 796 /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( 797 dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32); 798 break; 799 } 800 case Type::DCT: 801 case Type::DCT2X2: 802 case Type::DCT4X4: 803 case Type::DCT4X8: 804 case Type::DCT8X4: 805 case Type::AFV0: 806 case Type::AFV1: 807 case Type::AFV2: 808 case Type::AFV3: 809 case Type::IDENTITY: 810 llf[0] = dc[0]; 811 break; 812 }; 813 } 814 815 } // namespace 816 // NOLINTNEXTLINE(google-readability-namespace-comments) 817 } // namespace HWY_NAMESPACE 818 } // namespace jxl 819 HWY_AFTER_NAMESPACE(); 820 821 #endif // LIB_JXL_DEC_TRANSFORMS_INL_H_