dct_test.cc (11610B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include <cmath> 7 #include <cstring> 8 #include <numeric> 9 10 #undef HWY_TARGET_INCLUDE 11 #define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc" 12 #include <hwy/foreach_target.h> 13 #include <hwy/highway.h> 14 #include <hwy/tests/hwy_gtest.h> 15 16 #include "lib/jxl/dct-inl.h" 17 #include "lib/jxl/dct_for_test.h" 18 #include "lib/jxl/test_utils.h" 19 #include "lib/jxl/testing.h" 20 21 HWY_BEFORE_NAMESPACE(); 22 namespace jxl { 23 namespace HWY_NAMESPACE { 24 25 // Computes the in-place NxN DCT of block. 26 // Requires that block is HWY_ALIGN'ed. 27 // 28 // Performs ComputeTransposedScaledDCT and then transposes and scales it to 29 // obtain "vanilla" DCT. 30 template <size_t N> 31 void ComputeDCT(float block[N * N]) { 32 HWY_ALIGN float tmp_block[N * N]; 33 HWY_ALIGN float scratch_space[4 * N * N]; 34 ComputeScaledDCT<N, N>()(DCTFrom(block, N), tmp_block, scratch_space); 35 36 // Untranspose. 37 Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N)); 38 } 39 40 // Computes the in-place 8x8 iDCT of block. 41 // Requires that block is HWY_ALIGN'ed. 42 template <int N> 43 void ComputeIDCT(float block[N * N]) { 44 HWY_ALIGN float tmp_block[N * N]; 45 HWY_ALIGN float scratch_space[4 * N * N]; 46 // Untranspose. 47 Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N)); 48 49 ComputeScaledIDCT<N, N>()(tmp_block, DCTTo(block, N), scratch_space); 50 } 51 52 template <size_t N> 53 void TransposeTestT(float accuracy) { 54 constexpr size_t kBlockSize = N * N; 55 HWY_ALIGN float src[kBlockSize]; 56 DCTTo to_src(src, N); 57 for (size_t y = 0; y < N; ++y) { 58 for (size_t x = 0; x < N; ++x) { 59 to_src.Write(y * N + x, y, x); 60 } 61 } 62 HWY_ALIGN float dst[kBlockSize]; 63 Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N)); 64 DCTFrom from_dst(dst, N); 65 for (size_t y = 0; y < N; ++y) { 66 for (size_t x = 0; x < N; ++x) { 67 float expected = x * N + y; 68 float actual = from_dst.Read(y, x); 69 EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y; 70 } 71 } 72 } 73 74 void TransposeTest() { 75 TransposeTestT<8>(1e-7f); 76 TransposeTestT<16>(1e-7f); 77 TransposeTestT<32>(1e-7f); 78 } 79 80 template <size_t N> 81 void ColumnDctRoundtripT(float accuracy) { 82 constexpr size_t kBlockSize = N * N; 83 // Though we are only interested in single column result, dct.h has built-in 84 // limit on minimal number of columns processed. So, to be safe, we do 85 // regular 8x8 block transformation. On the bright side - we could check all 86 // 8 basis vectors at once. 87 HWY_ALIGN float block[kBlockSize]; 88 HWY_ALIGN float scratch[3 * kBlockSize]; 89 DCTTo to(block, N); 90 DCTFrom from(block, N); 91 for (size_t i = 0; i < N; ++i) { 92 for (size_t j = 0; j < N; ++j) { 93 to.Write((i == j) ? 1.0f : 0.0f, i, j); 94 } 95 } 96 97 // Running (I)DCT on the same memory block seems to trigger a compiler bug on 98 // ARMv7 with clang6. 99 HWY_ALIGN float tmp[kBlockSize]; 100 DCTTo to_tmp(tmp, N); 101 DCTFrom from_tmp(tmp, N); 102 103 DCT1D<N, N>()(from, to_tmp, scratch); 104 IDCT1D<N, N>()(from_tmp, to, scratch); 105 106 for (size_t i = 0; i < N; ++i) { 107 for (size_t j = 0; j < N; ++j) { 108 float expected = (i == j) ? 1.0f : 0.0f; 109 float actual = from.Read(i, j); 110 EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j; 111 } 112 } 113 } 114 115 void ColumnDctRoundtrip() { 116 ColumnDctRoundtripT<8>(1e-6f); 117 ColumnDctRoundtripT<16>(1e-6f); 118 ColumnDctRoundtripT<32>(1e-6f); 119 } 120 121 template <size_t N> 122 void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) { 123 constexpr size_t kBlockSize = N * N; 124 for (size_t i = start; i < end; i++) { 125 HWY_ALIGN float fast[kBlockSize] = {0.0f}; 126 double slow[kBlockSize] = {0.0}; 127 fast[i] = 1.0; 128 slow[i] = 1.0; 129 DCTSlow<N>(slow); 130 ComputeDCT<N>(fast); 131 for (size_t k = 0; k < kBlockSize; ++k) { 132 EXPECT_NEAR(fast[k], slow[k], accuracy / N) 133 << "i = " << i << ", k = " << k << ", N = " << N; 134 } 135 } 136 } 137 138 template <size_t N> 139 void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) { 140 constexpr size_t kBlockSize = N * N; 141 for (size_t i = start; i < end; i++) { 142 HWY_ALIGN float fast[kBlockSize] = {0.0f}; 143 double slow[kBlockSize] = {0.0}; 144 fast[i] = 1.0; 145 slow[i] = 1.0; 146 IDCTSlow<N>(slow); 147 ComputeIDCT<N>(fast); 148 for (size_t k = 0; k < kBlockSize; ++k) { 149 EXPECT_NEAR(fast[k], slow[k], accuracy * N) 150 << "i = " << i << ", k = " << k << ", N = " << N; 151 } 152 } 153 } 154 155 template <size_t N> 156 void TestInverseT(float accuracy) { 157 test::ThreadPoolForTests pool(N < 32 ? 0 : 8); 158 enum { kBlockSize = N * N }; 159 const auto process_block = [accuracy](const uint32_t task, 160 size_t /*thread*/) -> Status { 161 const size_t i = static_cast<size_t>(task); 162 HWY_ALIGN float x[kBlockSize] = {0.0f}; 163 x[i] = 1.0; 164 165 ComputeIDCT<N>(x); 166 ComputeDCT<N>(x); 167 168 for (size_t k = 0; k < kBlockSize; ++k) { 169 EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy) 170 << "i = " << i << ", k = " << k; 171 } 172 return true; 173 }; 174 EXPECT_TRUE(RunOnPool(pool.get(), 0, kBlockSize, ThreadPool::NoInit, 175 process_block, "TestInverse")); 176 } 177 178 void InverseTest() { 179 TestInverseT<8>(1e-6f); 180 TestInverseT<16>(1e-6f); 181 TestInverseT<32>(3e-6f); 182 } 183 184 template <size_t N> 185 void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) { 186 constexpr size_t kBlockSize = N * N; 187 for (size_t i = start; i < end; i++) { 188 for (size_t j = 0; j < kBlockSize; ++j) { 189 // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>. 190 // That means (Me_j)_i = (M^\dagger{}e_i)_j 191 192 // x := Me_j 193 HWY_ALIGN float x[kBlockSize] = {0.0f}; 194 x[j] = 1.0; 195 ComputeIDCT<N>(x); 196 // y := M^\dagger{}e_i 197 HWY_ALIGN float y[kBlockSize] = {0.0f}; 198 y[i] = 1.0; 199 ComputeDCT<N>(y); 200 201 EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j; 202 } 203 } 204 } 205 206 template <size_t N> 207 void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) { 208 constexpr size_t kBlockSize = N * N; 209 for (size_t i = start; i < end; i++) { 210 double x[kBlockSize] = {0.0f}; 211 x[i] = 1.0; 212 213 DCTSlow<N>(x); 214 IDCTSlow<N>(x); 215 216 for (size_t k = 0; k < kBlockSize; ++k) { 217 EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy) 218 << "i = " << i << ", k = " << k; 219 } 220 } 221 } 222 223 template <size_t ROWS, size_t COLS> 224 void TestRectInverseT(float accuracy) { 225 constexpr size_t kBlockSize = ROWS * COLS; 226 for (size_t i = 0; i < kBlockSize; ++i) { 227 HWY_ALIGN float x[kBlockSize] = {0.0f}; 228 HWY_ALIGN float out[kBlockSize] = {0.0f}; 229 x[i] = 1.0; 230 HWY_ALIGN float coeffs[kBlockSize] = {0.0f}; 231 HWY_ALIGN float scratch_space[kBlockSize * 5]; 232 233 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space); 234 ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space); 235 236 for (size_t k = 0; k < kBlockSize; ++k) { 237 EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy) 238 << "i = " << i << ", k = " << k << " ROWS = " << ROWS 239 << " COLS = " << COLS; 240 } 241 } 242 } 243 244 void TestRectInverse() { 245 TestRectInverseT<16, 32>(1e-6f); 246 TestRectInverseT<8, 32>(1e-6f); 247 TestRectInverseT<8, 16>(1e-6f); 248 TestRectInverseT<4, 8>(1e-6f); 249 TestRectInverseT<2, 4>(1e-6f); 250 TestRectInverseT<1, 4>(1e-6f); 251 TestRectInverseT<1, 2>(1e-6f); 252 253 TestRectInverseT<32, 16>(1e-6f); 254 TestRectInverseT<32, 8>(1e-6f); 255 TestRectInverseT<16, 8>(1e-6f); 256 TestRectInverseT<8, 4>(1e-6f); 257 TestRectInverseT<4, 2>(1e-6f); 258 TestRectInverseT<4, 1>(1e-6f); 259 TestRectInverseT<2, 1>(1e-6f); 260 } 261 262 template <size_t ROWS, size_t COLS> 263 void TestRectTransposeT(float accuracy) { 264 constexpr size_t kBlockSize = ROWS * COLS; 265 HWY_ALIGN float scratch_space[kBlockSize * 5]; 266 for (size_t px = 0; px < COLS; ++px) { 267 for (size_t py = 0; py < ROWS; ++py) { 268 HWY_ALIGN float x1[kBlockSize] = {0.0f}; 269 HWY_ALIGN float x2[kBlockSize] = {0.0f}; 270 HWY_ALIGN float coeffs1[kBlockSize] = {0.0f}; 271 HWY_ALIGN float coeffs2[kBlockSize] = {0.0f}; 272 x1[py * COLS + px] = 1; 273 x2[px * ROWS + py] = 1; 274 275 constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS; 276 constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS; 277 278 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space); 279 ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space); 280 281 for (size_t x = 0; x < OUT_COLS; ++x) { 282 for (size_t y = 0; y < OUT_ROWS; ++y) { 283 EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x], 284 accuracy) 285 << " px = " << px << ", py = " << py << ", x = " << x 286 << ", y = " << y; 287 } 288 } 289 } 290 } 291 } 292 293 void TestRectTranspose() { 294 TestRectTransposeT<16, 32>(1e-6f); 295 TestRectTransposeT<8, 32>(1e-6f); 296 TestRectTransposeT<8, 16>(1e-6f); 297 TestRectTransposeT<4, 8>(1e-6f); 298 TestRectTransposeT<2, 4>(1e-6f); 299 TestRectTransposeT<1, 4>(1e-6f); 300 TestRectTransposeT<1, 2>(1e-6f); 301 302 // Identical to 8, 16 303 // TestRectTranspose<16, 8>(1e-6f); 304 } 305 306 void TestDctAccuracyShard(size_t shard) { 307 if (shard == 0) { 308 TestDctAccuracy<1>(1.1E-7f); 309 TestDctAccuracy<2>(1.1E-7f); 310 TestDctAccuracy<4>(1.1E-7f); 311 TestDctAccuracy<8>(1.1E-7f); 312 TestDctAccuracy<16>(1.3E-7f); 313 } 314 TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1)); 315 } 316 317 void TestIdctAccuracyShard(size_t shard) { 318 if (shard == 0) { 319 TestIdctAccuracy<1>(1E-7f); 320 TestIdctAccuracy<2>(1E-7f); 321 TestIdctAccuracy<4>(1E-7f); 322 TestIdctAccuracy<8>(1E-7f); 323 TestIdctAccuracy<16>(1E-7f); 324 } 325 TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1)); 326 } 327 328 void TestDctTransposeShard(size_t shard) { 329 if (shard == 0) { 330 TestDctTranspose<8>(1E-6f); 331 TestDctTranspose<16>(1E-6f); 332 } 333 TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1)); 334 } 335 336 void TestSlowInverseShard(size_t shard) { 337 if (shard == 0) { 338 TestSlowInverse<1>(1E-5f); 339 TestSlowInverse<2>(1E-5f); 340 TestSlowInverse<4>(1E-5f); 341 TestSlowInverse<8>(1E-5f); 342 TestSlowInverse<16>(1E-5f); 343 } 344 TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1)); 345 } 346 347 // NOLINTNEXTLINE(google-readability-namespace-comments) 348 } // namespace HWY_NAMESPACE 349 } // namespace jxl 350 HWY_AFTER_NAMESPACE(); 351 352 #if HWY_ONCE 353 namespace jxl { 354 355 class TransposeTest : public hwy::TestWithParamTarget {}; 356 357 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest); 358 359 HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest); 360 HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest); 361 HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip); 362 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse); 363 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose); 364 365 // Tests in the DctShardedTest class are sharded for N=32. 366 class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {}; 367 368 template <size_t n> 369 std::vector<uint32_t> ShardRange() { 370 #ifdef JXL_DISABLE_SLOW_TESTS 371 static_assert(n > 6); 372 std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1}; 373 #else 374 std::vector<uint32_t> ret(n); 375 std::iota(ret.begin(), ret.end(), 0); 376 #endif // JXL_DISABLE_SLOW_TESTS 377 return ret; 378 } 379 380 HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest, 381 ::testing::ValuesIn(ShardRange<32>())); 382 383 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard); 384 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard); 385 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard); 386 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard); 387 388 } // namespace jxl 389 #endif // HWY_ONCE