tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

dct_test.cc (11610B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include <cmath>
      7 #include <cstring>
      8 #include <numeric>
      9 
     10 #undef HWY_TARGET_INCLUDE
     11 #define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc"
     12 #include <hwy/foreach_target.h>
     13 #include <hwy/highway.h>
     14 #include <hwy/tests/hwy_gtest.h>
     15 
     16 #include "lib/jxl/dct-inl.h"
     17 #include "lib/jxl/dct_for_test.h"
     18 #include "lib/jxl/test_utils.h"
     19 #include "lib/jxl/testing.h"
     20 
     21 HWY_BEFORE_NAMESPACE();
     22 namespace jxl {
     23 namespace HWY_NAMESPACE {
     24 
     25 // Computes the in-place NxN DCT of block.
     26 // Requires that block is HWY_ALIGN'ed.
     27 //
     28 // Performs ComputeTransposedScaledDCT and then transposes and scales it to
     29 // obtain "vanilla" DCT.
     30 template <size_t N>
     31 void ComputeDCT(float block[N * N]) {
     32  HWY_ALIGN float tmp_block[N * N];
     33  HWY_ALIGN float scratch_space[4 * N * N];
     34  ComputeScaledDCT<N, N>()(DCTFrom(block, N), tmp_block, scratch_space);
     35 
     36  // Untranspose.
     37  Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N));
     38 }
     39 
     40 // Computes the in-place 8x8 iDCT of block.
     41 // Requires that block is HWY_ALIGN'ed.
     42 template <int N>
     43 void ComputeIDCT(float block[N * N]) {
     44  HWY_ALIGN float tmp_block[N * N];
     45  HWY_ALIGN float scratch_space[4 * N * N];
     46  // Untranspose.
     47  Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N));
     48 
     49  ComputeScaledIDCT<N, N>()(tmp_block, DCTTo(block, N), scratch_space);
     50 }
     51 
     52 template <size_t N>
     53 void TransposeTestT(float accuracy) {
     54  constexpr size_t kBlockSize = N * N;
     55  HWY_ALIGN float src[kBlockSize];
     56  DCTTo to_src(src, N);
     57  for (size_t y = 0; y < N; ++y) {
     58    for (size_t x = 0; x < N; ++x) {
     59      to_src.Write(y * N + x, y, x);
     60    }
     61  }
     62  HWY_ALIGN float dst[kBlockSize];
     63  Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N));
     64  DCTFrom from_dst(dst, N);
     65  for (size_t y = 0; y < N; ++y) {
     66    for (size_t x = 0; x < N; ++x) {
     67      float expected = x * N + y;
     68      float actual = from_dst.Read(y, x);
     69      EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y;
     70    }
     71  }
     72 }
     73 
     74 void TransposeTest() {
     75  TransposeTestT<8>(1e-7f);
     76  TransposeTestT<16>(1e-7f);
     77  TransposeTestT<32>(1e-7f);
     78 }
     79 
     80 template <size_t N>
     81 void ColumnDctRoundtripT(float accuracy) {
     82  constexpr size_t kBlockSize = N * N;
     83  // Though we are only interested in single column result, dct.h has built-in
     84  // limit on minimal number of columns processed. So, to be safe, we do
     85  // regular 8x8 block transformation. On the bright side - we could check all
     86  // 8 basis vectors at once.
     87  HWY_ALIGN float block[kBlockSize];
     88  HWY_ALIGN float scratch[3 * kBlockSize];
     89  DCTTo to(block, N);
     90  DCTFrom from(block, N);
     91  for (size_t i = 0; i < N; ++i) {
     92    for (size_t j = 0; j < N; ++j) {
     93      to.Write((i == j) ? 1.0f : 0.0f, i, j);
     94    }
     95  }
     96 
     97  // Running (I)DCT on the same memory block seems to trigger a compiler bug on
     98  // ARMv7 with clang6.
     99  HWY_ALIGN float tmp[kBlockSize];
    100  DCTTo to_tmp(tmp, N);
    101  DCTFrom from_tmp(tmp, N);
    102 
    103  DCT1D<N, N>()(from, to_tmp, scratch);
    104  IDCT1D<N, N>()(from_tmp, to, scratch);
    105 
    106  for (size_t i = 0; i < N; ++i) {
    107    for (size_t j = 0; j < N; ++j) {
    108      float expected = (i == j) ? 1.0f : 0.0f;
    109      float actual = from.Read(i, j);
    110      EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j;
    111    }
    112  }
    113 }
    114 
    115 void ColumnDctRoundtrip() {
    116  ColumnDctRoundtripT<8>(1e-6f);
    117  ColumnDctRoundtripT<16>(1e-6f);
    118  ColumnDctRoundtripT<32>(1e-6f);
    119 }
    120 
    121 template <size_t N>
    122 void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
    123  constexpr size_t kBlockSize = N * N;
    124  for (size_t i = start; i < end; i++) {
    125    HWY_ALIGN float fast[kBlockSize] = {0.0f};
    126    double slow[kBlockSize] = {0.0};
    127    fast[i] = 1.0;
    128    slow[i] = 1.0;
    129    DCTSlow<N>(slow);
    130    ComputeDCT<N>(fast);
    131    for (size_t k = 0; k < kBlockSize; ++k) {
    132      EXPECT_NEAR(fast[k], slow[k], accuracy / N)
    133          << "i = " << i << ", k = " << k << ", N = " << N;
    134    }
    135  }
    136 }
    137 
    138 template <size_t N>
    139 void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
    140  constexpr size_t kBlockSize = N * N;
    141  for (size_t i = start; i < end; i++) {
    142    HWY_ALIGN float fast[kBlockSize] = {0.0f};
    143    double slow[kBlockSize] = {0.0};
    144    fast[i] = 1.0;
    145    slow[i] = 1.0;
    146    IDCTSlow<N>(slow);
    147    ComputeIDCT<N>(fast);
    148    for (size_t k = 0; k < kBlockSize; ++k) {
    149      EXPECT_NEAR(fast[k], slow[k], accuracy * N)
    150          << "i = " << i << ", k = " << k << ", N = " << N;
    151    }
    152  }
    153 }
    154 
    155 template <size_t N>
    156 void TestInverseT(float accuracy) {
    157  test::ThreadPoolForTests pool(N < 32 ? 0 : 8);
    158  enum { kBlockSize = N * N };
    159  const auto process_block = [accuracy](const uint32_t task,
    160                                        size_t /*thread*/) -> Status {
    161    const size_t i = static_cast<size_t>(task);
    162    HWY_ALIGN float x[kBlockSize] = {0.0f};
    163    x[i] = 1.0;
    164 
    165    ComputeIDCT<N>(x);
    166    ComputeDCT<N>(x);
    167 
    168    for (size_t k = 0; k < kBlockSize; ++k) {
    169      EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
    170          << "i = " << i << ", k = " << k;
    171    }
    172    return true;
    173  };
    174  EXPECT_TRUE(RunOnPool(pool.get(), 0, kBlockSize, ThreadPool::NoInit,
    175                        process_block, "TestInverse"));
    176 }
    177 
    178 void InverseTest() {
    179  TestInverseT<8>(1e-6f);
    180  TestInverseT<16>(1e-6f);
    181  TestInverseT<32>(3e-6f);
    182 }
    183 
    184 template <size_t N>
    185 void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) {
    186  constexpr size_t kBlockSize = N * N;
    187  for (size_t i = start; i < end; i++) {
    188    for (size_t j = 0; j < kBlockSize; ++j) {
    189      // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>.
    190      // That means (Me_j)_i = (M^\dagger{}e_i)_j
    191 
    192      // x := Me_j
    193      HWY_ALIGN float x[kBlockSize] = {0.0f};
    194      x[j] = 1.0;
    195      ComputeIDCT<N>(x);
    196      // y := M^\dagger{}e_i
    197      HWY_ALIGN float y[kBlockSize] = {0.0f};
    198      y[i] = 1.0;
    199      ComputeDCT<N>(y);
    200 
    201      EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j;
    202    }
    203  }
    204 }
    205 
    206 template <size_t N>
    207 void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) {
    208  constexpr size_t kBlockSize = N * N;
    209  for (size_t i = start; i < end; i++) {
    210    double x[kBlockSize] = {0.0f};
    211    x[i] = 1.0;
    212 
    213    DCTSlow<N>(x);
    214    IDCTSlow<N>(x);
    215 
    216    for (size_t k = 0; k < kBlockSize; ++k) {
    217      EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
    218          << "i = " << i << ", k = " << k;
    219    }
    220  }
    221 }
    222 
    223 template <size_t ROWS, size_t COLS>
    224 void TestRectInverseT(float accuracy) {
    225  constexpr size_t kBlockSize = ROWS * COLS;
    226  for (size_t i = 0; i < kBlockSize; ++i) {
    227    HWY_ALIGN float x[kBlockSize] = {0.0f};
    228    HWY_ALIGN float out[kBlockSize] = {0.0f};
    229    x[i] = 1.0;
    230    HWY_ALIGN float coeffs[kBlockSize] = {0.0f};
    231    HWY_ALIGN float scratch_space[kBlockSize * 5];
    232 
    233    ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space);
    234    ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space);
    235 
    236    for (size_t k = 0; k < kBlockSize; ++k) {
    237      EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy)
    238          << "i = " << i << ", k = " << k << " ROWS = " << ROWS
    239          << " COLS = " << COLS;
    240    }
    241  }
    242 }
    243 
    244 void TestRectInverse() {
    245  TestRectInverseT<16, 32>(1e-6f);
    246  TestRectInverseT<8, 32>(1e-6f);
    247  TestRectInverseT<8, 16>(1e-6f);
    248  TestRectInverseT<4, 8>(1e-6f);
    249  TestRectInverseT<2, 4>(1e-6f);
    250  TestRectInverseT<1, 4>(1e-6f);
    251  TestRectInverseT<1, 2>(1e-6f);
    252 
    253  TestRectInverseT<32, 16>(1e-6f);
    254  TestRectInverseT<32, 8>(1e-6f);
    255  TestRectInverseT<16, 8>(1e-6f);
    256  TestRectInverseT<8, 4>(1e-6f);
    257  TestRectInverseT<4, 2>(1e-6f);
    258  TestRectInverseT<4, 1>(1e-6f);
    259  TestRectInverseT<2, 1>(1e-6f);
    260 }
    261 
    262 template <size_t ROWS, size_t COLS>
    263 void TestRectTransposeT(float accuracy) {
    264  constexpr size_t kBlockSize = ROWS * COLS;
    265  HWY_ALIGN float scratch_space[kBlockSize * 5];
    266  for (size_t px = 0; px < COLS; ++px) {
    267    for (size_t py = 0; py < ROWS; ++py) {
    268      HWY_ALIGN float x1[kBlockSize] = {0.0f};
    269      HWY_ALIGN float x2[kBlockSize] = {0.0f};
    270      HWY_ALIGN float coeffs1[kBlockSize] = {0.0f};
    271      HWY_ALIGN float coeffs2[kBlockSize] = {0.0f};
    272      x1[py * COLS + px] = 1;
    273      x2[px * ROWS + py] = 1;
    274 
    275      constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS;
    276      constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS;
    277 
    278      ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space);
    279      ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space);
    280 
    281      for (size_t x = 0; x < OUT_COLS; ++x) {
    282        for (size_t y = 0; y < OUT_ROWS; ++y) {
    283          EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x],
    284                      accuracy)
    285              << " px = " << px << ", py = " << py << ", x = " << x
    286              << ", y = " << y;
    287        }
    288      }
    289    }
    290  }
    291 }
    292 
    293 void TestRectTranspose() {
    294  TestRectTransposeT<16, 32>(1e-6f);
    295  TestRectTransposeT<8, 32>(1e-6f);
    296  TestRectTransposeT<8, 16>(1e-6f);
    297  TestRectTransposeT<4, 8>(1e-6f);
    298  TestRectTransposeT<2, 4>(1e-6f);
    299  TestRectTransposeT<1, 4>(1e-6f);
    300  TestRectTransposeT<1, 2>(1e-6f);
    301 
    302  // Identical to 8, 16
    303  //  TestRectTranspose<16, 8>(1e-6f);
    304 }
    305 
    306 void TestDctAccuracyShard(size_t shard) {
    307  if (shard == 0) {
    308    TestDctAccuracy<1>(1.1E-7f);
    309    TestDctAccuracy<2>(1.1E-7f);
    310    TestDctAccuracy<4>(1.1E-7f);
    311    TestDctAccuracy<8>(1.1E-7f);
    312    TestDctAccuracy<16>(1.3E-7f);
    313  }
    314  TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1));
    315 }
    316 
    317 void TestIdctAccuracyShard(size_t shard) {
    318  if (shard == 0) {
    319    TestIdctAccuracy<1>(1E-7f);
    320    TestIdctAccuracy<2>(1E-7f);
    321    TestIdctAccuracy<4>(1E-7f);
    322    TestIdctAccuracy<8>(1E-7f);
    323    TestIdctAccuracy<16>(1E-7f);
    324  }
    325  TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1));
    326 }
    327 
    328 void TestDctTransposeShard(size_t shard) {
    329  if (shard == 0) {
    330    TestDctTranspose<8>(1E-6f);
    331    TestDctTranspose<16>(1E-6f);
    332  }
    333  TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1));
    334 }
    335 
    336 void TestSlowInverseShard(size_t shard) {
    337  if (shard == 0) {
    338    TestSlowInverse<1>(1E-5f);
    339    TestSlowInverse<2>(1E-5f);
    340    TestSlowInverse<4>(1E-5f);
    341    TestSlowInverse<8>(1E-5f);
    342    TestSlowInverse<16>(1E-5f);
    343  }
    344  TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1));
    345 }
    346 
    347 // NOLINTNEXTLINE(google-readability-namespace-comments)
    348 }  // namespace HWY_NAMESPACE
    349 }  // namespace jxl
    350 HWY_AFTER_NAMESPACE();
    351 
    352 #if HWY_ONCE
    353 namespace jxl {
    354 
    355 class TransposeTest : public hwy::TestWithParamTarget {};
    356 
    357 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest);
    358 
    359 HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest);
    360 HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest);
    361 HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip);
    362 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse);
    363 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose);
    364 
    365 // Tests in the DctShardedTest class are sharded for N=32.
    366 class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {};
    367 
    368 template <size_t n>
    369 std::vector<uint32_t> ShardRange() {
    370 #ifdef JXL_DISABLE_SLOW_TESTS
    371  static_assert(n > 6);
    372  std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1};
    373 #else
    374  std::vector<uint32_t> ret(n);
    375  std::iota(ret.begin(), ret.end(), 0);
    376 #endif  // JXL_DISABLE_SLOW_TESTS
    377  return ret;
    378 }
    379 
    380 HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest,
    381                                      ::testing::ValuesIn(ShardRange<32>()));
    382 
    383 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard);
    384 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard);
    385 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard);
    386 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard);
    387 
    388 }  // namespace jxl
    389 #endif  // HWY_ONCE