tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hiprec_convolve_test_util.cc (15747B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include "test/hiprec_convolve_test_util.h"
     13 
     14 #include <memory>
     15 #include <new>
     16 
     17 #include "av1/common/restoration.h"
     18 
     19 using std::make_tuple;
     20 using std::tuple;
     21 
     22 namespace libaom_test {
     23 
     24 // Generate a random pair of filter kernels, using the ranges
     25 // of possible values from the loop-restoration experiment
     26 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
     27                             InterpKernel vkernel, int kernel_type = 2) {
     28  if (kernel_type == 0) {
     29    // Low possible values for filter coefficients, 7-tap kernel
     30    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV;
     31    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
     32    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
     33    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
     34    hkernel[7] = vkernel[7] = 0;
     35  } else if (kernel_type == 1) {
     36    // Max possible values for filter coefficients, 7-tap kernel
     37    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV;
     38    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
     39    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
     40    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
     41    hkernel[7] = vkernel[7] = 0;
     42  } else if (kernel_type == 2) {
     43    // Randomly generated values for filter coefficients, 7-tap kernel
     44    hkernel[0] = hkernel[6] =
     45        WIENER_FILT_TAP0_MINV +
     46        rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
     47    hkernel[1] = hkernel[5] =
     48        WIENER_FILT_TAP1_MINV +
     49        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
     50    hkernel[2] = hkernel[4] =
     51        WIENER_FILT_TAP2_MINV +
     52        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
     53    hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
     54    hkernel[7] = 0;
     55 
     56    vkernel[0] = vkernel[6] =
     57        WIENER_FILT_TAP0_MINV +
     58        rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV);
     59    vkernel[1] = vkernel[5] =
     60        WIENER_FILT_TAP1_MINV +
     61        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
     62    vkernel[2] = vkernel[4] =
     63        WIENER_FILT_TAP2_MINV +
     64        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
     65    vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
     66    vkernel[7] = 0;
     67  } else if (kernel_type == 3) {
     68    // Low possible values for filter coefficients, 5-tap kernel
     69    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
     70    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
     71    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
     72    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
     73    hkernel[7] = vkernel[7] = 0;
     74  } else if (kernel_type == 4) {
     75    // Max possible values for filter coefficients, 5-tap kernel
     76    hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0;
     77    hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
     78    hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
     79    hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
     80    hkernel[7] = vkernel[7] = 0;
     81  } else {
     82    // Randomly generated values for filter coefficients, 5-tap kernel
     83    hkernel[0] = hkernel[6] = 0;
     84    hkernel[1] = hkernel[5] =
     85        WIENER_FILT_TAP1_MINV +
     86        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
     87    hkernel[2] = hkernel[4] =
     88        WIENER_FILT_TAP2_MINV +
     89        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
     90    hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
     91    hkernel[7] = 0;
     92 
     93    vkernel[0] = vkernel[6] = 0;
     94    vkernel[1] = vkernel[5] =
     95        WIENER_FILT_TAP1_MINV +
     96        rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
     97    vkernel[2] = vkernel[4] =
     98        WIENER_FILT_TAP2_MINV +
     99        rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
    100    vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
    101    vkernel[7] = 0;
    102  }
    103 }
    104 
    105 namespace AV1HiprecConvolve {
    106 
    107 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
    108    hiprec_convolve_func filter) {
    109  const HiprecConvolveParam params[] = {
    110    make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
    111    make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
    112    make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
    113    make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
    114    make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
    115    make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
    116    make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
    117  };
    118  return ::testing::ValuesIn(params);
    119 }
    120 
    121 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() = default;
    122 void AV1HiprecConvolveTest::SetUp() {
    123  rnd_.Reset(ACMRandom::DeterministicSeed());
    124 }
    125 
    126 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
    127  const int w = 128, h = 128;
    128  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
    129  const int num_iters = GET_PARAM(2);
    130  int i, j, k, m;
    131  const WienerConvolveParams conv_params = get_conv_params_wiener(8);
    132 
    133  std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
    134  ASSERT_NE(input_, nullptr);
    135  uint8_t *input = input_.get();
    136 
    137  // The AVX2 convolve functions always write rows with widths that are
    138  // multiples of 16. So to avoid a buffer overflow, we may need to pad
    139  // rows to a multiple of 16.
    140  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
    141  std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
    142  ASSERT_NE(output, nullptr);
    143  std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
    144  ASSERT_NE(output2, nullptr);
    145 
    146  // Generate random filter kernels
    147  DECLARE_ALIGNED(16, InterpKernel, hkernel);
    148  DECLARE_ALIGNED(16, InterpKernel, vkernel);
    149 
    150  for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
    151    generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
    152    for (i = 0; i < num_iters; ++i) {
    153      for (k = 0; k < h; ++k)
    154        for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8();
    155      // Choose random locations within the source block
    156      int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
    157      int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
    158      av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w,
    159                                    output.get(), out_w, hkernel, 16, vkernel,
    160                                    16, out_w, out_h, &conv_params);
    161      test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w,
    162                hkernel, 16, vkernel, 16, out_w, out_h, &conv_params);
    163 
    164      for (j = 0; j < out_w * out_h; ++j)
    165        ASSERT_EQ(output[j], output2[j])
    166            << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
    167            << (j / out_w) << ") on iteration " << i;
    168    }
    169  }
    170 }
    171 
    172 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
    173  const int w = 128, h = 128;
    174  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
    175  const int num_iters = GET_PARAM(2) / 500;
    176  int i, j, k;
    177  const WienerConvolveParams conv_params = get_conv_params_wiener(8);
    178 
    179  std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
    180  ASSERT_NE(input_, nullptr);
    181  uint8_t *input = input_.get();
    182 
    183  // The AVX2 convolve functions always write rows with widths that are
    184  // multiples of 16. So to avoid a buffer overflow, we may need to pad
    185  // rows to a multiple of 16.
    186  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
    187  std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
    188  ASSERT_NE(output, nullptr);
    189  std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
    190  ASSERT_NE(output2, nullptr);
    191 
    192  // Generate random filter kernels
    193  DECLARE_ALIGNED(16, InterpKernel, hkernel);
    194  DECLARE_ALIGNED(16, InterpKernel, vkernel);
    195 
    196  generate_kernels(&rnd_, hkernel, vkernel);
    197 
    198  for (i = 0; i < h; ++i)
    199    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
    200 
    201  aom_usec_timer ref_timer;
    202  aom_usec_timer_start(&ref_timer);
    203  for (i = 0; i < num_iters; ++i) {
    204    for (j = 3; j < h - out_h - 4; j++) {
    205      for (k = 3; k < w - out_w - 4; k++) {
    206        av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w,
    207                                      hkernel, 16, vkernel, 16, out_w, out_h,
    208                                      &conv_params);
    209      }
    210    }
    211  }
    212  aom_usec_timer_mark(&ref_timer);
    213  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
    214 
    215  aom_usec_timer tst_timer;
    216  aom_usec_timer_start(&tst_timer);
    217  for (i = 0; i < num_iters; ++i) {
    218    for (j = 3; j < h - out_h - 4; j++) {
    219      for (k = 3; k < w - out_w - 4; k++) {
    220        test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16,
    221                  vkernel, 16, out_w, out_h, &conv_params);
    222      }
    223    }
    224  }
    225  aom_usec_timer_mark(&tst_timer);
    226  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
    227 
    228  std::cout << "[          ] C time = " << ref_time / 1000
    229            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
    230 
    231  EXPECT_GT(ref_time, tst_time)
    232      << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
    233      << "C time: " << ref_time << " us\n"
    234      << "SIMD time: " << tst_time << " us\n";
    235 }
    236 }  // namespace AV1HiprecConvolve
    237 
    238 #if CONFIG_AV1_HIGHBITDEPTH
    239 namespace AV1HighbdHiprecConvolve {
    240 
    241 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
    242    highbd_hiprec_convolve_func filter) {
    243  const HighbdHiprecConvolveParam params[] = {
    244    make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
    245    make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
    246    make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
    247    make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
    248    make_tuple(32, 8, 10000, 12, filter),
    249  };
    250  return ::testing::ValuesIn(params);
    251 }
    252 
    253 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() = default;
    254 void AV1HighbdHiprecConvolveTest::SetUp() {
    255  rnd_.Reset(ACMRandom::DeterministicSeed());
    256 }
    257 
    258 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
    259    highbd_hiprec_convolve_func test_impl) {
    260  const int w = 128, h = 128;
    261  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
    262  const int num_iters = GET_PARAM(2);
    263  const int bd = GET_PARAM(3);
    264  int i, j;
    265  const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
    266 
    267  std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
    268  ASSERT_NE(input, nullptr);
    269 
    270  // The AVX2 convolve functions always write rows with widths that are
    271  // multiples of 16. So to avoid a buffer overflow, we may need to pad
    272  // rows to a multiple of 16.
    273  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
    274  std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
    275  ASSERT_NE(output, nullptr);
    276  std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
    277  ASSERT_NE(output2, nullptr);
    278 
    279  // Generate random filter kernels
    280  DECLARE_ALIGNED(16, InterpKernel, hkernel);
    281  DECLARE_ALIGNED(16, InterpKernel, vkernel);
    282 
    283  for (i = 0; i < h; ++i)
    284    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
    285 
    286  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
    287  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
    288  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
    289  for (int kernel_type = 0; kernel_type < 6; kernel_type++) {
    290    generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
    291    for (i = 0; i < num_iters; ++i) {
    292      // Choose random locations within the source block
    293      int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
    294      int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
    295      av1_highbd_wiener_convolve_add_src_c(
    296          input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel,
    297          16, vkernel, 16, out_w, out_h, &conv_params, bd);
    298      test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
    299                hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
    300 
    301      for (j = 0; j < out_w * out_h; ++j)
    302        ASSERT_EQ(output[j], output2[j])
    303            << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
    304            << (j / out_w) << ") on iteration " << i;
    305    }
    306  }
    307 }
    308 
    309 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
    310    highbd_hiprec_convolve_func test_impl) {
    311  const int w = 128, h = 128;
    312  const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
    313  const int num_iters = GET_PARAM(2) / 500;
    314  const int bd = GET_PARAM(3);
    315  int i, j, k;
    316  const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
    317 
    318  std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
    319  ASSERT_NE(input, nullptr);
    320 
    321  // The AVX2 convolve functions always write rows with widths that are
    322  // multiples of 16. So to avoid a buffer overflow, we may need to pad
    323  // rows to a multiple of 16.
    324  int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
    325  std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
    326  ASSERT_NE(output, nullptr);
    327  std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
    328  ASSERT_NE(output2, nullptr);
    329 
    330  // Generate random filter kernels
    331  DECLARE_ALIGNED(16, InterpKernel, hkernel);
    332  DECLARE_ALIGNED(16, InterpKernel, vkernel);
    333 
    334  generate_kernels(&rnd_, hkernel, vkernel);
    335 
    336  for (i = 0; i < h; ++i)
    337    for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
    338 
    339  uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
    340  uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
    341  uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
    342 
    343  aom_usec_timer ref_timer;
    344  aom_usec_timer_start(&ref_timer);
    345  for (i = 0; i < num_iters; ++i) {
    346    for (j = 3; j < h - out_h - 4; j++) {
    347      for (k = 3; k < w - out_w - 4; k++) {
    348        av1_highbd_wiener_convolve_add_src_c(
    349            input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
    350            16, out_w, out_h, &conv_params, bd);
    351      }
    352    }
    353  }
    354  aom_usec_timer_mark(&ref_timer);
    355  const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
    356 
    357  aom_usec_timer tst_timer;
    358  aom_usec_timer_start(&tst_timer);
    359  for (i = 0; i < num_iters; ++i) {
    360    for (j = 3; j < h - out_h - 4; j++) {
    361      for (k = 3; k < w - out_w - 4; k++) {
    362        test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
    363                  vkernel, 16, out_w, out_h, &conv_params, bd);
    364      }
    365    }
    366  }
    367  aom_usec_timer_mark(&tst_timer);
    368  const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
    369 
    370  std::cout << "[          ] C time = " << ref_time / 1000
    371            << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
    372 
    373  EXPECT_GT(ref_time, tst_time)
    374      << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
    375      << "C time: " << ref_time << " us\n"
    376      << "SIMD time: " << tst_time << " us\n";
    377 }
    378 }  // namespace AV1HighbdHiprecConvolve
    379 #endif  // CONFIG_AV1_HIGHBITDEPTH
    380 }  // namespace libaom_test