tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

av1_convolve_test.cc (98594B)


      1 /*
      2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <cstddef>
     13 #include <cstdint>
     14 #include <ostream>
     15 #include <set>
     16 #include <vector>
     17 #include "config/av1_rtcd.h"
     18 #include "config/aom_dsp_rtcd.h"
     19 #include "aom_ports/aom_timer.h"
     20 #include "gtest/gtest.h"
     21 #include "test/acm_random.h"
     22 
     23 namespace {
     24 
     25 // TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter
     26 // is tested once 12-tap filter SIMD is done.
     27 #undef INTERP_FILTERS_ALL
     28 #define INTERP_FILTERS_ALL 4
     29 
     30 // All single reference convolve tests are parameterized on block size,
     31 // bit-depth, and function to test.
     32 //
     33 // Note that parameterizing on these variables (and not other parameters) is
     34 // a conscious decision - Jenkins needs some degree of parallelization to run
     35 // the tests within the time limit, but if the number of parameters increases
     36 // too much, the gtest framework does not handle it well (increased overhead per
     37 // test, huge amount of output to stdout, etc.).
     38 //
     39 // Also note that the test suites must be named with the architecture, e.g.,
     40 // C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests
     41 // that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86
     42 // binaries) and will disable tests using a filter like
     43 // --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the
     44 // testing infrastructure will not selectively filter them properly.
     45 class BlockSize {
     46 public:
     47  BlockSize(int w, int h) : width_(w), height_(h) {}
     48 
     49  int Width() const { return width_; }
     50  int Height() const { return height_; }
     51 
     52  bool operator<(const BlockSize &other) const {
     53    if (Width() == other.Width()) {
     54      return Height() < other.Height();
     55    }
     56    return Width() < other.Width();
     57  }
     58 
     59  bool operator==(const BlockSize &other) const {
     60    return Width() == other.Width() && Height() == other.Height();
     61  }
     62 
     63 private:
     64  int width_;
     65  int height_;
     66 };
     67 
     68 // Block size / bit depth / test function used to parameterize the tests.
     69 template <typename T>
     70 class TestParam {
     71 public:
     72  TestParam(const BlockSize &block, int bd, T test_func)
     73      : block_(block), bd_(bd), test_func_(test_func) {}
     74 
     75  const BlockSize &Block() const { return block_; }
     76  int BitDepth() const { return bd_; }
     77  T TestFunction() const { return test_func_; }
     78 
     79  bool operator==(const TestParam &other) const {
     80    return Block() == other.Block() && BitDepth() == other.BitDepth() &&
     81           TestFunction() == other.TestFunction();
     82  }
     83 
     84 private:
     85  BlockSize block_;
     86  int bd_;
     87  T test_func_;
     88 };
     89 
     90 template <typename T>
     91 std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) {
     92  return os << "TestParam { width:" << test_arg.Block().Width()
     93            << " height:" << test_arg.Block().Height()
     94            << " bd:" << test_arg.BitDepth() << " }";
     95 }
     96 
     97 // Generate the list of all block widths / heights that need to be tested,
     98 // includes chroma and luma sizes, for the given bit-depths. The test
     99 // function is the same for all generated parameters.
    100 template <typename T>
    101 std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths,
    102                                        T test_func) {
    103  std::set<BlockSize> sizes;
    104  for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
    105    const int w = block_size_wide[b];
    106    const int h = block_size_high[b];
    107    sizes.insert(BlockSize(w, h));
    108    // Add in smaller chroma sizes as well.
    109    if (w == 4 || h == 4) {
    110      sizes.insert(BlockSize(w / 2, h / 2));
    111    }
    112  }
    113  std::vector<TestParam<T>> result;
    114  for (const BlockSize &block : sizes) {
    115    for (int bd : bit_depths) {
    116      result.push_back(TestParam<T>(block, bd, test_func));
    117    }
    118  }
    119  return result;
    120 }
    121 
    122 template <typename T>
    123 std::vector<TestParam<T>> GetLowbdTestParams(T test_func) {
    124  return GetTestParams({ 8 }, test_func);
    125 }
    126 
    127 template <typename T>
    128 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams(
    129    T test_func) {
    130  return ::testing::ValuesIn(GetLowbdTestParams(test_func));
    131 }
    132 
    133 // Test the test-parameters generators work as expected.
    134 class AV1ConvolveParametersTest : public ::testing::Test {};
    135 
    136 TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) {
    137  auto v = GetLowbdTestParams(av1_convolve_x_sr_c);
    138  ASSERT_EQ(27U, v.size());
    139  for (const auto &p : v) {
    140    ASSERT_EQ(8, p.BitDepth());
    141    // Needed (instead of ASSERT_EQ(...) since gtest does not
    142    // have built in printing for arbitrary functions, which
    143    // causes a compilation error.
    144    bool same_fn = av1_convolve_x_sr_c == p.TestFunction();
    145    ASSERT_TRUE(same_fn);
    146  }
    147 }
    148 
    149 #if CONFIG_AV1_HIGHBITDEPTH
    150 template <typename T>
    151 std::vector<TestParam<T>> GetHighbdTestParams(T test_func) {
    152  return GetTestParams({ 10, 12 }, test_func);
    153 }
    154 
    155 template <typename T>
    156 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams(
    157    T test_func) {
    158  return ::testing::ValuesIn(GetHighbdTestParams(test_func));
    159 }
    160 
    161 TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) {
    162  auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c);
    163  ASSERT_EQ(54U, v.size());
    164  int num_10 = 0;
    165  int num_12 = 0;
    166  for (const auto &p : v) {
    167    ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12);
    168    bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction();
    169    ASSERT_TRUE(same_fn);
    170    if (p.BitDepth() == 10) {
    171      ++num_10;
    172    } else {
    173      ++num_12;
    174    }
    175  }
    176  ASSERT_EQ(num_10, num_12);
    177 }
    178 #endif  // CONFIG_AV1_HIGHBITDEPTH
    179 
    180 // AV1ConvolveTest is the base class that all convolve tests should derive from.
    181 // It provides storage/methods for generating randomized buffers for both
    182 // low bit-depth and high bit-depth, and setup/teardown methods for clearing
    183 // system state. Implementors can get the bit-depth / block-size /
    184 // test function by calling GetParam().
    185 template <typename T>
    186 class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {
    187 public:
    188  ~AV1ConvolveTest() override = default;
    189 
    190  void SetUp() override {
    191    rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
    192  }
    193 
    194  // Randomizes the 8-bit input buffer and returns a pointer to it. Note that
    195  // the pointer is safe to use with an 8-tap filter. The stride can range
    196  // from width to (width + kPadding). Also note that the pointer is to the
    197  // same memory location.
    198  static constexpr int kInputPadding = 12;
    199 
    200  // Get a pointer to a buffer with stride == width. Note that we must have
    201  // the test param passed in explicitly -- the gtest framework does not
    202  // support calling GetParam() within a templatized class.
    203  // Note that FirstRandomInput8 always returns the same pointer -- if two
    204  // inputs are needed, also use SecondRandomInput8.
    205  const uint8_t *FirstRandomInput8(const TestParam<T> &param) {
    206    // Note we can't call GetParam() directly -- gtest does not support
    207    // this for parameterized types.
    208    return RandomInput8(input8_1_, param);
    209  }
    210 
    211  const uint8_t *SecondRandomInput8(const TestParam<T> &param) {
    212    return RandomInput8(input8_2_, param);
    213  }
    214 
    215  // Some of the intrinsics perform writes in 32 byte chunks. Moreover, some
    216  // of the instrinsics assume that the stride is also a multiple of 32.
    217  // To satisfy these constraints and also remain simple, output buffer strides
    218  // are assumed MAX_SB_SIZE.
    219  static constexpr int kOutputStride = MAX_SB_SIZE;
    220 
    221  // Check that two 8-bit output buffers are identical.
    222  void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width,
    223                            int height) {
    224    ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations";
    225    for (int j = 0; j < height; ++j) {
    226      if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
    227        p1 += kOutputStride;
    228        p2 += kOutputStride;
    229        continue;
    230      }
    231      for (int i = 0; i < width; ++i) {
    232        ASSERT_EQ(p1[i], p2[i])
    233            << width << "x" << height << " Pixel mismatch at (" << i << ", "
    234            << j << ")";
    235      }
    236    }
    237  }
    238 
    239  // Check that two 16-bit output buffers are identical.
    240  void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width,
    241                            int height) {
    242    ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations";
    243    for (int j = 0; j < height; ++j) {
    244      if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
    245        p1 += kOutputStride;
    246        p2 += kOutputStride;
    247        continue;
    248      }
    249      for (int i = 0; i < width; ++i) {
    250        ASSERT_EQ(p1[i], p2[i])
    251            << width << "x" << height << " Pixel mismatch at (" << i << ", "
    252            << j << ")";
    253      }
    254    }
    255  }
    256 
    257 #if CONFIG_AV1_HIGHBITDEPTH
    258  // Note that the randomized values are capped by bit-depth.
    259  const uint16_t *FirstRandomInput16(const TestParam<T> &param) {
    260    return RandomInput16(input16_1_, param);
    261  }
    262 
    263  const uint16_t *SecondRandomInput16(const TestParam<T> &param) {
    264    return RandomInput16(input16_2_, param);
    265  }
    266 #endif
    267 
    268 private:
    269  const uint8_t *RandomInput8(uint8_t *p, const TestParam<T> &param) {
    270    EXPECT_EQ(8, param.BitDepth());
    271    EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
    272    EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
    273    const int padded_width = param.Block().Width() + kInputPadding;
    274    const int padded_height = param.Block().Height() + kInputPadding;
    275    Randomize(p, padded_width * padded_height);
    276    return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
    277  }
    278 
    279  void Randomize(uint8_t *p, int size) {
    280    for (int i = 0; i < size; ++i) {
    281      p[i] = rnd_.Rand8();
    282    }
    283  }
    284 
    285 #if CONFIG_AV1_HIGHBITDEPTH
    286  const uint16_t *RandomInput16(uint16_t *p, const TestParam<T> &param) {
    287    // Check that this is only called with high bit-depths.
    288    EXPECT_TRUE(param.BitDepth() == 10 || param.BitDepth() == 12);
    289    EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
    290    EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
    291    const int padded_width = param.Block().Width() + kInputPadding;
    292    const int padded_height = param.Block().Height() + kInputPadding;
    293    Randomize(p, padded_width * padded_height, param.BitDepth());
    294    return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
    295  }
    296 
    297  void Randomize(uint16_t *p, int size, int bit_depth) {
    298    for (int i = 0; i < size; ++i) {
    299      p[i] = rnd_.Rand16() & ((1 << bit_depth) - 1);
    300    }
    301  }
    302 #endif
    303 
    304  static constexpr int kInputStride = MAX_SB_SIZE + kInputPadding;
    305 
    306  libaom_test::ACMRandom rnd_;
    307  // Statically allocate all the memory that is needed for the tests. Note
    308  // that we cannot allocate output memory here. It must use DECLARE_ALIGNED,
    309  // which is a C99 feature and interacts badly with C++ member variables.
    310  uint8_t input8_1_[kInputStride * kInputStride];
    311  uint8_t input8_2_[kInputStride * kInputStride];
    312 #if CONFIG_AV1_HIGHBITDEPTH
    313  uint16_t input16_1_[kInputStride * kInputStride];
    314  uint16_t input16_2_[kInputStride * kInputStride];
    315 #endif
    316 };
    317 
    318 ////////////////////////////////////////////////////////
    319 // Single reference convolve-x functions (low bit-depth)
    320 ////////////////////////////////////////////////////////
    321 using convolve_x_func = void (*)(const uint8_t *src, int src_stride,
    322                                 uint8_t *dst, int dst_stride, int w, int h,
    323                                 const InterpFilterParams *filter_params_x,
    324                                 const int subpel_x_qn,
    325                                 ConvolveParams *conv_params);
    326 
    327 class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> {
    328 public:
    329  void RunTest() {
    330    // Do not test the no-op filter.
    331    for (int sub_x = 1; sub_x < 16; ++sub_x) {
    332      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    333           ++filter) {
    334        InterpFilter f = static_cast<InterpFilter>(filter);
    335        TestConvolve(sub_x, f);
    336      }
    337    }
    338  }
    339 
    340 public:
    341  void SpeedTest() {
    342    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    343         ++filter) {
    344      InterpFilter f = static_cast<InterpFilter>(filter);
    345      TestConvolveSpeed(f, 10000);
    346    }
    347  }
    348 
    349 private:
    350  void TestConvolve(const int sub_x, const InterpFilter filter) {
    351    const int width = GetParam().Block().Width();
    352    const int height = GetParam().Block().Height();
    353 
    354    const InterpFilterParams *filter_params_x =
    355        av1_get_interp_filter_params_with_block_size(filter, width);
    356    ConvolveParams conv_params1 =
    357        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    358    const uint8_t *input = FirstRandomInput8(GetParam());
    359    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    360    av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
    361                        filter_params_x, sub_x, &conv_params1);
    362 
    363    ConvolveParams conv_params2 =
    364        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    365    convolve_x_func test_func = GetParam().TestFunction();
    366    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    367    test_func(input, width, test, kOutputStride, width, height, filter_params_x,
    368              sub_x, &conv_params2);
    369    AssertOutputBufferEq(reference, test, width, height);
    370  }
    371 
    372 private:
    373  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
    374    const int width = GetParam().Block().Width();
    375    const int height = GetParam().Block().Height();
    376 
    377    const InterpFilterParams *filter_params_x =
    378        av1_get_interp_filter_params_with_block_size(filter, width);
    379    ConvolveParams conv_params1 =
    380        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    381    const uint8_t *input = FirstRandomInput8(GetParam());
    382    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    383 
    384    aom_usec_timer timer;
    385    aom_usec_timer_start(&timer);
    386    for (int i = 0; i < num_iters; ++i) {
    387      av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
    388                          filter_params_x, 0, &conv_params1);
    389    }
    390    aom_usec_timer_mark(&timer);
    391    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    392    ConvolveParams conv_params2 =
    393        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    394    convolve_x_func test_func = GetParam().TestFunction();
    395    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    396 
    397    aom_usec_timer_start(&timer);
    398    for (int i = 0; i < num_iters; ++i) {
    399      test_func(input, width, test, kOutputStride, width, height,
    400                filter_params_x, 0, &conv_params2);
    401    }
    402    aom_usec_timer_mark(&timer);
    403    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    404    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
    405           time2, time1 / time2);
    406  }
    407 };
    408 
    409 TEST_P(AV1ConvolveXTest, RunTest) { RunTest(); }
    410 
    411 TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) { SpeedTest(); }
    412 
    413 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXTest,
    414                         BuildLowbdParams(av1_convolve_x_sr_c));
    415 
    416 #if HAVE_SSE2
    417 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXTest,
    418                         BuildLowbdParams(av1_convolve_x_sr_sse2));
    419 #endif
    420 
    421 #if HAVE_AVX2
    422 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXTest,
    423                         BuildLowbdParams(av1_convolve_x_sr_avx2));
    424 #endif
    425 
    426 #if HAVE_NEON
    427 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest,
    428                         BuildLowbdParams(av1_convolve_x_sr_neon));
    429 #endif
    430 
    431 #if HAVE_NEON_DOTPROD
    432 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest,
    433                         BuildLowbdParams(av1_convolve_x_sr_neon_dotprod));
    434 #endif
    435 
    436 #if HAVE_NEON_I8MM
    437 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest,
    438                         BuildLowbdParams(av1_convolve_x_sr_neon_i8mm));
    439 #endif
    440 
    441 #if HAVE_RVV
    442 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveXTest,
    443                         BuildLowbdParams(av1_convolve_x_sr_rvv));
    444 #endif
    445 
    446 ////////////////////////////////////////////////////////////////
    447 // Single reference convolve-x IntraBC functions (low bit-depth)
    448 ////////////////////////////////////////////////////////////////
    449 
    450 class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> {
    451 public:
    452  void RunTest() {
    453    // IntraBC functions only operate for subpel_x_qn = 8.
    454    constexpr int kSubX = 8;
    455    const int width = GetParam().Block().Width();
    456    const int height = GetParam().Block().Height();
    457    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
    458    const uint8_t *input = FirstRandomInput8(GetParam());
    459 
    460    ConvolveParams conv_params1 =
    461        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    462    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    463    // Use a stride different from width to avoid potential storing errors that
    464    // would go undetected. The input buffer is filled using a padding of 12, so
    465    // the stride can be anywhere between width and width + 12.
    466    av1_convolve_x_sr_intrabc_c(input, width + 2, reference, kOutputStride,
    467                                width, height, filter_params_x, kSubX,
    468                                &conv_params1);
    469 
    470    ConvolveParams conv_params2 =
    471        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    472    convolve_x_func test_func = GetParam().TestFunction();
    473    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    474    test_func(input, width + 2, test, kOutputStride, width, height,
    475              filter_params_x, kSubX, &conv_params2);
    476 
    477    AssertOutputBufferEq(reference, test, width, height);
    478  }
    479 
    480  void SpeedTest() {
    481    constexpr int kNumIters = 10000;
    482    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
    483    const int width = GetParam().Block().Width();
    484    const int height = GetParam().Block().Height();
    485    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
    486    const uint8_t *input = FirstRandomInput8(GetParam());
    487 
    488    ConvolveParams conv_params1 =
    489        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    490    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    491    aom_usec_timer timer;
    492    aom_usec_timer_start(&timer);
    493    for (int i = 0; i < kNumIters; ++i) {
    494      av1_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, width,
    495                                  height, filter_params_x, 0, &conv_params1);
    496    }
    497    aom_usec_timer_mark(&timer);
    498    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    499 
    500    ConvolveParams conv_params2 =
    501        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    502    convolve_x_func test_func = GetParam().TestFunction();
    503    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    504    aom_usec_timer_start(&timer);
    505    for (int i = 0; i < kNumIters; ++i) {
    506      test_func(input, width, test, kOutputStride, width, height,
    507                filter_params_x, 0, &conv_params2);
    508    }
    509    aom_usec_timer_mark(&timer);
    510    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    511 
    512    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
    513           time2, time1 / time2);
    514  }
    515 };
    516 
    517 TEST_P(AV1ConvolveXIntraBCTest, RunTest) { RunTest(); }
    518 
    519 TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
    520 
    521 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXIntraBCTest,
    522                         BuildLowbdParams(av1_convolve_x_sr_intrabc_c));
    523 
    524 #if HAVE_NEON
    525 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest,
    526                         BuildLowbdParams(av1_convolve_x_sr_intrabc_neon));
    527 #endif
    528 
    529 #if HAVE_RVV
    530 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveXIntraBCTest,
    531                         BuildLowbdParams(av1_convolve_x_sr_intrabc_rvv));
    532 #endif
    533 
    534 #if CONFIG_AV1_HIGHBITDEPTH
    535 /////////////////////////////////////////////////////////
    536 // Single reference convolve-x functions (high bit-depth)
    537 /////////////////////////////////////////////////////////
    538 using highbd_convolve_x_func =
    539    void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
    540             int w, int h, const InterpFilterParams *filter_params_x,
    541             const int subpel_x_qn, ConvolveParams *conv_params, int bd);
    542 
    543 class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> {
    544 public:
    545  void RunTest() {
    546    // Do not test the no-op filter.
    547    for (int sub_x = 1; sub_x < 16; ++sub_x) {
    548      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    549           ++filter) {
    550        InterpFilter f = static_cast<InterpFilter>(filter);
    551        TestConvolve(sub_x, f);
    552      }
    553    }
    554  }
    555 
    556 public:
    557  void SpeedTest() {
    558    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    559         ++filter) {
    560      InterpFilter f = static_cast<InterpFilter>(filter);
    561      TestConvolveSpeed(f, 10000);
    562    }
    563  }
    564 
    565 private:
    566  void TestConvolve(const int sub_x, const InterpFilter filter) {
    567    const int width = GetParam().Block().Width();
    568    const int height = GetParam().Block().Height();
    569    const int bit_depth = GetParam().BitDepth();
    570    const InterpFilterParams *filter_params_x =
    571        av1_get_interp_filter_params_with_block_size(filter, width);
    572    ConvolveParams conv_params1 =
    573        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    574    const uint16_t *input = FirstRandomInput16(GetParam());
    575    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    576    av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
    577                               height, filter_params_x, sub_x, &conv_params1,
    578                               bit_depth);
    579 
    580    ConvolveParams conv_params2 =
    581        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    582    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    583    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
    584                              filter_params_x, sub_x, &conv_params2, bit_depth);
    585    AssertOutputBufferEq(reference, test, width, height);
    586  }
    587 
    588 private:
    589  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
    590    const int width = GetParam().Block().Width();
    591    const int height = GetParam().Block().Height();
    592    const int bit_depth = GetParam().BitDepth();
    593    const InterpFilterParams *filter_params_x =
    594        av1_get_interp_filter_params_with_block_size(filter, width);
    595    ConvolveParams conv_params1 =
    596        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    597    const uint16_t *input = FirstRandomInput16(GetParam());
    598    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    599 
    600    aom_usec_timer timer;
    601    aom_usec_timer_start(&timer);
    602    for (int i = 0; i < num_iters; ++i) {
    603      av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
    604                                 height, filter_params_x, 0, &conv_params1,
    605                                 bit_depth);
    606    }
    607    aom_usec_timer_mark(&timer);
    608    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    609    ConvolveParams conv_params2 =
    610        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    611    highbd_convolve_x_func test_func = GetParam().TestFunction();
    612    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    613 
    614    aom_usec_timer_start(&timer);
    615    for (int i = 0; i < num_iters; ++i) {
    616      test_func(input, width, test, kOutputStride, width, height,
    617                filter_params_x, 0, &conv_params2, bit_depth);
    618    }
    619    aom_usec_timer_mark(&timer);
    620    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    621    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
    622           time2, time1 / time2);
    623  }
    624 };
    625 
    626 TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); }
    627 
    628 TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
    629 
    630 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest,
    631                         BuildHighbdParams(av1_highbd_convolve_x_sr_c));
    632 
    633 #if HAVE_SSSE3
    634 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest,
    635                         BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3));
    636 #endif
    637 
    638 #if HAVE_AVX2
    639 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest,
    640                         BuildHighbdParams(av1_highbd_convolve_x_sr_avx2));
    641 #endif
    642 
    643 #if HAVE_NEON
    644 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest,
    645                         BuildHighbdParams(av1_highbd_convolve_x_sr_neon));
    646 #endif
    647 
    648 #if HAVE_SVE2
    649 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveXHighbdTest,
    650                         BuildHighbdParams(av1_highbd_convolve_x_sr_sve2));
    651 #endif
    652 
    653 #if HAVE_RVV
    654 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveXHighbdTest,
    655                         BuildHighbdParams(av1_highbd_convolve_x_sr_rvv));
    656 #endif
    657 
    658 /////////////////////////////////////////////////////////////////
    659 // Single reference convolve-x IntraBC functions (high bit-depth)
    660 /////////////////////////////////////////////////////////////////
    661 
    662 class AV1ConvolveXHighbdIntraBCTest
    663    : public AV1ConvolveTest<highbd_convolve_x_func> {
    664 public:
    665  void RunTest() {
    666    // IntraBC functions only operate for subpel_x_qn = 8.
    667    constexpr int kSubX = 8;
    668    const int width = GetParam().Block().Width();
    669    const int height = GetParam().Block().Height();
    670    const int bit_depth = GetParam().BitDepth();
    671    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
    672    const uint16_t *input = FirstRandomInput16(GetParam());
    673 
    674    ConvolveParams conv_params1 =
    675        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    676    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    677    // Use a stride different from width to avoid potential storing errors that
    678    // would go undetected. The input buffer is filled using a padding of 12, so
    679    // the stride can be anywhere between width and width + 12.
    680    av1_highbd_convolve_x_sr_intrabc_c(
    681        input, width + 2, reference, kOutputStride, width, height,
    682        filter_params_x, kSubX, &conv_params1, bit_depth);
    683 
    684    ConvolveParams conv_params2 =
    685        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
    686    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    687    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
    688                              height, filter_params_x, kSubX, &conv_params2,
    689                              bit_depth);
    690 
    691    AssertOutputBufferEq(reference, test, width, height);
    692  }
    693 
    694  void SpeedTest() {
    695    constexpr int kNumIters = 10000;
    696    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
    697    const int width = GetParam().Block().Width();
    698    const int height = GetParam().Block().Height();
    699    const int bit_depth = GetParam().BitDepth();
    700    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
    701    const uint16_t *input = FirstRandomInput16(GetParam());
    702 
    703    ConvolveParams conv_params1 =
    704        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    705    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    706    aom_usec_timer timer;
    707    aom_usec_timer_start(&timer);
    708    for (int i = 0; i < kNumIters; ++i) {
    709      av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride,
    710                                         width, height, filter_params_x, 0,
    711                                         &conv_params1, bit_depth);
    712    }
    713    aom_usec_timer_mark(&timer);
    714    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    715 
    716    ConvolveParams conv_params2 =
    717        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
    718    highbd_convolve_x_func test_func = GetParam().TestFunction();
    719    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    720    aom_usec_timer_start(&timer);
    721    for (int i = 0; i < kNumIters; ++i) {
    722      test_func(input, width, test, kOutputStride, width, height,
    723                filter_params_x, 0, &conv_params2, bit_depth);
    724    }
    725    aom_usec_timer_mark(&timer);
    726    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    727 
    728    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
    729           time2, time1 / time2);
    730  }
    731 };
    732 
    733 TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); }
    734 
    735 TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
    736 
    737 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest,
    738                         BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c));
    739 
    740 #if HAVE_NEON
    741 INSTANTIATE_TEST_SUITE_P(
    742    NEON, AV1ConvolveXHighbdIntraBCTest,
    743    BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon));
    744 #endif
    745 
    746 #if HAVE_RVV
    747 INSTANTIATE_TEST_SUITE_P(
    748    RVV, AV1ConvolveXHighbdIntraBCTest,
    749    BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_rvv));
    750 #endif
    751 
    752 #endif  // CONFIG_AV1_HIGHBITDEPTH
    753 
    754 ////////////////////////////////////////////////////////
    755 // Single reference convolve-y functions (low bit-depth)
    756 ////////////////////////////////////////////////////////
    757 using convolve_y_func = void (*)(const uint8_t *src, int src_stride,
    758                                 uint8_t *dst, int dst_stride, int w, int h,
    759                                 const InterpFilterParams *filter_params_y,
    760                                 const int subpel_y_qn);
    761 
    762 class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> {
    763 public:
    764  void RunTest() {
    765    // Do not test the no-op filter.
    766    for (int sub_y = 1; sub_y < 16; ++sub_y) {
    767      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    768           ++filter) {
    769        InterpFilter f = static_cast<InterpFilter>(filter);
    770        TestConvolve(sub_y, f);
    771      }
    772    }
    773  }
    774 
    775 public:
    776  void SpeedTest() {
    777    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    778         ++filter) {
    779      InterpFilter f = static_cast<InterpFilter>(filter);
    780      TestConvolveSpeed(f, 10000);
    781    }
    782  }
    783 
    784 private:
    785  void TestConvolve(const int sub_y, const InterpFilter filter) {
    786    const int width = GetParam().Block().Width();
    787    const int height = GetParam().Block().Height();
    788 
    789    const InterpFilterParams *filter_params_y =
    790        av1_get_interp_filter_params_with_block_size(filter, height);
    791    const uint8_t *input = FirstRandomInput8(GetParam());
    792    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    793    av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
    794                        filter_params_y, sub_y);
    795    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    796    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
    797                              filter_params_y, sub_y);
    798    AssertOutputBufferEq(reference, test, width, height);
    799  }
    800 
    801 private:
    802  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
    803    const int width = GetParam().Block().Width();
    804    const int height = GetParam().Block().Height();
    805 
    806    const InterpFilterParams *filter_params_y =
    807        av1_get_interp_filter_params_with_block_size(filter, height);
    808    const uint8_t *input = FirstRandomInput8(GetParam());
    809    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    810 
    811    aom_usec_timer timer;
    812    aom_usec_timer_start(&timer);
    813    for (int i = 0; i < num_iters; ++i) {
    814      av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
    815                          filter_params_y, 0);
    816    }
    817    aom_usec_timer_mark(&timer);
    818    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    819 
    820    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    821 
    822    aom_usec_timer_start(&timer);
    823    for (int i = 0; i < num_iters; ++i) {
    824      GetParam().TestFunction()(input, width, test, kOutputStride, width,
    825                                height, filter_params_y, 0);
    826    }
    827    aom_usec_timer_mark(&timer);
    828    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    829    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
    830           time2, time1 / time2);
    831  }
    832 };
    833 
    834 TEST_P(AV1ConvolveYTest, RunTest) { RunTest(); }
    835 
    836 TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) { SpeedTest(); }
    837 
    838 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYTest,
    839                         BuildLowbdParams(av1_convolve_y_sr_c));
    840 
    841 #if HAVE_SSE2
    842 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYTest,
    843                         BuildLowbdParams(av1_convolve_y_sr_sse2));
    844 #endif
    845 
    846 #if HAVE_AVX2
    847 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYTest,
    848                         BuildLowbdParams(av1_convolve_y_sr_avx2));
    849 #endif
    850 
    851 #if HAVE_NEON
    852 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest,
    853                         BuildLowbdParams(av1_convolve_y_sr_neon));
    854 #endif
    855 
    856 #if HAVE_NEON_DOTPROD
    857 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveYTest,
    858                         BuildLowbdParams(av1_convolve_y_sr_neon_dotprod));
    859 #endif
    860 
    861 #if HAVE_NEON_I8MM
    862 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveYTest,
    863                         BuildLowbdParams(av1_convolve_y_sr_neon_i8mm));
    864 #endif
    865 
    866 #if HAVE_RVV
    867 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveYTest,
    868                         BuildLowbdParams(av1_convolve_y_sr_rvv));
    869 #endif
    870 
    871 ////////////////////////////////////////////////////////////////
    872 // Single reference convolve-y IntraBC functions (low bit-depth)
    873 ////////////////////////////////////////////////////////////////
    874 
    875 class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> {
    876 public:
    877  void RunTest() {
    878    // IntraBC functions only operate for subpel_y_qn = 8.
    879    constexpr int kSubY = 8;
    880    const int width = GetParam().Block().Width();
    881    const int height = GetParam().Block().Height();
    882    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
    883    const uint8_t *input = FirstRandomInput8(GetParam());
    884 
    885    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    886    // Use a stride different from width to avoid potential storing errors that
    887    // would go undetected. The input buffer is filled using a padding of 12, so
    888    // the stride can be anywhere between width and width + 12.
    889    av1_convolve_y_sr_intrabc_c(input, width + 2, reference, kOutputStride,
    890                                width, height, filter_params_y, kSubY);
    891 
    892    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    893    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
    894                              height, filter_params_y, kSubY);
    895 
    896    AssertOutputBufferEq(reference, test, width, height);
    897  }
    898 
    899  void SpeedTest() {
    900    constexpr int kNumIters = 10000;
    901    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
    902    const int width = GetParam().Block().Width();
    903    const int height = GetParam().Block().Height();
    904 
    905    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
    906    const uint8_t *input = FirstRandomInput8(GetParam());
    907    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
    908 
    909    aom_usec_timer timer;
    910    aom_usec_timer_start(&timer);
    911    for (int i = 0; i < kNumIters; ++i) {
    912      av1_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, width,
    913                                  height, filter_params_y, 0);
    914    }
    915    aom_usec_timer_mark(&timer);
    916    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    917 
    918    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
    919    convolve_y_func test_func = GetParam().TestFunction();
    920    aom_usec_timer_start(&timer);
    921    for (int i = 0; i < kNumIters; ++i) {
    922      test_func(input, width, test, kOutputStride, width, height,
    923                filter_params_y, 0);
    924    }
    925    aom_usec_timer_mark(&timer);
    926    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    927 
    928    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
    929           time2, time1 / time2);
    930  }
    931 };
    932 
    933 TEST_P(AV1ConvolveYIntraBCTest, RunTest) { RunTest(); }
    934 
    935 TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
    936 
    937 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYIntraBCTest,
    938                         BuildLowbdParams(av1_convolve_y_sr_intrabc_c));
    939 
    940 #if HAVE_NEON
    941 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest,
    942                         BuildLowbdParams(av1_convolve_y_sr_intrabc_neon));
    943 #endif
    944 
    945 #if HAVE_RVV
    946 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveYIntraBCTest,
    947                         BuildLowbdParams(av1_convolve_y_sr_intrabc_rvv));
    948 #endif
    949 
    950 #if CONFIG_AV1_HIGHBITDEPTH
    951 /////////////////////////////////////////////////////////
    952 // Single reference convolve-y functions (high bit-depth)
    953 /////////////////////////////////////////////////////////
    954 using highbd_convolve_y_func =
    955    void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
    956             int w, int h, const InterpFilterParams *filter_params_y,
    957             const int subpel_y_qn, int bd);
    958 
    959 class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> {
    960 public:
    961  void RunTest() {
    962    // Do not test the no-op filter.
    963    for (int sub_y = 1; sub_y < 16; ++sub_y) {
    964      for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    965           ++filter) {
    966        InterpFilter f = static_cast<InterpFilter>(filter);
    967        TestConvolve(sub_y, f);
    968      }
    969    }
    970  }
    971 
    972 public:
    973  void SpeedTest() {
    974    for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
    975         ++filter) {
    976      InterpFilter f = static_cast<InterpFilter>(filter);
    977      TestConvolveSpeed(f, 10000);
    978    }
    979  }
    980 
    981 private:
    982  void TestConvolve(const int sub_y, const InterpFilter filter) {
    983    const int width = GetParam().Block().Width();
    984    const int height = GetParam().Block().Height();
    985    const int bit_depth = GetParam().BitDepth();
    986    const InterpFilterParams *filter_params_y =
    987        av1_get_interp_filter_params_with_block_size(filter, height);
    988    const uint16_t *input = FirstRandomInput16(GetParam());
    989    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
    990    av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
    991                               height, filter_params_y, sub_y, bit_depth);
    992    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
    993    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
    994                              filter_params_y, sub_y, bit_depth);
    995    AssertOutputBufferEq(reference, test, width, height);
    996  }
    997 
    998 private:
    999  void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
   1000    const int width = GetParam().Block().Width();
   1001    const int height = GetParam().Block().Height();
   1002    const int bit_depth = GetParam().BitDepth();
   1003    const InterpFilterParams *filter_params_y =
   1004        av1_get_interp_filter_params_with_block_size(filter, width);
   1005    const uint16_t *input = FirstRandomInput16(GetParam());
   1006    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1007 
   1008    aom_usec_timer timer;
   1009    aom_usec_timer_start(&timer);
   1010    for (int i = 0; i < num_iters; ++i) {
   1011      av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
   1012                                 height, filter_params_y, 0, bit_depth);
   1013    }
   1014    aom_usec_timer_mark(&timer);
   1015    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1016    highbd_convolve_y_func test_func = GetParam().TestFunction();
   1017    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1018 
   1019    aom_usec_timer_start(&timer);
   1020    for (int i = 0; i < num_iters; ++i) {
   1021      test_func(input, width, test, kOutputStride, width, height,
   1022                filter_params_y, 0, bit_depth);
   1023    }
   1024    aom_usec_timer_mark(&timer);
   1025    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1026    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
   1027           time2, time1 / time2);
   1028  }
   1029 };
   1030 
   1031 TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); }
   1032 
   1033 TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
   1034 
   1035 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest,
   1036                         BuildHighbdParams(av1_highbd_convolve_y_sr_c));
   1037 
   1038 #if HAVE_SSSE3
   1039 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest,
   1040                         BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3));
   1041 #endif
   1042 
   1043 #if HAVE_AVX2
   1044 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest,
   1045                         BuildHighbdParams(av1_highbd_convolve_y_sr_avx2));
   1046 #endif
   1047 
   1048 #if HAVE_NEON
   1049 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest,
   1050                         BuildHighbdParams(av1_highbd_convolve_y_sr_neon));
   1051 #endif
   1052 
   1053 #if HAVE_SVE2
   1054 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveYHighbdTest,
   1055                         BuildHighbdParams(av1_highbd_convolve_y_sr_sve2));
   1056 #endif
   1057 
   1058 #if HAVE_RVV
   1059 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveYHighbdTest,
   1060                         BuildHighbdParams(av1_highbd_convolve_y_sr_rvv));
   1061 #endif
   1062 
   1063 /////////////////////////////////////////////////////////////////
   1064 // Single reference convolve-y IntraBC functions (high bit-depth)
   1065 /////////////////////////////////////////////////////////////////
   1066 
   1067 class AV1ConvolveYHighbdIntraBCTest
   1068    : public AV1ConvolveTest<highbd_convolve_y_func> {
   1069 public:
   1070  void RunTest() {
   1071    // IntraBC functions only operate for subpel_y_qn = 8.
   1072    constexpr int kSubY = 8;
   1073    const int width = GetParam().Block().Width();
   1074    const int height = GetParam().Block().Height();
   1075    const int bit_depth = GetParam().BitDepth();
   1076    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
   1077    const uint16_t *input = FirstRandomInput16(GetParam());
   1078 
   1079    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1080    // Use a stride different from width to avoid potential storing errors that
   1081    // would go undetected. The input buffer is filled using a padding of 12, so
   1082    // the stride can be anywhere between width and width + 12.
   1083    av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference,
   1084                                       kOutputStride, width, height,
   1085                                       filter_params_y, kSubY, bit_depth);
   1086 
   1087    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1088    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
   1089                              height, filter_params_y, kSubY, bit_depth);
   1090 
   1091    AssertOutputBufferEq(reference, test, width, height);
   1092  }
   1093 
   1094  void SpeedTest() {
   1095    constexpr int kNumIters = 10000;
   1096    const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
   1097    const int width = GetParam().Block().Width();
   1098    const int height = GetParam().Block().Height();
   1099    const int bit_depth = GetParam().BitDepth();
   1100    const InterpFilterParams *filter_params_y =
   1101        av1_get_interp_filter_params_with_block_size(filter, width);
   1102    const uint16_t *input = FirstRandomInput16(GetParam());
   1103 
   1104    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1105    aom_usec_timer timer;
   1106    aom_usec_timer_start(&timer);
   1107    for (int i = 0; i < kNumIters; ++i) {
   1108      av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride,
   1109                                         width, height, filter_params_y, 0,
   1110                                         bit_depth);
   1111    }
   1112    aom_usec_timer_mark(&timer);
   1113    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1114 
   1115    highbd_convolve_y_func test_func = GetParam().TestFunction();
   1116    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1117    aom_usec_timer_start(&timer);
   1118    for (int i = 0; i < kNumIters; ++i) {
   1119      test_func(input, width, test, kOutputStride, width, height,
   1120                filter_params_y, 0, bit_depth);
   1121    }
   1122    aom_usec_timer_mark(&timer);
   1123    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1124 
   1125    printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
   1126           time2, time1 / time2);
   1127  }
   1128 };
   1129 
   1130 TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); }
   1131 
   1132 TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
   1133 
   1134 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest,
   1135                         BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c));
   1136 
   1137 #if HAVE_NEON
   1138 INSTANTIATE_TEST_SUITE_P(
   1139    NEON, AV1ConvolveYHighbdIntraBCTest,
   1140    BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon));
   1141 #endif
   1142 
   1143 #if HAVE_RVV
   1144 INSTANTIATE_TEST_SUITE_P(
   1145    RVV, AV1ConvolveYHighbdIntraBCTest,
   1146    BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_rvv));
   1147 #endif
   1148 
   1149 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1150 
   1151 //////////////////////////////////////////////////////////////
   1152 // Single reference convolve-copy functions (low bit-depth)
   1153 //////////////////////////////////////////////////////////////
   1154 using convolve_copy_func = void (*)(const uint8_t *src, ptrdiff_t src_stride,
   1155                                    uint8_t *dst, ptrdiff_t dst_stride, int w,
   1156                                    int h);
   1157 
   1158 class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> {
   1159 public:
   1160  void RunTest() {
   1161    const int width = GetParam().Block().Width();
   1162    const int height = GetParam().Block().Height();
   1163    const uint8_t *input = FirstRandomInput8(GetParam());
   1164    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   1165    aom_convolve_copy_c(input, width, reference, kOutputStride, width, height);
   1166    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   1167    GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
   1168    AssertOutputBufferEq(reference, test, width, height);
   1169  }
   1170 };
   1171 
   1172 // Note that even though these are AOM convolve functions, we are using the
   1173 // newer AV1 test framework.
   1174 TEST_P(AV1ConvolveCopyTest, RunTest) { RunTest(); }
   1175 
   1176 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyTest,
   1177                         BuildLowbdParams(aom_convolve_copy_c));
   1178 
   1179 #if HAVE_SSE2
   1180 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyTest,
   1181                         BuildLowbdParams(aom_convolve_copy_sse2));
   1182 #endif
   1183 
   1184 #if HAVE_AVX2
   1185 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyTest,
   1186                         BuildLowbdParams(aom_convolve_copy_avx2));
   1187 #endif
   1188 
   1189 #if HAVE_NEON
   1190 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest,
   1191                         BuildLowbdParams(aom_convolve_copy_neon));
   1192 #endif
   1193 
   1194 #if CONFIG_AV1_HIGHBITDEPTH
   1195 ///////////////////////////////////////////////////////////////
   1196 // Single reference convolve-copy functions (high bit-depth)
   1197 ///////////////////////////////////////////////////////////////
   1198 using highbd_convolve_copy_func = void (*)(const uint16_t *src,
   1199                                           ptrdiff_t src_stride, uint16_t *dst,
   1200                                           ptrdiff_t dst_stride, int w, int h);
   1201 
   1202 class AV1ConvolveCopyHighbdTest
   1203    : public AV1ConvolveTest<highbd_convolve_copy_func> {
   1204 public:
   1205  void RunTest() {
   1206    const BlockSize &block = GetParam().Block();
   1207    const int width = block.Width();
   1208    const int height = block.Height();
   1209    const uint16_t *input = FirstRandomInput16(GetParam());
   1210    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1211    aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width,
   1212                               height);
   1213    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1214    GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
   1215    AssertOutputBufferEq(reference, test, width, height);
   1216  }
   1217 };
   1218 
   1219 TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); }
   1220 
   1221 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest,
   1222                         BuildHighbdParams(aom_highbd_convolve_copy_c));
   1223 
   1224 #if HAVE_SSE2
   1225 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest,
   1226                         BuildHighbdParams(aom_highbd_convolve_copy_sse2));
   1227 #endif
   1228 
   1229 #if HAVE_AVX2
   1230 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest,
   1231                         BuildHighbdParams(aom_highbd_convolve_copy_avx2));
   1232 #endif
   1233 
   1234 #if HAVE_NEON
   1235 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest,
   1236                         BuildHighbdParams(aom_highbd_convolve_copy_neon));
   1237 #endif
   1238 
   1239 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1240 
   1241 /////////////////////////////////////////////////////////
   1242 // Single reference convolve-2D functions (low bit-depth)
   1243 /////////////////////////////////////////////////////////
   1244 using convolve_2d_func = void (*)(const uint8_t *src, int src_stride,
   1245                                  uint8_t *dst, int dst_stride, int w, int h,
   1246                                  const InterpFilterParams *filter_params_x,
   1247                                  const InterpFilterParams *filter_params_y,
   1248                                  const int subpel_x_qn, const int subpel_y_qn,
   1249                                  ConvolveParams *conv_params);
   1250 
   1251 class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> {
   1252 public:
   1253  void RunTest() {
   1254    // Do not test the no-op filter.
   1255    for (int sub_x = 1; sub_x < 16; ++sub_x) {
   1256      for (int sub_y = 1; sub_y < 16; ++sub_y) {
   1257        for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
   1258          for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
   1259            if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
   1260                ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
   1261              continue;
   1262            TestConvolve(static_cast<InterpFilter>(h_f),
   1263                         static_cast<InterpFilter>(v_f), sub_x, sub_y);
   1264          }
   1265        }
   1266      }
   1267    }
   1268  }
   1269 
   1270 public:
   1271  void SpeedTest() {
   1272    for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
   1273      for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
   1274        if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
   1275            ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
   1276          continue;
   1277        TestConvolveSpeed(static_cast<InterpFilter>(h_f),
   1278                          static_cast<InterpFilter>(v_f), 10000);
   1279      }
   1280    }
   1281  }
   1282 
   1283 private:
   1284  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
   1285                    const int sub_x, const int sub_y) {
   1286    const int width = GetParam().Block().Width();
   1287    const int height = GetParam().Block().Height();
   1288    const InterpFilterParams *filter_params_x =
   1289        av1_get_interp_filter_params_with_block_size(h_f, width);
   1290    const InterpFilterParams *filter_params_y =
   1291        av1_get_interp_filter_params_with_block_size(v_f, height);
   1292    const uint8_t *input = FirstRandomInput8(GetParam());
   1293    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   1294    ConvolveParams conv_params1 =
   1295        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1296    av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, height,
   1297                         filter_params_x, filter_params_y, sub_x, sub_y,
   1298                         &conv_params1);
   1299    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   1300    ConvolveParams conv_params2 =
   1301        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1302    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
   1303                              filter_params_x, filter_params_y, sub_x, sub_y,
   1304                              &conv_params2);
   1305    AssertOutputBufferEq(reference, test, width, height);
   1306  }
   1307 
   1308 private:
   1309  void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
   1310                         int num_iters) {
   1311    const int width = GetParam().Block().Width();
   1312    const int height = GetParam().Block().Height();
   1313    const InterpFilterParams *filter_params_x =
   1314        av1_get_interp_filter_params_with_block_size(h_f, width);
   1315    const InterpFilterParams *filter_params_y =
   1316        av1_get_interp_filter_params_with_block_size(v_f, height);
   1317    const uint8_t *input = FirstRandomInput8(GetParam());
   1318    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   1319    ConvolveParams conv_params1 =
   1320        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1321    aom_usec_timer timer;
   1322    aom_usec_timer_start(&timer);
   1323    for (int i = 0; i < num_iters; ++i) {
   1324      av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
   1325                           height, filter_params_x, filter_params_y, 0, 0,
   1326                           &conv_params1);
   1327    }
   1328    aom_usec_timer_mark(&timer);
   1329    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1330    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   1331    ConvolveParams conv_params2 =
   1332        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1333    aom_usec_timer_start(&timer);
   1334    for (int i = 0; i < num_iters; ++i) {
   1335      GetParam().TestFunction()(input, width, test, kOutputStride, width,
   1336                                height, filter_params_x, filter_params_y, 0, 0,
   1337                                &conv_params2);
   1338    }
   1339    aom_usec_timer_mark(&timer);
   1340    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1341    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
   1342           time1, time2, time1 / time2);
   1343  }
   1344 };
   1345 
   1346 TEST_P(AV1Convolve2DTest, RunTest) { RunTest(); }
   1347 
   1348 TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) { SpeedTest(); }
   1349 
   1350 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DTest,
   1351                         BuildLowbdParams(av1_convolve_2d_sr_c));
   1352 
   1353 #if HAVE_SSE2
   1354 INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DTest,
   1355                         BuildLowbdParams(av1_convolve_2d_sr_sse2));
   1356 #endif
   1357 
   1358 #if HAVE_AVX2
   1359 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DTest,
   1360                         BuildLowbdParams(av1_convolve_2d_sr_avx2));
   1361 #endif
   1362 
   1363 #if HAVE_NEON
   1364 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest,
   1365                         BuildLowbdParams(av1_convolve_2d_sr_neon));
   1366 #endif
   1367 
   1368 #if HAVE_NEON_DOTPROD
   1369 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest,
   1370                         BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod));
   1371 #endif
   1372 
   1373 #if HAVE_NEON_I8MM
   1374 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest,
   1375                         BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm));
   1376 #endif
   1377 
   1378 #if HAVE_SVE2
   1379 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DTest,
   1380                         BuildLowbdParams(av1_convolve_2d_sr_sve2));
   1381 #endif
   1382 
   1383 #if HAVE_RVV
   1384 INSTANTIATE_TEST_SUITE_P(RVV, AV1Convolve2DTest,
   1385                         BuildLowbdParams(av1_convolve_2d_sr_rvv));
   1386 #endif
   1387 
   1388 /////////////////////////////////////////////////////////////////
   1389 // Single reference convolve-2D IntraBC functions (low bit-depth)
   1390 /////////////////////////////////////////////////////////////////
   1391 
   1392 class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> {
   1393 public:
   1394  void RunTest() {
   1395    // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
   1396    constexpr int kSubX = 8;
   1397    constexpr int kSubY = 8;
   1398    const int width = GetParam().Block().Width();
   1399    const int height = GetParam().Block().Height();
   1400    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
   1401    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
   1402    const uint8_t *input = FirstRandomInput8(GetParam());
   1403 
   1404    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   1405    ConvolveParams conv_params1 =
   1406        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1407    // Use a stride different from width to avoid potential storing errors that
   1408    // would go undetected. The input buffer is filled using a padding of 12, so
   1409    // the stride can be anywhere between width and width + 12.
   1410    av1_convolve_2d_sr_intrabc_c(input, width + 2, reference, kOutputStride,
   1411                                 width, height, filter_params_x,
   1412                                 filter_params_y, kSubX, kSubY, &conv_params1);
   1413 
   1414    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   1415    ConvolveParams conv_params2 =
   1416        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1417    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
   1418                              height, filter_params_x, filter_params_y, kSubX,
   1419                              kSubY, &conv_params2);
   1420 
   1421    AssertOutputBufferEq(reference, test, width, height);
   1422  }
   1423 
   1424  void SpeedTest() {
   1425    constexpr int kNumIters = 10000;
   1426    const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
   1427    const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
   1428    const int width = GetParam().Block().Width();
   1429    const int height = GetParam().Block().Height();
   1430    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
   1431    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
   1432    const uint8_t *input = FirstRandomInput8(GetParam());
   1433 
   1434    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   1435    ConvolveParams conv_params1 =
   1436        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1437    aom_usec_timer timer;
   1438    aom_usec_timer_start(&timer);
   1439    for (int i = 0; i < kNumIters; ++i) {
   1440      av1_convolve_2d_sr_intrabc_c(input, width, reference, kOutputStride,
   1441                                   width, height, filter_params_x,
   1442                                   filter_params_y, 8, 8, &conv_params1);
   1443    }
   1444    aom_usec_timer_mark(&timer);
   1445    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1446 
   1447    convolve_2d_func test_func = GetParam().TestFunction();
   1448    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   1449    ConvolveParams conv_params2 =
   1450        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1451    aom_usec_timer_start(&timer);
   1452    for (int i = 0; i < kNumIters; ++i) {
   1453      test_func(input, width, test, kOutputStride, width, height,
   1454                filter_params_x, filter_params_y, 8, 8, &conv_params2);
   1455    }
   1456    aom_usec_timer_mark(&timer);
   1457    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1458 
   1459    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
   1460           time1, time2, time1 / time2);
   1461  }
   1462 };
   1463 
   1464 TEST_P(AV1Convolve2DIntraBCTest, RunTest) { RunTest(); }
   1465 
   1466 TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
   1467 
   1468 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DIntraBCTest,
   1469                         BuildLowbdParams(av1_convolve_2d_sr_intrabc_c));
   1470 
   1471 #if HAVE_NEON
   1472 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest,
   1473                         BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon));
   1474 #endif
   1475 
   1476 #if HAVE_RVV
   1477 INSTANTIATE_TEST_SUITE_P(RVV, AV1Convolve2DIntraBCTest,
   1478                         BuildLowbdParams(av1_convolve_2d_sr_intrabc_rvv));
   1479 #endif
   1480 
   1481 #if CONFIG_AV1_HIGHBITDEPTH
   1482 //////////////////////////////////////////////////////////
   1483 // Single reference convolve-2d functions (high bit-depth)
   1484 //////////////////////////////////////////////////////////
   1485 
   1486 using highbd_convolve_2d_func =
   1487    void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
   1488             int w, int h, const InterpFilterParams *filter_params_x,
   1489             const InterpFilterParams *filter_params_y, const int subpel_x_qn,
   1490             const int subpel_y_qn, ConvolveParams *conv_params, int bd);
   1491 
   1492 class AV1Convolve2DHighbdTest
   1493    : public AV1ConvolveTest<highbd_convolve_2d_func> {
   1494 public:
   1495  void RunTest() {
   1496    // Do not test the no-op filter.
   1497    for (int sub_x = 1; sub_x < 16; ++sub_x) {
   1498      for (int sub_y = 1; sub_y < 16; ++sub_y) {
   1499        for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
   1500          for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
   1501            if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
   1502                ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
   1503              continue;
   1504            TestConvolve(static_cast<InterpFilter>(h_f),
   1505                         static_cast<InterpFilter>(v_f), sub_x, sub_y);
   1506          }
   1507        }
   1508      }
   1509    }
   1510  }
   1511 
   1512 public:
   1513  void SpeedTest() {
   1514    for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
   1515      for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
   1516        if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
   1517            ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
   1518          continue;
   1519        TestConvolveSpeed(static_cast<InterpFilter>(h_f),
   1520                          static_cast<InterpFilter>(v_f), 10000);
   1521      }
   1522    }
   1523  }
   1524 
   1525 private:
   1526  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
   1527                    const int sub_x, const int sub_y) {
   1528    const int width = GetParam().Block().Width();
   1529    const int height = GetParam().Block().Height();
   1530    const int bit_depth = GetParam().BitDepth();
   1531    const InterpFilterParams *filter_params_x =
   1532        av1_get_interp_filter_params_with_block_size(h_f, width);
   1533    const InterpFilterParams *filter_params_y =
   1534        av1_get_interp_filter_params_with_block_size(v_f, height);
   1535    const uint16_t *input = FirstRandomInput16(GetParam());
   1536    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1537    ConvolveParams conv_params1 =
   1538        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
   1539    av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
   1540                                height, filter_params_x, filter_params_y, sub_x,
   1541                                sub_y, &conv_params1, bit_depth);
   1542    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1543    ConvolveParams conv_params2 =
   1544        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
   1545    GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
   1546                              filter_params_x, filter_params_y, sub_x, sub_y,
   1547                              &conv_params2, bit_depth);
   1548    AssertOutputBufferEq(reference, test, width, height);
   1549  }
   1550 
   1551  void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
   1552                         int num_iters) {
   1553    const int width = GetParam().Block().Width();
   1554    const int height = GetParam().Block().Height();
   1555    const int bit_depth = GetParam().BitDepth();
   1556    const InterpFilterParams *filter_params_x =
   1557        av1_get_interp_filter_params_with_block_size(h_f, width);
   1558    const InterpFilterParams *filter_params_y =
   1559        av1_get_interp_filter_params_with_block_size(v_f, height);
   1560    const uint16_t *input = FirstRandomInput16(GetParam());
   1561    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1562    ConvolveParams conv_params1 =
   1563        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1564    aom_usec_timer timer;
   1565    aom_usec_timer_start(&timer);
   1566    for (int i = 0; i < num_iters; ++i) {
   1567      av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
   1568                                  height, filter_params_x, filter_params_y, 0,
   1569                                  0, &conv_params1, bit_depth);
   1570    }
   1571    aom_usec_timer_mark(&timer);
   1572    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1573    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1574    ConvolveParams conv_params2 =
   1575        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1576    aom_usec_timer_start(&timer);
   1577    for (int i = 0; i < num_iters; ++i) {
   1578      GetParam().TestFunction()(input, width, test, kOutputStride, width,
   1579                                height, filter_params_x, filter_params_y, 0, 0,
   1580                                &conv_params2, bit_depth);
   1581    }
   1582    aom_usec_timer_mark(&timer);
   1583    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1584    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
   1585           time1, time2, time1 / time2);
   1586  }
   1587 };
   1588 
   1589 TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); }
   1590 
   1591 TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
   1592 
   1593 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest,
   1594                         BuildHighbdParams(av1_highbd_convolve_2d_sr_c));
   1595 
   1596 #if HAVE_SSSE3
   1597 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest,
   1598                         BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3));
   1599 #endif
   1600 
   1601 #if HAVE_AVX2
   1602 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest,
   1603                         BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2));
   1604 #endif
   1605 
   1606 #if HAVE_NEON
   1607 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest,
   1608                         BuildHighbdParams(av1_highbd_convolve_2d_sr_neon));
   1609 #endif
   1610 
   1611 #if HAVE_SVE2
   1612 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DHighbdTest,
   1613                         BuildHighbdParams(av1_highbd_convolve_2d_sr_sve2));
   1614 #endif
   1615 
   1616 #if HAVE_RVV
   1617 INSTANTIATE_TEST_SUITE_P(RVV, AV1Convolve2DHighbdTest,
   1618                         BuildHighbdParams(av1_highbd_convolve_2d_sr_rvv));
   1619 #endif
   1620 
   1621 //////////////////////////////////////////////////////////////////
   1622 // Single reference convolve-2d IntraBC functions (high bit-depth)
   1623 //////////////////////////////////////////////////////////////////
   1624 
   1625 class AV1Convolve2DHighbdIntraBCTest
   1626    : public AV1ConvolveTest<highbd_convolve_2d_func> {
   1627 public:
   1628  void RunTest() {
   1629    // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
   1630    constexpr int kSubX = 8;
   1631    constexpr int kSubY = 8;
   1632    const int width = GetParam().Block().Width();
   1633    const int height = GetParam().Block().Height();
   1634    const int bit_depth = GetParam().BitDepth();
   1635    const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
   1636    const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
   1637    const uint16_t *input = FirstRandomInput16(GetParam());
   1638 
   1639    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1640    ConvolveParams conv_params1 =
   1641        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
   1642    // Use a stride different from width to avoid potential storing errors that
   1643    // would go undetected. The input buffer is filled using a padding of 12, so
   1644    // the stride can be anywhere between width and width + 12.
   1645    av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference,
   1646                                        kOutputStride, width, height,
   1647                                        filter_params_x, filter_params_y, kSubX,
   1648                                        kSubY, &conv_params1, bit_depth);
   1649 
   1650    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1651    ConvolveParams conv_params2 =
   1652        get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
   1653    GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
   1654                              height, filter_params_x, filter_params_y, kSubX,
   1655                              kSubY, &conv_params2, bit_depth);
   1656 
   1657    AssertOutputBufferEq(reference, test, width, height);
   1658  }
   1659 
   1660  void SpeedTest() {
   1661    constexpr int kNumIters = 10000;
   1662    const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
   1663    const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
   1664    const int width = GetParam().Block().Width();
   1665    const int height = GetParam().Block().Height();
   1666    const int bit_depth = GetParam().BitDepth();
   1667    const InterpFilterParams *filter_params_x =
   1668        av1_get_interp_filter_params_with_block_size(h_f, width);
   1669    const InterpFilterParams *filter_params_y =
   1670        av1_get_interp_filter_params_with_block_size(v_f, height);
   1671    const uint16_t *input = FirstRandomInput16(GetParam());
   1672 
   1673    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1674    ConvolveParams conv_params1 =
   1675        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1676    aom_usec_timer timer;
   1677    aom_usec_timer_start(&timer);
   1678    for (int i = 0; i < kNumIters; ++i) {
   1679      av1_highbd_convolve_2d_sr_intrabc_c(
   1680          input, width, reference, kOutputStride, width, height,
   1681          filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth);
   1682    }
   1683    aom_usec_timer_mark(&timer);
   1684    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1685 
   1686    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1687    highbd_convolve_2d_func test_func = GetParam().TestFunction();
   1688    ConvolveParams conv_params2 =
   1689        get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
   1690    aom_usec_timer_start(&timer);
   1691    for (int i = 0; i < kNumIters; ++i) {
   1692      test_func(input, width, test, kOutputStride, width, height,
   1693                filter_params_x, filter_params_y, 0, 0, &conv_params2,
   1694                bit_depth);
   1695    }
   1696    aom_usec_timer_mark(&timer);
   1697    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   1698 
   1699    printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
   1700           time1, time2, time1 / time2);
   1701  }
   1702 };
   1703 
   1704 TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); }
   1705 
   1706 TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
   1707 
   1708 INSTANTIATE_TEST_SUITE_P(
   1709    C, AV1Convolve2DHighbdIntraBCTest,
   1710    BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c));
   1711 
   1712 #if HAVE_NEON
   1713 INSTANTIATE_TEST_SUITE_P(
   1714    NEON, AV1Convolve2DHighbdIntraBCTest,
   1715    BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon));
   1716 #endif
   1717 
   1718 #if HAVE_RVV
   1719 INSTANTIATE_TEST_SUITE_P(
   1720    RVV, AV1Convolve2DHighbdIntraBCTest,
   1721    BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_rvv));
   1722 #endif
   1723 
   1724 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1725 
   1726 //////////////////////////
   1727 // Compound Convolve Tests
   1728 //////////////////////////
   1729 
   1730 // The compound functions do not work for chroma block sizes. Provide
   1731 // a function to generate test parameters for just luma block sizes.
   1732 template <typename T>
   1733 std::vector<TestParam<T>> GetLumaTestParams(
   1734    std::initializer_list<int> bit_depths, T test_func) {
   1735  std::set<BlockSize> sizes;
   1736  for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
   1737    const int w = block_size_wide[b];
   1738    const int h = block_size_high[b];
   1739    sizes.insert(BlockSize(w, h));
   1740  }
   1741  std::vector<TestParam<T>> result;
   1742  for (int bit_depth : bit_depths) {
   1743    for (const auto &block : sizes) {
   1744      result.push_back(TestParam<T>(block, bit_depth, test_func));
   1745    }
   1746  }
   1747  return result;
   1748 }
   1749 
   1750 template <typename T>
   1751 std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) {
   1752  return GetLumaTestParams({ 8 }, test_func);
   1753 }
   1754 
   1755 template <typename T>
   1756 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams(
   1757    T test_func) {
   1758  return ::testing::ValuesIn(GetLowbdLumaTestParams(test_func));
   1759 }
   1760 
   1761 TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) {
   1762  auto v = GetLowbdLumaTestParams(av1_dist_wtd_convolve_x_c);
   1763  ASSERT_EQ(22U, v.size());
   1764  for (const auto &e : v) {
   1765    ASSERT_EQ(8, e.BitDepth());
   1766    bool same_fn = av1_dist_wtd_convolve_x_c == e.TestFunction();
   1767    ASSERT_TRUE(same_fn);
   1768  }
   1769 }
   1770 
   1771 #if CONFIG_AV1_HIGHBITDEPTH
   1772 template <typename T>
   1773 std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) {
   1774  return GetLumaTestParams({ 10, 12 }, test_func);
   1775 }
   1776 
   1777 TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) {
   1778  auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c);
   1779  ASSERT_EQ(44U, v.size());
   1780  int num_10 = 0;
   1781  int num_12 = 0;
   1782  for (const auto &e : v) {
   1783    ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth());
   1784    bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction();
   1785    ASSERT_TRUE(same_fn);
   1786    if (e.BitDepth() == 10) {
   1787      ++num_10;
   1788    } else {
   1789      ++num_12;
   1790    }
   1791  }
   1792  ASSERT_EQ(num_10, num_12);
   1793 }
   1794 
   1795 template <typename T>
   1796 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams(
   1797    T test_func) {
   1798  return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func));
   1799 }
   1800 
   1801 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1802 
   1803 // Compound cases also need to test different frame offsets and weightings.
   1804 class CompoundParam {
   1805 public:
   1806  CompoundParam(bool use_dist_wtd_comp_avg, int fwd_offset, int bck_offset)
   1807      : use_dist_wtd_comp_avg_(use_dist_wtd_comp_avg), fwd_offset_(fwd_offset),
   1808        bck_offset_(bck_offset) {}
   1809 
   1810  bool UseDistWtdCompAvg() const { return use_dist_wtd_comp_avg_; }
   1811  int FwdOffset() const { return fwd_offset_; }
   1812  int BckOffset() const { return bck_offset_; }
   1813 
   1814 private:
   1815  bool use_dist_wtd_comp_avg_;
   1816  int fwd_offset_;
   1817  int bck_offset_;
   1818 };
   1819 
   1820 std::vector<CompoundParam> GetCompoundParams() {
   1821  std::vector<CompoundParam> result;
   1822  result.push_back(CompoundParam(false, 0, 0));
   1823  for (int k = 0; k < 2; ++k) {
   1824    for (int l = 0; l < 4; ++l) {
   1825      result.push_back(CompoundParam(true, quant_dist_lookup_table[l][k],
   1826                                     quant_dist_lookup_table[l][1 - k]));
   1827    }
   1828  }
   1829  return result;
   1830 }
   1831 
   1832 TEST_F(AV1ConvolveParametersTest, GetCompoundParams) {
   1833  auto v = GetCompoundParams();
   1834  ASSERT_EQ(9U, v.size());
   1835  ASSERT_FALSE(v[0].UseDistWtdCompAvg());
   1836  for (size_t i = 1; i < v.size(); ++i) {
   1837    ASSERT_TRUE(v[i].UseDistWtdCompAvg());
   1838  }
   1839 }
   1840 
   1841 ////////////////////////////////////////////////
   1842 // Compound convolve-x functions (low bit-depth)
   1843 ////////////////////////////////////////////////
   1844 
   1845 ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf,
   1846                                 int width, int bit_depth,
   1847                                 const CompoundParam &compound) {
   1848  ConvolveParams conv_params =
   1849      get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth);
   1850  conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg();
   1851  conv_params.fwd_offset = compound.FwdOffset();
   1852  conv_params.bck_offset = compound.BckOffset();
   1853  return conv_params;
   1854 }
   1855 
   1856 class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> {
   1857 public:
   1858  void RunTest() {
   1859    auto compound_params = GetCompoundParams();
   1860    // Do not test the no-op filter.
   1861    for (int sub_pix = 1; sub_pix < 16; ++sub_pix) {
   1862      for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
   1863        for (const auto &c : compound_params) {
   1864          TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
   1865        }
   1866      }
   1867    }
   1868  }
   1869 
   1870 protected:
   1871  virtual const InterpFilterParams *FilterParams(InterpFilter f,
   1872                                                 const BlockSize &block) const {
   1873    return av1_get_interp_filter_params_with_block_size(f, block.Width());
   1874  }
   1875 
   1876  virtual convolve_x_func ReferenceFunc() const {
   1877    return av1_dist_wtd_convolve_x_c;
   1878  }
   1879 
   1880 private:
   1881  void TestConvolve(const int sub_pix, const InterpFilter filter,
   1882                    const CompoundParam &compound) {
   1883    const int width = GetParam().Block().Width();
   1884    const int height = GetParam().Block().Height();
   1885    const uint8_t *input1 = FirstRandomInput8(GetParam());
   1886    const uint8_t *input2 = SecondRandomInput8(GetParam());
   1887    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   1888    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
   1889    Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
   1890             compound, sub_pix, filter);
   1891 
   1892    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   1893    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
   1894    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
   1895             compound, sub_pix, filter);
   1896 
   1897    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
   1898    AssertOutputBufferEq(reference, test, width, height);
   1899  }
   1900 
   1901 private:
   1902  void Convolve(convolve_x_func test_func, const uint8_t *src1,
   1903                const uint8_t *src2, uint8_t *dst, CONV_BUF_TYPE *conv_buf,
   1904                const CompoundParam &compound, const int sub_pix,
   1905                const InterpFilter filter) {
   1906    const int width = GetParam().Block().Width();
   1907    const int height = GetParam().Block().Height();
   1908    const InterpFilterParams *filter_params =
   1909        FilterParams(filter, GetParam().Block());
   1910 
   1911    ConvolveParams conv_params =
   1912        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
   1913    test_func(src1, width, dst, kOutputStride, width, height, filter_params,
   1914              sub_pix, &conv_params);
   1915 
   1916    conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
   1917    test_func(src2, width, dst, kOutputStride, width, height, filter_params,
   1918              sub_pix, &conv_params);
   1919  }
   1920 };
   1921 
   1922 TEST_P(AV1ConvolveXCompoundTest, RunTest) { RunTest(); }
   1923 
   1924 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXCompoundTest,
   1925                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_c));
   1926 
   1927 #if HAVE_SSE2
   1928 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXCompoundTest,
   1929                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_sse2));
   1930 #endif
   1931 
   1932 #if HAVE_AVX2
   1933 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXCompoundTest,
   1934                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_avx2));
   1935 #endif
   1936 
   1937 #if HAVE_NEON
   1938 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest,
   1939                         BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon));
   1940 #endif
   1941 
   1942 #if HAVE_NEON_DOTPROD
   1943 INSTANTIATE_TEST_SUITE_P(
   1944    NEON_DOTPROD, AV1ConvolveXCompoundTest,
   1945    BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod));
   1946 #endif
   1947 
   1948 #if HAVE_NEON_I8MM
   1949 INSTANTIATE_TEST_SUITE_P(
   1950    NEON_I8MM, AV1ConvolveXCompoundTest,
   1951    BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm));
   1952 #endif
   1953 
   1954 #if CONFIG_AV1_HIGHBITDEPTH
   1955 /////////////////////////////////////////////////
   1956 // Compound convolve-x functions (high bit-depth)
   1957 /////////////////////////////////////////////////
   1958 class AV1ConvolveXHighbdCompoundTest
   1959    : public AV1ConvolveTest<highbd_convolve_x_func> {
   1960 public:
   1961  void RunTest() {
   1962    auto compound_params = GetCompoundParams();
   1963    // Do not test the no-op filter.
   1964    for (int sub_pix = 1; sub_pix < 16; ++sub_pix) {
   1965      for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
   1966        for (const auto &c : compound_params) {
   1967          TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
   1968        }
   1969      }
   1970    }
   1971  }
   1972 
   1973 protected:
   1974  virtual const InterpFilterParams *FilterParams(InterpFilter f,
   1975                                                 const BlockSize &block) const {
   1976    return av1_get_interp_filter_params_with_block_size(f, block.Width());
   1977  }
   1978 
   1979  virtual highbd_convolve_x_func ReferenceFunc() const {
   1980    return av1_highbd_dist_wtd_convolve_x_c;
   1981  }
   1982 
   1983 private:
   1984  void TestConvolve(const int sub_pix, const InterpFilter filter,
   1985                    const CompoundParam &compound) {
   1986    const int width = GetParam().Block().Width();
   1987    const int height = GetParam().Block().Height();
   1988 
   1989    const uint16_t *input1 = FirstRandomInput16(GetParam());
   1990    const uint16_t *input2 = SecondRandomInput16(GetParam());
   1991    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   1992    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
   1993    Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
   1994             compound, sub_pix, filter);
   1995 
   1996    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   1997    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
   1998    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
   1999             compound, sub_pix, filter);
   2000 
   2001    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
   2002    AssertOutputBufferEq(reference, test, width, height);
   2003  }
   2004 
   2005  void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1,
   2006                const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf,
   2007                const CompoundParam &compound, const int sub_pix,
   2008                const InterpFilter filter) {
   2009    const int width = GetParam().Block().Width();
   2010    const int height = GetParam().Block().Height();
   2011    const int bit_depth = GetParam().BitDepth();
   2012    const InterpFilterParams *filter_params =
   2013        FilterParams(filter, GetParam().Block());
   2014    ConvolveParams conv_params =
   2015        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
   2016    test_func(src1, width, dst, kOutputStride, width, height, filter_params,
   2017              sub_pix, &conv_params, bit_depth);
   2018    conv_params =
   2019        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
   2020    test_func(src2, width, dst, kOutputStride, width, height, filter_params,
   2021              sub_pix, &conv_params, bit_depth);
   2022  }
   2023 };
   2024 
   2025 TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); }
   2026 
   2027 INSTANTIATE_TEST_SUITE_P(
   2028    C, AV1ConvolveXHighbdCompoundTest,
   2029    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c));
   2030 
   2031 #if HAVE_SSE4_1
   2032 INSTANTIATE_TEST_SUITE_P(
   2033    SSE4_1, AV1ConvolveXHighbdCompoundTest,
   2034    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1));
   2035 #endif
   2036 
   2037 #if HAVE_AVX2
   2038 INSTANTIATE_TEST_SUITE_P(
   2039    AVX2, AV1ConvolveXHighbdCompoundTest,
   2040    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2));
   2041 #endif
   2042 
   2043 #if HAVE_NEON
   2044 INSTANTIATE_TEST_SUITE_P(
   2045    NEON, AV1ConvolveXHighbdCompoundTest,
   2046    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon));
   2047 #endif
   2048 
   2049 #if HAVE_SVE2
   2050 INSTANTIATE_TEST_SUITE_P(
   2051    SVE2, AV1ConvolveXHighbdCompoundTest,
   2052    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sve2));
   2053 #endif
   2054 
   2055 #endif  // CONFIG_AV1_HIGHBITDEPTH
   2056 
   2057 ////////////////////////////////////////////////
   2058 // Compound convolve-y functions (low bit-depth)
   2059 ////////////////////////////////////////////////
   2060 
   2061 // Note that the X and Y convolve functions have the same type signature and
   2062 // logic; they only differentiate the filter parameters and reference function.
   2063 class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest {
   2064 protected:
   2065  const InterpFilterParams *FilterParams(
   2066      InterpFilter f, const BlockSize &block) const override {
   2067    return av1_get_interp_filter_params_with_block_size(f, block.Height());
   2068  }
   2069 
   2070  convolve_x_func ReferenceFunc() const override {
   2071    return av1_dist_wtd_convolve_y_c;
   2072  }
   2073 };
   2074 
   2075 TEST_P(AV1ConvolveYCompoundTest, RunTest) { RunTest(); }
   2076 
   2077 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYCompoundTest,
   2078                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_c));
   2079 
   2080 #if HAVE_SSE2
   2081 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYCompoundTest,
   2082                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_sse2));
   2083 #endif
   2084 
   2085 #if HAVE_AVX2
   2086 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYCompoundTest,
   2087                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_avx2));
   2088 #endif
   2089 
   2090 #if HAVE_NEON
   2091 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest,
   2092                         BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon));
   2093 #endif
   2094 
   2095 #if CONFIG_AV1_HIGHBITDEPTH
   2096 /////////////////////////////////////////////////
   2097 // Compound convolve-y functions (high bit-depth)
   2098 /////////////////////////////////////////////////
   2099 
   2100 // Again, the X and Y convolve functions have the same type signature and logic.
   2101 class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest {
   2102  highbd_convolve_x_func ReferenceFunc() const override {
   2103    return av1_highbd_dist_wtd_convolve_y_c;
   2104  }
   2105  const InterpFilterParams *FilterParams(
   2106      InterpFilter f, const BlockSize &block) const override {
   2107    return av1_get_interp_filter_params_with_block_size(f, block.Height());
   2108  }
   2109 };
   2110 
   2111 TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); }
   2112 
   2113 INSTANTIATE_TEST_SUITE_P(
   2114    C, AV1ConvolveYHighbdCompoundTest,
   2115    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c));
   2116 
   2117 #if HAVE_SSE4_1
   2118 INSTANTIATE_TEST_SUITE_P(
   2119    SSE4_1, AV1ConvolveYHighbdCompoundTest,
   2120    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1));
   2121 #endif
   2122 
   2123 #if HAVE_AVX2
   2124 INSTANTIATE_TEST_SUITE_P(
   2125    AVX2, AV1ConvolveYHighbdCompoundTest,
   2126    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2));
   2127 #endif
   2128 
   2129 #if HAVE_NEON
   2130 INSTANTIATE_TEST_SUITE_P(
   2131    NEON, AV1ConvolveYHighbdCompoundTest,
   2132    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon));
   2133 #endif
   2134 
   2135 #if HAVE_SVE2
   2136 INSTANTIATE_TEST_SUITE_P(
   2137    SVE2, AV1ConvolveYHighbdCompoundTest,
   2138    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sve2));
   2139 #endif
   2140 
   2141 #endif  // CONFIG_AV1_HIGHBITDEPTH
   2142 
   2143 //////////////////////////////////////////////////////
   2144 // Compound convolve-2d-copy functions (low bit-depth)
   2145 //////////////////////////////////////////////////////
   2146 using compound_conv_2d_copy_func = void (*)(const uint8_t *src, int src_stride,
   2147                                            uint8_t *dst, int dst_stride, int w,
   2148                                            int h, ConvolveParams *conv_params);
   2149 
   2150 class AV1Convolve2DCopyCompoundTest
   2151    : public AV1ConvolveTest<compound_conv_2d_copy_func> {
   2152 public:
   2153  void RunTest() {
   2154    auto compound_params = GetCompoundParams();
   2155    for (const auto &compound : compound_params) {
   2156      TestConvolve(compound);
   2157    }
   2158  }
   2159  void SpeedTest() {
   2160    for (const auto &compound : GetCompoundParams()) {
   2161      TestConvolveSpeed(compound, 100000);
   2162    }
   2163  }
   2164 
   2165 private:
   2166  void TestConvolve(const CompoundParam &compound) {
   2167    const BlockSize &block = GetParam().Block();
   2168    const int width = block.Width();
   2169    const int height = block.Height();
   2170 
   2171    const uint8_t *input1 = FirstRandomInput8(GetParam());
   2172    const uint8_t *input2 = SecondRandomInput8(GetParam());
   2173    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   2174    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
   2175    Convolve(av1_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
   2176             reference_conv_buf, compound);
   2177 
   2178    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   2179    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
   2180    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
   2181             compound);
   2182 
   2183    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
   2184    AssertOutputBufferEq(reference, test, width, height);
   2185  }
   2186 
   2187  void TestConvolveSpeed(const CompoundParam &compound, const int num_iters) {
   2188    const int width = GetParam().Block().Width();
   2189    const int height = GetParam().Block().Height();
   2190 
   2191    const uint8_t *src0 = FirstRandomInput8(GetParam());
   2192    const uint8_t *src1 = SecondRandomInput8(GetParam());
   2193    DECLARE_ALIGNED(32, uint8_t, dst[MAX_SB_SQUARE]);
   2194    DECLARE_ALIGNED(32, CONV_BUF_TYPE, conv_buf[MAX_SB_SQUARE]);
   2195 
   2196    const auto test_func = GetParam().TestFunction();
   2197 
   2198    ConvolveParams conv_params_0 =
   2199        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
   2200    ConvolveParams conv_params_1 =
   2201        GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
   2202 
   2203    aom_usec_timer timer;
   2204    aom_usec_timer_start(&timer);
   2205    for (int i = 0; i < num_iters; ++i) {
   2206      av1_dist_wtd_convolve_2d_copy_c(src0, width, dst, kOutputStride, width,
   2207                                      height, &conv_params_0);
   2208      av1_dist_wtd_convolve_2d_copy_c(src1, width, dst, kOutputStride, width,
   2209                                      height, &conv_params_1);
   2210    }
   2211    aom_usec_timer_mark(&timer);
   2212    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   2213 
   2214    aom_usec_timer_start(&timer);
   2215    for (int i = 0; i < num_iters; ++i) {
   2216      test_func(src0, width, dst, kOutputStride, width, height, &conv_params_0);
   2217      test_func(src1, width, dst, kOutputStride, width, height, &conv_params_1);
   2218    }
   2219    aom_usec_timer_mark(&timer);
   2220    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
   2221    printf("Dist Weighted: %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n",
   2222           compound.UseDistWtdCompAvg(), width, height, time1, time2,
   2223           time1 / time2);
   2224  }
   2225 
   2226  void Convolve(compound_conv_2d_copy_func test_func, const uint8_t *src1,
   2227                const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
   2228                const CompoundParam &compound) {
   2229    const BlockSize &block = GetParam().Block();
   2230    const int width = block.Width();
   2231    const int height = block.Height();
   2232    ConvolveParams conv_params =
   2233        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
   2234    test_func(src1, width, dst, kOutputStride, width, height, &conv_params);
   2235 
   2236    conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
   2237    test_func(src2, width, dst, kOutputStride, width, height, &conv_params);
   2238  }
   2239 };
   2240 
   2241 TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) { RunTest(); }
   2242 TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) { SpeedTest(); }
   2243 
   2244 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCopyCompoundTest,
   2245                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_c));
   2246 
   2247 #if HAVE_SSE2
   2248 INSTANTIATE_TEST_SUITE_P(
   2249    SSE2, AV1Convolve2DCopyCompoundTest,
   2250    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_sse2));
   2251 #endif
   2252 
   2253 #if HAVE_AVX2
   2254 INSTANTIATE_TEST_SUITE_P(
   2255    AVX2, AV1Convolve2DCopyCompoundTest,
   2256    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_avx2));
   2257 #endif
   2258 
   2259 #if HAVE_NEON
   2260 INSTANTIATE_TEST_SUITE_P(
   2261    NEON, AV1Convolve2DCopyCompoundTest,
   2262    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon));
   2263 #endif
   2264 
   2265 #if CONFIG_AV1_HIGHBITDEPTH
   2266 ///////////////////////////////////////////////////////
   2267 // Compound convolve-2d-copy functions (high bit-depth)
   2268 ///////////////////////////////////////////////////////
   2269 using highbd_compound_conv_2d_copy_func =
   2270    void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
   2271             int w, int h, ConvolveParams *conv_params, int bd);
   2272 
   2273 class AV1Convolve2DCopyHighbdCompoundTest
   2274    : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> {
   2275 public:
   2276  void RunTest() {
   2277    auto compound_params = GetCompoundParams();
   2278    for (const auto &compound : compound_params) {
   2279      TestConvolve(compound);
   2280    }
   2281  }
   2282 
   2283 private:
   2284  void TestConvolve(const CompoundParam &compound) {
   2285    const BlockSize &block = GetParam().Block();
   2286    const int width = block.Width();
   2287    const int height = block.Height();
   2288 
   2289    const uint16_t *input1 = FirstRandomInput16(GetParam());
   2290    const uint16_t *input2 = SecondRandomInput16(GetParam());
   2291    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   2292    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
   2293    Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
   2294             reference_conv_buf, compound);
   2295 
   2296    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   2297    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
   2298    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
   2299             compound);
   2300 
   2301    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
   2302    AssertOutputBufferEq(reference, test, width, height);
   2303  }
   2304 
   2305  void Convolve(highbd_compound_conv_2d_copy_func test_func,
   2306                const uint16_t *src1, const uint16_t *src2, uint16_t *dst,
   2307                uint16_t *conv_buf, const CompoundParam &compound) {
   2308    const BlockSize &block = GetParam().Block();
   2309    const int width = block.Width();
   2310    const int height = block.Height();
   2311    const int bit_depth = GetParam().BitDepth();
   2312 
   2313    ConvolveParams conv_params =
   2314        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
   2315    test_func(src1, width, dst, kOutputStride, width, height, &conv_params,
   2316              bit_depth);
   2317 
   2318    conv_params =
   2319        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
   2320    test_func(src2, width, dst, kOutputStride, width, height, &conv_params,
   2321              bit_depth);
   2322  }
   2323 };
   2324 
   2325 TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); }
   2326 
   2327 INSTANTIATE_TEST_SUITE_P(
   2328    C, AV1Convolve2DCopyHighbdCompoundTest,
   2329    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c));
   2330 
   2331 #if HAVE_SSE4_1
   2332 INSTANTIATE_TEST_SUITE_P(
   2333    SSE4_1, AV1Convolve2DCopyHighbdCompoundTest,
   2334    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1));
   2335 #endif
   2336 
   2337 #if HAVE_AVX2
   2338 INSTANTIATE_TEST_SUITE_P(
   2339    AVX2, AV1Convolve2DCopyHighbdCompoundTest,
   2340    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2));
   2341 #endif
   2342 
   2343 #if HAVE_NEON
   2344 INSTANTIATE_TEST_SUITE_P(
   2345    NEON, AV1Convolve2DCopyHighbdCompoundTest,
   2346    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon));
   2347 #endif
   2348 
   2349 #endif  // CONFIG_AV1_HIGHBITDEPTH
   2350 
   2351 /////////////////////////////////////////////////
   2352 // Compound convolve-2d functions (low bit-depth)
   2353 /////////////////////////////////////////////////
   2354 
   2355 class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> {
   2356 public:
   2357  void RunTest() {
   2358    auto compound_params = GetCompoundParams();
   2359    for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
   2360      for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
   2361        // Do not test the no-op filter.
   2362        for (int sub_x = 1; sub_x < 16; ++sub_x) {
   2363          for (int sub_y = 1; sub_y < 16; ++sub_y) {
   2364            for (const auto &compound : compound_params) {
   2365              TestConvolve(static_cast<InterpFilter>(h_f),
   2366                           static_cast<InterpFilter>(v_f), sub_x, sub_y,
   2367                           compound);
   2368            }
   2369          }
   2370        }
   2371      }
   2372    }
   2373  }
   2374 
   2375 private:
   2376  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
   2377                    const int sub_x, const int sub_y,
   2378                    const CompoundParam &compound) {
   2379    const BlockSize &block = GetParam().Block();
   2380    const int width = block.Width();
   2381    const int height = block.Height();
   2382 
   2383    const uint8_t *input1 = FirstRandomInput8(GetParam());
   2384    const uint8_t *input2 = SecondRandomInput8(GetParam());
   2385    DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
   2386    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
   2387    Convolve(av1_dist_wtd_convolve_2d_c, input1, input2, reference,
   2388             reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
   2389 
   2390    DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
   2391    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
   2392    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
   2393             compound, h_f, v_f, sub_x, sub_y);
   2394 
   2395    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
   2396    AssertOutputBufferEq(reference, test, width, height);
   2397  }
   2398 
   2399 private:
   2400  void Convolve(convolve_2d_func test_func, const uint8_t *src1,
   2401                const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
   2402                const CompoundParam &compound, const InterpFilter h_f,
   2403                const InterpFilter v_f, const int sub_x, const int sub_y) {
   2404    const BlockSize &block = GetParam().Block();
   2405    const int width = block.Width();
   2406    const int height = block.Height();
   2407 
   2408    const InterpFilterParams *filter_params_x =
   2409        av1_get_interp_filter_params_with_block_size(h_f, width);
   2410    const InterpFilterParams *filter_params_y =
   2411        av1_get_interp_filter_params_with_block_size(v_f, height);
   2412    ConvolveParams conv_params =
   2413        GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
   2414 
   2415    test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
   2416              filter_params_y, sub_x, sub_y, &conv_params);
   2417 
   2418    conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
   2419    test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
   2420              filter_params_y, sub_x, sub_y, &conv_params);
   2421  }
   2422 };
   2423 
   2424 TEST_P(AV1Convolve2DCompoundTest, RunTest) { RunTest(); }
   2425 
   2426 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCompoundTest,
   2427                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_c));
   2428 
   2429 #if HAVE_SSSE3
   2430 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DCompoundTest,
   2431                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_ssse3));
   2432 #endif
   2433 
   2434 #if HAVE_AVX2
   2435 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DCompoundTest,
   2436                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_avx2));
   2437 #endif
   2438 
   2439 #if HAVE_NEON
   2440 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest,
   2441                         BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon));
   2442 #endif
   2443 
   2444 #if HAVE_NEON_DOTPROD
   2445 INSTANTIATE_TEST_SUITE_P(
   2446    NEON_DOTPROD, AV1Convolve2DCompoundTest,
   2447    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod));
   2448 #endif
   2449 
   2450 #if HAVE_NEON_I8MM
   2451 INSTANTIATE_TEST_SUITE_P(
   2452    NEON_I8MM, AV1Convolve2DCompoundTest,
   2453    BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm));
   2454 #endif
   2455 
   2456 #if CONFIG_AV1_HIGHBITDEPTH
   2457 //////////////////////////////////////////////////
   2458 // Compound convolve-2d functions (high bit-depth)
   2459 //////////////////////////////////////////////////
   2460 
   2461 class AV1Convolve2DHighbdCompoundTest
   2462    : public AV1ConvolveTest<highbd_convolve_2d_func> {
   2463 public:
   2464  void RunTest() {
   2465    auto compound_params = GetCompoundParams();
   2466    for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
   2467      for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
   2468        // Do not test the no-op filter.
   2469        for (int sub_x = 1; sub_x < 16; ++sub_x) {
   2470          for (int sub_y = 1; sub_y < 16; ++sub_y) {
   2471            for (const auto &compound : compound_params) {
   2472              TestConvolve(static_cast<InterpFilter>(h_f),
   2473                           static_cast<InterpFilter>(v_f), sub_x, sub_y,
   2474                           compound);
   2475            }
   2476          }
   2477        }
   2478      }
   2479    }
   2480  }
   2481 
   2482 private:
   2483  void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
   2484                    const int sub_x, const int sub_y,
   2485                    const CompoundParam &compound) {
   2486    const BlockSize &block = GetParam().Block();
   2487    const int width = block.Width();
   2488    const int height = block.Height();
   2489    const uint16_t *input1 = FirstRandomInput16(GetParam());
   2490    const uint16_t *input2 = SecondRandomInput16(GetParam());
   2491    DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
   2492    DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
   2493    Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference,
   2494             reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
   2495 
   2496    DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
   2497    DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
   2498    Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
   2499             compound, h_f, v_f, sub_x, sub_y);
   2500 
   2501    AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
   2502    AssertOutputBufferEq(reference, test, width, height);
   2503  }
   2504 
   2505 private:
   2506  void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1,
   2507                const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf,
   2508                const CompoundParam &compound, const InterpFilter h_f,
   2509                const InterpFilter v_f, const int sub_x, const int sub_y) {
   2510    const BlockSize &block = GetParam().Block();
   2511    const int width = block.Width();
   2512    const int height = block.Height();
   2513 
   2514    const InterpFilterParams *filter_params_x =
   2515        av1_get_interp_filter_params_with_block_size(h_f, width);
   2516    const InterpFilterParams *filter_params_y =
   2517        av1_get_interp_filter_params_with_block_size(v_f, height);
   2518    const int bit_depth = GetParam().BitDepth();
   2519    ConvolveParams conv_params =
   2520        GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
   2521    test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
   2522              filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
   2523 
   2524    conv_params =
   2525        GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
   2526    test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
   2527              filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
   2528  }
   2529 };
   2530 
   2531 TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); }
   2532 
   2533 INSTANTIATE_TEST_SUITE_P(
   2534    C, AV1Convolve2DHighbdCompoundTest,
   2535    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c));
   2536 
   2537 #if HAVE_SSE4_1
   2538 INSTANTIATE_TEST_SUITE_P(
   2539    SSE4_1, AV1Convolve2DHighbdCompoundTest,
   2540    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1));
   2541 #endif
   2542 
   2543 #if HAVE_AVX2
   2544 INSTANTIATE_TEST_SUITE_P(
   2545    AVX2, AV1Convolve2DHighbdCompoundTest,
   2546    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2));
   2547 #endif
   2548 
   2549 #if HAVE_NEON
   2550 INSTANTIATE_TEST_SUITE_P(
   2551    NEON, AV1Convolve2DHighbdCompoundTest,
   2552    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon));
   2553 #endif
   2554 
   2555 #if HAVE_SVE2
   2556 INSTANTIATE_TEST_SUITE_P(
   2557    SVE2, AV1Convolve2DHighbdCompoundTest,
   2558    BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sve2));
   2559 #endif
   2560 
   2561 #endif  // CONFIG_AV1_HIGHBITDEPTH
   2562 
   2563 }  // namespace