av1_convolve_test.cc (98594B)
1 /* 2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <cstddef> 13 #include <cstdint> 14 #include <ostream> 15 #include <set> 16 #include <vector> 17 #include "config/av1_rtcd.h" 18 #include "config/aom_dsp_rtcd.h" 19 #include "aom_ports/aom_timer.h" 20 #include "gtest/gtest.h" 21 #include "test/acm_random.h" 22 23 namespace { 24 25 // TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter 26 // is tested once 12-tap filter SIMD is done. 27 #undef INTERP_FILTERS_ALL 28 #define INTERP_FILTERS_ALL 4 29 30 // All single reference convolve tests are parameterized on block size, 31 // bit-depth, and function to test. 32 // 33 // Note that parameterizing on these variables (and not other parameters) is 34 // a conscious decision - Jenkins needs some degree of parallelization to run 35 // the tests within the time limit, but if the number of parameters increases 36 // too much, the gtest framework does not handle it well (increased overhead per 37 // test, huge amount of output to stdout, etc.). 38 // 39 // Also note that the test suites must be named with the architecture, e.g., 40 // C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests 41 // that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86 42 // binaries) and will disable tests using a filter like 43 // --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the 44 // testing infrastructure will not selectively filter them properly. 45 class BlockSize { 46 public: 47 BlockSize(int w, int h) : width_(w), height_(h) {} 48 49 int Width() const { return width_; } 50 int Height() const { return height_; } 51 52 bool operator<(const BlockSize &other) const { 53 if (Width() == other.Width()) { 54 return Height() < other.Height(); 55 } 56 return Width() < other.Width(); 57 } 58 59 bool operator==(const BlockSize &other) const { 60 return Width() == other.Width() && Height() == other.Height(); 61 } 62 63 private: 64 int width_; 65 int height_; 66 }; 67 68 // Block size / bit depth / test function used to parameterize the tests. 69 template <typename T> 70 class TestParam { 71 public: 72 TestParam(const BlockSize &block, int bd, T test_func) 73 : block_(block), bd_(bd), test_func_(test_func) {} 74 75 const BlockSize &Block() const { return block_; } 76 int BitDepth() const { return bd_; } 77 T TestFunction() const { return test_func_; } 78 79 bool operator==(const TestParam &other) const { 80 return Block() == other.Block() && BitDepth() == other.BitDepth() && 81 TestFunction() == other.TestFunction(); 82 } 83 84 private: 85 BlockSize block_; 86 int bd_; 87 T test_func_; 88 }; 89 90 template <typename T> 91 std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) { 92 return os << "TestParam { width:" << test_arg.Block().Width() 93 << " height:" << test_arg.Block().Height() 94 << " bd:" << test_arg.BitDepth() << " }"; 95 } 96 97 // Generate the list of all block widths / heights that need to be tested, 98 // includes chroma and luma sizes, for the given bit-depths. The test 99 // function is the same for all generated parameters. 100 template <typename T> 101 std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths, 102 T test_func) { 103 std::set<BlockSize> sizes; 104 for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) { 105 const int w = block_size_wide[b]; 106 const int h = block_size_high[b]; 107 sizes.insert(BlockSize(w, h)); 108 // Add in smaller chroma sizes as well. 109 if (w == 4 || h == 4) { 110 sizes.insert(BlockSize(w / 2, h / 2)); 111 } 112 } 113 std::vector<TestParam<T>> result; 114 for (const BlockSize &block : sizes) { 115 for (int bd : bit_depths) { 116 result.push_back(TestParam<T>(block, bd, test_func)); 117 } 118 } 119 return result; 120 } 121 122 template <typename T> 123 std::vector<TestParam<T>> GetLowbdTestParams(T test_func) { 124 return GetTestParams({ 8 }, test_func); 125 } 126 127 template <typename T> 128 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams( 129 T test_func) { 130 return ::testing::ValuesIn(GetLowbdTestParams(test_func)); 131 } 132 133 // Test the test-parameters generators work as expected. 134 class AV1ConvolveParametersTest : public ::testing::Test {}; 135 136 TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) { 137 auto v = GetLowbdTestParams(av1_convolve_x_sr_c); 138 ASSERT_EQ(27U, v.size()); 139 for (const auto &p : v) { 140 ASSERT_EQ(8, p.BitDepth()); 141 // Needed (instead of ASSERT_EQ(...) since gtest does not 142 // have built in printing for arbitrary functions, which 143 // causes a compilation error. 144 bool same_fn = av1_convolve_x_sr_c == p.TestFunction(); 145 ASSERT_TRUE(same_fn); 146 } 147 } 148 149 #if CONFIG_AV1_HIGHBITDEPTH 150 template <typename T> 151 std::vector<TestParam<T>> GetHighbdTestParams(T test_func) { 152 return GetTestParams({ 10, 12 }, test_func); 153 } 154 155 template <typename T> 156 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams( 157 T test_func) { 158 return ::testing::ValuesIn(GetHighbdTestParams(test_func)); 159 } 160 161 TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) { 162 auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c); 163 ASSERT_EQ(54U, v.size()); 164 int num_10 = 0; 165 int num_12 = 0; 166 for (const auto &p : v) { 167 ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12); 168 bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction(); 169 ASSERT_TRUE(same_fn); 170 if (p.BitDepth() == 10) { 171 ++num_10; 172 } else { 173 ++num_12; 174 } 175 } 176 ASSERT_EQ(num_10, num_12); 177 } 178 #endif // CONFIG_AV1_HIGHBITDEPTH 179 180 // AV1ConvolveTest is the base class that all convolve tests should derive from. 181 // It provides storage/methods for generating randomized buffers for both 182 // low bit-depth and high bit-depth, and setup/teardown methods for clearing 183 // system state. Implementors can get the bit-depth / block-size / 184 // test function by calling GetParam(). 185 template <typename T> 186 class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> { 187 public: 188 ~AV1ConvolveTest() override = default; 189 190 void SetUp() override { 191 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); 192 } 193 194 // Randomizes the 8-bit input buffer and returns a pointer to it. Note that 195 // the pointer is safe to use with an 8-tap filter. The stride can range 196 // from width to (width + kPadding). Also note that the pointer is to the 197 // same memory location. 198 static constexpr int kInputPadding = 12; 199 200 // Get a pointer to a buffer with stride == width. Note that we must have 201 // the test param passed in explicitly -- the gtest framework does not 202 // support calling GetParam() within a templatized class. 203 // Note that FirstRandomInput8 always returns the same pointer -- if two 204 // inputs are needed, also use SecondRandomInput8. 205 const uint8_t *FirstRandomInput8(const TestParam<T> ¶m) { 206 // Note we can't call GetParam() directly -- gtest does not support 207 // this for parameterized types. 208 return RandomInput8(input8_1_, param); 209 } 210 211 const uint8_t *SecondRandomInput8(const TestParam<T> ¶m) { 212 return RandomInput8(input8_2_, param); 213 } 214 215 // Some of the intrinsics perform writes in 32 byte chunks. Moreover, some 216 // of the instrinsics assume that the stride is also a multiple of 32. 217 // To satisfy these constraints and also remain simple, output buffer strides 218 // are assumed MAX_SB_SIZE. 219 static constexpr int kOutputStride = MAX_SB_SIZE; 220 221 // Check that two 8-bit output buffers are identical. 222 void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width, 223 int height) { 224 ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations"; 225 for (int j = 0; j < height; ++j) { 226 if (memcmp(p1, p2, sizeof(*p1) * width) == 0) { 227 p1 += kOutputStride; 228 p2 += kOutputStride; 229 continue; 230 } 231 for (int i = 0; i < width; ++i) { 232 ASSERT_EQ(p1[i], p2[i]) 233 << width << "x" << height << " Pixel mismatch at (" << i << ", " 234 << j << ")"; 235 } 236 } 237 } 238 239 // Check that two 16-bit output buffers are identical. 240 void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width, 241 int height) { 242 ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations"; 243 for (int j = 0; j < height; ++j) { 244 if (memcmp(p1, p2, sizeof(*p1) * width) == 0) { 245 p1 += kOutputStride; 246 p2 += kOutputStride; 247 continue; 248 } 249 for (int i = 0; i < width; ++i) { 250 ASSERT_EQ(p1[i], p2[i]) 251 << width << "x" << height << " Pixel mismatch at (" << i << ", " 252 << j << ")"; 253 } 254 } 255 } 256 257 #if CONFIG_AV1_HIGHBITDEPTH 258 // Note that the randomized values are capped by bit-depth. 259 const uint16_t *FirstRandomInput16(const TestParam<T> ¶m) { 260 return RandomInput16(input16_1_, param); 261 } 262 263 const uint16_t *SecondRandomInput16(const TestParam<T> ¶m) { 264 return RandomInput16(input16_2_, param); 265 } 266 #endif 267 268 private: 269 const uint8_t *RandomInput8(uint8_t *p, const TestParam<T> ¶m) { 270 EXPECT_EQ(8, param.BitDepth()); 271 EXPECT_GE(MAX_SB_SIZE, param.Block().Width()); 272 EXPECT_GE(MAX_SB_SIZE, param.Block().Height()); 273 const int padded_width = param.Block().Width() + kInputPadding; 274 const int padded_height = param.Block().Height() + kInputPadding; 275 Randomize(p, padded_width * padded_height); 276 return p + (kInputPadding / 2) * padded_width + kInputPadding / 2; 277 } 278 279 void Randomize(uint8_t *p, int size) { 280 for (int i = 0; i < size; ++i) { 281 p[i] = rnd_.Rand8(); 282 } 283 } 284 285 #if CONFIG_AV1_HIGHBITDEPTH 286 const uint16_t *RandomInput16(uint16_t *p, const TestParam<T> ¶m) { 287 // Check that this is only called with high bit-depths. 288 EXPECT_TRUE(param.BitDepth() == 10 || param.BitDepth() == 12); 289 EXPECT_GE(MAX_SB_SIZE, param.Block().Width()); 290 EXPECT_GE(MAX_SB_SIZE, param.Block().Height()); 291 const int padded_width = param.Block().Width() + kInputPadding; 292 const int padded_height = param.Block().Height() + kInputPadding; 293 Randomize(p, padded_width * padded_height, param.BitDepth()); 294 return p + (kInputPadding / 2) * padded_width + kInputPadding / 2; 295 } 296 297 void Randomize(uint16_t *p, int size, int bit_depth) { 298 for (int i = 0; i < size; ++i) { 299 p[i] = rnd_.Rand16() & ((1 << bit_depth) - 1); 300 } 301 } 302 #endif 303 304 static constexpr int kInputStride = MAX_SB_SIZE + kInputPadding; 305 306 libaom_test::ACMRandom rnd_; 307 // Statically allocate all the memory that is needed for the tests. Note 308 // that we cannot allocate output memory here. It must use DECLARE_ALIGNED, 309 // which is a C99 feature and interacts badly with C++ member variables. 310 uint8_t input8_1_[kInputStride * kInputStride]; 311 uint8_t input8_2_[kInputStride * kInputStride]; 312 #if CONFIG_AV1_HIGHBITDEPTH 313 uint16_t input16_1_[kInputStride * kInputStride]; 314 uint16_t input16_2_[kInputStride * kInputStride]; 315 #endif 316 }; 317 318 //////////////////////////////////////////////////////// 319 // Single reference convolve-x functions (low bit-depth) 320 //////////////////////////////////////////////////////// 321 using convolve_x_func = void (*)(const uint8_t *src, int src_stride, 322 uint8_t *dst, int dst_stride, int w, int h, 323 const InterpFilterParams *filter_params_x, 324 const int subpel_x_qn, 325 ConvolveParams *conv_params); 326 327 class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> { 328 public: 329 void RunTest() { 330 // Do not test the no-op filter. 331 for (int sub_x = 1; sub_x < 16; ++sub_x) { 332 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 333 ++filter) { 334 InterpFilter f = static_cast<InterpFilter>(filter); 335 TestConvolve(sub_x, f); 336 } 337 } 338 } 339 340 public: 341 void SpeedTest() { 342 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 343 ++filter) { 344 InterpFilter f = static_cast<InterpFilter>(filter); 345 TestConvolveSpeed(f, 10000); 346 } 347 } 348 349 private: 350 void TestConvolve(const int sub_x, const InterpFilter filter) { 351 const int width = GetParam().Block().Width(); 352 const int height = GetParam().Block().Height(); 353 354 const InterpFilterParams *filter_params_x = 355 av1_get_interp_filter_params_with_block_size(filter, width); 356 ConvolveParams conv_params1 = 357 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 358 const uint8_t *input = FirstRandomInput8(GetParam()); 359 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 360 av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height, 361 filter_params_x, sub_x, &conv_params1); 362 363 ConvolveParams conv_params2 = 364 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 365 convolve_x_func test_func = GetParam().TestFunction(); 366 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 367 test_func(input, width, test, kOutputStride, width, height, filter_params_x, 368 sub_x, &conv_params2); 369 AssertOutputBufferEq(reference, test, width, height); 370 } 371 372 private: 373 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { 374 const int width = GetParam().Block().Width(); 375 const int height = GetParam().Block().Height(); 376 377 const InterpFilterParams *filter_params_x = 378 av1_get_interp_filter_params_with_block_size(filter, width); 379 ConvolveParams conv_params1 = 380 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 381 const uint8_t *input = FirstRandomInput8(GetParam()); 382 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 383 384 aom_usec_timer timer; 385 aom_usec_timer_start(&timer); 386 for (int i = 0; i < num_iters; ++i) { 387 av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height, 388 filter_params_x, 0, &conv_params1); 389 } 390 aom_usec_timer_mark(&timer); 391 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 392 ConvolveParams conv_params2 = 393 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 394 convolve_x_func test_func = GetParam().TestFunction(); 395 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 396 397 aom_usec_timer_start(&timer); 398 for (int i = 0; i < num_iters; ++i) { 399 test_func(input, width, test, kOutputStride, width, height, 400 filter_params_x, 0, &conv_params2); 401 } 402 aom_usec_timer_mark(&timer); 403 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 404 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 405 time2, time1 / time2); 406 } 407 }; 408 409 TEST_P(AV1ConvolveXTest, RunTest) { RunTest(); } 410 411 TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) { SpeedTest(); } 412 413 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXTest, 414 BuildLowbdParams(av1_convolve_x_sr_c)); 415 416 #if HAVE_SSE2 417 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXTest, 418 BuildLowbdParams(av1_convolve_x_sr_sse2)); 419 #endif 420 421 #if HAVE_AVX2 422 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXTest, 423 BuildLowbdParams(av1_convolve_x_sr_avx2)); 424 #endif 425 426 #if HAVE_NEON 427 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest, 428 BuildLowbdParams(av1_convolve_x_sr_neon)); 429 #endif 430 431 #if HAVE_NEON_DOTPROD 432 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest, 433 BuildLowbdParams(av1_convolve_x_sr_neon_dotprod)); 434 #endif 435 436 #if HAVE_NEON_I8MM 437 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest, 438 BuildLowbdParams(av1_convolve_x_sr_neon_i8mm)); 439 #endif 440 441 #if HAVE_RVV 442 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveXTest, 443 BuildLowbdParams(av1_convolve_x_sr_rvv)); 444 #endif 445 446 //////////////////////////////////////////////////////////////// 447 // Single reference convolve-x IntraBC functions (low bit-depth) 448 //////////////////////////////////////////////////////////////// 449 450 class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> { 451 public: 452 void RunTest() { 453 // IntraBC functions only operate for subpel_x_qn = 8. 454 constexpr int kSubX = 8; 455 const int width = GetParam().Block().Width(); 456 const int height = GetParam().Block().Height(); 457 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; 458 const uint8_t *input = FirstRandomInput8(GetParam()); 459 460 ConvolveParams conv_params1 = 461 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 462 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 463 // Use a stride different from width to avoid potential storing errors that 464 // would go undetected. The input buffer is filled using a padding of 12, so 465 // the stride can be anywhere between width and width + 12. 466 av1_convolve_x_sr_intrabc_c(input, width + 2, reference, kOutputStride, 467 width, height, filter_params_x, kSubX, 468 &conv_params1); 469 470 ConvolveParams conv_params2 = 471 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 472 convolve_x_func test_func = GetParam().TestFunction(); 473 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 474 test_func(input, width + 2, test, kOutputStride, width, height, 475 filter_params_x, kSubX, &conv_params2); 476 477 AssertOutputBufferEq(reference, test, width, height); 478 } 479 480 void SpeedTest() { 481 constexpr int kNumIters = 10000; 482 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); 483 const int width = GetParam().Block().Width(); 484 const int height = GetParam().Block().Height(); 485 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; 486 const uint8_t *input = FirstRandomInput8(GetParam()); 487 488 ConvolveParams conv_params1 = 489 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 490 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 491 aom_usec_timer timer; 492 aom_usec_timer_start(&timer); 493 for (int i = 0; i < kNumIters; ++i) { 494 av1_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, width, 495 height, filter_params_x, 0, &conv_params1); 496 } 497 aom_usec_timer_mark(&timer); 498 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 499 500 ConvolveParams conv_params2 = 501 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 502 convolve_x_func test_func = GetParam().TestFunction(); 503 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 504 aom_usec_timer_start(&timer); 505 for (int i = 0; i < kNumIters; ++i) { 506 test_func(input, width, test, kOutputStride, width, height, 507 filter_params_x, 0, &conv_params2); 508 } 509 aom_usec_timer_mark(&timer); 510 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 511 512 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 513 time2, time1 / time2); 514 } 515 }; 516 517 TEST_P(AV1ConvolveXIntraBCTest, RunTest) { RunTest(); } 518 519 TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } 520 521 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXIntraBCTest, 522 BuildLowbdParams(av1_convolve_x_sr_intrabc_c)); 523 524 #if HAVE_NEON 525 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest, 526 BuildLowbdParams(av1_convolve_x_sr_intrabc_neon)); 527 #endif 528 529 #if HAVE_RVV 530 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveXIntraBCTest, 531 BuildLowbdParams(av1_convolve_x_sr_intrabc_rvv)); 532 #endif 533 534 #if CONFIG_AV1_HIGHBITDEPTH 535 ///////////////////////////////////////////////////////// 536 // Single reference convolve-x functions (high bit-depth) 537 ///////////////////////////////////////////////////////// 538 using highbd_convolve_x_func = 539 void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, 540 int w, int h, const InterpFilterParams *filter_params_x, 541 const int subpel_x_qn, ConvolveParams *conv_params, int bd); 542 543 class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> { 544 public: 545 void RunTest() { 546 // Do not test the no-op filter. 547 for (int sub_x = 1; sub_x < 16; ++sub_x) { 548 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 549 ++filter) { 550 InterpFilter f = static_cast<InterpFilter>(filter); 551 TestConvolve(sub_x, f); 552 } 553 } 554 } 555 556 public: 557 void SpeedTest() { 558 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 559 ++filter) { 560 InterpFilter f = static_cast<InterpFilter>(filter); 561 TestConvolveSpeed(f, 10000); 562 } 563 } 564 565 private: 566 void TestConvolve(const int sub_x, const InterpFilter filter) { 567 const int width = GetParam().Block().Width(); 568 const int height = GetParam().Block().Height(); 569 const int bit_depth = GetParam().BitDepth(); 570 const InterpFilterParams *filter_params_x = 571 av1_get_interp_filter_params_with_block_size(filter, width); 572 ConvolveParams conv_params1 = 573 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 574 const uint16_t *input = FirstRandomInput16(GetParam()); 575 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 576 av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width, 577 height, filter_params_x, sub_x, &conv_params1, 578 bit_depth); 579 580 ConvolveParams conv_params2 = 581 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 582 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 583 GetParam().TestFunction()(input, width, test, kOutputStride, width, height, 584 filter_params_x, sub_x, &conv_params2, bit_depth); 585 AssertOutputBufferEq(reference, test, width, height); 586 } 587 588 private: 589 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { 590 const int width = GetParam().Block().Width(); 591 const int height = GetParam().Block().Height(); 592 const int bit_depth = GetParam().BitDepth(); 593 const InterpFilterParams *filter_params_x = 594 av1_get_interp_filter_params_with_block_size(filter, width); 595 ConvolveParams conv_params1 = 596 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 597 const uint16_t *input = FirstRandomInput16(GetParam()); 598 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 599 600 aom_usec_timer timer; 601 aom_usec_timer_start(&timer); 602 for (int i = 0; i < num_iters; ++i) { 603 av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width, 604 height, filter_params_x, 0, &conv_params1, 605 bit_depth); 606 } 607 aom_usec_timer_mark(&timer); 608 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 609 ConvolveParams conv_params2 = 610 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 611 highbd_convolve_x_func test_func = GetParam().TestFunction(); 612 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 613 614 aom_usec_timer_start(&timer); 615 for (int i = 0; i < num_iters; ++i) { 616 test_func(input, width, test, kOutputStride, width, height, 617 filter_params_x, 0, &conv_params2, bit_depth); 618 } 619 aom_usec_timer_mark(&timer); 620 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 621 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 622 time2, time1 / time2); 623 } 624 }; 625 626 TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); } 627 628 TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); } 629 630 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest, 631 BuildHighbdParams(av1_highbd_convolve_x_sr_c)); 632 633 #if HAVE_SSSE3 634 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest, 635 BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3)); 636 #endif 637 638 #if HAVE_AVX2 639 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest, 640 BuildHighbdParams(av1_highbd_convolve_x_sr_avx2)); 641 #endif 642 643 #if HAVE_NEON 644 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest, 645 BuildHighbdParams(av1_highbd_convolve_x_sr_neon)); 646 #endif 647 648 #if HAVE_SVE2 649 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveXHighbdTest, 650 BuildHighbdParams(av1_highbd_convolve_x_sr_sve2)); 651 #endif 652 653 #if HAVE_RVV 654 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveXHighbdTest, 655 BuildHighbdParams(av1_highbd_convolve_x_sr_rvv)); 656 #endif 657 658 ///////////////////////////////////////////////////////////////// 659 // Single reference convolve-x IntraBC functions (high bit-depth) 660 ///////////////////////////////////////////////////////////////// 661 662 class AV1ConvolveXHighbdIntraBCTest 663 : public AV1ConvolveTest<highbd_convolve_x_func> { 664 public: 665 void RunTest() { 666 // IntraBC functions only operate for subpel_x_qn = 8. 667 constexpr int kSubX = 8; 668 const int width = GetParam().Block().Width(); 669 const int height = GetParam().Block().Height(); 670 const int bit_depth = GetParam().BitDepth(); 671 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; 672 const uint16_t *input = FirstRandomInput16(GetParam()); 673 674 ConvolveParams conv_params1 = 675 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 676 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 677 // Use a stride different from width to avoid potential storing errors that 678 // would go undetected. The input buffer is filled using a padding of 12, so 679 // the stride can be anywhere between width and width + 12. 680 av1_highbd_convolve_x_sr_intrabc_c( 681 input, width + 2, reference, kOutputStride, width, height, 682 filter_params_x, kSubX, &conv_params1, bit_depth); 683 684 ConvolveParams conv_params2 = 685 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 686 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 687 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, 688 height, filter_params_x, kSubX, &conv_params2, 689 bit_depth); 690 691 AssertOutputBufferEq(reference, test, width, height); 692 } 693 694 void SpeedTest() { 695 constexpr int kNumIters = 10000; 696 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); 697 const int width = GetParam().Block().Width(); 698 const int height = GetParam().Block().Height(); 699 const int bit_depth = GetParam().BitDepth(); 700 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; 701 const uint16_t *input = FirstRandomInput16(GetParam()); 702 703 ConvolveParams conv_params1 = 704 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 705 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 706 aom_usec_timer timer; 707 aom_usec_timer_start(&timer); 708 for (int i = 0; i < kNumIters; ++i) { 709 av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, 710 width, height, filter_params_x, 0, 711 &conv_params1, bit_depth); 712 } 713 aom_usec_timer_mark(&timer); 714 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 715 716 ConvolveParams conv_params2 = 717 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 718 highbd_convolve_x_func test_func = GetParam().TestFunction(); 719 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 720 aom_usec_timer_start(&timer); 721 for (int i = 0; i < kNumIters; ++i) { 722 test_func(input, width, test, kOutputStride, width, height, 723 filter_params_x, 0, &conv_params2, bit_depth); 724 } 725 aom_usec_timer_mark(&timer); 726 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 727 728 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 729 time2, time1 / time2); 730 } 731 }; 732 733 TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); } 734 735 TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } 736 737 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest, 738 BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c)); 739 740 #if HAVE_NEON 741 INSTANTIATE_TEST_SUITE_P( 742 NEON, AV1ConvolveXHighbdIntraBCTest, 743 BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon)); 744 #endif 745 746 #if HAVE_RVV 747 INSTANTIATE_TEST_SUITE_P( 748 RVV, AV1ConvolveXHighbdIntraBCTest, 749 BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_rvv)); 750 #endif 751 752 #endif // CONFIG_AV1_HIGHBITDEPTH 753 754 //////////////////////////////////////////////////////// 755 // Single reference convolve-y functions (low bit-depth) 756 //////////////////////////////////////////////////////// 757 using convolve_y_func = void (*)(const uint8_t *src, int src_stride, 758 uint8_t *dst, int dst_stride, int w, int h, 759 const InterpFilterParams *filter_params_y, 760 const int subpel_y_qn); 761 762 class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> { 763 public: 764 void RunTest() { 765 // Do not test the no-op filter. 766 for (int sub_y = 1; sub_y < 16; ++sub_y) { 767 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 768 ++filter) { 769 InterpFilter f = static_cast<InterpFilter>(filter); 770 TestConvolve(sub_y, f); 771 } 772 } 773 } 774 775 public: 776 void SpeedTest() { 777 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 778 ++filter) { 779 InterpFilter f = static_cast<InterpFilter>(filter); 780 TestConvolveSpeed(f, 10000); 781 } 782 } 783 784 private: 785 void TestConvolve(const int sub_y, const InterpFilter filter) { 786 const int width = GetParam().Block().Width(); 787 const int height = GetParam().Block().Height(); 788 789 const InterpFilterParams *filter_params_y = 790 av1_get_interp_filter_params_with_block_size(filter, height); 791 const uint8_t *input = FirstRandomInput8(GetParam()); 792 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 793 av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height, 794 filter_params_y, sub_y); 795 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 796 GetParam().TestFunction()(input, width, test, kOutputStride, width, height, 797 filter_params_y, sub_y); 798 AssertOutputBufferEq(reference, test, width, height); 799 } 800 801 private: 802 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { 803 const int width = GetParam().Block().Width(); 804 const int height = GetParam().Block().Height(); 805 806 const InterpFilterParams *filter_params_y = 807 av1_get_interp_filter_params_with_block_size(filter, height); 808 const uint8_t *input = FirstRandomInput8(GetParam()); 809 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 810 811 aom_usec_timer timer; 812 aom_usec_timer_start(&timer); 813 for (int i = 0; i < num_iters; ++i) { 814 av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height, 815 filter_params_y, 0); 816 } 817 aom_usec_timer_mark(&timer); 818 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 819 820 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 821 822 aom_usec_timer_start(&timer); 823 for (int i = 0; i < num_iters; ++i) { 824 GetParam().TestFunction()(input, width, test, kOutputStride, width, 825 height, filter_params_y, 0); 826 } 827 aom_usec_timer_mark(&timer); 828 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 829 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 830 time2, time1 / time2); 831 } 832 }; 833 834 TEST_P(AV1ConvolveYTest, RunTest) { RunTest(); } 835 836 TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) { SpeedTest(); } 837 838 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYTest, 839 BuildLowbdParams(av1_convolve_y_sr_c)); 840 841 #if HAVE_SSE2 842 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYTest, 843 BuildLowbdParams(av1_convolve_y_sr_sse2)); 844 #endif 845 846 #if HAVE_AVX2 847 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYTest, 848 BuildLowbdParams(av1_convolve_y_sr_avx2)); 849 #endif 850 851 #if HAVE_NEON 852 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest, 853 BuildLowbdParams(av1_convolve_y_sr_neon)); 854 #endif 855 856 #if HAVE_NEON_DOTPROD 857 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveYTest, 858 BuildLowbdParams(av1_convolve_y_sr_neon_dotprod)); 859 #endif 860 861 #if HAVE_NEON_I8MM 862 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveYTest, 863 BuildLowbdParams(av1_convolve_y_sr_neon_i8mm)); 864 #endif 865 866 #if HAVE_RVV 867 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveYTest, 868 BuildLowbdParams(av1_convolve_y_sr_rvv)); 869 #endif 870 871 //////////////////////////////////////////////////////////////// 872 // Single reference convolve-y IntraBC functions (low bit-depth) 873 //////////////////////////////////////////////////////////////// 874 875 class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> { 876 public: 877 void RunTest() { 878 // IntraBC functions only operate for subpel_y_qn = 8. 879 constexpr int kSubY = 8; 880 const int width = GetParam().Block().Width(); 881 const int height = GetParam().Block().Height(); 882 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; 883 const uint8_t *input = FirstRandomInput8(GetParam()); 884 885 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 886 // Use a stride different from width to avoid potential storing errors that 887 // would go undetected. The input buffer is filled using a padding of 12, so 888 // the stride can be anywhere between width and width + 12. 889 av1_convolve_y_sr_intrabc_c(input, width + 2, reference, kOutputStride, 890 width, height, filter_params_y, kSubY); 891 892 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 893 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, 894 height, filter_params_y, kSubY); 895 896 AssertOutputBufferEq(reference, test, width, height); 897 } 898 899 void SpeedTest() { 900 constexpr int kNumIters = 10000; 901 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); 902 const int width = GetParam().Block().Width(); 903 const int height = GetParam().Block().Height(); 904 905 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; 906 const uint8_t *input = FirstRandomInput8(GetParam()); 907 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 908 909 aom_usec_timer timer; 910 aom_usec_timer_start(&timer); 911 for (int i = 0; i < kNumIters; ++i) { 912 av1_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, width, 913 height, filter_params_y, 0); 914 } 915 aom_usec_timer_mark(&timer); 916 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 917 918 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 919 convolve_y_func test_func = GetParam().TestFunction(); 920 aom_usec_timer_start(&timer); 921 for (int i = 0; i < kNumIters; ++i) { 922 test_func(input, width, test, kOutputStride, width, height, 923 filter_params_y, 0); 924 } 925 aom_usec_timer_mark(&timer); 926 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 927 928 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 929 time2, time1 / time2); 930 } 931 }; 932 933 TEST_P(AV1ConvolveYIntraBCTest, RunTest) { RunTest(); } 934 935 TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } 936 937 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYIntraBCTest, 938 BuildLowbdParams(av1_convolve_y_sr_intrabc_c)); 939 940 #if HAVE_NEON 941 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest, 942 BuildLowbdParams(av1_convolve_y_sr_intrabc_neon)); 943 #endif 944 945 #if HAVE_RVV 946 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveYIntraBCTest, 947 BuildLowbdParams(av1_convolve_y_sr_intrabc_rvv)); 948 #endif 949 950 #if CONFIG_AV1_HIGHBITDEPTH 951 ///////////////////////////////////////////////////////// 952 // Single reference convolve-y functions (high bit-depth) 953 ///////////////////////////////////////////////////////// 954 using highbd_convolve_y_func = 955 void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, 956 int w, int h, const InterpFilterParams *filter_params_y, 957 const int subpel_y_qn, int bd); 958 959 class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> { 960 public: 961 void RunTest() { 962 // Do not test the no-op filter. 963 for (int sub_y = 1; sub_y < 16; ++sub_y) { 964 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 965 ++filter) { 966 InterpFilter f = static_cast<InterpFilter>(filter); 967 TestConvolve(sub_y, f); 968 } 969 } 970 } 971 972 public: 973 void SpeedTest() { 974 for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL; 975 ++filter) { 976 InterpFilter f = static_cast<InterpFilter>(filter); 977 TestConvolveSpeed(f, 10000); 978 } 979 } 980 981 private: 982 void TestConvolve(const int sub_y, const InterpFilter filter) { 983 const int width = GetParam().Block().Width(); 984 const int height = GetParam().Block().Height(); 985 const int bit_depth = GetParam().BitDepth(); 986 const InterpFilterParams *filter_params_y = 987 av1_get_interp_filter_params_with_block_size(filter, height); 988 const uint16_t *input = FirstRandomInput16(GetParam()); 989 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 990 av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width, 991 height, filter_params_y, sub_y, bit_depth); 992 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 993 GetParam().TestFunction()(input, width, test, kOutputStride, width, height, 994 filter_params_y, sub_y, bit_depth); 995 AssertOutputBufferEq(reference, test, width, height); 996 } 997 998 private: 999 void TestConvolveSpeed(const InterpFilter filter, const int num_iters) { 1000 const int width = GetParam().Block().Width(); 1001 const int height = GetParam().Block().Height(); 1002 const int bit_depth = GetParam().BitDepth(); 1003 const InterpFilterParams *filter_params_y = 1004 av1_get_interp_filter_params_with_block_size(filter, width); 1005 const uint16_t *input = FirstRandomInput16(GetParam()); 1006 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1007 1008 aom_usec_timer timer; 1009 aom_usec_timer_start(&timer); 1010 for (int i = 0; i < num_iters; ++i) { 1011 av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width, 1012 height, filter_params_y, 0, bit_depth); 1013 } 1014 aom_usec_timer_mark(&timer); 1015 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1016 highbd_convolve_y_func test_func = GetParam().TestFunction(); 1017 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1018 1019 aom_usec_timer_start(&timer); 1020 for (int i = 0; i < num_iters; ++i) { 1021 test_func(input, width, test, kOutputStride, width, height, 1022 filter_params_y, 0, bit_depth); 1023 } 1024 aom_usec_timer_mark(&timer); 1025 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1026 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 1027 time2, time1 / time2); 1028 } 1029 }; 1030 1031 TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); } 1032 1033 TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); } 1034 1035 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest, 1036 BuildHighbdParams(av1_highbd_convolve_y_sr_c)); 1037 1038 #if HAVE_SSSE3 1039 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest, 1040 BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3)); 1041 #endif 1042 1043 #if HAVE_AVX2 1044 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest, 1045 BuildHighbdParams(av1_highbd_convolve_y_sr_avx2)); 1046 #endif 1047 1048 #if HAVE_NEON 1049 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest, 1050 BuildHighbdParams(av1_highbd_convolve_y_sr_neon)); 1051 #endif 1052 1053 #if HAVE_SVE2 1054 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveYHighbdTest, 1055 BuildHighbdParams(av1_highbd_convolve_y_sr_sve2)); 1056 #endif 1057 1058 #if HAVE_RVV 1059 INSTANTIATE_TEST_SUITE_P(RVV, AV1ConvolveYHighbdTest, 1060 BuildHighbdParams(av1_highbd_convolve_y_sr_rvv)); 1061 #endif 1062 1063 ///////////////////////////////////////////////////////////////// 1064 // Single reference convolve-y IntraBC functions (high bit-depth) 1065 ///////////////////////////////////////////////////////////////// 1066 1067 class AV1ConvolveYHighbdIntraBCTest 1068 : public AV1ConvolveTest<highbd_convolve_y_func> { 1069 public: 1070 void RunTest() { 1071 // IntraBC functions only operate for subpel_y_qn = 8. 1072 constexpr int kSubY = 8; 1073 const int width = GetParam().Block().Width(); 1074 const int height = GetParam().Block().Height(); 1075 const int bit_depth = GetParam().BitDepth(); 1076 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; 1077 const uint16_t *input = FirstRandomInput16(GetParam()); 1078 1079 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1080 // Use a stride different from width to avoid potential storing errors that 1081 // would go undetected. The input buffer is filled using a padding of 12, so 1082 // the stride can be anywhere between width and width + 12. 1083 av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference, 1084 kOutputStride, width, height, 1085 filter_params_y, kSubY, bit_depth); 1086 1087 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1088 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, 1089 height, filter_params_y, kSubY, bit_depth); 1090 1091 AssertOutputBufferEq(reference, test, width, height); 1092 } 1093 1094 void SpeedTest() { 1095 constexpr int kNumIters = 10000; 1096 const InterpFilter filter = static_cast<InterpFilter>(BILINEAR); 1097 const int width = GetParam().Block().Width(); 1098 const int height = GetParam().Block().Height(); 1099 const int bit_depth = GetParam().BitDepth(); 1100 const InterpFilterParams *filter_params_y = 1101 av1_get_interp_filter_params_with_block_size(filter, width); 1102 const uint16_t *input = FirstRandomInput16(GetParam()); 1103 1104 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1105 aom_usec_timer timer; 1106 aom_usec_timer_start(&timer); 1107 for (int i = 0; i < kNumIters; ++i) { 1108 av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, 1109 width, height, filter_params_y, 0, 1110 bit_depth); 1111 } 1112 aom_usec_timer_mark(&timer); 1113 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1114 1115 highbd_convolve_y_func test_func = GetParam().TestFunction(); 1116 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1117 aom_usec_timer_start(&timer); 1118 for (int i = 0; i < kNumIters; ++i) { 1119 test_func(input, width, test, kOutputStride, width, height, 1120 filter_params_y, 0, bit_depth); 1121 } 1122 aom_usec_timer_mark(&timer); 1123 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1124 1125 printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1, 1126 time2, time1 / time2); 1127 } 1128 }; 1129 1130 TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); } 1131 1132 TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } 1133 1134 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest, 1135 BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c)); 1136 1137 #if HAVE_NEON 1138 INSTANTIATE_TEST_SUITE_P( 1139 NEON, AV1ConvolveYHighbdIntraBCTest, 1140 BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon)); 1141 #endif 1142 1143 #if HAVE_RVV 1144 INSTANTIATE_TEST_SUITE_P( 1145 RVV, AV1ConvolveYHighbdIntraBCTest, 1146 BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_rvv)); 1147 #endif 1148 1149 #endif // CONFIG_AV1_HIGHBITDEPTH 1150 1151 ////////////////////////////////////////////////////////////// 1152 // Single reference convolve-copy functions (low bit-depth) 1153 ////////////////////////////////////////////////////////////// 1154 using convolve_copy_func = void (*)(const uint8_t *src, ptrdiff_t src_stride, 1155 uint8_t *dst, ptrdiff_t dst_stride, int w, 1156 int h); 1157 1158 class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> { 1159 public: 1160 void RunTest() { 1161 const int width = GetParam().Block().Width(); 1162 const int height = GetParam().Block().Height(); 1163 const uint8_t *input = FirstRandomInput8(GetParam()); 1164 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 1165 aom_convolve_copy_c(input, width, reference, kOutputStride, width, height); 1166 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 1167 GetParam().TestFunction()(input, width, test, kOutputStride, width, height); 1168 AssertOutputBufferEq(reference, test, width, height); 1169 } 1170 }; 1171 1172 // Note that even though these are AOM convolve functions, we are using the 1173 // newer AV1 test framework. 1174 TEST_P(AV1ConvolveCopyTest, RunTest) { RunTest(); } 1175 1176 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyTest, 1177 BuildLowbdParams(aom_convolve_copy_c)); 1178 1179 #if HAVE_SSE2 1180 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyTest, 1181 BuildLowbdParams(aom_convolve_copy_sse2)); 1182 #endif 1183 1184 #if HAVE_AVX2 1185 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyTest, 1186 BuildLowbdParams(aom_convolve_copy_avx2)); 1187 #endif 1188 1189 #if HAVE_NEON 1190 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest, 1191 BuildLowbdParams(aom_convolve_copy_neon)); 1192 #endif 1193 1194 #if CONFIG_AV1_HIGHBITDEPTH 1195 /////////////////////////////////////////////////////////////// 1196 // Single reference convolve-copy functions (high bit-depth) 1197 /////////////////////////////////////////////////////////////// 1198 using highbd_convolve_copy_func = void (*)(const uint16_t *src, 1199 ptrdiff_t src_stride, uint16_t *dst, 1200 ptrdiff_t dst_stride, int w, int h); 1201 1202 class AV1ConvolveCopyHighbdTest 1203 : public AV1ConvolveTest<highbd_convolve_copy_func> { 1204 public: 1205 void RunTest() { 1206 const BlockSize &block = GetParam().Block(); 1207 const int width = block.Width(); 1208 const int height = block.Height(); 1209 const uint16_t *input = FirstRandomInput16(GetParam()); 1210 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1211 aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width, 1212 height); 1213 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1214 GetParam().TestFunction()(input, width, test, kOutputStride, width, height); 1215 AssertOutputBufferEq(reference, test, width, height); 1216 } 1217 }; 1218 1219 TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); } 1220 1221 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest, 1222 BuildHighbdParams(aom_highbd_convolve_copy_c)); 1223 1224 #if HAVE_SSE2 1225 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest, 1226 BuildHighbdParams(aom_highbd_convolve_copy_sse2)); 1227 #endif 1228 1229 #if HAVE_AVX2 1230 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest, 1231 BuildHighbdParams(aom_highbd_convolve_copy_avx2)); 1232 #endif 1233 1234 #if HAVE_NEON 1235 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest, 1236 BuildHighbdParams(aom_highbd_convolve_copy_neon)); 1237 #endif 1238 1239 #endif // CONFIG_AV1_HIGHBITDEPTH 1240 1241 ///////////////////////////////////////////////////////// 1242 // Single reference convolve-2D functions (low bit-depth) 1243 ///////////////////////////////////////////////////////// 1244 using convolve_2d_func = void (*)(const uint8_t *src, int src_stride, 1245 uint8_t *dst, int dst_stride, int w, int h, 1246 const InterpFilterParams *filter_params_x, 1247 const InterpFilterParams *filter_params_y, 1248 const int subpel_x_qn, const int subpel_y_qn, 1249 ConvolveParams *conv_params); 1250 1251 class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> { 1252 public: 1253 void RunTest() { 1254 // Do not test the no-op filter. 1255 for (int sub_x = 1; sub_x < 16; ++sub_x) { 1256 for (int sub_y = 1; sub_y < 16; ++sub_y) { 1257 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { 1258 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { 1259 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || 1260 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) 1261 continue; 1262 TestConvolve(static_cast<InterpFilter>(h_f), 1263 static_cast<InterpFilter>(v_f), sub_x, sub_y); 1264 } 1265 } 1266 } 1267 } 1268 } 1269 1270 public: 1271 void SpeedTest() { 1272 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { 1273 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { 1274 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || 1275 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) 1276 continue; 1277 TestConvolveSpeed(static_cast<InterpFilter>(h_f), 1278 static_cast<InterpFilter>(v_f), 10000); 1279 } 1280 } 1281 } 1282 1283 private: 1284 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, 1285 const int sub_x, const int sub_y) { 1286 const int width = GetParam().Block().Width(); 1287 const int height = GetParam().Block().Height(); 1288 const InterpFilterParams *filter_params_x = 1289 av1_get_interp_filter_params_with_block_size(h_f, width); 1290 const InterpFilterParams *filter_params_y = 1291 av1_get_interp_filter_params_with_block_size(v_f, height); 1292 const uint8_t *input = FirstRandomInput8(GetParam()); 1293 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 1294 ConvolveParams conv_params1 = 1295 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1296 av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, height, 1297 filter_params_x, filter_params_y, sub_x, sub_y, 1298 &conv_params1); 1299 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 1300 ConvolveParams conv_params2 = 1301 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1302 GetParam().TestFunction()(input, width, test, kOutputStride, width, height, 1303 filter_params_x, filter_params_y, sub_x, sub_y, 1304 &conv_params2); 1305 AssertOutputBufferEq(reference, test, width, height); 1306 } 1307 1308 private: 1309 void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f, 1310 int num_iters) { 1311 const int width = GetParam().Block().Width(); 1312 const int height = GetParam().Block().Height(); 1313 const InterpFilterParams *filter_params_x = 1314 av1_get_interp_filter_params_with_block_size(h_f, width); 1315 const InterpFilterParams *filter_params_y = 1316 av1_get_interp_filter_params_with_block_size(v_f, height); 1317 const uint8_t *input = FirstRandomInput8(GetParam()); 1318 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 1319 ConvolveParams conv_params1 = 1320 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1321 aom_usec_timer timer; 1322 aom_usec_timer_start(&timer); 1323 for (int i = 0; i < num_iters; ++i) { 1324 av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, 1325 height, filter_params_x, filter_params_y, 0, 0, 1326 &conv_params1); 1327 } 1328 aom_usec_timer_mark(&timer); 1329 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1330 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 1331 ConvolveParams conv_params2 = 1332 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1333 aom_usec_timer_start(&timer); 1334 for (int i = 0; i < num_iters; ++i) { 1335 GetParam().TestFunction()(input, width, test, kOutputStride, width, 1336 height, filter_params_x, filter_params_y, 0, 0, 1337 &conv_params2); 1338 } 1339 aom_usec_timer_mark(&timer); 1340 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1341 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, 1342 time1, time2, time1 / time2); 1343 } 1344 }; 1345 1346 TEST_P(AV1Convolve2DTest, RunTest) { RunTest(); } 1347 1348 TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) { SpeedTest(); } 1349 1350 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DTest, 1351 BuildLowbdParams(av1_convolve_2d_sr_c)); 1352 1353 #if HAVE_SSE2 1354 INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DTest, 1355 BuildLowbdParams(av1_convolve_2d_sr_sse2)); 1356 #endif 1357 1358 #if HAVE_AVX2 1359 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DTest, 1360 BuildLowbdParams(av1_convolve_2d_sr_avx2)); 1361 #endif 1362 1363 #if HAVE_NEON 1364 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest, 1365 BuildLowbdParams(av1_convolve_2d_sr_neon)); 1366 #endif 1367 1368 #if HAVE_NEON_DOTPROD 1369 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest, 1370 BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod)); 1371 #endif 1372 1373 #if HAVE_NEON_I8MM 1374 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest, 1375 BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm)); 1376 #endif 1377 1378 #if HAVE_SVE2 1379 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DTest, 1380 BuildLowbdParams(av1_convolve_2d_sr_sve2)); 1381 #endif 1382 1383 #if HAVE_RVV 1384 INSTANTIATE_TEST_SUITE_P(RVV, AV1Convolve2DTest, 1385 BuildLowbdParams(av1_convolve_2d_sr_rvv)); 1386 #endif 1387 1388 ///////////////////////////////////////////////////////////////// 1389 // Single reference convolve-2D IntraBC functions (low bit-depth) 1390 ///////////////////////////////////////////////////////////////// 1391 1392 class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> { 1393 public: 1394 void RunTest() { 1395 // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8. 1396 constexpr int kSubX = 8; 1397 constexpr int kSubY = 8; 1398 const int width = GetParam().Block().Width(); 1399 const int height = GetParam().Block().Height(); 1400 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; 1401 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; 1402 const uint8_t *input = FirstRandomInput8(GetParam()); 1403 1404 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 1405 ConvolveParams conv_params1 = 1406 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1407 // Use a stride different from width to avoid potential storing errors that 1408 // would go undetected. The input buffer is filled using a padding of 12, so 1409 // the stride can be anywhere between width and width + 12. 1410 av1_convolve_2d_sr_intrabc_c(input, width + 2, reference, kOutputStride, 1411 width, height, filter_params_x, 1412 filter_params_y, kSubX, kSubY, &conv_params1); 1413 1414 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 1415 ConvolveParams conv_params2 = 1416 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1417 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, 1418 height, filter_params_x, filter_params_y, kSubX, 1419 kSubY, &conv_params2); 1420 1421 AssertOutputBufferEq(reference, test, width, height); 1422 } 1423 1424 void SpeedTest() { 1425 constexpr int kNumIters = 10000; 1426 const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR); 1427 const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR); 1428 const int width = GetParam().Block().Width(); 1429 const int height = GetParam().Block().Height(); 1430 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; 1431 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; 1432 const uint8_t *input = FirstRandomInput8(GetParam()); 1433 1434 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 1435 ConvolveParams conv_params1 = 1436 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1437 aom_usec_timer timer; 1438 aom_usec_timer_start(&timer); 1439 for (int i = 0; i < kNumIters; ++i) { 1440 av1_convolve_2d_sr_intrabc_c(input, width, reference, kOutputStride, 1441 width, height, filter_params_x, 1442 filter_params_y, 8, 8, &conv_params1); 1443 } 1444 aom_usec_timer_mark(&timer); 1445 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1446 1447 convolve_2d_func test_func = GetParam().TestFunction(); 1448 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 1449 ConvolveParams conv_params2 = 1450 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1451 aom_usec_timer_start(&timer); 1452 for (int i = 0; i < kNumIters; ++i) { 1453 test_func(input, width, test, kOutputStride, width, height, 1454 filter_params_x, filter_params_y, 8, 8, &conv_params2); 1455 } 1456 aom_usec_timer_mark(&timer); 1457 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1458 1459 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, 1460 time1, time2, time1 / time2); 1461 } 1462 }; 1463 1464 TEST_P(AV1Convolve2DIntraBCTest, RunTest) { RunTest(); } 1465 1466 TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } 1467 1468 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DIntraBCTest, 1469 BuildLowbdParams(av1_convolve_2d_sr_intrabc_c)); 1470 1471 #if HAVE_NEON 1472 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest, 1473 BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon)); 1474 #endif 1475 1476 #if HAVE_RVV 1477 INSTANTIATE_TEST_SUITE_P(RVV, AV1Convolve2DIntraBCTest, 1478 BuildLowbdParams(av1_convolve_2d_sr_intrabc_rvv)); 1479 #endif 1480 1481 #if CONFIG_AV1_HIGHBITDEPTH 1482 ////////////////////////////////////////////////////////// 1483 // Single reference convolve-2d functions (high bit-depth) 1484 ////////////////////////////////////////////////////////// 1485 1486 using highbd_convolve_2d_func = 1487 void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, 1488 int w, int h, const InterpFilterParams *filter_params_x, 1489 const InterpFilterParams *filter_params_y, const int subpel_x_qn, 1490 const int subpel_y_qn, ConvolveParams *conv_params, int bd); 1491 1492 class AV1Convolve2DHighbdTest 1493 : public AV1ConvolveTest<highbd_convolve_2d_func> { 1494 public: 1495 void RunTest() { 1496 // Do not test the no-op filter. 1497 for (int sub_x = 1; sub_x < 16; ++sub_x) { 1498 for (int sub_y = 1; sub_y < 16; ++sub_y) { 1499 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { 1500 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { 1501 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || 1502 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) 1503 continue; 1504 TestConvolve(static_cast<InterpFilter>(h_f), 1505 static_cast<InterpFilter>(v_f), sub_x, sub_y); 1506 } 1507 } 1508 } 1509 } 1510 } 1511 1512 public: 1513 void SpeedTest() { 1514 for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) { 1515 for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) { 1516 if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) || 1517 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2))) 1518 continue; 1519 TestConvolveSpeed(static_cast<InterpFilter>(h_f), 1520 static_cast<InterpFilter>(v_f), 10000); 1521 } 1522 } 1523 } 1524 1525 private: 1526 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, 1527 const int sub_x, const int sub_y) { 1528 const int width = GetParam().Block().Width(); 1529 const int height = GetParam().Block().Height(); 1530 const int bit_depth = GetParam().BitDepth(); 1531 const InterpFilterParams *filter_params_x = 1532 av1_get_interp_filter_params_with_block_size(h_f, width); 1533 const InterpFilterParams *filter_params_y = 1534 av1_get_interp_filter_params_with_block_size(v_f, height); 1535 const uint16_t *input = FirstRandomInput16(GetParam()); 1536 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1537 ConvolveParams conv_params1 = 1538 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 1539 av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width, 1540 height, filter_params_x, filter_params_y, sub_x, 1541 sub_y, &conv_params1, bit_depth); 1542 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1543 ConvolveParams conv_params2 = 1544 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 1545 GetParam().TestFunction()(input, width, test, kOutputStride, width, height, 1546 filter_params_x, filter_params_y, sub_x, sub_y, 1547 &conv_params2, bit_depth); 1548 AssertOutputBufferEq(reference, test, width, height); 1549 } 1550 1551 void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f, 1552 int num_iters) { 1553 const int width = GetParam().Block().Width(); 1554 const int height = GetParam().Block().Height(); 1555 const int bit_depth = GetParam().BitDepth(); 1556 const InterpFilterParams *filter_params_x = 1557 av1_get_interp_filter_params_with_block_size(h_f, width); 1558 const InterpFilterParams *filter_params_y = 1559 av1_get_interp_filter_params_with_block_size(v_f, height); 1560 const uint16_t *input = FirstRandomInput16(GetParam()); 1561 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1562 ConvolveParams conv_params1 = 1563 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1564 aom_usec_timer timer; 1565 aom_usec_timer_start(&timer); 1566 for (int i = 0; i < num_iters; ++i) { 1567 av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width, 1568 height, filter_params_x, filter_params_y, 0, 1569 0, &conv_params1, bit_depth); 1570 } 1571 aom_usec_timer_mark(&timer); 1572 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1573 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1574 ConvolveParams conv_params2 = 1575 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1576 aom_usec_timer_start(&timer); 1577 for (int i = 0; i < num_iters; ++i) { 1578 GetParam().TestFunction()(input, width, test, kOutputStride, width, 1579 height, filter_params_x, filter_params_y, 0, 0, 1580 &conv_params2, bit_depth); 1581 } 1582 aom_usec_timer_mark(&timer); 1583 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1584 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, 1585 time1, time2, time1 / time2); 1586 } 1587 }; 1588 1589 TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); } 1590 1591 TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); } 1592 1593 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest, 1594 BuildHighbdParams(av1_highbd_convolve_2d_sr_c)); 1595 1596 #if HAVE_SSSE3 1597 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest, 1598 BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3)); 1599 #endif 1600 1601 #if HAVE_AVX2 1602 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest, 1603 BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2)); 1604 #endif 1605 1606 #if HAVE_NEON 1607 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest, 1608 BuildHighbdParams(av1_highbd_convolve_2d_sr_neon)); 1609 #endif 1610 1611 #if HAVE_SVE2 1612 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DHighbdTest, 1613 BuildHighbdParams(av1_highbd_convolve_2d_sr_sve2)); 1614 #endif 1615 1616 #if HAVE_RVV 1617 INSTANTIATE_TEST_SUITE_P(RVV, AV1Convolve2DHighbdTest, 1618 BuildHighbdParams(av1_highbd_convolve_2d_sr_rvv)); 1619 #endif 1620 1621 ////////////////////////////////////////////////////////////////// 1622 // Single reference convolve-2d IntraBC functions (high bit-depth) 1623 ////////////////////////////////////////////////////////////////// 1624 1625 class AV1Convolve2DHighbdIntraBCTest 1626 : public AV1ConvolveTest<highbd_convolve_2d_func> { 1627 public: 1628 void RunTest() { 1629 // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8. 1630 constexpr int kSubX = 8; 1631 constexpr int kSubY = 8; 1632 const int width = GetParam().Block().Width(); 1633 const int height = GetParam().Block().Height(); 1634 const int bit_depth = GetParam().BitDepth(); 1635 const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params; 1636 const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params; 1637 const uint16_t *input = FirstRandomInput16(GetParam()); 1638 1639 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1640 ConvolveParams conv_params1 = 1641 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 1642 // Use a stride different from width to avoid potential storing errors that 1643 // would go undetected. The input buffer is filled using a padding of 12, so 1644 // the stride can be anywhere between width and width + 12. 1645 av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference, 1646 kOutputStride, width, height, 1647 filter_params_x, filter_params_y, kSubX, 1648 kSubY, &conv_params1, bit_depth); 1649 1650 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1651 ConvolveParams conv_params2 = 1652 get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth); 1653 GetParam().TestFunction()(input, width + 2, test, kOutputStride, width, 1654 height, filter_params_x, filter_params_y, kSubX, 1655 kSubY, &conv_params2, bit_depth); 1656 1657 AssertOutputBufferEq(reference, test, width, height); 1658 } 1659 1660 void SpeedTest() { 1661 constexpr int kNumIters = 10000; 1662 const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR); 1663 const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR); 1664 const int width = GetParam().Block().Width(); 1665 const int height = GetParam().Block().Height(); 1666 const int bit_depth = GetParam().BitDepth(); 1667 const InterpFilterParams *filter_params_x = 1668 av1_get_interp_filter_params_with_block_size(h_f, width); 1669 const InterpFilterParams *filter_params_y = 1670 av1_get_interp_filter_params_with_block_size(v_f, height); 1671 const uint16_t *input = FirstRandomInput16(GetParam()); 1672 1673 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1674 ConvolveParams conv_params1 = 1675 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1676 aom_usec_timer timer; 1677 aom_usec_timer_start(&timer); 1678 for (int i = 0; i < kNumIters; ++i) { 1679 av1_highbd_convolve_2d_sr_intrabc_c( 1680 input, width, reference, kOutputStride, width, height, 1681 filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth); 1682 } 1683 aom_usec_timer_mark(&timer); 1684 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1685 1686 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1687 highbd_convolve_2d_func test_func = GetParam().TestFunction(); 1688 ConvolveParams conv_params2 = 1689 get_conv_params_no_round(0, 0, nullptr, 0, 0, 8); 1690 aom_usec_timer_start(&timer); 1691 for (int i = 0; i < kNumIters; ++i) { 1692 test_func(input, width, test, kOutputStride, width, height, 1693 filter_params_x, filter_params_y, 0, 0, &conv_params2, 1694 bit_depth); 1695 } 1696 aom_usec_timer_mark(&timer); 1697 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 1698 1699 printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height, 1700 time1, time2, time1 / time2); 1701 } 1702 }; 1703 1704 TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); } 1705 1706 TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); } 1707 1708 INSTANTIATE_TEST_SUITE_P( 1709 C, AV1Convolve2DHighbdIntraBCTest, 1710 BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c)); 1711 1712 #if HAVE_NEON 1713 INSTANTIATE_TEST_SUITE_P( 1714 NEON, AV1Convolve2DHighbdIntraBCTest, 1715 BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon)); 1716 #endif 1717 1718 #if HAVE_RVV 1719 INSTANTIATE_TEST_SUITE_P( 1720 RVV, AV1Convolve2DHighbdIntraBCTest, 1721 BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_rvv)); 1722 #endif 1723 1724 #endif // CONFIG_AV1_HIGHBITDEPTH 1725 1726 ////////////////////////// 1727 // Compound Convolve Tests 1728 ////////////////////////// 1729 1730 // The compound functions do not work for chroma block sizes. Provide 1731 // a function to generate test parameters for just luma block sizes. 1732 template <typename T> 1733 std::vector<TestParam<T>> GetLumaTestParams( 1734 std::initializer_list<int> bit_depths, T test_func) { 1735 std::set<BlockSize> sizes; 1736 for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) { 1737 const int w = block_size_wide[b]; 1738 const int h = block_size_high[b]; 1739 sizes.insert(BlockSize(w, h)); 1740 } 1741 std::vector<TestParam<T>> result; 1742 for (int bit_depth : bit_depths) { 1743 for (const auto &block : sizes) { 1744 result.push_back(TestParam<T>(block, bit_depth, test_func)); 1745 } 1746 } 1747 return result; 1748 } 1749 1750 template <typename T> 1751 std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) { 1752 return GetLumaTestParams({ 8 }, test_func); 1753 } 1754 1755 template <typename T> 1756 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams( 1757 T test_func) { 1758 return ::testing::ValuesIn(GetLowbdLumaTestParams(test_func)); 1759 } 1760 1761 TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) { 1762 auto v = GetLowbdLumaTestParams(av1_dist_wtd_convolve_x_c); 1763 ASSERT_EQ(22U, v.size()); 1764 for (const auto &e : v) { 1765 ASSERT_EQ(8, e.BitDepth()); 1766 bool same_fn = av1_dist_wtd_convolve_x_c == e.TestFunction(); 1767 ASSERT_TRUE(same_fn); 1768 } 1769 } 1770 1771 #if CONFIG_AV1_HIGHBITDEPTH 1772 template <typename T> 1773 std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) { 1774 return GetLumaTestParams({ 10, 12 }, test_func); 1775 } 1776 1777 TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) { 1778 auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c); 1779 ASSERT_EQ(44U, v.size()); 1780 int num_10 = 0; 1781 int num_12 = 0; 1782 for (const auto &e : v) { 1783 ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth()); 1784 bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction(); 1785 ASSERT_TRUE(same_fn); 1786 if (e.BitDepth() == 10) { 1787 ++num_10; 1788 } else { 1789 ++num_12; 1790 } 1791 } 1792 ASSERT_EQ(num_10, num_12); 1793 } 1794 1795 template <typename T> 1796 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams( 1797 T test_func) { 1798 return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func)); 1799 } 1800 1801 #endif // CONFIG_AV1_HIGHBITDEPTH 1802 1803 // Compound cases also need to test different frame offsets and weightings. 1804 class CompoundParam { 1805 public: 1806 CompoundParam(bool use_dist_wtd_comp_avg, int fwd_offset, int bck_offset) 1807 : use_dist_wtd_comp_avg_(use_dist_wtd_comp_avg), fwd_offset_(fwd_offset), 1808 bck_offset_(bck_offset) {} 1809 1810 bool UseDistWtdCompAvg() const { return use_dist_wtd_comp_avg_; } 1811 int FwdOffset() const { return fwd_offset_; } 1812 int BckOffset() const { return bck_offset_; } 1813 1814 private: 1815 bool use_dist_wtd_comp_avg_; 1816 int fwd_offset_; 1817 int bck_offset_; 1818 }; 1819 1820 std::vector<CompoundParam> GetCompoundParams() { 1821 std::vector<CompoundParam> result; 1822 result.push_back(CompoundParam(false, 0, 0)); 1823 for (int k = 0; k < 2; ++k) { 1824 for (int l = 0; l < 4; ++l) { 1825 result.push_back(CompoundParam(true, quant_dist_lookup_table[l][k], 1826 quant_dist_lookup_table[l][1 - k])); 1827 } 1828 } 1829 return result; 1830 } 1831 1832 TEST_F(AV1ConvolveParametersTest, GetCompoundParams) { 1833 auto v = GetCompoundParams(); 1834 ASSERT_EQ(9U, v.size()); 1835 ASSERT_FALSE(v[0].UseDistWtdCompAvg()); 1836 for (size_t i = 1; i < v.size(); ++i) { 1837 ASSERT_TRUE(v[i].UseDistWtdCompAvg()); 1838 } 1839 } 1840 1841 //////////////////////////////////////////////// 1842 // Compound convolve-x functions (low bit-depth) 1843 //////////////////////////////////////////////// 1844 1845 ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf, 1846 int width, int bit_depth, 1847 const CompoundParam &compound) { 1848 ConvolveParams conv_params = 1849 get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth); 1850 conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg(); 1851 conv_params.fwd_offset = compound.FwdOffset(); 1852 conv_params.bck_offset = compound.BckOffset(); 1853 return conv_params; 1854 } 1855 1856 class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> { 1857 public: 1858 void RunTest() { 1859 auto compound_params = GetCompoundParams(); 1860 // Do not test the no-op filter. 1861 for (int sub_pix = 1; sub_pix < 16; ++sub_pix) { 1862 for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) { 1863 for (const auto &c : compound_params) { 1864 TestConvolve(sub_pix, static_cast<InterpFilter>(f), c); 1865 } 1866 } 1867 } 1868 } 1869 1870 protected: 1871 virtual const InterpFilterParams *FilterParams(InterpFilter f, 1872 const BlockSize &block) const { 1873 return av1_get_interp_filter_params_with_block_size(f, block.Width()); 1874 } 1875 1876 virtual convolve_x_func ReferenceFunc() const { 1877 return av1_dist_wtd_convolve_x_c; 1878 } 1879 1880 private: 1881 void TestConvolve(const int sub_pix, const InterpFilter filter, 1882 const CompoundParam &compound) { 1883 const int width = GetParam().Block().Width(); 1884 const int height = GetParam().Block().Height(); 1885 const uint8_t *input1 = FirstRandomInput8(GetParam()); 1886 const uint8_t *input2 = SecondRandomInput8(GetParam()); 1887 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 1888 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); 1889 Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf, 1890 compound, sub_pix, filter); 1891 1892 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 1893 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); 1894 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, 1895 compound, sub_pix, filter); 1896 1897 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); 1898 AssertOutputBufferEq(reference, test, width, height); 1899 } 1900 1901 private: 1902 void Convolve(convolve_x_func test_func, const uint8_t *src1, 1903 const uint8_t *src2, uint8_t *dst, CONV_BUF_TYPE *conv_buf, 1904 const CompoundParam &compound, const int sub_pix, 1905 const InterpFilter filter) { 1906 const int width = GetParam().Block().Width(); 1907 const int height = GetParam().Block().Height(); 1908 const InterpFilterParams *filter_params = 1909 FilterParams(filter, GetParam().Block()); 1910 1911 ConvolveParams conv_params = 1912 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); 1913 test_func(src1, width, dst, kOutputStride, width, height, filter_params, 1914 sub_pix, &conv_params); 1915 1916 conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); 1917 test_func(src2, width, dst, kOutputStride, width, height, filter_params, 1918 sub_pix, &conv_params); 1919 } 1920 }; 1921 1922 TEST_P(AV1ConvolveXCompoundTest, RunTest) { RunTest(); } 1923 1924 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXCompoundTest, 1925 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_c)); 1926 1927 #if HAVE_SSE2 1928 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXCompoundTest, 1929 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_sse2)); 1930 #endif 1931 1932 #if HAVE_AVX2 1933 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXCompoundTest, 1934 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_avx2)); 1935 #endif 1936 1937 #if HAVE_NEON 1938 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest, 1939 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon)); 1940 #endif 1941 1942 #if HAVE_NEON_DOTPROD 1943 INSTANTIATE_TEST_SUITE_P( 1944 NEON_DOTPROD, AV1ConvolveXCompoundTest, 1945 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod)); 1946 #endif 1947 1948 #if HAVE_NEON_I8MM 1949 INSTANTIATE_TEST_SUITE_P( 1950 NEON_I8MM, AV1ConvolveXCompoundTest, 1951 BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm)); 1952 #endif 1953 1954 #if CONFIG_AV1_HIGHBITDEPTH 1955 ///////////////////////////////////////////////// 1956 // Compound convolve-x functions (high bit-depth) 1957 ///////////////////////////////////////////////// 1958 class AV1ConvolveXHighbdCompoundTest 1959 : public AV1ConvolveTest<highbd_convolve_x_func> { 1960 public: 1961 void RunTest() { 1962 auto compound_params = GetCompoundParams(); 1963 // Do not test the no-op filter. 1964 for (int sub_pix = 1; sub_pix < 16; ++sub_pix) { 1965 for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) { 1966 for (const auto &c : compound_params) { 1967 TestConvolve(sub_pix, static_cast<InterpFilter>(f), c); 1968 } 1969 } 1970 } 1971 } 1972 1973 protected: 1974 virtual const InterpFilterParams *FilterParams(InterpFilter f, 1975 const BlockSize &block) const { 1976 return av1_get_interp_filter_params_with_block_size(f, block.Width()); 1977 } 1978 1979 virtual highbd_convolve_x_func ReferenceFunc() const { 1980 return av1_highbd_dist_wtd_convolve_x_c; 1981 } 1982 1983 private: 1984 void TestConvolve(const int sub_pix, const InterpFilter filter, 1985 const CompoundParam &compound) { 1986 const int width = GetParam().Block().Width(); 1987 const int height = GetParam().Block().Height(); 1988 1989 const uint16_t *input1 = FirstRandomInput16(GetParam()); 1990 const uint16_t *input2 = SecondRandomInput16(GetParam()); 1991 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 1992 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); 1993 Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf, 1994 compound, sub_pix, filter); 1995 1996 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 1997 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); 1998 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, 1999 compound, sub_pix, filter); 2000 2001 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); 2002 AssertOutputBufferEq(reference, test, width, height); 2003 } 2004 2005 void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1, 2006 const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf, 2007 const CompoundParam &compound, const int sub_pix, 2008 const InterpFilter filter) { 2009 const int width = GetParam().Block().Width(); 2010 const int height = GetParam().Block().Height(); 2011 const int bit_depth = GetParam().BitDepth(); 2012 const InterpFilterParams *filter_params = 2013 FilterParams(filter, GetParam().Block()); 2014 ConvolveParams conv_params = 2015 GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound); 2016 test_func(src1, width, dst, kOutputStride, width, height, filter_params, 2017 sub_pix, &conv_params, bit_depth); 2018 conv_params = 2019 GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound); 2020 test_func(src2, width, dst, kOutputStride, width, height, filter_params, 2021 sub_pix, &conv_params, bit_depth); 2022 } 2023 }; 2024 2025 TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); } 2026 2027 INSTANTIATE_TEST_SUITE_P( 2028 C, AV1ConvolveXHighbdCompoundTest, 2029 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c)); 2030 2031 #if HAVE_SSE4_1 2032 INSTANTIATE_TEST_SUITE_P( 2033 SSE4_1, AV1ConvolveXHighbdCompoundTest, 2034 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1)); 2035 #endif 2036 2037 #if HAVE_AVX2 2038 INSTANTIATE_TEST_SUITE_P( 2039 AVX2, AV1ConvolveXHighbdCompoundTest, 2040 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2)); 2041 #endif 2042 2043 #if HAVE_NEON 2044 INSTANTIATE_TEST_SUITE_P( 2045 NEON, AV1ConvolveXHighbdCompoundTest, 2046 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon)); 2047 #endif 2048 2049 #if HAVE_SVE2 2050 INSTANTIATE_TEST_SUITE_P( 2051 SVE2, AV1ConvolveXHighbdCompoundTest, 2052 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sve2)); 2053 #endif 2054 2055 #endif // CONFIG_AV1_HIGHBITDEPTH 2056 2057 //////////////////////////////////////////////// 2058 // Compound convolve-y functions (low bit-depth) 2059 //////////////////////////////////////////////// 2060 2061 // Note that the X and Y convolve functions have the same type signature and 2062 // logic; they only differentiate the filter parameters and reference function. 2063 class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest { 2064 protected: 2065 const InterpFilterParams *FilterParams( 2066 InterpFilter f, const BlockSize &block) const override { 2067 return av1_get_interp_filter_params_with_block_size(f, block.Height()); 2068 } 2069 2070 convolve_x_func ReferenceFunc() const override { 2071 return av1_dist_wtd_convolve_y_c; 2072 } 2073 }; 2074 2075 TEST_P(AV1ConvolveYCompoundTest, RunTest) { RunTest(); } 2076 2077 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYCompoundTest, 2078 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_c)); 2079 2080 #if HAVE_SSE2 2081 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYCompoundTest, 2082 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_sse2)); 2083 #endif 2084 2085 #if HAVE_AVX2 2086 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYCompoundTest, 2087 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_avx2)); 2088 #endif 2089 2090 #if HAVE_NEON 2091 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest, 2092 BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon)); 2093 #endif 2094 2095 #if CONFIG_AV1_HIGHBITDEPTH 2096 ///////////////////////////////////////////////// 2097 // Compound convolve-y functions (high bit-depth) 2098 ///////////////////////////////////////////////// 2099 2100 // Again, the X and Y convolve functions have the same type signature and logic. 2101 class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest { 2102 highbd_convolve_x_func ReferenceFunc() const override { 2103 return av1_highbd_dist_wtd_convolve_y_c; 2104 } 2105 const InterpFilterParams *FilterParams( 2106 InterpFilter f, const BlockSize &block) const override { 2107 return av1_get_interp_filter_params_with_block_size(f, block.Height()); 2108 } 2109 }; 2110 2111 TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); } 2112 2113 INSTANTIATE_TEST_SUITE_P( 2114 C, AV1ConvolveYHighbdCompoundTest, 2115 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c)); 2116 2117 #if HAVE_SSE4_1 2118 INSTANTIATE_TEST_SUITE_P( 2119 SSE4_1, AV1ConvolveYHighbdCompoundTest, 2120 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1)); 2121 #endif 2122 2123 #if HAVE_AVX2 2124 INSTANTIATE_TEST_SUITE_P( 2125 AVX2, AV1ConvolveYHighbdCompoundTest, 2126 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2)); 2127 #endif 2128 2129 #if HAVE_NEON 2130 INSTANTIATE_TEST_SUITE_P( 2131 NEON, AV1ConvolveYHighbdCompoundTest, 2132 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon)); 2133 #endif 2134 2135 #if HAVE_SVE2 2136 INSTANTIATE_TEST_SUITE_P( 2137 SVE2, AV1ConvolveYHighbdCompoundTest, 2138 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sve2)); 2139 #endif 2140 2141 #endif // CONFIG_AV1_HIGHBITDEPTH 2142 2143 ////////////////////////////////////////////////////// 2144 // Compound convolve-2d-copy functions (low bit-depth) 2145 ////////////////////////////////////////////////////// 2146 using compound_conv_2d_copy_func = void (*)(const uint8_t *src, int src_stride, 2147 uint8_t *dst, int dst_stride, int w, 2148 int h, ConvolveParams *conv_params); 2149 2150 class AV1Convolve2DCopyCompoundTest 2151 : public AV1ConvolveTest<compound_conv_2d_copy_func> { 2152 public: 2153 void RunTest() { 2154 auto compound_params = GetCompoundParams(); 2155 for (const auto &compound : compound_params) { 2156 TestConvolve(compound); 2157 } 2158 } 2159 void SpeedTest() { 2160 for (const auto &compound : GetCompoundParams()) { 2161 TestConvolveSpeed(compound, 100000); 2162 } 2163 } 2164 2165 private: 2166 void TestConvolve(const CompoundParam &compound) { 2167 const BlockSize &block = GetParam().Block(); 2168 const int width = block.Width(); 2169 const int height = block.Height(); 2170 2171 const uint8_t *input1 = FirstRandomInput8(GetParam()); 2172 const uint8_t *input2 = SecondRandomInput8(GetParam()); 2173 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 2174 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); 2175 Convolve(av1_dist_wtd_convolve_2d_copy_c, input1, input2, reference, 2176 reference_conv_buf, compound); 2177 2178 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 2179 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); 2180 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, 2181 compound); 2182 2183 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); 2184 AssertOutputBufferEq(reference, test, width, height); 2185 } 2186 2187 void TestConvolveSpeed(const CompoundParam &compound, const int num_iters) { 2188 const int width = GetParam().Block().Width(); 2189 const int height = GetParam().Block().Height(); 2190 2191 const uint8_t *src0 = FirstRandomInput8(GetParam()); 2192 const uint8_t *src1 = SecondRandomInput8(GetParam()); 2193 DECLARE_ALIGNED(32, uint8_t, dst[MAX_SB_SQUARE]); 2194 DECLARE_ALIGNED(32, CONV_BUF_TYPE, conv_buf[MAX_SB_SQUARE]); 2195 2196 const auto test_func = GetParam().TestFunction(); 2197 2198 ConvolveParams conv_params_0 = 2199 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); 2200 ConvolveParams conv_params_1 = 2201 GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); 2202 2203 aom_usec_timer timer; 2204 aom_usec_timer_start(&timer); 2205 for (int i = 0; i < num_iters; ++i) { 2206 av1_dist_wtd_convolve_2d_copy_c(src0, width, dst, kOutputStride, width, 2207 height, &conv_params_0); 2208 av1_dist_wtd_convolve_2d_copy_c(src1, width, dst, kOutputStride, width, 2209 height, &conv_params_1); 2210 } 2211 aom_usec_timer_mark(&timer); 2212 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 2213 2214 aom_usec_timer_start(&timer); 2215 for (int i = 0; i < num_iters; ++i) { 2216 test_func(src0, width, dst, kOutputStride, width, height, &conv_params_0); 2217 test_func(src1, width, dst, kOutputStride, width, height, &conv_params_1); 2218 } 2219 aom_usec_timer_mark(&timer); 2220 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 2221 printf("Dist Weighted: %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", 2222 compound.UseDistWtdCompAvg(), width, height, time1, time2, 2223 time1 / time2); 2224 } 2225 2226 void Convolve(compound_conv_2d_copy_func test_func, const uint8_t *src1, 2227 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf, 2228 const CompoundParam &compound) { 2229 const BlockSize &block = GetParam().Block(); 2230 const int width = block.Width(); 2231 const int height = block.Height(); 2232 ConvolveParams conv_params = 2233 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); 2234 test_func(src1, width, dst, kOutputStride, width, height, &conv_params); 2235 2236 conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); 2237 test_func(src2, width, dst, kOutputStride, width, height, &conv_params); 2238 } 2239 }; 2240 2241 TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) { RunTest(); } 2242 TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) { SpeedTest(); } 2243 2244 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCopyCompoundTest, 2245 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_c)); 2246 2247 #if HAVE_SSE2 2248 INSTANTIATE_TEST_SUITE_P( 2249 SSE2, AV1Convolve2DCopyCompoundTest, 2250 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_sse2)); 2251 #endif 2252 2253 #if HAVE_AVX2 2254 INSTANTIATE_TEST_SUITE_P( 2255 AVX2, AV1Convolve2DCopyCompoundTest, 2256 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_avx2)); 2257 #endif 2258 2259 #if HAVE_NEON 2260 INSTANTIATE_TEST_SUITE_P( 2261 NEON, AV1Convolve2DCopyCompoundTest, 2262 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon)); 2263 #endif 2264 2265 #if CONFIG_AV1_HIGHBITDEPTH 2266 /////////////////////////////////////////////////////// 2267 // Compound convolve-2d-copy functions (high bit-depth) 2268 /////////////////////////////////////////////////////// 2269 using highbd_compound_conv_2d_copy_func = 2270 void (*)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, 2271 int w, int h, ConvolveParams *conv_params, int bd); 2272 2273 class AV1Convolve2DCopyHighbdCompoundTest 2274 : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> { 2275 public: 2276 void RunTest() { 2277 auto compound_params = GetCompoundParams(); 2278 for (const auto &compound : compound_params) { 2279 TestConvolve(compound); 2280 } 2281 } 2282 2283 private: 2284 void TestConvolve(const CompoundParam &compound) { 2285 const BlockSize &block = GetParam().Block(); 2286 const int width = block.Width(); 2287 const int height = block.Height(); 2288 2289 const uint16_t *input1 = FirstRandomInput16(GetParam()); 2290 const uint16_t *input2 = SecondRandomInput16(GetParam()); 2291 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 2292 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); 2293 Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference, 2294 reference_conv_buf, compound); 2295 2296 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 2297 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); 2298 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, 2299 compound); 2300 2301 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); 2302 AssertOutputBufferEq(reference, test, width, height); 2303 } 2304 2305 void Convolve(highbd_compound_conv_2d_copy_func test_func, 2306 const uint16_t *src1, const uint16_t *src2, uint16_t *dst, 2307 uint16_t *conv_buf, const CompoundParam &compound) { 2308 const BlockSize &block = GetParam().Block(); 2309 const int width = block.Width(); 2310 const int height = block.Height(); 2311 const int bit_depth = GetParam().BitDepth(); 2312 2313 ConvolveParams conv_params = 2314 GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound); 2315 test_func(src1, width, dst, kOutputStride, width, height, &conv_params, 2316 bit_depth); 2317 2318 conv_params = 2319 GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound); 2320 test_func(src2, width, dst, kOutputStride, width, height, &conv_params, 2321 bit_depth); 2322 } 2323 }; 2324 2325 TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); } 2326 2327 INSTANTIATE_TEST_SUITE_P( 2328 C, AV1Convolve2DCopyHighbdCompoundTest, 2329 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c)); 2330 2331 #if HAVE_SSE4_1 2332 INSTANTIATE_TEST_SUITE_P( 2333 SSE4_1, AV1Convolve2DCopyHighbdCompoundTest, 2334 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1)); 2335 #endif 2336 2337 #if HAVE_AVX2 2338 INSTANTIATE_TEST_SUITE_P( 2339 AVX2, AV1Convolve2DCopyHighbdCompoundTest, 2340 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2)); 2341 #endif 2342 2343 #if HAVE_NEON 2344 INSTANTIATE_TEST_SUITE_P( 2345 NEON, AV1Convolve2DCopyHighbdCompoundTest, 2346 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon)); 2347 #endif 2348 2349 #endif // CONFIG_AV1_HIGHBITDEPTH 2350 2351 ///////////////////////////////////////////////// 2352 // Compound convolve-2d functions (low bit-depth) 2353 ///////////////////////////////////////////////// 2354 2355 class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> { 2356 public: 2357 void RunTest() { 2358 auto compound_params = GetCompoundParams(); 2359 for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) { 2360 for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) { 2361 // Do not test the no-op filter. 2362 for (int sub_x = 1; sub_x < 16; ++sub_x) { 2363 for (int sub_y = 1; sub_y < 16; ++sub_y) { 2364 for (const auto &compound : compound_params) { 2365 TestConvolve(static_cast<InterpFilter>(h_f), 2366 static_cast<InterpFilter>(v_f), sub_x, sub_y, 2367 compound); 2368 } 2369 } 2370 } 2371 } 2372 } 2373 } 2374 2375 private: 2376 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, 2377 const int sub_x, const int sub_y, 2378 const CompoundParam &compound) { 2379 const BlockSize &block = GetParam().Block(); 2380 const int width = block.Width(); 2381 const int height = block.Height(); 2382 2383 const uint8_t *input1 = FirstRandomInput8(GetParam()); 2384 const uint8_t *input2 = SecondRandomInput8(GetParam()); 2385 DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]); 2386 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); 2387 Convolve(av1_dist_wtd_convolve_2d_c, input1, input2, reference, 2388 reference_conv_buf, compound, h_f, v_f, sub_x, sub_y); 2389 2390 DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]); 2391 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); 2392 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, 2393 compound, h_f, v_f, sub_x, sub_y); 2394 2395 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); 2396 AssertOutputBufferEq(reference, test, width, height); 2397 } 2398 2399 private: 2400 void Convolve(convolve_2d_func test_func, const uint8_t *src1, 2401 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf, 2402 const CompoundParam &compound, const InterpFilter h_f, 2403 const InterpFilter v_f, const int sub_x, const int sub_y) { 2404 const BlockSize &block = GetParam().Block(); 2405 const int width = block.Width(); 2406 const int height = block.Height(); 2407 2408 const InterpFilterParams *filter_params_x = 2409 av1_get_interp_filter_params_with_block_size(h_f, width); 2410 const InterpFilterParams *filter_params_y = 2411 av1_get_interp_filter_params_with_block_size(v_f, height); 2412 ConvolveParams conv_params = 2413 GetConvolveParams(0, conv_buf, kOutputStride, 8, compound); 2414 2415 test_func(src1, width, dst, kOutputStride, width, height, filter_params_x, 2416 filter_params_y, sub_x, sub_y, &conv_params); 2417 2418 conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound); 2419 test_func(src2, width, dst, kOutputStride, width, height, filter_params_x, 2420 filter_params_y, sub_x, sub_y, &conv_params); 2421 } 2422 }; 2423 2424 TEST_P(AV1Convolve2DCompoundTest, RunTest) { RunTest(); } 2425 2426 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCompoundTest, 2427 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_c)); 2428 2429 #if HAVE_SSSE3 2430 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DCompoundTest, 2431 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_ssse3)); 2432 #endif 2433 2434 #if HAVE_AVX2 2435 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DCompoundTest, 2436 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_avx2)); 2437 #endif 2438 2439 #if HAVE_NEON 2440 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest, 2441 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon)); 2442 #endif 2443 2444 #if HAVE_NEON_DOTPROD 2445 INSTANTIATE_TEST_SUITE_P( 2446 NEON_DOTPROD, AV1Convolve2DCompoundTest, 2447 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod)); 2448 #endif 2449 2450 #if HAVE_NEON_I8MM 2451 INSTANTIATE_TEST_SUITE_P( 2452 NEON_I8MM, AV1Convolve2DCompoundTest, 2453 BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm)); 2454 #endif 2455 2456 #if CONFIG_AV1_HIGHBITDEPTH 2457 ////////////////////////////////////////////////// 2458 // Compound convolve-2d functions (high bit-depth) 2459 ////////////////////////////////////////////////// 2460 2461 class AV1Convolve2DHighbdCompoundTest 2462 : public AV1ConvolveTest<highbd_convolve_2d_func> { 2463 public: 2464 void RunTest() { 2465 auto compound_params = GetCompoundParams(); 2466 for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) { 2467 for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) { 2468 // Do not test the no-op filter. 2469 for (int sub_x = 1; sub_x < 16; ++sub_x) { 2470 for (int sub_y = 1; sub_y < 16; ++sub_y) { 2471 for (const auto &compound : compound_params) { 2472 TestConvolve(static_cast<InterpFilter>(h_f), 2473 static_cast<InterpFilter>(v_f), sub_x, sub_y, 2474 compound); 2475 } 2476 } 2477 } 2478 } 2479 } 2480 } 2481 2482 private: 2483 void TestConvolve(const InterpFilter h_f, const InterpFilter v_f, 2484 const int sub_x, const int sub_y, 2485 const CompoundParam &compound) { 2486 const BlockSize &block = GetParam().Block(); 2487 const int width = block.Width(); 2488 const int height = block.Height(); 2489 const uint16_t *input1 = FirstRandomInput16(GetParam()); 2490 const uint16_t *input2 = SecondRandomInput16(GetParam()); 2491 DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]); 2492 DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]); 2493 Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference, 2494 reference_conv_buf, compound, h_f, v_f, sub_x, sub_y); 2495 2496 DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]); 2497 DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]); 2498 Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf, 2499 compound, h_f, v_f, sub_x, sub_y); 2500 2501 AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height); 2502 AssertOutputBufferEq(reference, test, width, height); 2503 } 2504 2505 private: 2506 void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1, 2507 const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf, 2508 const CompoundParam &compound, const InterpFilter h_f, 2509 const InterpFilter v_f, const int sub_x, const int sub_y) { 2510 const BlockSize &block = GetParam().Block(); 2511 const int width = block.Width(); 2512 const int height = block.Height(); 2513 2514 const InterpFilterParams *filter_params_x = 2515 av1_get_interp_filter_params_with_block_size(h_f, width); 2516 const InterpFilterParams *filter_params_y = 2517 av1_get_interp_filter_params_with_block_size(v_f, height); 2518 const int bit_depth = GetParam().BitDepth(); 2519 ConvolveParams conv_params = 2520 GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound); 2521 test_func(src1, width, dst, kOutputStride, width, height, filter_params_x, 2522 filter_params_y, sub_x, sub_y, &conv_params, bit_depth); 2523 2524 conv_params = 2525 GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound); 2526 test_func(src2, width, dst, kOutputStride, width, height, filter_params_x, 2527 filter_params_y, sub_x, sub_y, &conv_params, bit_depth); 2528 } 2529 }; 2530 2531 TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); } 2532 2533 INSTANTIATE_TEST_SUITE_P( 2534 C, AV1Convolve2DHighbdCompoundTest, 2535 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c)); 2536 2537 #if HAVE_SSE4_1 2538 INSTANTIATE_TEST_SUITE_P( 2539 SSE4_1, AV1Convolve2DHighbdCompoundTest, 2540 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1)); 2541 #endif 2542 2543 #if HAVE_AVX2 2544 INSTANTIATE_TEST_SUITE_P( 2545 AVX2, AV1Convolve2DHighbdCompoundTest, 2546 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2)); 2547 #endif 2548 2549 #if HAVE_NEON 2550 INSTANTIATE_TEST_SUITE_P( 2551 NEON, AV1Convolve2DHighbdCompoundTest, 2552 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon)); 2553 #endif 2554 2555 #if HAVE_SVE2 2556 INSTANTIATE_TEST_SUITE_P( 2557 SVE2, AV1Convolve2DHighbdCompoundTest, 2558 BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sve2)); 2559 #endif 2560 2561 #endif // CONFIG_AV1_HIGHBITDEPTH 2562 2563 } // namespace