cdef_test.cc (39199B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <array> 13 #include <cstdlib> 14 #include <iostream> 15 #include <string> 16 #include <tuple> 17 18 #include "gtest/gtest.h" 19 20 #include "config/aom_config.h" 21 #include "config/av1_rtcd.h" 22 23 #include "aom_ports/aom_timer.h" 24 #include "av1/common/cdef_block.h" 25 #include "test/acm_random.h" 26 #include "test/register_state_check.h" 27 #include "test/util.h" 28 29 using libaom_test::ACMRandom; 30 31 namespace { 32 33 using CdefFilterBlockFunctions = std::array<cdef_filter_block_func, 4>; 34 35 using cdef_dir_param_t = 36 std::tuple<CdefFilterBlockFunctions, CdefFilterBlockFunctions, BLOCK_SIZE, 37 int, int>; 38 39 class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> { 40 public: 41 ~CDEFBlockTest() override = default; 42 void SetUp() override { 43 cdef_ = GET_PARAM(0); 44 ref_cdef_ = GET_PARAM(1); 45 bsize_ = GET_PARAM(2); 46 boundary_ = GET_PARAM(3); 47 depth_ = GET_PARAM(4); 48 } 49 50 protected: 51 BLOCK_SIZE bsize_; 52 int boundary_; 53 int depth_; 54 CdefFilterBlockFunctions cdef_; 55 CdefFilterBlockFunctions ref_cdef_; 56 }; 57 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest); 58 59 using CDEFBlockHighbdTest = CDEFBlockTest; 60 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockHighbdTest); 61 62 using CDEFSpeedTest = CDEFBlockTest; 63 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest); 64 65 using CDEFSpeedHighbdTest = CDEFBlockTest; 66 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedHighbdTest); 67 68 int64_t test_cdef(BLOCK_SIZE bsize, int iterations, 69 CdefFilterBlockFunctions cdef, 70 CdefFilterBlockFunctions ref_cdef, int boundary, int depth) { 71 aom_usec_timer ref_timer; 72 int64_t ref_elapsed_time = 0; 73 const int size = 8; 74 const int ysize = size + 2 * CDEF_VBORDER; 75 ACMRandom rnd(ACMRandom::DeterministicSeed()); 76 DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]); 77 DECLARE_ALIGNED(16, static uint16_t, d[size * size]); 78 DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]); 79 memset(ref_d, 0, sizeof(ref_d)); 80 memset(d, 0, sizeof(d)); 81 82 int error = 0, pristrength = 0, secstrength, dir; 83 int pridamping, secdamping, bits, level, count, 84 errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0, 85 errpridamping = 0, errsecdamping = 0; 86 unsigned int pos = 0; 87 88 const int block_width = 89 ((bsize == BLOCK_8X8) || (bsize == BLOCK_8X4)) ? 8 : 4; 90 const int block_height = 91 ((bsize == BLOCK_8X8) || (bsize == BLOCK_4X8)) ? 8 : 4; 92 const unsigned int max_pos = size * size >> static_cast<int>(depth == 8); 93 for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8; 94 pridamping++) { 95 for (secdamping = 3 + depth - 8; 96 secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) { 97 for (count = 0; count < iterations; count++) { 98 for (level = 0; level < (1 << depth) && !error; 99 level += (2 + 6 * !!boundary) << (depth - 8)) { 100 for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) { 101 for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) 102 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, 103 (1 << depth) - 1); 104 if (boundary) { 105 if (boundary & 1) { // Left 106 for (int i = 0; i < ysize; i++) 107 for (int j = 0; j < CDEF_HBORDER; j++) 108 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; 109 } 110 if (boundary & 2) { // Right 111 for (int i = 0; i < ysize; i++) 112 for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++) 113 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; 114 } 115 if (boundary & 4) { // Above 116 for (int i = 0; i < CDEF_VBORDER; i++) 117 for (int j = 0; j < CDEF_BSTRIDE; j++) 118 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; 119 } 120 if (boundary & 8) { // Below 121 for (int i = CDEF_VBORDER + size; i < ysize; i++) 122 for (int j = 0; j < CDEF_BSTRIDE; j++) 123 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE; 124 } 125 } 126 for (dir = 0; dir < 8; dir++) { 127 for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error; 128 pristrength += (1 + 4 * !!boundary) << (depth - 8)) { 129 if (pristrength == 16) pristrength = 19; 130 for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error; 131 secstrength += 1 << (depth - 8)) { 132 if (secstrength == 3 << (depth - 8)) continue; 133 134 const int strength_index = 135 (secstrength == 0) | ((pristrength == 0) << 1); 136 137 aom_usec_timer_start(&ref_timer); 138 ref_cdef[strength_index]( 139 ref_d, size, 140 s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE, 141 pristrength, secstrength, dir, pridamping, secdamping, 142 depth - 8, block_width, block_height); 143 aom_usec_timer_mark(&ref_timer); 144 ref_elapsed_time += aom_usec_timer_elapsed(&ref_timer); 145 // If cdef and ref_cdef are the same, we're just testing 146 // speed 147 if (cdef[0] != ref_cdef[0]) 148 API_REGISTER_STATE_CHECK(cdef[strength_index]( 149 d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE, 150 pristrength, secstrength, dir, pridamping, secdamping, 151 depth - 8, block_width, block_height)); 152 if (ref_cdef[0] != cdef[0]) { 153 for (pos = 0; pos < max_pos && !error; pos++) { 154 error = ref_d[pos] != d[pos]; 155 errdepth = depth; 156 errpristrength = pristrength; 157 errsecstrength = secstrength; 158 errboundary = boundary; 159 errpridamping = pridamping; 160 errsecdamping = secdamping; 161 } 162 } 163 } 164 } 165 } 166 } 167 } 168 } 169 } 170 } 171 172 pos--; 173 EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch." 174 << std::endl 175 << "First error at " << pos % size << "," << pos / size 176 << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos] 177 << ") " << std::endl 178 << "pristrength: " << errpristrength << std::endl 179 << "pridamping: " << errpridamping << std::endl 180 << "secstrength: " << errsecstrength << std::endl 181 << "secdamping: " << errsecdamping << std::endl 182 << "depth: " << errdepth << std::endl 183 << "size: " << bsize << std::endl 184 << "boundary: " << errboundary << std::endl 185 << std::endl; 186 187 return ref_elapsed_time; 188 } 189 190 void test_cdef_speed(BLOCK_SIZE bsize, int iterations, 191 CdefFilterBlockFunctions cdef, 192 CdefFilterBlockFunctions ref_cdef, int boundary, 193 int depth) { 194 int64_t ref_elapsed_time = 195 test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth); 196 197 int64_t elapsed_time = 198 test_cdef(bsize, iterations, cdef, cdef, boundary, depth); 199 200 std::cout << "C time: " << ref_elapsed_time << " us" << std::endl 201 << "SIMD time: " << elapsed_time << " us" << std::endl; 202 203 EXPECT_GT(ref_elapsed_time, elapsed_time) 204 << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl 205 << "C time: " << ref_elapsed_time << " us" << std::endl 206 << "SIMD time: " << elapsed_time << " us" << std::endl; 207 } 208 209 using find_dir_t = int (*)(const uint16_t *img, int stride, int32_t *var, 210 int coeff_shift); 211 212 using find_dir_param_t = std::tuple<find_dir_t, find_dir_t>; 213 214 class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> { 215 public: 216 ~CDEFFindDirTest() override = default; 217 void SetUp() override { 218 finddir_ = GET_PARAM(0); 219 ref_finddir_ = GET_PARAM(1); 220 } 221 222 protected: 223 find_dir_t finddir_; 224 find_dir_t ref_finddir_; 225 }; 226 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest); 227 228 using CDEFFindDirSpeedTest = CDEFFindDirTest; 229 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest); 230 231 void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var, 232 int coeff_shift), 233 int (*ref_finddir)(const uint16_t *img, int stride, 234 int32_t *var, int coeff_shift)) { 235 const int size = 8; 236 ACMRandom rnd(ACMRandom::DeterministicSeed()); 237 DECLARE_ALIGNED(16, uint16_t, s[size * size]); 238 239 int error = 0; 240 int depth, bits, level, count, errdepth = 0; 241 int ref_res = 0, res = 0; 242 int32_t ref_var = 0, var = 0; 243 244 for (depth = 8; depth <= 12 && !error; depth += 2) { 245 for (count = 0; count < 512 && !error; count++) { 246 for (level = 0; level < (1 << depth) && !error; 247 level += 1 << (depth - 8)) { 248 for (bits = 1; bits <= depth && !error; bits++) { 249 for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) 250 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, 251 (1 << depth) - 1); 252 for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++) 253 ref_res = ref_finddir(s, size, &ref_var, depth - 8); 254 if (finddir != ref_finddir) 255 API_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8)); 256 if (ref_finddir != finddir) { 257 if (res != ref_res || var != ref_var) error = 1; 258 errdepth = depth; 259 } 260 } 261 } 262 } 263 } 264 265 EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch." 266 << std::endl 267 << "return: " << res << " : " << ref_res << std::endl 268 << "var: " << var << " : " << ref_var << std::endl 269 << "depth: " << errdepth << std::endl 270 << std::endl; 271 } 272 273 void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride, 274 int32_t *var, int coeff_shift), 275 int (*ref_finddir)(const uint16_t *img, int stride, 276 int32_t *var, int coeff_shift)) { 277 aom_usec_timer ref_timer; 278 aom_usec_timer timer; 279 280 aom_usec_timer_start(&ref_timer); 281 test_finddir(ref_finddir, ref_finddir); 282 aom_usec_timer_mark(&ref_timer); 283 int64_t ref_elapsed_time = aom_usec_timer_elapsed(&ref_timer); 284 285 aom_usec_timer_start(&timer); 286 test_finddir(finddir, finddir); 287 aom_usec_timer_mark(&timer); 288 int64_t elapsed_time = aom_usec_timer_elapsed(&timer); 289 290 EXPECT_GT(ref_elapsed_time, elapsed_time) 291 << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl 292 << "C time: " << ref_elapsed_time << " us" << std::endl 293 << "SIMD time: " << elapsed_time << " us" << std::endl; 294 } 295 296 using find_dir_dual_t = void (*)(const uint16_t *img1, const uint16_t *img2, 297 int stride, int32_t *var1, int32_t *var2, 298 int coeff_shift, int *out1, int *out2); 299 300 using find_dir_dual_param_t = std::tuple<find_dir_dual_t, find_dir_dual_t>; 301 302 class CDEFFindDirDualTest 303 : public ::testing::TestWithParam<find_dir_dual_param_t> { 304 public: 305 ~CDEFFindDirDualTest() override = default; 306 void SetUp() override { 307 finddir_ = GET_PARAM(0); 308 ref_finddir_ = GET_PARAM(1); 309 } 310 311 protected: 312 find_dir_dual_t finddir_; 313 find_dir_dual_t ref_finddir_; 314 }; 315 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualTest); 316 317 using CDEFFindDirDualSpeedTest = CDEFFindDirDualTest; 318 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualSpeedTest); 319 320 void test_finddir_dual( 321 void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride, 322 int32_t *var1, int32_t *var2, int coeff_shift, int *out1, 323 int *out2), 324 void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride, 325 int32_t *var1, int32_t *var2, int coeff_shift, 326 int *out1, int *out2)) { 327 const int size_wd = 16; 328 const int size_ht = 8; 329 ACMRandom rnd(ACMRandom::DeterministicSeed()); 330 DECLARE_ALIGNED(16, uint16_t, s[size_ht * size_wd]); 331 332 int error = 0, errdepth = 0; 333 int32_t ref_var[2] = { 0 }; 334 int ref_dir[2] = { 0 }; 335 int32_t var[2] = { 0 }; 336 int dir[2] = { 0 }; 337 338 for (int depth = 8; depth <= 12 && !error; depth += 2) { 339 for (int count = 0; count < 512 && !error; count++) { 340 for (int level = 0; level < (1 << depth) && !error; 341 level += 1 << (depth - 8)) { 342 for (int bits = 1; bits <= depth && !error; bits++) { 343 for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++) 344 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0, 345 (1 << depth) - 1); 346 for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++) 347 ref_finddir(s, s + 8, size_wd, &ref_var[0], &ref_var[1], depth - 8, 348 &ref_dir[0], &ref_dir[1]); 349 if (finddir != ref_finddir) 350 API_REGISTER_STATE_CHECK(finddir(s, s + 8, size_wd, &var[0], 351 &var[1], depth - 8, &dir[0], 352 &dir[1])); 353 if (ref_finddir != finddir) { 354 for (int j = 0; j < 2; j++) { 355 if (ref_dir[j] != dir[j] || ref_var[j] != var[j]) error = 1; 356 } 357 errdepth = depth; 358 } 359 } 360 } 361 } 362 } 363 364 for (int j = 0; j < 2; j++) { 365 EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch." 366 << std::endl 367 << "direction: " << dir[j] << " : " << ref_dir[j] 368 << std::endl 369 << "variance: " << var[j] << " : " << ref_var[j] 370 << std::endl 371 << "depth: " << errdepth << std::endl 372 << std::endl; 373 } 374 } 375 376 void test_finddir_dual_speed( 377 void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride, 378 int32_t *var1, int32_t *var2, int coeff_shift, int *out1, 379 int *out2), 380 void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride, 381 int32_t *var1, int32_t *var2, int coeff_shift, 382 int *out1, int *out2)) { 383 aom_usec_timer ref_timer; 384 aom_usec_timer timer; 385 386 aom_usec_timer_start(&ref_timer); 387 test_finddir_dual(ref_finddir, ref_finddir); 388 aom_usec_timer_mark(&ref_timer); 389 const double ref_elapsed_time = 390 static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); 391 392 aom_usec_timer_start(&timer); 393 test_finddir_dual(finddir, finddir); 394 aom_usec_timer_mark(&timer); 395 const double elapsed_time = 396 static_cast<double>(aom_usec_timer_elapsed(&timer)); 397 398 printf( 399 "ref_time=%lf \t simd_time=%lf \t " 400 "gain=%lf \n", 401 ref_elapsed_time, elapsed_time, ref_elapsed_time / elapsed_time); 402 } 403 404 #define MAX_CDEF_BLOCK 256 405 406 constexpr int kIterations = 100; 407 408 using CDEFCopyRect8To16 = void (*)(uint16_t *dst, int dstride, 409 const uint8_t *src, int sstride, int width, 410 int height); 411 412 using CDEFCopyRect8To16Param = std::tuple<CDEFCopyRect8To16, CDEFCopyRect8To16>; 413 414 class CDEFCopyRect8to16Test 415 : public ::testing::TestWithParam<CDEFCopyRect8To16Param> { 416 public: 417 CDEFCopyRect8to16Test() 418 : rnd_(libaom_test::ACMRandom::DeterministicSeed()), 419 test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {} 420 ~CDEFCopyRect8to16Test() override = default; 421 void SetUp() override { 422 src_ = reinterpret_cast<uint8_t *>( 423 aom_memalign(8, sizeof(uint8_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); 424 ASSERT_NE(src_, nullptr); 425 ref_dst_ = reinterpret_cast<uint16_t *>( 426 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); 427 ASSERT_NE(ref_dst_, nullptr); 428 test_dst_ = reinterpret_cast<uint16_t *>( 429 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); 430 ASSERT_NE(test_dst_, nullptr); 431 } 432 433 void TearDown() override { 434 aom_free(src_); 435 aom_free(ref_dst_); 436 aom_free(test_dst_); 437 } 438 439 void test_copy_rect_8_to_16(CDEFCopyRect8To16 test_func, 440 CDEFCopyRect8To16 ref_func) { 441 constexpr int stride = MAX_CDEF_BLOCK; 442 int error = 0; 443 for (int k = 0; k < kIterations && !error; k++) { 444 // This function operates on values of width that are either 4 or a 445 // multiple of 8. For height, generate a random value between 1 and 256, 446 // making sure it is even. 447 const int width = k == 0 ? 4 : (rnd_.Rand8() % 32 + 1) * 8; 448 const int height = k == 0 ? 4 : (rnd_.Rand8() % 128 + 1) * 2; 449 for (int i = 0; i < height; i++) { 450 for (int j = 0; j < width; j++) { 451 src_[i * stride + j] = rnd_.Rand8(); 452 } 453 } 454 455 ref_func(ref_dst_, stride, src_, stride, width, height); 456 test_func(test_dst_, stride, src_, stride, width, height); 457 458 int i, j; 459 for (i = 0; i < height; i++) { 460 for (j = 0; j < width; j++) { 461 if (test_dst_[i * stride + j] != ref_dst_[i * stride + j]) { 462 error = 1; 463 break; 464 } 465 } 466 if (error) { 467 break; 468 } 469 } 470 EXPECT_EQ(0, error) 471 << "Error: CDEFCopyRect8to16Test, SIMD and C mismatch." << std::endl 472 << "First error at " << i << "," << j << " (" 473 << ref_dst_[i * stride + j] << " : " << test_dst_[i * stride + j] 474 << ") " << std::endl 475 << "width: " << width << std::endl 476 << "height: " << height << std::endl 477 << std::endl; 478 } 479 } 480 481 protected: 482 libaom_test::ACMRandom rnd_; 483 uint8_t *src_; 484 uint16_t *ref_dst_; 485 uint16_t *test_dst_; 486 CDEFCopyRect8To16 test_func_; 487 CDEFCopyRect8To16 ref_func_; 488 }; 489 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect8to16Test); 490 491 #if CONFIG_AV1_HIGHBITDEPTH 492 using CDEFCopyRect16To16 = void (*)(uint16_t *dst, int dstride, 493 const uint16_t *src, int sstride, int width, 494 int height); 495 496 using CDEFCopyRect16To16Param = 497 std::tuple<CDEFCopyRect16To16, CDEFCopyRect16To16>; 498 499 class CDEFCopyRect16to16Test 500 : public ::testing::TestWithParam<CDEFCopyRect16To16Param> { 501 public: 502 CDEFCopyRect16to16Test() 503 : rnd_(libaom_test::ACMRandom::DeterministicSeed()), 504 test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {} 505 ~CDEFCopyRect16to16Test() override = default; 506 void SetUp() override { 507 src_ = reinterpret_cast<uint16_t *>( 508 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); 509 ASSERT_NE(src_, nullptr); 510 ref_dst_ = reinterpret_cast<uint16_t *>( 511 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); 512 ASSERT_NE(ref_dst_, nullptr); 513 test_dst_ = reinterpret_cast<uint16_t *>( 514 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK * MAX_CDEF_BLOCK)); 515 ASSERT_NE(test_dst_, nullptr); 516 } 517 518 void TearDown() override { 519 aom_free(src_); 520 aom_free(ref_dst_); 521 aom_free(test_dst_); 522 } 523 524 void test_copy_rect_16_to_16(CDEFCopyRect16To16 test_func, 525 CDEFCopyRect16To16 ref_func) { 526 constexpr int stride = MAX_CDEF_BLOCK; 527 int error = 0; 528 for (int k = 0; k < kIterations && !error; k++) { 529 // This function operates on values of width that are either 4 or a 530 // multiple of 8. For height, generate a random value between 1 and 256, 531 // making sure it is even. 532 const int width = k == 0 ? 4 : (rnd_.Rand8() % 32 + 1) * 8; 533 const int height = k == 0 ? 4 : (rnd_.Rand8() % 128 + 1) * 2; 534 for (int i = 0; i < height; i++) { 535 for (int j = 0; j < width; j++) { 536 src_[i * stride + j] = rnd_.Rand16(); 537 } 538 } 539 540 ref_func(ref_dst_, stride, src_, stride, width, height); 541 test_func(test_dst_, stride, src_, stride, width, height); 542 543 int i, j; 544 for (i = 0; i < height; i++) { 545 for (j = 0; j < width; j++) { 546 if (test_dst_[i * stride + j] != ref_dst_[i * stride + j]) { 547 error = 1; 548 break; 549 } 550 } 551 if (error) { 552 break; 553 } 554 } 555 EXPECT_EQ(0, error) 556 << "Error: CDEFCopyRect16to16Test, SIMD and C mismatch." << std::endl 557 << "First error at " << i << "," << j << " (" 558 << ref_dst_[i * stride + j] << " : " << test_dst_[i * stride + j] 559 << ") " << std::endl 560 << "width: " << width << std::endl 561 << "height: " << height << std::endl 562 << std::endl; 563 } 564 } 565 566 protected: 567 libaom_test::ACMRandom rnd_; 568 uint16_t *src_; 569 uint16_t *ref_dst_; 570 uint16_t *test_dst_; 571 CDEFCopyRect16To16 test_func_; 572 CDEFCopyRect16To16 ref_func_; 573 }; 574 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect16to16Test); 575 #endif // CONFIG_AV1_HIGHBITDEPTH 576 577 TEST_P(CDEFBlockTest, TestSIMDNoMismatch) { 578 test_cdef(bsize_, 1, cdef_, ref_cdef_, boundary_, depth_); 579 } 580 581 TEST_P(CDEFBlockHighbdTest, TestSIMDHighbdNoMismatch) { 582 test_cdef(bsize_, 1, cdef_, ref_cdef_, boundary_, depth_); 583 } 584 585 TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) { 586 test_cdef_speed(bsize_, 4, cdef_, ref_cdef_, boundary_, depth_); 587 } 588 589 TEST_P(CDEFSpeedHighbdTest, DISABLED_TestSpeed) { 590 test_cdef_speed(bsize_, 4, cdef_, ref_cdef_, boundary_, depth_); 591 } 592 593 TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) { 594 test_finddir(finddir_, ref_finddir_); 595 } 596 597 TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) { 598 test_finddir_speed(finddir_, ref_finddir_); 599 } 600 601 TEST_P(CDEFFindDirDualTest, TestSIMDNoMismatch) { 602 test_finddir_dual(finddir_, ref_finddir_); 603 } 604 605 TEST_P(CDEFFindDirDualSpeedTest, DISABLED_TestSpeed) { 606 test_finddir_dual_speed(finddir_, ref_finddir_); 607 } 608 609 TEST_P(CDEFCopyRect8to16Test, TestSIMDNoMismatch) { 610 test_copy_rect_8_to_16(test_func_, ref_func_); 611 } 612 613 #if CONFIG_AV1_HIGHBITDEPTH 614 TEST_P(CDEFCopyRect16to16Test, TestSIMDNoMismatch) { 615 test_copy_rect_16_to_16(test_func_, ref_func_); 616 } 617 #endif // CONFIG_AV1_HIGHBITDEPTH 618 619 using std::make_tuple; 620 621 #if ((AOM_ARCH_X86 && HAVE_SSSE3) || HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON || \ 622 HAVE_RVV) 623 static const CdefFilterBlockFunctions kCdefFilterFuncC[] = { 624 { &cdef_filter_8_0_c, &cdef_filter_8_1_c, &cdef_filter_8_2_c, 625 &cdef_filter_8_3_c } 626 }; 627 628 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncC[] = { 629 { &cdef_filter_16_0_c, &cdef_filter_16_0_c, &cdef_filter_16_0_c, 630 &cdef_filter_16_0_c } 631 }; 632 #endif 633 634 #if AOM_ARCH_X86 && HAVE_SSSE3 635 static const CdefFilterBlockFunctions kCdefFilterFuncSsse3[] = { 636 { &cdef_filter_8_0_ssse3, &cdef_filter_8_1_ssse3, &cdef_filter_8_2_ssse3, 637 &cdef_filter_8_3_ssse3 } 638 }; 639 640 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSsse3[] = { 641 { &cdef_filter_16_0_ssse3, &cdef_filter_16_1_ssse3, &cdef_filter_16_2_ssse3, 642 &cdef_filter_16_3_ssse3 } 643 }; 644 645 INSTANTIATE_TEST_SUITE_P( 646 SSSE3, CDEFBlockTest, 647 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3), 648 ::testing::ValuesIn(kCdefFilterFuncC), 649 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 650 BLOCK_8X8), 651 ::testing::Range(0, 16), ::testing::Values(8))); 652 INSTANTIATE_TEST_SUITE_P( 653 SSSE3, CDEFBlockHighbdTest, 654 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3), 655 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 656 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 657 BLOCK_8X8), 658 ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); 659 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirTest, 660 ::testing::Values(make_tuple(&cdef_find_dir_ssse3, 661 &cdef_find_dir_c))); 662 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualTest, 663 ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3, 664 &cdef_find_dir_dual_c))); 665 666 INSTANTIATE_TEST_SUITE_P( 667 SSSE3, CDEFCopyRect8to16Test, 668 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, 669 &cdef_copy_rect8_8bit_to_16bit_ssse3))); 670 671 #if CONFIG_AV1_HIGHBITDEPTH 672 INSTANTIATE_TEST_SUITE_P( 673 SSSE3, CDEFCopyRect16to16Test, 674 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, 675 &cdef_copy_rect8_16bit_to_16bit_ssse3))); 676 #endif // CONFIG_AV1_HIGHBITDEPTH 677 #endif 678 679 #if HAVE_SSE4_1 680 static const CdefFilterBlockFunctions kCdefFilterFuncSse4_1[] = { 681 { &cdef_filter_8_0_sse4_1, &cdef_filter_8_1_sse4_1, &cdef_filter_8_2_sse4_1, 682 &cdef_filter_8_3_sse4_1 } 683 }; 684 685 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse4_1[] = { 686 { &cdef_filter_16_0_sse4_1, &cdef_filter_16_1_sse4_1, 687 &cdef_filter_16_2_sse4_1, &cdef_filter_16_3_sse4_1 } 688 }; 689 690 INSTANTIATE_TEST_SUITE_P( 691 SSE4_1, CDEFBlockTest, 692 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1), 693 ::testing::ValuesIn(kCdefFilterFuncC), 694 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 695 BLOCK_8X8), 696 ::testing::Range(0, 16), ::testing::Values(8))); 697 INSTANTIATE_TEST_SUITE_P( 698 SSE4_1, CDEFBlockHighbdTest, 699 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1), 700 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 701 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 702 BLOCK_8X8), 703 ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); 704 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirTest, 705 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1, 706 &cdef_find_dir_c))); 707 INSTANTIATE_TEST_SUITE_P( 708 SSE4_1, CDEFFindDirDualTest, 709 ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1, 710 &cdef_find_dir_dual_c))); 711 712 INSTANTIATE_TEST_SUITE_P( 713 SSE4_1, CDEFCopyRect8to16Test, 714 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, 715 &cdef_copy_rect8_8bit_to_16bit_sse4_1))); 716 717 #if CONFIG_AV1_HIGHBITDEPTH 718 INSTANTIATE_TEST_SUITE_P( 719 SSE4_1, CDEFCopyRect16to16Test, 720 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, 721 &cdef_copy_rect8_16bit_to_16bit_sse4_1))); 722 #endif // CONFIG_AV1_HIGHBITDEPTH 723 #endif 724 725 #if HAVE_AVX2 726 static const CdefFilterBlockFunctions kCdefFilterFuncAvx2[] = { 727 { &cdef_filter_8_0_avx2, &cdef_filter_8_1_avx2, &cdef_filter_8_2_avx2, 728 &cdef_filter_8_3_avx2 } 729 }; 730 731 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncAvx2[] = { 732 { &cdef_filter_16_0_avx2, &cdef_filter_16_1_avx2, &cdef_filter_16_2_avx2, 733 &cdef_filter_16_3_avx2 } 734 }; 735 736 INSTANTIATE_TEST_SUITE_P( 737 AVX2, CDEFBlockTest, 738 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2), 739 ::testing::ValuesIn(kCdefFilterFuncC), 740 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 741 BLOCK_8X8), 742 ::testing::Range(0, 16), ::testing::Values(8))); 743 INSTANTIATE_TEST_SUITE_P( 744 AVX2, CDEFBlockHighbdTest, 745 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2), 746 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 747 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 748 BLOCK_8X8), 749 ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); 750 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirTest, 751 ::testing::Values(make_tuple(&cdef_find_dir_avx2, 752 &cdef_find_dir_c))); 753 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualTest, 754 ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2, 755 &cdef_find_dir_dual_c))); 756 757 INSTANTIATE_TEST_SUITE_P( 758 AVX2, CDEFCopyRect8to16Test, 759 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, 760 &cdef_copy_rect8_8bit_to_16bit_avx2))); 761 762 #if CONFIG_AV1_HIGHBITDEPTH 763 INSTANTIATE_TEST_SUITE_P( 764 AVX2, CDEFCopyRect16to16Test, 765 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, 766 &cdef_copy_rect8_16bit_to_16bit_avx2))); 767 #endif // CONFIG_AV1_HIGHBITDEPTH 768 #endif 769 770 #if HAVE_NEON 771 static const CdefFilterBlockFunctions kCdefFilterFuncNeon[] = { 772 { &cdef_filter_8_0_neon, &cdef_filter_8_1_neon, &cdef_filter_8_2_neon, 773 &cdef_filter_8_3_neon } 774 }; 775 776 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncNeon[] = { 777 { &cdef_filter_16_0_neon, &cdef_filter_16_1_neon, &cdef_filter_16_2_neon, 778 &cdef_filter_16_3_neon } 779 }; 780 781 INSTANTIATE_TEST_SUITE_P( 782 NEON, CDEFBlockTest, 783 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon), 784 ::testing::ValuesIn(kCdefFilterFuncC), 785 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 786 BLOCK_8X8), 787 ::testing::Range(0, 16), ::testing::Values(8))); 788 INSTANTIATE_TEST_SUITE_P( 789 NEON, CDEFBlockHighbdTest, 790 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon), 791 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 792 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 793 BLOCK_8X8), 794 ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); 795 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirTest, 796 ::testing::Values(make_tuple(&cdef_find_dir_neon, 797 &cdef_find_dir_c))); 798 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualTest, 799 ::testing::Values(make_tuple(&cdef_find_dir_dual_neon, 800 &cdef_find_dir_dual_c))); 801 802 INSTANTIATE_TEST_SUITE_P( 803 NEON, CDEFCopyRect8to16Test, 804 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, 805 &cdef_copy_rect8_8bit_to_16bit_neon))); 806 807 #if CONFIG_AV1_HIGHBITDEPTH 808 INSTANTIATE_TEST_SUITE_P( 809 NEON, CDEFCopyRect16to16Test, 810 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, 811 &cdef_copy_rect8_16bit_to_16bit_neon))); 812 #endif // CONFIG_AV1_HIGHBITDEPTH 813 #endif 814 815 #if HAVE_RVV 816 static const CdefFilterBlockFunctions kCdefFilterFuncRvv[] = { 817 { &cdef_filter_8_0_rvv, &cdef_filter_8_1_rvv, &cdef_filter_8_2_rvv, 818 &cdef_filter_8_3_rvv } 819 }; 820 821 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncRvv[] = { 822 { &cdef_filter_16_0_rvv, &cdef_filter_16_1_rvv, &cdef_filter_16_2_rvv, 823 &cdef_filter_16_3_rvv } 824 }; 825 826 INSTANTIATE_TEST_SUITE_P( 827 RVV, CDEFBlockTest, 828 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncRvv), 829 ::testing::ValuesIn(kCdefFilterFuncC), 830 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 831 BLOCK_8X8), 832 ::testing::Range(0, 16), ::testing::Values(8))); 833 INSTANTIATE_TEST_SUITE_P( 834 RVV, CDEFBlockHighbdTest, 835 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncRvv), 836 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 837 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 838 BLOCK_8X8), 839 ::testing::Range(0, 16), ::testing::Range(10, 13, 2))); 840 INSTANTIATE_TEST_SUITE_P(RVV, CDEFFindDirTest, 841 ::testing::Values(make_tuple(&cdef_find_dir_rvv, 842 &cdef_find_dir_c))); 843 844 INSTANTIATE_TEST_SUITE_P( 845 RVV, CDEFCopyRect8to16Test, 846 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c, 847 &cdef_copy_rect8_8bit_to_16bit_rvv))); 848 849 INSTANTIATE_TEST_SUITE_P( 850 RVV, CDEFCopyRect16to16Test, 851 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c, 852 &cdef_copy_rect8_16bit_to_16bit_rvv))); 853 #endif 854 855 // Test speed for all supported architectures 856 #if AOM_ARCH_X86 && HAVE_SSSE3 857 INSTANTIATE_TEST_SUITE_P( 858 SSSE3, CDEFSpeedTest, 859 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3), 860 ::testing::ValuesIn(kCdefFilterFuncC), 861 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 862 BLOCK_8X8), 863 ::testing::Range(0, 16), ::testing::Values(8))); 864 INSTANTIATE_TEST_SUITE_P( 865 SSSE3, CDEFSpeedHighbdTest, 866 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3), 867 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 868 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 869 BLOCK_8X8), 870 ::testing::Range(0, 16), ::testing::Values(10))); 871 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirSpeedTest, 872 ::testing::Values(make_tuple(&cdef_find_dir_ssse3, 873 &cdef_find_dir_c))); 874 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualSpeedTest, 875 ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3, 876 &cdef_find_dir_dual_c))); 877 #endif 878 879 #if HAVE_SSE4_1 880 INSTANTIATE_TEST_SUITE_P( 881 SSE4_1, CDEFSpeedTest, 882 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1), 883 ::testing::ValuesIn(kCdefFilterFuncC), 884 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 885 BLOCK_8X8), 886 ::testing::Range(0, 16), ::testing::Values(8))); 887 INSTANTIATE_TEST_SUITE_P( 888 SSE4_1, CDEFSpeedHighbdTest, 889 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1), 890 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 891 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 892 BLOCK_8X8), 893 ::testing::Range(0, 16), ::testing::Values(10))); 894 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirSpeedTest, 895 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1, 896 &cdef_find_dir_c))); 897 INSTANTIATE_TEST_SUITE_P( 898 SSE4_1, CDEFFindDirDualSpeedTest, 899 ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1, 900 &cdef_find_dir_dual_c))); 901 #endif 902 903 #if HAVE_AVX2 904 INSTANTIATE_TEST_SUITE_P( 905 AVX2, CDEFSpeedTest, 906 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2), 907 ::testing::ValuesIn(kCdefFilterFuncC), 908 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 909 BLOCK_8X8), 910 ::testing::Range(0, 16), ::testing::Values(8))); 911 INSTANTIATE_TEST_SUITE_P( 912 AVX2, CDEFSpeedHighbdTest, 913 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2), 914 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 915 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 916 BLOCK_8X8), 917 ::testing::Range(0, 16), ::testing::Values(10))); 918 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirSpeedTest, 919 ::testing::Values(make_tuple(&cdef_find_dir_avx2, 920 &cdef_find_dir_c))); 921 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualSpeedTest, 922 ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2, 923 &cdef_find_dir_dual_c))); 924 #endif 925 926 #if HAVE_NEON 927 INSTANTIATE_TEST_SUITE_P( 928 NEON, CDEFSpeedTest, 929 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon), 930 ::testing::ValuesIn(kCdefFilterFuncC), 931 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 932 BLOCK_8X8), 933 ::testing::Range(0, 16), ::testing::Values(8))); 934 INSTANTIATE_TEST_SUITE_P( 935 NEON, CDEFSpeedHighbdTest, 936 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon), 937 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 938 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 939 BLOCK_8X8), 940 ::testing::Range(0, 16), ::testing::Values(10))); 941 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirSpeedTest, 942 ::testing::Values(make_tuple(&cdef_find_dir_neon, 943 &cdef_find_dir_c))); 944 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualSpeedTest, 945 ::testing::Values(make_tuple(&cdef_find_dir_dual_neon, 946 &cdef_find_dir_dual_c))); 947 #endif 948 949 #if HAVE_RVV 950 INSTANTIATE_TEST_SUITE_P( 951 RVV, CDEFSpeedTest, 952 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncRvv), 953 ::testing::ValuesIn(kCdefFilterFuncC), 954 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 955 BLOCK_8X8), 956 ::testing::Range(0, 16), ::testing::Values(8))); 957 INSTANTIATE_TEST_SUITE_P( 958 RVV, CDEFSpeedHighbdTest, 959 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncRvv), 960 ::testing::ValuesIn(kCdefFilterHighbdFuncC), 961 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, 962 BLOCK_8X8), 963 ::testing::Range(0, 16), ::testing::Values(10))); 964 INSTANTIATE_TEST_SUITE_P(RVV, CDEFFindDirSpeedTest, 965 ::testing::Values(make_tuple(&cdef_find_dir_rvv, 966 &cdef_find_dir_c))); 967 #endif 968 969 } // namespace