sum_squares_test.cc (29565B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <cmath> 13 #include <cstdlib> 14 #include <string> 15 #include <tuple> 16 17 #include "gtest/gtest.h" 18 19 #include "config/aom_config.h" 20 #include "config/aom_dsp_rtcd.h" 21 22 #include "aom_ports/mem.h" 23 #include "av1/common/common_data.h" 24 #include "test/acm_random.h" 25 #include "test/register_state_check.h" 26 #include "test/util.h" 27 #include "test/function_equivalence_test.h" 28 29 using libaom_test::ACMRandom; 30 using libaom_test::FunctionEquivalenceTest; 31 using ::testing::Combine; 32 using ::testing::Range; 33 using ::testing::Values; 34 using ::testing::ValuesIn; 35 36 namespace { 37 const int kNumIterations = 10000; 38 39 static const int16_t kInt13Max = (1 << 12) - 1; 40 41 using SSI16Func = uint64_t (*)(const int16_t *src, int stride, int width, 42 int height); 43 using TestFuncs = libaom_test::FuncParam<SSI16Func>; 44 45 class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> { 46 public: 47 ~SumSquaresTest() override = default; 48 void SetUp() override { 49 params_ = this->GetParam(); 50 rnd_.Reset(ACMRandom::DeterministicSeed()); 51 src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2)); 52 ASSERT_NE(src_, nullptr); 53 } 54 55 void TearDown() override { aom_free(src_); } 56 void RunTest(bool is_random); 57 void RunSpeedTest(); 58 59 void GenRandomData(int width, int height, int stride) { 60 const int msb = 11; // Up to 12 bit input 61 const int limit = 1 << (msb + 1); 62 for (int ii = 0; ii < height; ii++) { 63 for (int jj = 0; jj < width; jj++) { 64 src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit); 65 } 66 } 67 } 68 69 void GenExtremeData(int width, int height, int stride) { 70 const int msb = 11; // Up to 12 bit input 71 const int limit = 1 << (msb + 1); 72 const int val = rnd_(2) ? limit - 1 : -(limit - 1); 73 for (int ii = 0; ii < height; ii++) { 74 for (int jj = 0; jj < width; jj++) { 75 src_[ii * stride + jj] = val; 76 } 77 } 78 } 79 80 protected: 81 TestFuncs params_; 82 int16_t *src_; 83 ACMRandom rnd_; 84 }; 85 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquaresTest); 86 87 void SumSquaresTest::RunTest(bool is_random) { 88 int failed = 0; 89 for (int k = 0; k < kNumIterations; k++) { 90 const int width = 4 * (rnd_(31) + 1); // Up to 128x128 91 const int height = 4 * (rnd_(31) + 1); // Up to 128x128 92 int stride = 4 << rnd_(7); // Up to 256 stride 93 while (stride < width) { // Make sure it's valid 94 stride = 4 << rnd_(7); 95 } 96 if (is_random) { 97 GenRandomData(width, height, stride); 98 } else { 99 GenExtremeData(width, height, stride); 100 } 101 const uint64_t res_ref = params_.ref_func(src_, stride, width, height); 102 uint64_t res_tst; 103 API_REGISTER_STATE_CHECK(res_tst = 104 params_.tst_func(src_, stride, width, height)); 105 106 if (!failed) { 107 failed = res_ref != res_tst; 108 EXPECT_EQ(res_ref, res_tst) 109 << "Error: Sum Squares Test [" << width << "x" << height 110 << "] C output does not match optimized output."; 111 } 112 } 113 } 114 115 void SumSquaresTest::RunSpeedTest() { 116 for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) { 117 const int width = block_size_wide[block]; // Up to 128x128 118 const int height = block_size_high[block]; // Up to 128x128 119 int stride = 4 << rnd_(7); // Up to 256 stride 120 while (stride < width) { // Make sure it's valid 121 stride = 4 << rnd_(7); 122 } 123 GenExtremeData(width, height, stride); 124 const int num_loops = 1000000000 / (width + height); 125 aom_usec_timer timer; 126 aom_usec_timer_start(&timer); 127 128 for (int i = 0; i < num_loops; ++i) 129 params_.ref_func(src_, stride, width, height); 130 131 aom_usec_timer_mark(&timer); 132 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 133 printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height, 134 1000.0 * elapsed_time / num_loops); 135 136 aom_usec_timer timer1; 137 aom_usec_timer_start(&timer1); 138 for (int i = 0; i < num_loops; ++i) 139 params_.tst_func(src_, stride, width, height); 140 aom_usec_timer_mark(&timer1); 141 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); 142 printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height, 143 1000.0 * elapsed_time1 / num_loops); 144 } 145 } 146 147 TEST_P(SumSquaresTest, OperationCheck) { 148 RunTest(true); // GenRandomData 149 } 150 151 TEST_P(SumSquaresTest, ExtremeValues) { 152 RunTest(false); // GenExtremeData 153 } 154 155 TEST_P(SumSquaresTest, DISABLED_Speed) { RunSpeedTest(); } 156 157 #if HAVE_SSE2 158 159 INSTANTIATE_TEST_SUITE_P( 160 SSE2, SumSquaresTest, 161 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, 162 &aom_sum_squares_2d_i16_sse2))); 163 164 #endif // HAVE_SSE2 165 166 #if HAVE_NEON 167 168 INSTANTIATE_TEST_SUITE_P( 169 NEON, SumSquaresTest, 170 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, 171 &aom_sum_squares_2d_i16_neon))); 172 173 #endif // HAVE_NEON 174 175 #if HAVE_SVE 176 INSTANTIATE_TEST_SUITE_P( 177 SVE, SumSquaresTest, 178 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, 179 &aom_sum_squares_2d_i16_sve))); 180 181 #endif // HAVE_SVE 182 183 #if HAVE_AVX2 184 INSTANTIATE_TEST_SUITE_P( 185 AVX2, SumSquaresTest, 186 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c, 187 &aom_sum_squares_2d_i16_avx2))); 188 #endif // HAVE_AVX2 189 190 ////////////////////////////////////////////////////////////////////////////// 191 // 1D version 192 ////////////////////////////////////////////////////////////////////////////// 193 194 using F1D = uint64_t (*)(const int16_t *src, uint32_t n); 195 using TestFuncs1D = libaom_test::FuncParam<F1D>; 196 197 class SumSquares1DTest : public FunctionEquivalenceTest<F1D> { 198 protected: 199 static const int kIterations = 1000; 200 static const int kMaxSize = 256; 201 }; 202 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquares1DTest); 203 204 TEST_P(SumSquares1DTest, RandomValues) { 205 DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); 206 207 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 208 for (int i = 0; i < kMaxSize * kMaxSize; ++i) 209 src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max; 210 211 // Block size is between 64 and 128 * 128 and is always a multiple of 64. 212 const int n = (rng_(255) + 1) * 64; 213 214 const uint64_t ref_res = params_.ref_func(src, n); 215 uint64_t tst_res; 216 API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n)); 217 218 ASSERT_EQ(ref_res, tst_res); 219 } 220 } 221 222 TEST_P(SumSquares1DTest, ExtremeValues) { 223 DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]); 224 225 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 226 if (rng_(2)) { 227 for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max; 228 } else { 229 for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max; 230 } 231 232 // Block size is between 64 and 128 * 128 and is always a multiple of 64. 233 const int n = (rng_(255) + 1) * 64; 234 235 const uint64_t ref_res = params_.ref_func(src, n); 236 uint64_t tst_res; 237 API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, n)); 238 239 ASSERT_EQ(ref_res, tst_res); 240 } 241 } 242 243 #if HAVE_SSE2 244 INSTANTIATE_TEST_SUITE_P(SSE2, SumSquares1DTest, 245 ::testing::Values(TestFuncs1D( 246 aom_sum_squares_i16_c, aom_sum_squares_i16_sse2))); 247 248 #endif // HAVE_SSE2 249 250 #if HAVE_NEON 251 INSTANTIATE_TEST_SUITE_P(NEON, SumSquares1DTest, 252 ::testing::Values(TestFuncs1D( 253 aom_sum_squares_i16_c, aom_sum_squares_i16_neon))); 254 255 #endif // HAVE_NEON 256 257 #if HAVE_SVE 258 INSTANTIATE_TEST_SUITE_P(SVE, SumSquares1DTest, 259 ::testing::Values(TestFuncs1D( 260 aom_sum_squares_i16_c, aom_sum_squares_i16_sve))); 261 262 #endif // HAVE_SVE 263 264 using SSEFunc = int64_t (*)(const uint8_t *a, int a_stride, const uint8_t *b, 265 int b_stride, int width, int height); 266 using TestSSEFuncs = libaom_test::FuncParam<SSEFunc>; 267 268 using SSETestParam = std::tuple<TestSSEFuncs, int>; 269 270 class SSETest : public ::testing::TestWithParam<SSETestParam> { 271 public: 272 ~SSETest() override = default; 273 void SetUp() override { 274 params_ = GET_PARAM(0); 275 width_ = GET_PARAM(1); 276 is_hbd_ = 277 #if CONFIG_AV1_HIGHBITDEPTH 278 params_.ref_func == aom_highbd_sse_c; 279 #else 280 false; 281 #endif 282 rnd_.Reset(ACMRandom::DeterministicSeed()); 283 src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); 284 ref_ = reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2)); 285 ASSERT_NE(src_, nullptr); 286 ASSERT_NE(ref_, nullptr); 287 } 288 289 void TearDown() override { 290 aom_free(src_); 291 aom_free(ref_); 292 } 293 void RunTest(bool is_random, int width, int height, int run_times); 294 295 void GenRandomData(int width, int height, int stride) { 296 uint16_t *src16 = reinterpret_cast<uint16_t *>(src_); 297 uint16_t *ref16 = reinterpret_cast<uint16_t *>(ref_); 298 const int msb = 11; // Up to 12 bit input 299 const int limit = 1 << (msb + 1); 300 for (int ii = 0; ii < height; ii++) { 301 for (int jj = 0; jj < width; jj++) { 302 if (!is_hbd_) { 303 src_[ii * stride + jj] = rnd_.Rand8(); 304 ref_[ii * stride + jj] = rnd_.Rand8(); 305 } else { 306 src16[ii * stride + jj] = rnd_(limit); 307 ref16[ii * stride + jj] = rnd_(limit); 308 } 309 } 310 } 311 } 312 313 void GenExtremeData(int width, int height, int stride, uint8_t *data, 314 int16_t val) { 315 uint16_t *data16 = reinterpret_cast<uint16_t *>(data); 316 for (int ii = 0; ii < height; ii++) { 317 for (int jj = 0; jj < width; jj++) { 318 if (!is_hbd_) { 319 data[ii * stride + jj] = static_cast<uint8_t>(val); 320 } else { 321 data16[ii * stride + jj] = val; 322 } 323 } 324 } 325 } 326 327 protected: 328 bool is_hbd_; 329 int width_; 330 TestSSEFuncs params_; 331 uint8_t *src_; 332 uint8_t *ref_; 333 ACMRandom rnd_; 334 }; 335 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSETest); 336 337 void SSETest::RunTest(bool is_random, int width, int height, int run_times) { 338 int failed = 0; 339 aom_usec_timer ref_timer, test_timer; 340 for (int k = 0; k < 3; k++) { 341 int stride = 4 << rnd_(7); // Up to 256 stride 342 while (stride < width) { // Make sure it's valid 343 stride = 4 << rnd_(7); 344 } 345 if (is_random) { 346 GenRandomData(width, height, stride); 347 } else { 348 const int msb = is_hbd_ ? 12 : 8; // Up to 12 bit input 349 const int limit = (1 << msb) - 1; 350 if (k == 0) { 351 GenExtremeData(width, height, stride, src_, 0); 352 GenExtremeData(width, height, stride, ref_, limit); 353 } else { 354 GenExtremeData(width, height, stride, src_, limit); 355 GenExtremeData(width, height, stride, ref_, 0); 356 } 357 } 358 int64_t res_ref, res_tst; 359 uint8_t *src = src_; 360 uint8_t *ref = ref_; 361 if (is_hbd_) { 362 src = CONVERT_TO_BYTEPTR(src_); 363 ref = CONVERT_TO_BYTEPTR(ref_); 364 } 365 res_ref = params_.ref_func(src, stride, ref, stride, width, height); 366 res_tst = params_.tst_func(src, stride, ref, stride, width, height); 367 if (run_times > 1) { 368 aom_usec_timer_start(&ref_timer); 369 for (int j = 0; j < run_times; j++) { 370 params_.ref_func(src, stride, ref, stride, width, height); 371 } 372 aom_usec_timer_mark(&ref_timer); 373 const int elapsed_time_c = 374 static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); 375 376 aom_usec_timer_start(&test_timer); 377 for (int j = 0; j < run_times; j++) { 378 params_.tst_func(src, stride, ref, stride, width, height); 379 } 380 aom_usec_timer_mark(&test_timer); 381 const int elapsed_time_simd = 382 static_cast<int>(aom_usec_timer_elapsed(&test_timer)); 383 384 printf( 385 "c_time=%d \t simd_time=%d \t " 386 "gain=%d\n", 387 elapsed_time_c, elapsed_time_simd, 388 (elapsed_time_c / elapsed_time_simd)); 389 } else { 390 if (!failed) { 391 failed = res_ref != res_tst; 392 EXPECT_EQ(res_ref, res_tst) 393 << "Error:" << (is_hbd_ ? "hbd " : " ") << k << " SSE Test [" 394 << width << "x" << height 395 << "] C output does not match optimized output."; 396 } 397 } 398 } 399 } 400 401 TEST_P(SSETest, OperationCheck) { 402 for (int height = 4; height <= 128; height += 4) { 403 RunTest(true, width_, height, 1); // GenRandomData 404 } 405 } 406 407 TEST_P(SSETest, ExtremeValues) { 408 for (int height = 4; height <= 128; height += 4) { 409 RunTest(false, width_, height, 1); 410 } 411 } 412 413 TEST_P(SSETest, DISABLED_Speed) { 414 for (int height = 4; height <= 128; height += 4) { 415 RunTest(true, width_, height, 100); 416 } 417 } 418 419 #if HAVE_NEON 420 TestSSEFuncs sse_neon[] = { 421 TestSSEFuncs(&aom_sse_c, &aom_sse_neon), 422 #if CONFIG_AV1_HIGHBITDEPTH 423 TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_neon) 424 #endif 425 }; 426 INSTANTIATE_TEST_SUITE_P(NEON, SSETest, 427 Combine(ValuesIn(sse_neon), Range(4, 129, 4))); 428 #endif // HAVE_NEON 429 430 #if HAVE_NEON_DOTPROD 431 TestSSEFuncs sse_neon_dotprod[] = { 432 TestSSEFuncs(&aom_sse_c, &aom_sse_neon_dotprod), 433 }; 434 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, SSETest, 435 Combine(ValuesIn(sse_neon_dotprod), Range(4, 129, 4))); 436 #endif // HAVE_NEON_DOTPROD 437 438 #if HAVE_SSE4_1 439 TestSSEFuncs sse_sse4[] = { 440 TestSSEFuncs(&aom_sse_c, &aom_sse_sse4_1), 441 #if CONFIG_AV1_HIGHBITDEPTH 442 TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_sse4_1) 443 #endif 444 }; 445 INSTANTIATE_TEST_SUITE_P(SSE4_1, SSETest, 446 Combine(ValuesIn(sse_sse4), Range(4, 129, 4))); 447 #endif // HAVE_SSE4_1 448 449 #if HAVE_AVX2 450 451 TestSSEFuncs sse_avx2[] = { 452 TestSSEFuncs(&aom_sse_c, &aom_sse_avx2), 453 #if CONFIG_AV1_HIGHBITDEPTH 454 TestSSEFuncs(&aom_highbd_sse_c, &aom_highbd_sse_avx2) 455 #endif 456 }; 457 INSTANTIATE_TEST_SUITE_P(AVX2, SSETest, 458 Combine(ValuesIn(sse_avx2), Range(4, 129, 4))); 459 #endif // HAVE_AVX2 460 461 #if HAVE_SVE 462 #if CONFIG_AV1_HIGHBITDEPTH 463 TestSSEFuncs sse_sve[] = { TestSSEFuncs(&aom_highbd_sse_c, 464 &aom_highbd_sse_sve) }; 465 INSTANTIATE_TEST_SUITE_P(SVE, SSETest, 466 Combine(ValuesIn(sse_sve), Range(4, 129, 4))); 467 #endif 468 #endif // HAVE_SVE 469 470 ////////////////////////////////////////////////////////////////////////////// 471 // get_blk sum squares test functions 472 ////////////////////////////////////////////////////////////////////////////// 473 474 using sse_sum_func = void (*)(const int16_t *data, int stride, int bw, int bh, 475 int *x_sum, int64_t *x2_sum); 476 using TestSSE_SumFuncs = libaom_test::FuncParam<sse_sum_func>; 477 478 using SSE_SumTestParam = std::tuple<TestSSE_SumFuncs, TX_SIZE>; 479 480 class SSE_Sum_Test : public ::testing::TestWithParam<SSE_SumTestParam> { 481 public: 482 ~SSE_Sum_Test() override = default; 483 void SetUp() override { 484 params_ = GET_PARAM(0); 485 rnd_.Reset(ACMRandom::DeterministicSeed()); 486 src_ = reinterpret_cast<int16_t *>(aom_memalign(32, 256 * 256 * 2)); 487 ASSERT_NE(src_, nullptr); 488 } 489 490 void TearDown() override { aom_free(src_); } 491 void RunTest(bool is_random, int tx_size, int run_times); 492 493 void GenRandomData(int width, int height, int stride) { 494 const int msb = 11; // Up to 12 bit input 495 const int limit = 1 << (msb + 1); 496 for (int ii = 0; ii < height; ii++) { 497 for (int jj = 0; jj < width; jj++) { 498 src_[ii * stride + jj] = rnd_(limit); 499 } 500 } 501 } 502 503 void GenExtremeData(int width, int height, int stride, int16_t *data, 504 int16_t val) { 505 for (int ii = 0; ii < height; ii++) { 506 for (int jj = 0; jj < width; jj++) { 507 data[ii * stride + jj] = val; 508 } 509 } 510 } 511 512 protected: 513 TestSSE_SumFuncs params_; 514 int16_t *src_; 515 ACMRandom rnd_; 516 }; 517 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSE_Sum_Test); 518 519 void SSE_Sum_Test::RunTest(bool is_random, int tx_size, int run_times) { 520 aom_usec_timer ref_timer, test_timer; 521 int width = tx_size_wide[tx_size]; 522 int height = tx_size_high[tx_size]; 523 for (int k = 0; k < 3; k++) { 524 int stride = 4 << rnd_(7); // Up to 256 stride 525 while (stride < width) { // Make sure it's valid 526 stride = 4 << rnd_(7); 527 } 528 if (is_random) { 529 GenRandomData(width, height, stride); 530 } else { 531 const int msb = 12; // Up to 12 bit input 532 const int limit = (1 << msb) - 1; 533 if (k == 0) { 534 GenExtremeData(width, height, stride, src_, limit); 535 } else { 536 GenExtremeData(width, height, stride, src_, -limit); 537 } 538 } 539 int sum_c = 0; 540 int64_t sse_intr = 0; 541 int sum_intr = 0; 542 int64_t sse_c = 0; 543 544 params_.ref_func(src_, stride, width, height, &sum_c, &sse_c); 545 params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr); 546 547 if (run_times > 1) { 548 aom_usec_timer_start(&ref_timer); 549 for (int j = 0; j < run_times; j++) { 550 params_.ref_func(src_, stride, width, height, &sum_c, &sse_c); 551 } 552 aom_usec_timer_mark(&ref_timer); 553 const int elapsed_time_c = 554 static_cast<int>(aom_usec_timer_elapsed(&ref_timer)); 555 556 aom_usec_timer_start(&test_timer); 557 for (int j = 0; j < run_times; j++) { 558 params_.tst_func(src_, stride, width, height, &sum_intr, &sse_intr); 559 } 560 aom_usec_timer_mark(&test_timer); 561 const int elapsed_time_simd = 562 static_cast<int>(aom_usec_timer_elapsed(&test_timer)); 563 564 printf( 565 "c_time=%d \t simd_time=%d \t " 566 "gain=%f\t width=%d\t height=%d \n", 567 elapsed_time_c, elapsed_time_simd, 568 (float)((float)elapsed_time_c / (float)elapsed_time_simd), width, 569 height); 570 571 } else { 572 EXPECT_EQ(sum_c, sum_intr) 573 << "Error:" << k << " SSE Sum Test [" << width << "x" << height 574 << "] C output does not match optimized output."; 575 EXPECT_EQ(sse_c, sse_intr) 576 << "Error:" << k << " SSE Sum Test [" << width << "x" << height 577 << "] C output does not match optimized output."; 578 } 579 } 580 } 581 582 TEST_P(SSE_Sum_Test, OperationCheck) { 583 RunTest(true, GET_PARAM(1), 1); // GenRandomData 584 } 585 586 TEST_P(SSE_Sum_Test, ExtremeValues) { RunTest(false, GET_PARAM(1), 1); } 587 588 TEST_P(SSE_Sum_Test, DISABLED_Speed) { RunTest(true, GET_PARAM(1), 10000); } 589 590 #if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON 591 const TX_SIZE kValidBlockSize[] = { TX_4X4, TX_8X8, TX_16X16, TX_32X32, 592 TX_64X64, TX_4X8, TX_8X4, TX_8X16, 593 TX_16X8, TX_16X32, TX_32X16, TX_64X32, 594 TX_32X64, TX_4X16, TX_16X4, TX_8X32, 595 TX_32X8, TX_16X64, TX_64X16 }; 596 #endif 597 598 #if HAVE_SSE2 599 TestSSE_SumFuncs sse_sum_sse2[] = { TestSSE_SumFuncs( 600 &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_sse2) }; 601 INSTANTIATE_TEST_SUITE_P(SSE2, SSE_Sum_Test, 602 Combine(ValuesIn(sse_sum_sse2), 603 ValuesIn(kValidBlockSize))); 604 #endif // HAVE_SSE2 605 606 #if HAVE_AVX2 607 TestSSE_SumFuncs sse_sum_avx2[] = { TestSSE_SumFuncs( 608 &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_avx2) }; 609 INSTANTIATE_TEST_SUITE_P(AVX2, SSE_Sum_Test, 610 Combine(ValuesIn(sse_sum_avx2), 611 ValuesIn(kValidBlockSize))); 612 #endif // HAVE_AVX2 613 614 #if HAVE_NEON 615 TestSSE_SumFuncs sse_sum_neon[] = { TestSSE_SumFuncs( 616 &aom_get_blk_sse_sum_c, &aom_get_blk_sse_sum_neon) }; 617 INSTANTIATE_TEST_SUITE_P(NEON, SSE_Sum_Test, 618 Combine(ValuesIn(sse_sum_neon), 619 ValuesIn(kValidBlockSize))); 620 #endif // HAVE_NEON 621 622 #if HAVE_SVE 623 TestSSE_SumFuncs sse_sum_sve[] = { TestSSE_SumFuncs(&aom_get_blk_sse_sum_c, 624 &aom_get_blk_sse_sum_sve) }; 625 INSTANTIATE_TEST_SUITE_P(SVE, SSE_Sum_Test, 626 Combine(ValuesIn(sse_sum_sve), 627 ValuesIn(kValidBlockSize))); 628 #endif // HAVE_SVE 629 630 ////////////////////////////////////////////////////////////////////////////// 631 // 2D Variance test functions 632 ////////////////////////////////////////////////////////////////////////////// 633 634 using Var2DFunc = uint64_t (*)(uint8_t *src, int stride, int width, int height); 635 using TestFuncVar2D = libaom_test::FuncParam<Var2DFunc>; 636 637 const uint16_t test_block_size[2] = { 128, 256 }; 638 639 class Lowbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> { 640 public: 641 ~Lowbd2dVarTest() override = default; 642 void SetUp() override { 643 params_ = this->GetParam(); 644 rnd_.Reset(ACMRandom::DeterministicSeed()); 645 src_ = reinterpret_cast<uint8_t *>( 646 aom_memalign(16, 512 * 512 * sizeof(uint8_t))); 647 ASSERT_NE(src_, nullptr); 648 } 649 650 void TearDown() override { aom_free(src_); } 651 void RunTest(bool is_random); 652 void RunSpeedTest(); 653 654 void GenRandomData(int width, int height, int stride) { 655 const int msb = 7; // Up to 8 bit input 656 const int limit = 1 << (msb + 1); 657 for (int ii = 0; ii < height; ii++) { 658 for (int jj = 0; jj < width; jj++) { 659 src_[ii * stride + jj] = rnd_(limit); 660 } 661 } 662 } 663 664 void GenExtremeData(int width, int height, int stride) { 665 const int msb = 7; // Up to 8 bit input 666 const int limit = 1 << (msb + 1); 667 const int val = rnd_(2) ? limit - 1 : 0; 668 for (int ii = 0; ii < height; ii++) { 669 for (int jj = 0; jj < width; jj++) { 670 src_[ii * stride + jj] = val; 671 } 672 } 673 } 674 675 protected: 676 TestFuncVar2D params_; 677 uint8_t *src_; 678 ACMRandom rnd_; 679 }; 680 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Lowbd2dVarTest); 681 682 void Lowbd2dVarTest::RunTest(bool is_random) { 683 int failed = 0; 684 for (int k = 0; k < kNumIterations; k++) { 685 const int width = 4 * (rnd_(63) + 1); // Up to 256x256 686 const int height = 4 * (rnd_(63) + 1); // Up to 256x256 687 int stride = 4 << rnd_(8); // Up to 512 stride 688 while (stride < width) { // Make sure it's valid 689 stride = 4 << rnd_(8); 690 } 691 if (is_random) { 692 GenRandomData(width, height, stride); 693 } else { 694 GenExtremeData(width, height, stride); 695 } 696 697 const uint64_t res_ref = params_.ref_func(src_, stride, width, height); 698 uint64_t res_tst; 699 API_REGISTER_STATE_CHECK(res_tst = 700 params_.tst_func(src_, stride, width, height)); 701 702 if (!failed) { 703 failed = res_ref != res_tst; 704 EXPECT_EQ(res_ref, res_tst) 705 << "Error: Sum Squares Test [" << width << "x" << height 706 << "] C output does not match optimized output."; 707 } 708 } 709 } 710 711 void Lowbd2dVarTest::RunSpeedTest() { 712 for (int block = 0; block < 2; block++) { 713 const int width = test_block_size[block]; 714 const int height = test_block_size[block]; 715 int stride = 4 << rnd_(8); // Up to 512 stride 716 while (stride < width) { // Make sure it's valid 717 stride = 4 << rnd_(8); 718 } 719 GenExtremeData(width, height, stride); 720 const int num_loops = 1000000000 / (width + height); 721 aom_usec_timer timer; 722 aom_usec_timer_start(&timer); 723 724 for (int i = 0; i < num_loops; ++i) 725 params_.ref_func(src_, stride, width, height); 726 727 aom_usec_timer_mark(&timer); 728 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 729 730 aom_usec_timer timer1; 731 aom_usec_timer_start(&timer1); 732 for (int i = 0; i < num_loops; ++i) 733 params_.tst_func(src_, stride, width, height); 734 aom_usec_timer_mark(&timer1); 735 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); 736 printf("%3dx%-3d: Scaling = %.2f\n", width, height, 737 (double)elapsed_time / elapsed_time1); 738 } 739 } 740 741 TEST_P(Lowbd2dVarTest, OperationCheck) { 742 RunTest(true); // GenRandomData 743 } 744 745 TEST_P(Lowbd2dVarTest, ExtremeValues) { 746 RunTest(false); // GenExtremeData 747 } 748 749 TEST_P(Lowbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); } 750 751 #if HAVE_SSE2 752 753 INSTANTIATE_TEST_SUITE_P(SSE2, Lowbd2dVarTest, 754 ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, 755 &aom_var_2d_u8_sse2))); 756 757 #endif // HAVE_SSE2 758 759 #if HAVE_AVX2 760 761 INSTANTIATE_TEST_SUITE_P(AVX2, Lowbd2dVarTest, 762 ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, 763 &aom_var_2d_u8_avx2))); 764 765 #endif // HAVE_SSE2 766 767 #if HAVE_NEON 768 769 INSTANTIATE_TEST_SUITE_P(NEON, Lowbd2dVarTest, 770 ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c, 771 &aom_var_2d_u8_neon))); 772 773 #endif // HAVE_NEON 774 775 #if HAVE_NEON_DOTPROD 776 777 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, Lowbd2dVarTest, 778 ::testing::Values(TestFuncVar2D( 779 &aom_var_2d_u8_c, &aom_var_2d_u8_neon_dotprod))); 780 781 #endif // HAVE_NEON_DOTPROD 782 783 #if CONFIG_AV1_HIGHBITDEPTH 784 class Highbd2dVarTest : public ::testing::TestWithParam<TestFuncVar2D> { 785 public: 786 ~Highbd2dVarTest() override = default; 787 void SetUp() override { 788 params_ = this->GetParam(); 789 rnd_.Reset(ACMRandom::DeterministicSeed()); 790 src_ = reinterpret_cast<uint16_t *>( 791 aom_memalign(16, 512 * 512 * sizeof(uint16_t))); 792 ASSERT_NE(src_, nullptr); 793 } 794 795 void TearDown() override { aom_free(src_); } 796 void RunTest(bool is_random); 797 void RunSpeedTest(); 798 799 void GenRandomData(int width, int height, int stride) { 800 const int msb = 11; // Up to 12 bit input 801 const int limit = 1 << (msb + 1); 802 for (int ii = 0; ii < height; ii++) { 803 for (int jj = 0; jj < width; jj++) { 804 src_[ii * stride + jj] = rnd_(limit); 805 } 806 } 807 } 808 809 void GenExtremeData(int width, int height, int stride) { 810 const int msb = 11; // Up to 12 bit input 811 const int limit = 1 << (msb + 1); 812 const int val = rnd_(2) ? limit - 1 : 0; 813 for (int ii = 0; ii < height; ii++) { 814 for (int jj = 0; jj < width; jj++) { 815 src_[ii * stride + jj] = val; 816 } 817 } 818 } 819 820 protected: 821 TestFuncVar2D params_; 822 uint16_t *src_; 823 ACMRandom rnd_; 824 }; 825 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Highbd2dVarTest); 826 827 void Highbd2dVarTest::RunTest(bool is_random) { 828 int failed = 0; 829 for (int k = 0; k < kNumIterations; k++) { 830 const int width = 4 * (rnd_(63) + 1); // Up to 256x256 831 const int height = 4 * (rnd_(63) + 1); // Up to 256x256 832 int stride = 4 << rnd_(8); // Up to 512 stride 833 while (stride < width) { // Make sure it's valid 834 stride = 4 << rnd_(8); 835 } 836 if (is_random) { 837 GenRandomData(width, height, stride); 838 } else { 839 GenExtremeData(width, height, stride); 840 } 841 842 const uint64_t res_ref = 843 params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); 844 uint64_t res_tst; 845 API_REGISTER_STATE_CHECK( 846 res_tst = 847 params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height)); 848 849 if (!failed) { 850 failed = res_ref != res_tst; 851 EXPECT_EQ(res_ref, res_tst) 852 << "Error: Sum Squares Test [" << width << "x" << height 853 << "] C output does not match optimized output."; 854 } 855 } 856 } 857 858 void Highbd2dVarTest::RunSpeedTest() { 859 for (int block = 0; block < 2; block++) { 860 const int width = test_block_size[block]; 861 const int height = test_block_size[block]; 862 int stride = 4 << rnd_(8); // Up to 512 stride 863 while (stride < width) { // Make sure it's valid 864 stride = 4 << rnd_(8); 865 } 866 GenExtremeData(width, height, stride); 867 const int num_loops = 1000000000 / (width + height); 868 aom_usec_timer timer; 869 aom_usec_timer_start(&timer); 870 871 for (int i = 0; i < num_loops; ++i) 872 params_.ref_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); 873 874 aom_usec_timer_mark(&timer); 875 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 876 877 aom_usec_timer timer1; 878 aom_usec_timer_start(&timer1); 879 for (int i = 0; i < num_loops; ++i) 880 params_.tst_func(CONVERT_TO_BYTEPTR(src_), stride, width, height); 881 aom_usec_timer_mark(&timer1); 882 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); 883 printf("%3dx%-3d: Scaling = %.2f\n", width, height, 884 (double)elapsed_time / elapsed_time1); 885 } 886 } 887 888 TEST_P(Highbd2dVarTest, OperationCheck) { 889 RunTest(true); // GenRandomData 890 } 891 892 TEST_P(Highbd2dVarTest, ExtremeValues) { 893 RunTest(false); // GenExtremeData 894 } 895 896 TEST_P(Highbd2dVarTest, DISABLED_Speed) { RunSpeedTest(); } 897 898 #if HAVE_SSE2 899 900 INSTANTIATE_TEST_SUITE_P( 901 SSE2, Highbd2dVarTest, 902 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_sse2))); 903 904 #endif // HAVE_SSE2 905 906 #if HAVE_AVX2 907 908 INSTANTIATE_TEST_SUITE_P( 909 AVX2, Highbd2dVarTest, 910 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_avx2))); 911 912 #endif // HAVE_SSE2 913 914 #if HAVE_NEON 915 916 INSTANTIATE_TEST_SUITE_P( 917 NEON, Highbd2dVarTest, 918 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, &aom_var_2d_u16_neon))); 919 920 #endif // HAVE_NEON 921 922 #if HAVE_SVE 923 924 INSTANTIATE_TEST_SUITE_P(SVE, Highbd2dVarTest, 925 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c, 926 &aom_var_2d_u16_sve))); 927 928 #endif // HAVE_SVE 929 #endif // CONFIG_AV1_HIGHBITDEPTH 930 } // namespace