comp_mask_pred_test.cc (30022B)
1 /* 2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <cstdlib> 13 #include <new> 14 #include <tuple> 15 16 #include "config/aom_config.h" 17 #include "config/aom_dsp_rtcd.h" 18 19 #include "aom/aom_codec.h" 20 #include "aom/aom_integer.h" 21 #include "aom_dsp/variance.h" 22 #include "aom_mem/aom_mem.h" 23 #include "aom_ports/aom_timer.h" 24 #include "aom_ports/mem.h" 25 #include "av1/common/reconinter.h" 26 #include "av1/encoder/reconinter_enc.h" 27 #include "gtest/gtest.h" 28 #include "test/acm_random.h" 29 #include "test/register_state_check.h" 30 #include "test/util.h" 31 32 namespace { 33 using comp_mask_pred_func = void (*)(uint8_t *comp_pred, const uint8_t *pred, 34 int width, int height, const uint8_t *ref, 35 int ref_stride, const uint8_t *mask, 36 int mask_stride, int invert_mask); 37 38 using comp_avg_pred_func = void (*)(uint8_t *comp_pred, const uint8_t *pred, 39 int width, int height, const uint8_t *ref, 40 int ref_stride); 41 42 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON 43 const BLOCK_SIZE kCompMaskPredParams[] = { 44 BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_16X8, BLOCK_16X16, 45 BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32 46 }; 47 #endif 48 49 class AV1CompMaskPredBase : public ::testing::Test { 50 public: 51 ~AV1CompMaskPredBase() override; 52 void SetUp() override; 53 54 void TearDown() override; 55 56 protected: 57 bool CheckResult(int width, int height) { 58 for (int y = 0; y < height; ++y) { 59 for (int x = 0; x < width; ++x) { 60 const int idx = y * width + x; 61 if (comp_pred1_[idx] != comp_pred2_[idx]) { 62 printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x); 63 printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); 64 return false; 65 } 66 } 67 } 68 return true; 69 } 70 71 libaom_test::ACMRandom rnd_; 72 uint8_t *comp_pred1_; 73 uint8_t *comp_pred2_; 74 uint8_t *pred_; 75 uint8_t *ref_buffer_; 76 uint8_t *ref_; 77 }; 78 79 AV1CompMaskPredBase::~AV1CompMaskPredBase() = default; 80 81 void AV1CompMaskPredBase::SetUp() { 82 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); 83 av1_init_wedge_masks(); 84 comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); 85 ASSERT_NE(comp_pred1_, nullptr); 86 comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); 87 ASSERT_NE(comp_pred2_, nullptr); 88 pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); 89 ASSERT_NE(pred_, nullptr); 90 // The biggest block size is MAX_SB_SQUARE(128*128), however for the 91 // convolution we need to access 3 bytes before and 4 bytes after (for an 92 // 8-tap filter), in both directions, so we need to allocate 93 // (128 + 7) * (128 + 7) = MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49 94 ref_buffer_ = 95 (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49); 96 ASSERT_NE(ref_buffer_, nullptr); 97 // Start of the actual block where the convolution will be computed 98 ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3); 99 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 100 pred_[i] = rnd_.Rand8(); 101 } 102 for (int i = 0; i < MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49; ++i) { 103 ref_buffer_[i] = rnd_.Rand8(); 104 } 105 } 106 107 void AV1CompMaskPredBase::TearDown() { 108 aom_free(comp_pred1_); 109 aom_free(comp_pred2_); 110 aom_free(pred_); 111 aom_free(ref_buffer_); 112 } 113 114 using CompMaskPredParam = std::tuple<comp_mask_pred_func, BLOCK_SIZE>; 115 116 class AV1CompMaskPredTest 117 : public AV1CompMaskPredBase, 118 public ::testing::WithParamInterface<CompMaskPredParam> { 119 protected: 120 void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv); 121 void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize); 122 }; 123 124 void AV1CompMaskPredTest::RunCheckOutput(comp_mask_pred_func test_impl, 125 BLOCK_SIZE bsize, int inv) { 126 const int w = block_size_wide[bsize]; 127 const int h = block_size_high[bsize]; 128 const int wedge_types = get_wedge_types_lookup(bsize); 129 for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { 130 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); 131 132 aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 133 inv); 134 test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv); 135 136 ASSERT_EQ(CheckResult(w, h), true) 137 << " wedge " << wedge_index << " inv " << inv; 138 } 139 } 140 141 void AV1CompMaskPredTest::RunSpeedTest(comp_mask_pred_func test_impl, 142 BLOCK_SIZE bsize) { 143 const int w = block_size_wide[bsize]; 144 const int h = block_size_high[bsize]; 145 const int wedge_types = get_wedge_types_lookup(bsize); 146 int wedge_index = wedge_types / 2; 147 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); 148 const int num_loops = 1000000000 / (w + h); 149 150 comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl }; 151 double elapsed_time[2] = { 0 }; 152 for (int i = 0; i < 2; ++i) { 153 aom_usec_timer timer; 154 aom_usec_timer_start(&timer); 155 comp_mask_pred_func func = funcs[i]; 156 for (int j = 0; j < num_loops; ++j) { 157 func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0); 158 } 159 aom_usec_timer_mark(&timer); 160 double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); 161 elapsed_time[i] = 1000.0 * time / num_loops; 162 } 163 printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], 164 elapsed_time[1]); 165 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); 166 } 167 168 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompMaskPredTest); 169 170 TEST_P(AV1CompMaskPredTest, CheckOutput) { 171 // inv = 0, 1 172 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0); 173 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1); 174 } 175 176 TEST_P(AV1CompMaskPredTest, DISABLED_Speed) { 177 RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); 178 } 179 180 #if HAVE_SSSE3 181 INSTANTIATE_TEST_SUITE_P( 182 SSSE3, AV1CompMaskPredTest, 183 ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3), 184 ::testing::ValuesIn(kCompMaskPredParams))); 185 #endif 186 187 #if HAVE_AVX2 188 INSTANTIATE_TEST_SUITE_P( 189 AVX2, AV1CompMaskPredTest, 190 ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2), 191 ::testing::ValuesIn(kCompMaskPredParams))); 192 #endif 193 194 #if HAVE_NEON 195 INSTANTIATE_TEST_SUITE_P( 196 NEON, AV1CompMaskPredTest, 197 ::testing::Combine(::testing::Values(&aom_comp_mask_pred_neon), 198 ::testing::ValuesIn(kCompMaskPredParams))); 199 #endif 200 201 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON 202 const BLOCK_SIZE kValidBlockSize[] = { 203 BLOCK_4X4, BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_16X8, 204 BLOCK_16X16, BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32, 205 BLOCK_32X64, BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64, 206 BLOCK_128X128, BLOCK_16X64, BLOCK_64X16 207 }; 208 #endif 209 210 using upsampled_pred_func = void (*)(MACROBLOCKD *xd, 211 const AV1_COMMON *const cm, int mi_row, 212 int mi_col, const MV *const mv, 213 uint8_t *comp_pred, int width, int height, 214 int subpel_x_q3, int subpel_y_q3, 215 const uint8_t *ref, int ref_stride, 216 int subpel_search); 217 218 using UpsampledPredParam = std::tuple<upsampled_pred_func, BLOCK_SIZE>; 219 220 class AV1UpsampledPredTest 221 : public AV1CompMaskPredBase, 222 public ::testing::WithParamInterface<UpsampledPredParam> { 223 protected: 224 void RunCheckOutput(upsampled_pred_func test_impl, BLOCK_SIZE bsize); 225 void RunSpeedTest(upsampled_pred_func test_impl, BLOCK_SIZE bsize, 226 int havSub); 227 }; 228 229 void AV1UpsampledPredTest::RunCheckOutput(upsampled_pred_func test_impl, 230 BLOCK_SIZE bsize) { 231 const int w = block_size_wide[bsize]; 232 const int h = block_size_high[bsize]; 233 for (int subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS; 234 ++subpel_search) { 235 // loop through subx and suby 236 for (int sub = 0; sub < 8 * 8; ++sub) { 237 int subx = sub & 0x7; 238 int suby = (sub >> 3); 239 240 aom_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h, 241 subx, suby, ref_, MAX_SB_SIZE, subpel_search); 242 243 test_impl(nullptr, nullptr, 0, 0, nullptr, comp_pred2_, w, h, subx, suby, 244 ref_, MAX_SB_SIZE, subpel_search); 245 ASSERT_EQ(CheckResult(w, h), true) 246 << "sub (" << subx << "," << suby << ")"; 247 } 248 } 249 } 250 251 void AV1UpsampledPredTest::RunSpeedTest(upsampled_pred_func test_impl, 252 BLOCK_SIZE bsize, int havSub) { 253 const int w = block_size_wide[bsize]; 254 const int h = block_size_high[bsize]; 255 const int subx = havSub ? 3 : 0; 256 const int suby = havSub ? 4 : 0; 257 258 const int num_loops = 1000000000 / (w + h); 259 upsampled_pred_func funcs[2] = { aom_upsampled_pred_c, test_impl }; 260 double elapsed_time[2] = { 0 }; 261 int subpel_search = USE_8_TAPS; // set to USE_4_TAPS to test 4-tap filter. 262 for (int i = 0; i < 2; ++i) { 263 aom_usec_timer timer; 264 aom_usec_timer_start(&timer); 265 upsampled_pred_func func = funcs[i]; 266 for (int j = 0; j < num_loops; ++j) { 267 func(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h, subx, suby, ref_, 268 MAX_SB_SIZE, subpel_search); 269 } 270 aom_usec_timer_mark(&timer); 271 double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); 272 elapsed_time[i] = 1000.0 * time / num_loops; 273 } 274 printf("UpsampledPred[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, 275 elapsed_time[0], elapsed_time[1]); 276 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); 277 } 278 279 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1UpsampledPredTest); 280 281 TEST_P(AV1UpsampledPredTest, CheckOutput) { 282 RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); 283 } 284 285 TEST_P(AV1UpsampledPredTest, DISABLED_Speed) { 286 RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1); 287 } 288 289 #if HAVE_SSE2 290 INSTANTIATE_TEST_SUITE_P( 291 SSE2, AV1UpsampledPredTest, 292 ::testing::Combine(::testing::Values(&aom_upsampled_pred_sse2), 293 ::testing::ValuesIn(kValidBlockSize))); 294 #endif 295 296 #if HAVE_NEON 297 INSTANTIATE_TEST_SUITE_P( 298 NEON, AV1UpsampledPredTest, 299 ::testing::Combine(::testing::Values(&aom_upsampled_pred_neon), 300 ::testing::ValuesIn(kValidBlockSize))); 301 #endif 302 303 using CompAvgPredParam = std::tuple<comp_avg_pred_func, BLOCK_SIZE>; 304 305 class AV1CompAvgPredTest : public ::testing::TestWithParam<CompAvgPredParam> { 306 public: 307 ~AV1CompAvgPredTest() override; 308 void SetUp() override; 309 310 void TearDown() override; 311 312 protected: 313 void RunCheckOutput(comp_avg_pred_func test_impl, BLOCK_SIZE bsize); 314 void RunSpeedTest(comp_avg_pred_func test_impl, BLOCK_SIZE bsize); 315 bool CheckResult(int width, int height) { 316 for (int y = 0; y < height; ++y) { 317 for (int x = 0; x < width; ++x) { 318 const int idx = y * width + x; 319 if (comp_pred1_[idx] != comp_pred2_[idx]) { 320 printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y); 321 printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); 322 return false; 323 } 324 } 325 } 326 return true; 327 } 328 329 libaom_test::ACMRandom rnd_; 330 uint8_t *comp_pred1_; 331 uint8_t *comp_pred2_; 332 uint8_t *pred_; 333 uint8_t *ref_; 334 }; 335 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompAvgPredTest); 336 337 AV1CompAvgPredTest::~AV1CompAvgPredTest() = default; 338 339 void AV1CompAvgPredTest::SetUp() { 340 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); 341 342 comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); 343 ASSERT_NE(comp_pred1_, nullptr); 344 comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); 345 ASSERT_NE(comp_pred2_, nullptr); 346 pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); 347 ASSERT_NE(pred_, nullptr); 348 ref_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE); 349 ASSERT_NE(ref_, nullptr); 350 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 351 pred_[i] = rnd_.Rand8(); 352 } 353 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 354 ref_[i] = rnd_.Rand8(); 355 } 356 } 357 358 void AV1CompAvgPredTest::TearDown() { 359 aom_free(comp_pred1_); 360 aom_free(comp_pred2_); 361 aom_free(pred_); 362 aom_free(ref_); 363 } 364 365 void AV1CompAvgPredTest::RunCheckOutput(comp_avg_pred_func test_impl, 366 BLOCK_SIZE bsize) { 367 const int w = block_size_wide[bsize]; 368 const int h = block_size_high[bsize]; 369 aom_comp_avg_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE); 370 test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE); 371 372 ASSERT_EQ(CheckResult(w, h), true); 373 } 374 375 void AV1CompAvgPredTest::RunSpeedTest(comp_avg_pred_func test_impl, 376 BLOCK_SIZE bsize) { 377 const int w = block_size_wide[bsize]; 378 const int h = block_size_high[bsize]; 379 const int num_loops = 1000000000 / (w + h); 380 381 comp_avg_pred_func functions[2] = { aom_comp_avg_pred_c, test_impl }; 382 double elapsed_time[2] = { 0.0 }; 383 for (int i = 0; i < 2; ++i) { 384 aom_usec_timer timer; 385 aom_usec_timer_start(&timer); 386 comp_avg_pred_func func = functions[i]; 387 for (int j = 0; j < num_loops; ++j) { 388 func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE); 389 } 390 aom_usec_timer_mark(&timer); 391 const double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); 392 elapsed_time[i] = 1000.0 * time; 393 } 394 printf("CompAvgPred %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], 395 elapsed_time[1]); 396 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); 397 } 398 399 TEST_P(AV1CompAvgPredTest, CheckOutput) { 400 RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); 401 } 402 403 TEST_P(AV1CompAvgPredTest, DISABLED_Speed) { 404 RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); 405 } 406 407 #if HAVE_AVX2 408 INSTANTIATE_TEST_SUITE_P( 409 AVX2, AV1CompAvgPredTest, 410 ::testing::Combine(::testing::Values(&aom_comp_avg_pred_avx2), 411 ::testing::ValuesIn(kValidBlockSize))); 412 #endif 413 414 #if HAVE_NEON 415 INSTANTIATE_TEST_SUITE_P( 416 NEON, AV1CompAvgPredTest, 417 ::testing::Combine(::testing::Values(&aom_comp_avg_pred_neon), 418 ::testing::ValuesIn(kValidBlockSize))); 419 #endif 420 421 #if CONFIG_AV1_HIGHBITDEPTH 422 class AV1HighbdCompMaskPredTestBase : public ::testing::Test { 423 public: 424 ~AV1HighbdCompMaskPredTestBase() override; 425 void SetUp() override; 426 427 void TearDown() override; 428 429 protected: 430 bool CheckResult(int width, int height) { 431 for (int y = 0; y < height; ++y) { 432 for (int x = 0; x < width; ++x) { 433 const int idx = y * width + x; 434 if (comp_pred1_[idx] != comp_pred2_[idx]) { 435 printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x); 436 printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); 437 return false; 438 } 439 } 440 } 441 return true; 442 } 443 444 libaom_test::ACMRandom rnd_; 445 uint16_t *comp_pred1_; 446 uint16_t *comp_pred2_; 447 uint16_t *pred_; 448 uint16_t *ref_buffer_; 449 uint16_t *ref_; 450 }; 451 452 AV1HighbdCompMaskPredTestBase::~AV1HighbdCompMaskPredTestBase() = default; 453 454 void AV1HighbdCompMaskPredTestBase::SetUp() { 455 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); 456 av1_init_wedge_masks(); 457 458 comp_pred1_ = 459 (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_)); 460 ASSERT_NE(comp_pred1_, nullptr); 461 comp_pred2_ = 462 (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_)); 463 ASSERT_NE(comp_pred2_, nullptr); 464 pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_)); 465 ASSERT_NE(pred_, nullptr); 466 // The biggest block size is MAX_SB_SQUARE(128*128), however for the 467 // convolution we need to access 3 elements before and 4 elements after (for 468 // an 8-tap filter), in both directions, so we need to allocate (128 + 7) * 469 // (128 + 7) = (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) * 470 // sizeof(*ref_buffer_) 471 ref_buffer_ = (uint16_t *)aom_memalign( 472 16, (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) * sizeof(*ref_buffer_)); 473 ASSERT_NE(ref_buffer_, nullptr); 474 // Start of the actual block where the convolution will be computed 475 ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3); 476 } 477 478 void AV1HighbdCompMaskPredTestBase::TearDown() { 479 aom_free(comp_pred1_); 480 aom_free(comp_pred2_); 481 aom_free(pred_); 482 aom_free(ref_buffer_); 483 } 484 485 using highbd_comp_mask_pred_func = void (*)(uint8_t *comp_pred8, 486 const uint8_t *pred8, int width, 487 int height, const uint8_t *ref8, 488 int ref_stride, const uint8_t *mask, 489 int mask_stride, int invert_mask); 490 491 using HighbdCompMaskPredParam = 492 std::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>; 493 494 class AV1HighbdCompMaskPredTest 495 : public AV1HighbdCompMaskPredTestBase, 496 public ::testing::WithParamInterface<HighbdCompMaskPredParam> { 497 public: 498 ~AV1HighbdCompMaskPredTest() override; 499 500 protected: 501 void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv); 502 void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize); 503 }; 504 505 AV1HighbdCompMaskPredTest::~AV1HighbdCompMaskPredTest() = default; 506 507 void AV1HighbdCompMaskPredTest::RunCheckOutput( 508 highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) { 509 int bd_ = GET_PARAM(2); 510 const int w = block_size_wide[bsize]; 511 const int h = block_size_high[bsize]; 512 const int wedge_types = get_wedge_types_lookup(bsize); 513 514 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 515 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 516 } 517 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { 518 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 519 } 520 521 for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { 522 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); 523 524 aom_highbd_comp_mask_pred_c( 525 CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h, 526 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv); 527 528 test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h, 529 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv); 530 531 ASSERT_EQ(CheckResult(w, h), true) 532 << " wedge " << wedge_index << " inv " << inv; 533 } 534 } 535 536 void AV1HighbdCompMaskPredTest::RunSpeedTest( 537 highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) { 538 int bd_ = GET_PARAM(2); 539 540 const int w = block_size_wide[bsize]; 541 const int h = block_size_high[bsize]; 542 const int wedge_types = get_wedge_types_lookup(bsize); 543 int wedge_index = wedge_types / 2; 544 545 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 546 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 547 } 548 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { 549 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 550 } 551 552 const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize); 553 const int num_loops = 1000000000 / (w + h); 554 555 highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c, 556 test_impl }; 557 double elapsed_time[2] = { 0 }; 558 for (int i = 0; i < 2; ++i) { 559 aom_usec_timer timer; 560 aom_usec_timer_start(&timer); 561 highbd_comp_mask_pred_func func = funcs[i]; 562 for (int j = 0; j < num_loops; ++j) { 563 func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h, 564 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0); 565 } 566 aom_usec_timer_mark(&timer); 567 double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); 568 elapsed_time[i] = 1000.0 * time / num_loops; 569 } 570 printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], 571 elapsed_time[1]); 572 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); 573 } 574 575 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompMaskPredTest); 576 577 TEST_P(AV1HighbdCompMaskPredTest, CheckOutput) { 578 // inv = 0, 1 579 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0); 580 RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1); 581 } 582 583 TEST_P(AV1HighbdCompMaskPredTest, DISABLED_Speed) { 584 RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); 585 } 586 587 #if HAVE_NEON 588 INSTANTIATE_TEST_SUITE_P( 589 NEON, AV1HighbdCompMaskPredTest, 590 ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_neon), 591 ::testing::ValuesIn(kCompMaskPredParams), 592 ::testing::Range(8, 13, 2))); 593 #endif 594 595 #if HAVE_AVX2 596 INSTANTIATE_TEST_SUITE_P( 597 AVX2, AV1HighbdCompMaskPredTest, 598 ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2), 599 ::testing::ValuesIn(kCompMaskPredParams), 600 ::testing::Range(8, 13, 2))); 601 #endif 602 603 #if HAVE_SSE2 604 INSTANTIATE_TEST_SUITE_P( 605 SSE2, AV1HighbdCompMaskPredTest, 606 ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2), 607 ::testing::ValuesIn(kCompMaskPredParams), 608 ::testing::Range(8, 13, 2))); 609 #endif 610 611 using highbd_upsampled_pred_func = 612 void (*)(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, 613 int mi_col, const MV *const mv, uint8_t *comp_pred8, int width, 614 int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, 615 int ref_stride, int bd, int subpel_search); 616 617 using HighbdUpsampledPredParam = 618 std::tuple<highbd_upsampled_pred_func, BLOCK_SIZE, int>; 619 620 class AV1HighbdUpsampledPredTest 621 : public AV1HighbdCompMaskPredTestBase, 622 public ::testing::WithParamInterface<HighbdUpsampledPredParam> { 623 public: 624 ~AV1HighbdUpsampledPredTest() override; 625 626 protected: 627 void RunCheckOutput(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize); 628 void RunSpeedTest(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize, 629 int havSub); 630 }; 631 632 AV1HighbdUpsampledPredTest::~AV1HighbdUpsampledPredTest() = default; 633 634 void AV1HighbdUpsampledPredTest::RunCheckOutput( 635 highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize) { 636 int bd_ = GET_PARAM(2); 637 const int w = block_size_wide[bsize]; 638 const int h = block_size_high[bsize]; 639 640 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 641 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 642 } 643 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { 644 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 645 } 646 647 for (int subpel_search = 1; subpel_search <= 2; ++subpel_search) { 648 // loop through subx and suby 649 for (int sub = 0; sub < 8 * 8; ++sub) { 650 int subx = sub & 0x7; 651 int suby = (sub >> 3); 652 653 aom_highbd_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr, 654 CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx, 655 suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, 656 bd_, subpel_search); 657 658 test_impl(nullptr, nullptr, 0, 0, nullptr, 659 CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx, suby, 660 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search); 661 662 ASSERT_EQ(CheckResult(w, h), true) 663 << "sub (" << subx << "," << suby << ")"; 664 } 665 } 666 } 667 668 void AV1HighbdUpsampledPredTest::RunSpeedTest( 669 highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize, int havSub) { 670 int bd_ = GET_PARAM(2); 671 const int w = block_size_wide[bsize]; 672 const int h = block_size_high[bsize]; 673 const int subx = havSub ? 3 : 0; 674 const int suby = havSub ? 4 : 0; 675 676 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 677 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 678 } 679 for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) { 680 ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 681 } 682 683 const int num_loops = 1000000000 / (w + h); 684 highbd_upsampled_pred_func funcs[2] = { &aom_highbd_upsampled_pred_c, 685 test_impl }; 686 double elapsed_time[2] = { 0 }; 687 for (int i = 0; i < 2; ++i) { 688 aom_usec_timer timer; 689 aom_usec_timer_start(&timer); 690 highbd_upsampled_pred_func func = funcs[i]; 691 int subpel_search = 2; // set to 1 to test 4-tap filter. 692 for (int j = 0; j < num_loops; ++j) { 693 func(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(comp_pred1_), w, 694 h, subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, 695 subpel_search); 696 } 697 aom_usec_timer_mark(&timer); 698 double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); 699 elapsed_time[i] = 1000.0 * time / num_loops; 700 } 701 printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0], 702 elapsed_time[1]); 703 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); 704 } 705 706 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdUpsampledPredTest); 707 708 TEST_P(AV1HighbdUpsampledPredTest, CheckOutput) { 709 RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); 710 } 711 712 TEST_P(AV1HighbdUpsampledPredTest, DISABLED_Speed) { 713 RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1); 714 } 715 716 #if HAVE_SSE2 717 INSTANTIATE_TEST_SUITE_P( 718 SSE2, AV1HighbdUpsampledPredTest, 719 ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_sse2), 720 ::testing::ValuesIn(kValidBlockSize), 721 ::testing::Range(8, 13, 2))); 722 #endif 723 724 #if HAVE_NEON 725 INSTANTIATE_TEST_SUITE_P( 726 NEON, AV1HighbdUpsampledPredTest, 727 ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_neon), 728 ::testing::ValuesIn(kValidBlockSize), 729 ::testing::Range(8, 13, 2))); 730 #endif 731 732 using highbd_comp_avg_pred_func = void (*)(uint8_t *comp_pred, 733 const uint8_t *pred, int width, 734 int height, const uint8_t *ref, 735 int ref_stride); 736 737 using HighbdCompAvgPredParam = 738 std::tuple<highbd_comp_avg_pred_func, BLOCK_SIZE, int>; 739 740 class AV1HighbdCompAvgPredTest 741 : public ::testing::TestWithParam<HighbdCompAvgPredParam> { 742 public: 743 ~AV1HighbdCompAvgPredTest() override; 744 void SetUp() override; 745 746 protected: 747 void RunCheckOutput(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize); 748 void RunSpeedTest(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize); 749 bool CheckResult(int width, int height) const { 750 for (int y = 0; y < height; ++y) { 751 for (int x = 0; x < width; ++x) { 752 const int idx = y * width + x; 753 if (comp_pred1_[idx] != comp_pred2_[idx]) { 754 printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y); 755 printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]); 756 return false; 757 } 758 } 759 } 760 return true; 761 } 762 763 libaom_test::ACMRandom rnd_; 764 uint16_t *comp_pred1_; 765 uint16_t *comp_pred2_; 766 uint16_t *pred_; 767 uint16_t *ref_; 768 }; 769 770 AV1HighbdCompAvgPredTest::~AV1HighbdCompAvgPredTest() { 771 aom_free(comp_pred1_); 772 aom_free(comp_pred2_); 773 aom_free(pred_); 774 aom_free(ref_); 775 } 776 777 void AV1HighbdCompAvgPredTest::SetUp() { 778 int bd_ = GET_PARAM(2); 779 rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed()); 780 781 comp_pred1_ = 782 (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_)); 783 ASSERT_NE(comp_pred1_, nullptr); 784 comp_pred2_ = 785 (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_)); 786 ASSERT_NE(comp_pred2_, nullptr); 787 pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_)); 788 ASSERT_NE(pred_, nullptr); 789 ref_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*ref_)); 790 ASSERT_NE(ref_, nullptr); 791 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 792 pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 793 } 794 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 795 ref_[i] = rnd_.Rand16() & ((1 << bd_) - 1); 796 } 797 } 798 799 void AV1HighbdCompAvgPredTest::RunCheckOutput( 800 highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize) { 801 const int w = block_size_wide[bsize]; 802 const int h = block_size_high[bsize]; 803 aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(comp_pred1_), 804 CONVERT_TO_BYTEPTR(pred_), w, h, 805 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE); 806 test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h, 807 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE); 808 809 ASSERT_EQ(CheckResult(w, h), true); 810 } 811 812 void AV1HighbdCompAvgPredTest::RunSpeedTest(highbd_comp_avg_pred_func test_impl, 813 BLOCK_SIZE bsize) { 814 const int w = block_size_wide[bsize]; 815 const int h = block_size_high[bsize]; 816 const int num_loops = 1000000000 / (w + h); 817 818 highbd_comp_avg_pred_func functions[2] = { aom_highbd_comp_avg_pred_c, 819 test_impl }; 820 double elapsed_time[2] = { 0.0 }; 821 for (int i = 0; i < 2; ++i) { 822 aom_usec_timer timer; 823 aom_usec_timer_start(&timer); 824 highbd_comp_avg_pred_func func = functions[i]; 825 for (int j = 0; j < num_loops; ++j) { 826 func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h, 827 CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE); 828 } 829 aom_usec_timer_mark(&timer); 830 const double time = static_cast<double>(aom_usec_timer_elapsed(&timer)); 831 elapsed_time[i] = 1000.0 * time; 832 } 833 printf("HighbdCompAvg %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0], 834 elapsed_time[1]); 835 printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]); 836 } 837 838 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompAvgPredTest); 839 840 TEST_P(AV1HighbdCompAvgPredTest, CheckOutput) { 841 RunCheckOutput(GET_PARAM(0), GET_PARAM(1)); 842 } 843 844 TEST_P(AV1HighbdCompAvgPredTest, DISABLED_Speed) { 845 RunSpeedTest(GET_PARAM(0), GET_PARAM(1)); 846 } 847 848 #if HAVE_NEON 849 INSTANTIATE_TEST_SUITE_P( 850 NEON, AV1HighbdCompAvgPredTest, 851 ::testing::Combine(::testing::Values(&aom_highbd_comp_avg_pred_neon), 852 ::testing::ValuesIn(kValidBlockSize), 853 ::testing::Range(8, 13, 2))); 854 #endif 855 856 #endif // CONFIG_AV1_HIGHBITDEPTH 857 } // namespace