blend_a64_mask_test.cc (21936B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <math.h> 13 #include <stdlib.h> 14 #include <string.h> 15 16 #include "gtest/gtest.h" 17 #include "test/register_state_check.h" 18 #include "test/function_equivalence_test.h" 19 20 #include "config/aom_config.h" 21 #include "config/aom_dsp_rtcd.h" 22 #include "config/av1_rtcd.h" 23 24 #include "aom/aom_integer.h" 25 26 #include "av1/common/enums.h" 27 28 #include "aom_dsp/blend.h" 29 30 using libaom_test::FunctionEquivalenceTest; 31 32 namespace { 33 34 template <typename BlendA64Func, typename SrcPixel, typename DstPixel> 35 class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> { 36 protected: 37 static const int kIterations = 10000; 38 static const int kMaxWidth = MAX_SB_SIZE * 5; // * 5 to cover longer strides 39 static const int kMaxHeight = MAX_SB_SIZE; 40 static const int kBufSize = kMaxWidth * kMaxHeight; 41 static const int kMaxMaskWidth = 2 * MAX_SB_SIZE; 42 static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth; 43 44 ~BlendA64MaskTest() override = default; 45 46 virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1, 47 int run_times) = 0; 48 49 template <typename Pixel> 50 void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) { 51 if (run_times > 1) { 52 *src0 = src0_; 53 *src1 = src1_; 54 return; 55 } 56 switch (this->rng_(3)) { 57 case 0: // Separate sources 58 *src0 = src0_; 59 *src1 = src1_; 60 break; 61 case 1: // src0 == dst 62 *src0 = dst_tst_; 63 src0_stride_ = dst_stride_; 64 src0_offset_ = dst_offset_; 65 *src1 = src1_; 66 break; 67 case 2: // src1 == dst 68 *src0 = src0_; 69 *src1 = dst_tst_; 70 src1_stride_ = dst_stride_; 71 src1_offset_ = dst_offset_; 72 break; 73 default: FAIL(); 74 } 75 } 76 77 void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/, 78 int /*run_times*/) { 79 *src0 = src0_; 80 *src1 = src1_; 81 } 82 83 uint8_t Rand1() { return this->rng_.Rand8() & 1; } 84 85 void RunOneTest(int block_size, int subx, int suby, int run_times) { 86 w_ = block_size_wide[block_size]; 87 h_ = block_size_high[block_size]; 88 run_times = run_times > 1 ? run_times / w_ : 1; 89 ASSERT_GT(run_times, 0); 90 subx_ = subx; 91 suby_ = suby; 92 93 dst_offset_ = this->rng_(33); 94 dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; 95 96 src0_offset_ = this->rng_(33); 97 src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; 98 99 src1_offset_ = this->rng_(33); 100 src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_; 101 102 mask_stride_ = 103 this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1); 104 105 SrcPixel *p_src0; 106 SrcPixel *p_src1; 107 108 p_src0 = src0_; 109 p_src1 = src1_; 110 111 GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times); 112 113 Execute(p_src0, p_src1, run_times); 114 115 for (int r = 0; r < h_; ++r) { 116 for (int c = 0; c < w_; ++c) { 117 ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c], 118 dst_tst_[dst_offset_ + r * dst_stride_ + c]) 119 << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_ 120 << " r: " << r << " c: " << c; 121 } 122 } 123 } 124 125 void RunTest(int block_size, int run_times) { 126 for (subx_ = 0; subx_ <= 1; subx_++) { 127 for (suby_ = 0; suby_ <= 1; suby_++) { 128 RunOneTest(block_size, subx_, suby_, run_times); 129 } 130 } 131 } 132 133 DstPixel dst_ref_[kBufSize]; 134 DstPixel dst_tst_[kBufSize]; 135 uint32_t dst_stride_; 136 uint32_t dst_offset_; 137 138 SrcPixel src0_[kBufSize]; 139 uint32_t src0_stride_; 140 uint32_t src0_offset_; 141 142 SrcPixel src1_[kBufSize]; 143 uint32_t src1_stride_; 144 uint32_t src1_offset_; 145 146 uint8_t mask_[kMaxMaskSize]; 147 size_t mask_stride_; 148 149 int w_; 150 int h_; 151 152 int suby_; 153 int subx_; 154 }; 155 156 ////////////////////////////////////////////////////////////////////////////// 157 // 8 bit version 158 ////////////////////////////////////////////////////////////////////////////// 159 160 using F8B = void (*)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, 161 uint32_t src0_stride, const uint8_t *src1, 162 uint32_t src1_stride, const uint8_t *mask, 163 uint32_t mask_stride, int w, int h, int subx, int suby); 164 using TestFuncs = libaom_test::FuncParam<F8B>; 165 166 class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> { 167 protected: 168 void Execute(const uint8_t *p_src0, const uint8_t *p_src1, 169 int run_times) override { 170 aom_usec_timer timer; 171 aom_usec_timer_start(&timer); 172 for (int i = 0; i < run_times; ++i) { 173 params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, 174 p_src0 + src0_offset_, src0_stride_, 175 p_src1 + src1_offset_, src1_stride_, mask_, 176 kMaxMaskWidth, w_, h_, subx_, suby_); 177 } 178 aom_usec_timer_mark(&timer); 179 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 180 aom_usec_timer_start(&timer); 181 for (int i = 0; i < run_times; ++i) { 182 params_.tst_func(dst_tst_ + dst_offset_, dst_stride_, 183 p_src0 + src0_offset_, src0_stride_, 184 p_src1 + src1_offset_, src1_stride_, mask_, 185 kMaxMaskWidth, w_, h_, subx_, suby_); 186 } 187 aom_usec_timer_mark(&timer); 188 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 189 if (run_times > 1) { 190 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, 191 time1, time2); 192 printf("(%3.2f)\n", time1 / time2); 193 } 194 } 195 }; 196 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B); 197 198 TEST_P(BlendA64MaskTest8B, RandomValues) { 199 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) { 200 for (int i = 0; i < kBufSize; ++i) { 201 dst_ref_[i] = rng_.Rand8(); 202 dst_tst_[i] = rng_.Rand8(); 203 204 src0_[i] = rng_.Rand8(); 205 src1_[i] = rng_.Rand8(); 206 } 207 208 for (int i = 0; i < kMaxMaskSize; ++i) 209 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); 210 211 RunTest(bsize, 1); 212 } 213 } 214 215 TEST_P(BlendA64MaskTest8B, ExtremeValues) { 216 for (int i = 0; i < kBufSize; ++i) { 217 dst_ref_[i] = rng_(2) + 254; 218 dst_tst_[i] = rng_(2) + 254; 219 src0_[i] = rng_(2) + 254; 220 src1_[i] = rng_(2) + 254; 221 } 222 223 for (int i = 0; i < kMaxMaskSize; ++i) 224 mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; 225 226 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) 227 RunTest(bsize, 1); 228 } 229 230 TEST_P(BlendA64MaskTest8B, DISABLED_Speed) { 231 const int kRunTimes = 10000000; 232 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 233 for (int i = 0; i < kBufSize; ++i) { 234 dst_ref_[i] = rng_.Rand8(); 235 dst_tst_[i] = rng_.Rand8(); 236 237 src0_[i] = rng_.Rand8(); 238 src1_[i] = rng_.Rand8(); 239 } 240 241 for (int i = 0; i < kMaxMaskSize; ++i) 242 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); 243 244 RunTest(bsize, kRunTimes); 245 } 246 } 247 #if HAVE_SSE4_1 248 INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B, 249 ::testing::Values(TestFuncs( 250 aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1))); 251 #endif // HAVE_SSE4_1 252 253 #if HAVE_AVX2 254 INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B, 255 ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1, 256 aom_blend_a64_mask_avx2))); 257 #endif // HAVE_AVX2 258 259 #if HAVE_NEON 260 INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B, 261 ::testing::Values(TestFuncs(aom_blend_a64_mask_c, 262 aom_blend_a64_mask_neon))); 263 #endif // HAVE_NEON 264 265 ////////////////////////////////////////////////////////////////////////////// 266 // 8 bit _d16 version 267 ////////////////////////////////////////////////////////////////////////////// 268 269 using F8B_D16 = void (*)(uint8_t *dst, uint32_t dst_stride, 270 const uint16_t *src0, uint32_t src0_stride, 271 const uint16_t *src1, uint32_t src1_stride, 272 const uint8_t *mask, uint32_t mask_stride, int w, 273 int h, int subx, int suby, 274 ConvolveParams *conv_params); 275 using TestFuncs_d16 = libaom_test::FuncParam<F8B_D16>; 276 277 class BlendA64MaskTest8B_d16 278 : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> { 279 protected: 280 // max number of bits used by the source 281 static const int kSrcMaxBitsMask = 0x3fff; 282 283 void Execute(const uint16_t *p_src0, const uint16_t *p_src1, 284 int run_times) override { 285 ConvolveParams conv_params; 286 conv_params.round_0 = ROUND0_BITS; 287 conv_params.round_1 = COMPOUND_ROUND1_BITS; 288 aom_usec_timer timer; 289 aom_usec_timer_start(&timer); 290 for (int i = 0; i < run_times; ++i) { 291 params_.ref_func(dst_ref_ + dst_offset_, dst_stride_, 292 p_src0 + src0_offset_, src0_stride_, 293 p_src1 + src1_offset_, src1_stride_, mask_, 294 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params); 295 } 296 aom_usec_timer_mark(&timer); 297 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 298 aom_usec_timer_start(&timer); 299 for (int i = 0; i < run_times; ++i) { 300 params_.tst_func(dst_tst_ + dst_offset_, dst_stride_, 301 p_src0 + src0_offset_, src0_stride_, 302 p_src1 + src1_offset_, src1_stride_, mask_, 303 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params); 304 } 305 aom_usec_timer_mark(&timer); 306 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 307 if (run_times > 1) { 308 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, 309 time1, time2); 310 printf("(%3.2f)\n", time1 / time2); 311 } 312 } 313 }; 314 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16); 315 316 TEST_P(BlendA64MaskTest8B_d16, RandomValues) { 317 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) { 318 for (int i = 0; i < kBufSize; ++i) { 319 dst_ref_[i] = rng_.Rand8(); 320 dst_tst_[i] = rng_.Rand8(); 321 322 src0_[i] = rng_.Rand16() & kSrcMaxBitsMask; 323 src1_[i] = rng_.Rand16() & kSrcMaxBitsMask; 324 } 325 326 for (int i = 0; i < kMaxMaskSize; ++i) 327 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); 328 329 RunTest(bsize, 1); 330 } 331 } 332 333 TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) { 334 for (int i = 0; i < kBufSize; ++i) { 335 dst_ref_[i] = 255; 336 dst_tst_[i] = 255; 337 338 src0_[i] = kSrcMaxBitsMask; 339 src1_[i] = kSrcMaxBitsMask; 340 } 341 342 for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1; 343 344 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) 345 RunTest(bsize, 1); 346 } 347 348 TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) { 349 const int kRunTimes = 10000000; 350 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 351 for (int i = 0; i < kBufSize; ++i) { 352 dst_ref_[i] = rng_.Rand8(); 353 dst_tst_[i] = rng_.Rand8(); 354 355 src0_[i] = rng_.Rand16() & kSrcMaxBitsMask; 356 src1_[i] = rng_.Rand16() & kSrcMaxBitsMask; 357 } 358 359 for (int i = 0; i < kMaxMaskSize; ++i) 360 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); 361 362 RunTest(bsize, kRunTimes); 363 } 364 } 365 366 #if HAVE_SSE4_1 367 INSTANTIATE_TEST_SUITE_P( 368 SSE4_1, BlendA64MaskTest8B_d16, 369 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, 370 aom_lowbd_blend_a64_d16_mask_sse4_1))); 371 #endif // HAVE_SSE4_1 372 373 #if HAVE_AVX2 374 INSTANTIATE_TEST_SUITE_P( 375 AVX2, BlendA64MaskTest8B_d16, 376 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, 377 aom_lowbd_blend_a64_d16_mask_avx2))); 378 #endif // HAVE_AVX2 379 380 #if HAVE_NEON 381 INSTANTIATE_TEST_SUITE_P( 382 NEON, BlendA64MaskTest8B_d16, 383 ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c, 384 aom_lowbd_blend_a64_d16_mask_neon))); 385 #endif // HAVE_NEON 386 387 ////////////////////////////////////////////////////////////////////////////// 388 // High bit-depth version 389 ////////////////////////////////////////////////////////////////////////////// 390 #if CONFIG_AV1_HIGHBITDEPTH 391 using FHBD = void (*)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, 392 uint32_t src0_stride, const uint8_t *src1, 393 uint32_t src1_stride, const uint8_t *mask, 394 uint32_t mask_stride, int w, int h, int subx, int suby, 395 int bd); 396 using TestFuncsHBD = libaom_test::FuncParam<FHBD>; 397 398 class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> { 399 protected: 400 void Execute(const uint16_t *p_src0, const uint16_t *p_src1, 401 int run_times) override { 402 aom_usec_timer timer; 403 aom_usec_timer_start(&timer); 404 for (int i = 0; i < run_times; ++i) { 405 params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, 406 CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, 407 CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, 408 mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_); 409 } 410 aom_usec_timer_mark(&timer); 411 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 412 aom_usec_timer_start(&timer); 413 for (int i = 0; i < run_times; ++i) { 414 params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_, 415 CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_, 416 CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_, 417 mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_); 418 } 419 aom_usec_timer_mark(&timer); 420 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 421 if (run_times > 1) { 422 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, 423 time1, time2); 424 printf("(%3.2f)\n", time1 / time2); 425 } 426 } 427 428 int bit_depth_; 429 }; 430 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD); 431 432 TEST_P(BlendA64MaskTestHBD, RandomValues) { 433 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); 434 bit_depth_ += 2) { 435 const int hi = 1 << bit_depth_; 436 437 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 438 for (int i = 0; i < kBufSize; ++i) { 439 dst_ref_[i] = rng_(hi); 440 dst_tst_[i] = rng_(hi); 441 src0_[i] = rng_(hi); 442 src1_[i] = rng_(hi); 443 } 444 445 for (int i = 0; i < kMaxMaskSize; ++i) 446 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); 447 448 RunTest(bsize, 1); 449 } 450 } 451 } 452 453 TEST_P(BlendA64MaskTestHBD, ExtremeValues) { 454 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); 455 bit_depth_ += 2) { 456 const int hi = 1 << bit_depth_; 457 const int lo = hi - 2; 458 459 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); 460 ++bsize) { 461 for (int i = 0; i < kBufSize; ++i) { 462 dst_ref_[i] = rng_(hi - lo) + lo; 463 dst_tst_[i] = rng_(hi - lo) + lo; 464 src0_[i] = rng_(hi - lo) + lo; 465 src1_[i] = rng_(hi - lo) + lo; 466 } 467 468 for (int i = 0; i < kMaxMaskSize; ++i) 469 mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1; 470 471 RunTest(bsize, 1); 472 } 473 } 474 } 475 476 #if HAVE_SSE4_1 477 INSTANTIATE_TEST_SUITE_P( 478 SSE4_1, BlendA64MaskTestHBD, 479 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, 480 aom_highbd_blend_a64_mask_sse4_1))); 481 #endif // HAVE_SSE4_1 482 483 #if HAVE_NEON 484 INSTANTIATE_TEST_SUITE_P( 485 NEON, BlendA64MaskTestHBD, 486 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, 487 aom_highbd_blend_a64_mask_neon))); 488 #endif // HAVE_NEON 489 490 ////////////////////////////////////////////////////////////////////////////// 491 // HBD _d16 version 492 ////////////////////////////////////////////////////////////////////////////// 493 494 using FHBD_D16 = void (*)(uint8_t *dst, uint32_t dst_stride, 495 const CONV_BUF_TYPE *src0, uint32_t src0_stride, 496 const CONV_BUF_TYPE *src1, uint32_t src1_stride, 497 const uint8_t *mask, uint32_t mask_stride, int w, 498 int h, int subx, int suby, 499 ConvolveParams *conv_params, const int bd); 500 using TestFuncsHBD_d16 = libaom_test::FuncParam<FHBD_D16>; 501 502 class BlendA64MaskTestHBD_d16 503 : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> { 504 protected: 505 // max number of bits used by the source 506 static const int kSrcMaxBitsMask = (1 << 14) - 1; 507 static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1; 508 509 void Execute(const uint16_t *p_src0, const uint16_t *p_src1, 510 int run_times) override { 511 ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test."; 512 ConvolveParams conv_params; 513 conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS; 514 conv_params.round_1 = COMPOUND_ROUND1_BITS; 515 aom_usec_timer timer; 516 aom_usec_timer_start(&timer); 517 for (int i = 0; i < run_times; ++i) { 518 params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_, 519 p_src0 + src0_offset_, src0_stride_, 520 p_src1 + src1_offset_, src1_stride_, mask_, 521 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params, 522 bit_depth_); 523 } 524 if (params_.tst_func) { 525 aom_usec_timer_mark(&timer); 526 const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 527 aom_usec_timer_start(&timer); 528 for (int i = 0; i < run_times; ++i) { 529 params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), 530 dst_stride_, p_src0 + src0_offset_, src0_stride_, 531 p_src1 + src1_offset_, src1_stride_, mask_, 532 kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params, 533 bit_depth_); 534 } 535 aom_usec_timer_mark(&timer); 536 const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer)); 537 if (run_times > 1) { 538 printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_, 539 time1, time2); 540 printf("(%3.2f)\n", time1 / time2); 541 } 542 } 543 } 544 545 int bit_depth_; 546 int src_max_bits_mask_; 547 }; 548 549 TEST_P(BlendA64MaskTestHBD_d16, RandomValues) { 550 if (params_.tst_func == nullptr) return; 551 for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure(); 552 bit_depth_ += 2) { 553 src_max_bits_mask_ = 554 (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD; 555 556 for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); 557 ++bsize) { 558 for (int i = 0; i < kBufSize; ++i) { 559 dst_ref_[i] = rng_.Rand8(); 560 dst_tst_[i] = rng_.Rand8(); 561 562 src0_[i] = rng_.Rand16() & src_max_bits_mask_; 563 src1_[i] = rng_.Rand16() & src_max_bits_mask_; 564 } 565 566 for (int i = 0; i < kMaxMaskSize; ++i) 567 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); 568 569 RunTest(bsize, 1); 570 } 571 } 572 } 573 574 TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) { 575 for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { 576 src_max_bits_mask_ = 577 (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD; 578 579 for (int i = 0; i < kBufSize; ++i) { 580 dst_ref_[i] = 0; 581 dst_tst_[i] = (1 << bit_depth_) - 1; 582 583 src0_[i] = src_max_bits_mask_; 584 src1_[i] = src_max_bits_mask_; 585 } 586 587 for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA; 588 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 589 RunTest(bsize, 1); 590 } 591 } 592 } 593 594 TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) { 595 const int kRunTimes = 10000000; 596 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 597 for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) { 598 for (int i = 0; i < kBufSize; ++i) { 599 dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_); 600 dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_); 601 602 src0_[i] = rng_.Rand16(); 603 src1_[i] = rng_.Rand16(); 604 } 605 606 for (int i = 0; i < kMaxMaskSize; ++i) 607 mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1); 608 609 RunTest(bsize, kRunTimes); 610 } 611 } 612 } 613 614 INSTANTIATE_TEST_SUITE_P( 615 C, BlendA64MaskTestHBD_d16, 616 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, 617 aom_highbd_blend_a64_d16_mask_c))); 618 619 #if HAVE_SSE4_1 620 INSTANTIATE_TEST_SUITE_P( 621 SSE4_1, BlendA64MaskTestHBD_d16, 622 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, 623 aom_highbd_blend_a64_d16_mask_sse4_1))); 624 #endif // HAVE_SSE4_1 625 626 #if HAVE_AVX2 627 INSTANTIATE_TEST_SUITE_P( 628 AVX2, BlendA64MaskTestHBD_d16, 629 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, 630 aom_highbd_blend_a64_d16_mask_avx2))); 631 #endif // HAVE_AVX2 632 633 #if HAVE_NEON 634 INSTANTIATE_TEST_SUITE_P( 635 NEON, BlendA64MaskTestHBD_d16, 636 ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c, 637 aom_highbd_blend_a64_d16_mask_neon))); 638 #endif // HAVE_NEON 639 640 // TODO(slavarnway): Enable the following in the avx2 commit. (56501) 641 #if 0 642 #if HAVE_AVX2 643 INSTANTIATE_TEST_SUITE_P( 644 SSE4_1, BlendA64MaskTestHBD, 645 ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c, 646 aom_highbd_blend_a64_mask_avx2))); 647 #endif // HAVE_AVX2 648 #endif 649 #endif // CONFIG_AV1_HIGHBITDEPTH 650 } // namespace