tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

blend_a64_mask_test.cc (21936B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <math.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 
     16 #include "gtest/gtest.h"
     17 #include "test/register_state_check.h"
     18 #include "test/function_equivalence_test.h"
     19 
     20 #include "config/aom_config.h"
     21 #include "config/aom_dsp_rtcd.h"
     22 #include "config/av1_rtcd.h"
     23 
     24 #include "aom/aom_integer.h"
     25 
     26 #include "av1/common/enums.h"
     27 
     28 #include "aom_dsp/blend.h"
     29 
     30 using libaom_test::FunctionEquivalenceTest;
     31 
     32 namespace {
     33 
     34 template <typename BlendA64Func, typename SrcPixel, typename DstPixel>
     35 class BlendA64MaskTest : public FunctionEquivalenceTest<BlendA64Func> {
     36 protected:
     37  static const int kIterations = 10000;
     38  static const int kMaxWidth = MAX_SB_SIZE * 5;  // * 5 to cover longer strides
     39  static const int kMaxHeight = MAX_SB_SIZE;
     40  static const int kBufSize = kMaxWidth * kMaxHeight;
     41  static const int kMaxMaskWidth = 2 * MAX_SB_SIZE;
     42  static const int kMaxMaskSize = kMaxMaskWidth * kMaxMaskWidth;
     43 
     44  ~BlendA64MaskTest() override = default;
     45 
     46  virtual void Execute(const SrcPixel *p_src0, const SrcPixel *p_src1,
     47                       int run_times) = 0;
     48 
     49  template <typename Pixel>
     50  void GetSources(Pixel **src0, Pixel **src1, Pixel * /*dst*/, int run_times) {
     51    if (run_times > 1) {
     52      *src0 = src0_;
     53      *src1 = src1_;
     54      return;
     55    }
     56    switch (this->rng_(3)) {
     57      case 0:  // Separate sources
     58        *src0 = src0_;
     59        *src1 = src1_;
     60        break;
     61      case 1:  // src0 == dst
     62        *src0 = dst_tst_;
     63        src0_stride_ = dst_stride_;
     64        src0_offset_ = dst_offset_;
     65        *src1 = src1_;
     66        break;
     67      case 2:  // src1 == dst
     68        *src0 = src0_;
     69        *src1 = dst_tst_;
     70        src1_stride_ = dst_stride_;
     71        src1_offset_ = dst_offset_;
     72        break;
     73      default: FAIL();
     74    }
     75  }
     76 
     77  void GetSources(uint16_t **src0, uint16_t **src1, uint8_t * /*dst*/,
     78                  int /*run_times*/) {
     79    *src0 = src0_;
     80    *src1 = src1_;
     81  }
     82 
     83  uint8_t Rand1() { return this->rng_.Rand8() & 1; }
     84 
     85  void RunOneTest(int block_size, int subx, int suby, int run_times) {
     86    w_ = block_size_wide[block_size];
     87    h_ = block_size_high[block_size];
     88    run_times = run_times > 1 ? run_times / w_ : 1;
     89    ASSERT_GT(run_times, 0);
     90    subx_ = subx;
     91    suby_ = suby;
     92 
     93    dst_offset_ = this->rng_(33);
     94    dst_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
     95 
     96    src0_offset_ = this->rng_(33);
     97    src0_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
     98 
     99    src1_offset_ = this->rng_(33);
    100    src1_stride_ = this->rng_(kMaxWidth + 1 - w_) + w_;
    101 
    102    mask_stride_ =
    103        this->rng_(kMaxWidth + 1 - w_ * (subx_ ? 2 : 1)) + w_ * (subx_ ? 2 : 1);
    104 
    105    SrcPixel *p_src0;
    106    SrcPixel *p_src1;
    107 
    108    p_src0 = src0_;
    109    p_src1 = src1_;
    110 
    111    GetSources(&p_src0, &p_src1, &dst_ref_[0], run_times);
    112 
    113    Execute(p_src0, p_src1, run_times);
    114 
    115    for (int r = 0; r < h_; ++r) {
    116      for (int c = 0; c < w_; ++c) {
    117        ASSERT_EQ(dst_ref_[dst_offset_ + r * dst_stride_ + c],
    118                  dst_tst_[dst_offset_ + r * dst_stride_ + c])
    119            << w_ << "x" << h_ << " subx " << subx_ << " suby " << suby_
    120            << " r: " << r << " c: " << c;
    121      }
    122    }
    123  }
    124 
    125  void RunTest(int block_size, int run_times) {
    126    for (subx_ = 0; subx_ <= 1; subx_++) {
    127      for (suby_ = 0; suby_ <= 1; suby_++) {
    128        RunOneTest(block_size, subx_, suby_, run_times);
    129      }
    130    }
    131  }
    132 
    133  DstPixel dst_ref_[kBufSize];
    134  DstPixel dst_tst_[kBufSize];
    135  uint32_t dst_stride_;
    136  uint32_t dst_offset_;
    137 
    138  SrcPixel src0_[kBufSize];
    139  uint32_t src0_stride_;
    140  uint32_t src0_offset_;
    141 
    142  SrcPixel src1_[kBufSize];
    143  uint32_t src1_stride_;
    144  uint32_t src1_offset_;
    145 
    146  uint8_t mask_[kMaxMaskSize];
    147  size_t mask_stride_;
    148 
    149  int w_;
    150  int h_;
    151 
    152  int suby_;
    153  int subx_;
    154 };
    155 
    156 //////////////////////////////////////////////////////////////////////////////
    157 // 8 bit version
    158 //////////////////////////////////////////////////////////////////////////////
    159 
    160 using F8B = void (*)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
    161                     uint32_t src0_stride, const uint8_t *src1,
    162                     uint32_t src1_stride, const uint8_t *mask,
    163                     uint32_t mask_stride, int w, int h, int subx, int suby);
    164 using TestFuncs = libaom_test::FuncParam<F8B>;
    165 
    166 class BlendA64MaskTest8B : public BlendA64MaskTest<F8B, uint8_t, uint8_t> {
    167 protected:
    168  void Execute(const uint8_t *p_src0, const uint8_t *p_src1,
    169               int run_times) override {
    170    aom_usec_timer timer;
    171    aom_usec_timer_start(&timer);
    172    for (int i = 0; i < run_times; ++i) {
    173      params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
    174                       p_src0 + src0_offset_, src0_stride_,
    175                       p_src1 + src1_offset_, src1_stride_, mask_,
    176                       kMaxMaskWidth, w_, h_, subx_, suby_);
    177    }
    178    aom_usec_timer_mark(&timer);
    179    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    180    aom_usec_timer_start(&timer);
    181    for (int i = 0; i < run_times; ++i) {
    182      params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
    183                       p_src0 + src0_offset_, src0_stride_,
    184                       p_src1 + src1_offset_, src1_stride_, mask_,
    185                       kMaxMaskWidth, w_, h_, subx_, suby_);
    186    }
    187    aom_usec_timer_mark(&timer);
    188    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    189    if (run_times > 1) {
    190      printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
    191             time1, time2);
    192      printf("(%3.2f)\n", time1 / time2);
    193    }
    194  }
    195 };
    196 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B);
    197 
    198 TEST_P(BlendA64MaskTest8B, RandomValues) {
    199  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
    200    for (int i = 0; i < kBufSize; ++i) {
    201      dst_ref_[i] = rng_.Rand8();
    202      dst_tst_[i] = rng_.Rand8();
    203 
    204      src0_[i] = rng_.Rand8();
    205      src1_[i] = rng_.Rand8();
    206    }
    207 
    208    for (int i = 0; i < kMaxMaskSize; ++i)
    209      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
    210 
    211    RunTest(bsize, 1);
    212  }
    213 }
    214 
    215 TEST_P(BlendA64MaskTest8B, ExtremeValues) {
    216  for (int i = 0; i < kBufSize; ++i) {
    217    dst_ref_[i] = rng_(2) + 254;
    218    dst_tst_[i] = rng_(2) + 254;
    219    src0_[i] = rng_(2) + 254;
    220    src1_[i] = rng_(2) + 254;
    221  }
    222 
    223  for (int i = 0; i < kMaxMaskSize; ++i)
    224    mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
    225 
    226  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
    227    RunTest(bsize, 1);
    228 }
    229 
    230 TEST_P(BlendA64MaskTest8B, DISABLED_Speed) {
    231  const int kRunTimes = 10000000;
    232  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
    233    for (int i = 0; i < kBufSize; ++i) {
    234      dst_ref_[i] = rng_.Rand8();
    235      dst_tst_[i] = rng_.Rand8();
    236 
    237      src0_[i] = rng_.Rand8();
    238      src1_[i] = rng_.Rand8();
    239    }
    240 
    241    for (int i = 0; i < kMaxMaskSize; ++i)
    242      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
    243 
    244    RunTest(bsize, kRunTimes);
    245  }
    246 }
    247 #if HAVE_SSE4_1
    248 INSTANTIATE_TEST_SUITE_P(SSE4_1, BlendA64MaskTest8B,
    249                         ::testing::Values(TestFuncs(
    250                             aom_blend_a64_mask_c, aom_blend_a64_mask_sse4_1)));
    251 #endif  // HAVE_SSE4_1
    252 
    253 #if HAVE_AVX2
    254 INSTANTIATE_TEST_SUITE_P(AVX2, BlendA64MaskTest8B,
    255                         ::testing::Values(TestFuncs(aom_blend_a64_mask_sse4_1,
    256                                                     aom_blend_a64_mask_avx2)));
    257 #endif  // HAVE_AVX2
    258 
    259 #if HAVE_NEON
    260 INSTANTIATE_TEST_SUITE_P(NEON, BlendA64MaskTest8B,
    261                         ::testing::Values(TestFuncs(aom_blend_a64_mask_c,
    262                                                     aom_blend_a64_mask_neon)));
    263 #endif  // HAVE_NEON
    264 
    265 //////////////////////////////////////////////////////////////////////////////
    266 // 8 bit _d16 version
    267 //////////////////////////////////////////////////////////////////////////////
    268 
    269 using F8B_D16 = void (*)(uint8_t *dst, uint32_t dst_stride,
    270                         const uint16_t *src0, uint32_t src0_stride,
    271                         const uint16_t *src1, uint32_t src1_stride,
    272                         const uint8_t *mask, uint32_t mask_stride, int w,
    273                         int h, int subx, int suby,
    274                         ConvolveParams *conv_params);
    275 using TestFuncs_d16 = libaom_test::FuncParam<F8B_D16>;
    276 
    277 class BlendA64MaskTest8B_d16
    278    : public BlendA64MaskTest<F8B_D16, uint16_t, uint8_t> {
    279 protected:
    280  // max number of bits used by the source
    281  static const int kSrcMaxBitsMask = 0x3fff;
    282 
    283  void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
    284               int run_times) override {
    285    ConvolveParams conv_params;
    286    conv_params.round_0 = ROUND0_BITS;
    287    conv_params.round_1 = COMPOUND_ROUND1_BITS;
    288    aom_usec_timer timer;
    289    aom_usec_timer_start(&timer);
    290    for (int i = 0; i < run_times; ++i) {
    291      params_.ref_func(dst_ref_ + dst_offset_, dst_stride_,
    292                       p_src0 + src0_offset_, src0_stride_,
    293                       p_src1 + src1_offset_, src1_stride_, mask_,
    294                       kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
    295    }
    296    aom_usec_timer_mark(&timer);
    297    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    298    aom_usec_timer_start(&timer);
    299    for (int i = 0; i < run_times; ++i) {
    300      params_.tst_func(dst_tst_ + dst_offset_, dst_stride_,
    301                       p_src0 + src0_offset_, src0_stride_,
    302                       p_src1 + src1_offset_, src1_stride_, mask_,
    303                       kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params);
    304    }
    305    aom_usec_timer_mark(&timer);
    306    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    307    if (run_times > 1) {
    308      printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
    309             time1, time2);
    310      printf("(%3.2f)\n", time1 / time2);
    311    }
    312  }
    313 };
    314 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTest8B_d16);
    315 
    316 TEST_P(BlendA64MaskTest8B_d16, RandomValues) {
    317  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize) {
    318    for (int i = 0; i < kBufSize; ++i) {
    319      dst_ref_[i] = rng_.Rand8();
    320      dst_tst_[i] = rng_.Rand8();
    321 
    322      src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
    323      src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
    324    }
    325 
    326    for (int i = 0; i < kMaxMaskSize; ++i)
    327      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
    328 
    329    RunTest(bsize, 1);
    330  }
    331 }
    332 
    333 TEST_P(BlendA64MaskTest8B_d16, ExtremeValues) {
    334  for (int i = 0; i < kBufSize; ++i) {
    335    dst_ref_[i] = 255;
    336    dst_tst_[i] = 255;
    337 
    338    src0_[i] = kSrcMaxBitsMask;
    339    src1_[i] = kSrcMaxBitsMask;
    340  }
    341 
    342  for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA - 1;
    343 
    344  for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure(); ++bsize)
    345    RunTest(bsize, 1);
    346 }
    347 
    348 TEST_P(BlendA64MaskTest8B_d16, DISABLED_Speed) {
    349  const int kRunTimes = 10000000;
    350  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
    351    for (int i = 0; i < kBufSize; ++i) {
    352      dst_ref_[i] = rng_.Rand8();
    353      dst_tst_[i] = rng_.Rand8();
    354 
    355      src0_[i] = rng_.Rand16() & kSrcMaxBitsMask;
    356      src1_[i] = rng_.Rand16() & kSrcMaxBitsMask;
    357    }
    358 
    359    for (int i = 0; i < kMaxMaskSize; ++i)
    360      mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
    361 
    362    RunTest(bsize, kRunTimes);
    363  }
    364 }
    365 
    366 #if HAVE_SSE4_1
    367 INSTANTIATE_TEST_SUITE_P(
    368    SSE4_1, BlendA64MaskTest8B_d16,
    369    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
    370                                    aom_lowbd_blend_a64_d16_mask_sse4_1)));
    371 #endif  // HAVE_SSE4_1
    372 
    373 #if HAVE_AVX2
    374 INSTANTIATE_TEST_SUITE_P(
    375    AVX2, BlendA64MaskTest8B_d16,
    376    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
    377                                    aom_lowbd_blend_a64_d16_mask_avx2)));
    378 #endif  // HAVE_AVX2
    379 
    380 #if HAVE_NEON
    381 INSTANTIATE_TEST_SUITE_P(
    382    NEON, BlendA64MaskTest8B_d16,
    383    ::testing::Values(TestFuncs_d16(aom_lowbd_blend_a64_d16_mask_c,
    384                                    aom_lowbd_blend_a64_d16_mask_neon)));
    385 #endif  // HAVE_NEON
    386 
    387 //////////////////////////////////////////////////////////////////////////////
    388 // High bit-depth version
    389 //////////////////////////////////////////////////////////////////////////////
    390 #if CONFIG_AV1_HIGHBITDEPTH
    391 using FHBD = void (*)(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0,
    392                      uint32_t src0_stride, const uint8_t *src1,
    393                      uint32_t src1_stride, const uint8_t *mask,
    394                      uint32_t mask_stride, int w, int h, int subx, int suby,
    395                      int bd);
    396 using TestFuncsHBD = libaom_test::FuncParam<FHBD>;
    397 
    398 class BlendA64MaskTestHBD : public BlendA64MaskTest<FHBD, uint16_t, uint16_t> {
    399 protected:
    400  void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
    401               int run_times) override {
    402    aom_usec_timer timer;
    403    aom_usec_timer_start(&timer);
    404    for (int i = 0; i < run_times; ++i) {
    405      params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
    406                       CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
    407                       CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
    408                       mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
    409    }
    410    aom_usec_timer_mark(&timer);
    411    const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    412    aom_usec_timer_start(&timer);
    413    for (int i = 0; i < run_times; ++i) {
    414      params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_), dst_stride_,
    415                       CONVERT_TO_BYTEPTR(p_src0 + src0_offset_), src0_stride_,
    416                       CONVERT_TO_BYTEPTR(p_src1 + src1_offset_), src1_stride_,
    417                       mask_, kMaxMaskWidth, w_, h_, subx_, suby_, bit_depth_);
    418    }
    419    aom_usec_timer_mark(&timer);
    420    const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    421    if (run_times > 1) {
    422      printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
    423             time1, time2);
    424      printf("(%3.2f)\n", time1 / time2);
    425    }
    426  }
    427 
    428  int bit_depth_;
    429 };
    430 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(BlendA64MaskTestHBD);
    431 
    432 TEST_P(BlendA64MaskTestHBD, RandomValues) {
    433  for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
    434       bit_depth_ += 2) {
    435    const int hi = 1 << bit_depth_;
    436 
    437    for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
    438      for (int i = 0; i < kBufSize; ++i) {
    439        dst_ref_[i] = rng_(hi);
    440        dst_tst_[i] = rng_(hi);
    441        src0_[i] = rng_(hi);
    442        src1_[i] = rng_(hi);
    443      }
    444 
    445      for (int i = 0; i < kMaxMaskSize; ++i)
    446        mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
    447 
    448      RunTest(bsize, 1);
    449    }
    450  }
    451 }
    452 
    453 TEST_P(BlendA64MaskTestHBD, ExtremeValues) {
    454  for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
    455       bit_depth_ += 2) {
    456    const int hi = 1 << bit_depth_;
    457    const int lo = hi - 2;
    458 
    459    for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
    460         ++bsize) {
    461      for (int i = 0; i < kBufSize; ++i) {
    462        dst_ref_[i] = rng_(hi - lo) + lo;
    463        dst_tst_[i] = rng_(hi - lo) + lo;
    464        src0_[i] = rng_(hi - lo) + lo;
    465        src1_[i] = rng_(hi - lo) + lo;
    466      }
    467 
    468      for (int i = 0; i < kMaxMaskSize; ++i)
    469        mask_[i] = rng_(2) + AOM_BLEND_A64_MAX_ALPHA - 1;
    470 
    471      RunTest(bsize, 1);
    472    }
    473  }
    474 }
    475 
    476 #if HAVE_SSE4_1
    477 INSTANTIATE_TEST_SUITE_P(
    478    SSE4_1, BlendA64MaskTestHBD,
    479    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
    480                                   aom_highbd_blend_a64_mask_sse4_1)));
    481 #endif  // HAVE_SSE4_1
    482 
    483 #if HAVE_NEON
    484 INSTANTIATE_TEST_SUITE_P(
    485    NEON, BlendA64MaskTestHBD,
    486    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
    487                                   aom_highbd_blend_a64_mask_neon)));
    488 #endif  // HAVE_NEON
    489 
    490 //////////////////////////////////////////////////////////////////////////////
    491 // HBD _d16 version
    492 //////////////////////////////////////////////////////////////////////////////
    493 
    494 using FHBD_D16 = void (*)(uint8_t *dst, uint32_t dst_stride,
    495                          const CONV_BUF_TYPE *src0, uint32_t src0_stride,
    496                          const CONV_BUF_TYPE *src1, uint32_t src1_stride,
    497                          const uint8_t *mask, uint32_t mask_stride, int w,
    498                          int h, int subx, int suby,
    499                          ConvolveParams *conv_params, const int bd);
    500 using TestFuncsHBD_d16 = libaom_test::FuncParam<FHBD_D16>;
    501 
    502 class BlendA64MaskTestHBD_d16
    503    : public BlendA64MaskTest<FHBD_D16, uint16_t, uint16_t> {
    504 protected:
    505  // max number of bits used by the source
    506  static const int kSrcMaxBitsMask = (1 << 14) - 1;
    507  static const int kSrcMaxBitsMaskHBD = (1 << 16) - 1;
    508 
    509  void Execute(const uint16_t *p_src0, const uint16_t *p_src1,
    510               int run_times) override {
    511    ASSERT_GT(run_times, 0) << "Cannot run 0 iterations of the test.";
    512    ConvolveParams conv_params;
    513    conv_params.round_0 = (bit_depth_ == 12) ? ROUND0_BITS + 2 : ROUND0_BITS;
    514    conv_params.round_1 = COMPOUND_ROUND1_BITS;
    515    aom_usec_timer timer;
    516    aom_usec_timer_start(&timer);
    517    for (int i = 0; i < run_times; ++i) {
    518      params_.ref_func(CONVERT_TO_BYTEPTR(dst_ref_ + dst_offset_), dst_stride_,
    519                       p_src0 + src0_offset_, src0_stride_,
    520                       p_src1 + src1_offset_, src1_stride_, mask_,
    521                       kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
    522                       bit_depth_);
    523    }
    524    if (params_.tst_func) {
    525      aom_usec_timer_mark(&timer);
    526      const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    527      aom_usec_timer_start(&timer);
    528      for (int i = 0; i < run_times; ++i) {
    529        params_.tst_func(CONVERT_TO_BYTEPTR(dst_tst_ + dst_offset_),
    530                         dst_stride_, p_src0 + src0_offset_, src0_stride_,
    531                         p_src1 + src1_offset_, src1_stride_, mask_,
    532                         kMaxMaskWidth, w_, h_, subx_, suby_, &conv_params,
    533                         bit_depth_);
    534      }
    535      aom_usec_timer_mark(&timer);
    536      const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
    537      if (run_times > 1) {
    538        printf("%3dx%-3d subx %d suby %d :%7.2f/%7.2fns", w_, h_, subx_, suby_,
    539               time1, time2);
    540        printf("(%3.2f)\n", time1 / time2);
    541      }
    542    }
    543  }
    544 
    545  int bit_depth_;
    546  int src_max_bits_mask_;
    547 };
    548 
    549 TEST_P(BlendA64MaskTestHBD_d16, RandomValues) {
    550  if (params_.tst_func == nullptr) return;
    551  for (bit_depth_ = 8; bit_depth_ <= 12 && !HasFatalFailure();
    552       bit_depth_ += 2) {
    553    src_max_bits_mask_ =
    554        (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
    555 
    556    for (int bsize = 0; bsize < BLOCK_SIZES_ALL && !HasFatalFailure();
    557         ++bsize) {
    558      for (int i = 0; i < kBufSize; ++i) {
    559        dst_ref_[i] = rng_.Rand8();
    560        dst_tst_[i] = rng_.Rand8();
    561 
    562        src0_[i] = rng_.Rand16() & src_max_bits_mask_;
    563        src1_[i] = rng_.Rand16() & src_max_bits_mask_;
    564      }
    565 
    566      for (int i = 0; i < kMaxMaskSize; ++i)
    567        mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
    568 
    569      RunTest(bsize, 1);
    570    }
    571  }
    572 }
    573 
    574 TEST_P(BlendA64MaskTestHBD_d16, ExtremeValues) {
    575  for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
    576    src_max_bits_mask_ =
    577        (bit_depth_ == 8) ? kSrcMaxBitsMask : kSrcMaxBitsMaskHBD;
    578 
    579    for (int i = 0; i < kBufSize; ++i) {
    580      dst_ref_[i] = 0;
    581      dst_tst_[i] = (1 << bit_depth_) - 1;
    582 
    583      src0_[i] = src_max_bits_mask_;
    584      src1_[i] = src_max_bits_mask_;
    585    }
    586 
    587    for (int i = 0; i < kMaxMaskSize; ++i) mask_[i] = AOM_BLEND_A64_MAX_ALPHA;
    588    for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
    589      RunTest(bsize, 1);
    590    }
    591  }
    592 }
    593 
    594 TEST_P(BlendA64MaskTestHBD_d16, DISABLED_Speed) {
    595  const int kRunTimes = 10000000;
    596  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
    597    for (bit_depth_ = 8; bit_depth_ <= 12; bit_depth_ += 2) {
    598      for (int i = 0; i < kBufSize; ++i) {
    599        dst_ref_[i] = rng_.Rand12() % (1 << bit_depth_);
    600        dst_tst_[i] = rng_.Rand12() % (1 << bit_depth_);
    601 
    602        src0_[i] = rng_.Rand16();
    603        src1_[i] = rng_.Rand16();
    604      }
    605 
    606      for (int i = 0; i < kMaxMaskSize; ++i)
    607        mask_[i] = rng_(AOM_BLEND_A64_MAX_ALPHA + 1);
    608 
    609      RunTest(bsize, kRunTimes);
    610    }
    611  }
    612 }
    613 
    614 INSTANTIATE_TEST_SUITE_P(
    615    C, BlendA64MaskTestHBD_d16,
    616    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
    617                                       aom_highbd_blend_a64_d16_mask_c)));
    618 
    619 #if HAVE_SSE4_1
    620 INSTANTIATE_TEST_SUITE_P(
    621    SSE4_1, BlendA64MaskTestHBD_d16,
    622    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
    623                                       aom_highbd_blend_a64_d16_mask_sse4_1)));
    624 #endif  // HAVE_SSE4_1
    625 
    626 #if HAVE_AVX2
    627 INSTANTIATE_TEST_SUITE_P(
    628    AVX2, BlendA64MaskTestHBD_d16,
    629    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
    630                                       aom_highbd_blend_a64_d16_mask_avx2)));
    631 #endif  // HAVE_AVX2
    632 
    633 #if HAVE_NEON
    634 INSTANTIATE_TEST_SUITE_P(
    635    NEON, BlendA64MaskTestHBD_d16,
    636    ::testing::Values(TestFuncsHBD_d16(aom_highbd_blend_a64_d16_mask_c,
    637                                       aom_highbd_blend_a64_d16_mask_neon)));
    638 #endif  // HAVE_NEON
    639 
    640 // TODO(slavarnway): Enable the following in the avx2 commit. (56501)
    641 #if 0
    642 #if HAVE_AVX2
    643 INSTANTIATE_TEST_SUITE_P(
    644    SSE4_1, BlendA64MaskTestHBD,
    645    ::testing::Values(TestFuncsHBD(aom_highbd_blend_a64_mask_c,
    646                                   aom_highbd_blend_a64_mask_avx2)));
    647 #endif  // HAVE_AVX2
    648 #endif
    649 #endif  // CONFIG_AV1_HIGHBITDEPTH
    650 }  // namespace