tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

comp_mask_pred_test.cc (30022B)


      1 /*
      2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <cstdlib>
     13 #include <new>
     14 #include <tuple>
     15 
     16 #include "config/aom_config.h"
     17 #include "config/aom_dsp_rtcd.h"
     18 
     19 #include "aom/aom_codec.h"
     20 #include "aom/aom_integer.h"
     21 #include "aom_dsp/variance.h"
     22 #include "aom_mem/aom_mem.h"
     23 #include "aom_ports/aom_timer.h"
     24 #include "aom_ports/mem.h"
     25 #include "av1/common/reconinter.h"
     26 #include "av1/encoder/reconinter_enc.h"
     27 #include "gtest/gtest.h"
     28 #include "test/acm_random.h"
     29 #include "test/register_state_check.h"
     30 #include "test/util.h"
     31 
     32 namespace {
     33 using comp_mask_pred_func = void (*)(uint8_t *comp_pred, const uint8_t *pred,
     34                                     int width, int height, const uint8_t *ref,
     35                                     int ref_stride, const uint8_t *mask,
     36                                     int mask_stride, int invert_mask);
     37 
     38 using comp_avg_pred_func = void (*)(uint8_t *comp_pred, const uint8_t *pred,
     39                                    int width, int height, const uint8_t *ref,
     40                                    int ref_stride);
     41 
     42 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
     43 const BLOCK_SIZE kCompMaskPredParams[] = {
     44  BLOCK_8X8,   BLOCK_8X16, BLOCK_8X32,  BLOCK_16X8, BLOCK_16X16,
     45  BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32
     46 };
     47 #endif
     48 
     49 class AV1CompMaskPredBase : public ::testing::Test {
     50 public:
     51  ~AV1CompMaskPredBase() override;
     52  void SetUp() override;
     53 
     54  void TearDown() override;
     55 
     56 protected:
     57  bool CheckResult(int width, int height) {
     58    for (int y = 0; y < height; ++y) {
     59      for (int x = 0; x < width; ++x) {
     60        const int idx = y * width + x;
     61        if (comp_pred1_[idx] != comp_pred2_[idx]) {
     62          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
     63          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
     64          return false;
     65        }
     66      }
     67    }
     68    return true;
     69  }
     70 
     71  libaom_test::ACMRandom rnd_;
     72  uint8_t *comp_pred1_;
     73  uint8_t *comp_pred2_;
     74  uint8_t *pred_;
     75  uint8_t *ref_buffer_;
     76  uint8_t *ref_;
     77 };
     78 
     79 AV1CompMaskPredBase::~AV1CompMaskPredBase() = default;
     80 
     81 void AV1CompMaskPredBase::SetUp() {
     82  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
     83  av1_init_wedge_masks();
     84  comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
     85  ASSERT_NE(comp_pred1_, nullptr);
     86  comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
     87  ASSERT_NE(comp_pred2_, nullptr);
     88  pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
     89  ASSERT_NE(pred_, nullptr);
     90  // The biggest block size is MAX_SB_SQUARE(128*128), however for the
     91  // convolution we need to access 3 bytes before and 4 bytes after (for an
     92  // 8-tap filter), in both directions, so we need to allocate
     93  // (128 + 7) * (128 + 7) = MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49
     94  ref_buffer_ =
     95      (uint8_t *)aom_memalign(16, MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49);
     96  ASSERT_NE(ref_buffer_, nullptr);
     97  // Start of the actual block where the convolution will be computed
     98  ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3);
     99  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    100    pred_[i] = rnd_.Rand8();
    101  }
    102  for (int i = 0; i < MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49; ++i) {
    103    ref_buffer_[i] = rnd_.Rand8();
    104  }
    105 }
    106 
    107 void AV1CompMaskPredBase::TearDown() {
    108  aom_free(comp_pred1_);
    109  aom_free(comp_pred2_);
    110  aom_free(pred_);
    111  aom_free(ref_buffer_);
    112 }
    113 
    114 using CompMaskPredParam = std::tuple<comp_mask_pred_func, BLOCK_SIZE>;
    115 
    116 class AV1CompMaskPredTest
    117    : public AV1CompMaskPredBase,
    118      public ::testing::WithParamInterface<CompMaskPredParam> {
    119 protected:
    120  void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
    121  void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
    122 };
    123 
    124 void AV1CompMaskPredTest::RunCheckOutput(comp_mask_pred_func test_impl,
    125                                         BLOCK_SIZE bsize, int inv) {
    126  const int w = block_size_wide[bsize];
    127  const int h = block_size_high[bsize];
    128  const int wedge_types = get_wedge_types_lookup(bsize);
    129  for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    130    const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    131 
    132    aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
    133                         inv);
    134    test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
    135 
    136    ASSERT_EQ(CheckResult(w, h), true)
    137        << " wedge " << wedge_index << " inv " << inv;
    138  }
    139 }
    140 
    141 void AV1CompMaskPredTest::RunSpeedTest(comp_mask_pred_func test_impl,
    142                                       BLOCK_SIZE bsize) {
    143  const int w = block_size_wide[bsize];
    144  const int h = block_size_high[bsize];
    145  const int wedge_types = get_wedge_types_lookup(bsize);
    146  int wedge_index = wedge_types / 2;
    147  const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    148  const int num_loops = 1000000000 / (w + h);
    149 
    150  comp_mask_pred_func funcs[2] = { aom_comp_mask_pred_c, test_impl };
    151  double elapsed_time[2] = { 0 };
    152  for (int i = 0; i < 2; ++i) {
    153    aom_usec_timer timer;
    154    aom_usec_timer_start(&timer);
    155    comp_mask_pred_func func = funcs[i];
    156    for (int j = 0; j < num_loops; ++j) {
    157      func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, 0);
    158    }
    159    aom_usec_timer_mark(&timer);
    160    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    161    elapsed_time[i] = 1000.0 * time / num_loops;
    162  }
    163  printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
    164         elapsed_time[1]);
    165  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    166 }
    167 
    168 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompMaskPredTest);
    169 
    170 TEST_P(AV1CompMaskPredTest, CheckOutput) {
    171  // inv = 0, 1
    172  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
    173  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
    174 }
    175 
    176 TEST_P(AV1CompMaskPredTest, DISABLED_Speed) {
    177  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
    178 }
    179 
    180 #if HAVE_SSSE3
    181 INSTANTIATE_TEST_SUITE_P(
    182    SSSE3, AV1CompMaskPredTest,
    183    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
    184                       ::testing::ValuesIn(kCompMaskPredParams)));
    185 #endif
    186 
    187 #if HAVE_AVX2
    188 INSTANTIATE_TEST_SUITE_P(
    189    AVX2, AV1CompMaskPredTest,
    190    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_avx2),
    191                       ::testing::ValuesIn(kCompMaskPredParams)));
    192 #endif
    193 
    194 #if HAVE_NEON
    195 INSTANTIATE_TEST_SUITE_P(
    196    NEON, AV1CompMaskPredTest,
    197    ::testing::Combine(::testing::Values(&aom_comp_mask_pred_neon),
    198                       ::testing::ValuesIn(kCompMaskPredParams)));
    199 #endif
    200 
    201 #if HAVE_SSSE3 || HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
    202 const BLOCK_SIZE kValidBlockSize[] = {
    203  BLOCK_4X4,     BLOCK_8X8,   BLOCK_8X16,  BLOCK_8X32,   BLOCK_16X8,
    204  BLOCK_16X16,   BLOCK_16X32, BLOCK_32X8,  BLOCK_32X16,  BLOCK_32X32,
    205  BLOCK_32X64,   BLOCK_64X32, BLOCK_64X64, BLOCK_64X128, BLOCK_128X64,
    206  BLOCK_128X128, BLOCK_16X64, BLOCK_64X16
    207 };
    208 #endif
    209 
    210 using upsampled_pred_func = void (*)(MACROBLOCKD *xd,
    211                                     const AV1_COMMON *const cm, int mi_row,
    212                                     int mi_col, const MV *const mv,
    213                                     uint8_t *comp_pred, int width, int height,
    214                                     int subpel_x_q3, int subpel_y_q3,
    215                                     const uint8_t *ref, int ref_stride,
    216                                     int subpel_search);
    217 
    218 using UpsampledPredParam = std::tuple<upsampled_pred_func, BLOCK_SIZE>;
    219 
    220 class AV1UpsampledPredTest
    221    : public AV1CompMaskPredBase,
    222      public ::testing::WithParamInterface<UpsampledPredParam> {
    223 protected:
    224  void RunCheckOutput(upsampled_pred_func test_impl, BLOCK_SIZE bsize);
    225  void RunSpeedTest(upsampled_pred_func test_impl, BLOCK_SIZE bsize,
    226                    int havSub);
    227 };
    228 
    229 void AV1UpsampledPredTest::RunCheckOutput(upsampled_pred_func test_impl,
    230                                          BLOCK_SIZE bsize) {
    231  const int w = block_size_wide[bsize];
    232  const int h = block_size_high[bsize];
    233  for (int subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
    234       ++subpel_search) {
    235    // loop through subx and suby
    236    for (int sub = 0; sub < 8 * 8; ++sub) {
    237      int subx = sub & 0x7;
    238      int suby = (sub >> 3);
    239 
    240      aom_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h,
    241                           subx, suby, ref_, MAX_SB_SIZE, subpel_search);
    242 
    243      test_impl(nullptr, nullptr, 0, 0, nullptr, comp_pred2_, w, h, subx, suby,
    244                ref_, MAX_SB_SIZE, subpel_search);
    245      ASSERT_EQ(CheckResult(w, h), true)
    246          << "sub (" << subx << "," << suby << ")";
    247    }
    248  }
    249 }
    250 
    251 void AV1UpsampledPredTest::RunSpeedTest(upsampled_pred_func test_impl,
    252                                        BLOCK_SIZE bsize, int havSub) {
    253  const int w = block_size_wide[bsize];
    254  const int h = block_size_high[bsize];
    255  const int subx = havSub ? 3 : 0;
    256  const int suby = havSub ? 4 : 0;
    257 
    258  const int num_loops = 1000000000 / (w + h);
    259  upsampled_pred_func funcs[2] = { aom_upsampled_pred_c, test_impl };
    260  double elapsed_time[2] = { 0 };
    261  int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
    262  for (int i = 0; i < 2; ++i) {
    263    aom_usec_timer timer;
    264    aom_usec_timer_start(&timer);
    265    upsampled_pred_func func = funcs[i];
    266    for (int j = 0; j < num_loops; ++j) {
    267      func(nullptr, nullptr, 0, 0, nullptr, comp_pred1_, w, h, subx, suby, ref_,
    268           MAX_SB_SIZE, subpel_search);
    269    }
    270    aom_usec_timer_mark(&timer);
    271    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    272    elapsed_time[i] = 1000.0 * time / num_loops;
    273  }
    274  printf("UpsampledPred[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h,
    275         elapsed_time[0], elapsed_time[1]);
    276  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    277 }
    278 
    279 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1UpsampledPredTest);
    280 
    281 TEST_P(AV1UpsampledPredTest, CheckOutput) {
    282  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
    283 }
    284 
    285 TEST_P(AV1UpsampledPredTest, DISABLED_Speed) {
    286  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
    287 }
    288 
    289 #if HAVE_SSE2
    290 INSTANTIATE_TEST_SUITE_P(
    291    SSE2, AV1UpsampledPredTest,
    292    ::testing::Combine(::testing::Values(&aom_upsampled_pred_sse2),
    293                       ::testing::ValuesIn(kValidBlockSize)));
    294 #endif
    295 
    296 #if HAVE_NEON
    297 INSTANTIATE_TEST_SUITE_P(
    298    NEON, AV1UpsampledPredTest,
    299    ::testing::Combine(::testing::Values(&aom_upsampled_pred_neon),
    300                       ::testing::ValuesIn(kValidBlockSize)));
    301 #endif
    302 
    303 using CompAvgPredParam = std::tuple<comp_avg_pred_func, BLOCK_SIZE>;
    304 
    305 class AV1CompAvgPredTest : public ::testing::TestWithParam<CompAvgPredParam> {
    306 public:
    307  ~AV1CompAvgPredTest() override;
    308  void SetUp() override;
    309 
    310  void TearDown() override;
    311 
    312 protected:
    313  void RunCheckOutput(comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
    314  void RunSpeedTest(comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
    315  bool CheckResult(int width, int height) {
    316    for (int y = 0; y < height; ++y) {
    317      for (int x = 0; x < width; ++x) {
    318        const int idx = y * width + x;
    319        if (comp_pred1_[idx] != comp_pred2_[idx]) {
    320          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y);
    321          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
    322          return false;
    323        }
    324      }
    325    }
    326    return true;
    327  }
    328 
    329  libaom_test::ACMRandom rnd_;
    330  uint8_t *comp_pred1_;
    331  uint8_t *comp_pred2_;
    332  uint8_t *pred_;
    333  uint8_t *ref_;
    334 };
    335 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1CompAvgPredTest);
    336 
    337 AV1CompAvgPredTest::~AV1CompAvgPredTest() = default;
    338 
    339 void AV1CompAvgPredTest::SetUp() {
    340  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
    341 
    342  comp_pred1_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
    343  ASSERT_NE(comp_pred1_, nullptr);
    344  comp_pred2_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
    345  ASSERT_NE(comp_pred2_, nullptr);
    346  pred_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
    347  ASSERT_NE(pred_, nullptr);
    348  ref_ = (uint8_t *)aom_memalign(16, MAX_SB_SQUARE);
    349  ASSERT_NE(ref_, nullptr);
    350  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    351    pred_[i] = rnd_.Rand8();
    352  }
    353  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    354    ref_[i] = rnd_.Rand8();
    355  }
    356 }
    357 
    358 void AV1CompAvgPredTest::TearDown() {
    359  aom_free(comp_pred1_);
    360  aom_free(comp_pred2_);
    361  aom_free(pred_);
    362  aom_free(ref_);
    363 }
    364 
    365 void AV1CompAvgPredTest::RunCheckOutput(comp_avg_pred_func test_impl,
    366                                        BLOCK_SIZE bsize) {
    367  const int w = block_size_wide[bsize];
    368  const int h = block_size_high[bsize];
    369  aom_comp_avg_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE);
    370  test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE);
    371 
    372  ASSERT_EQ(CheckResult(w, h), true);
    373 }
    374 
    375 void AV1CompAvgPredTest::RunSpeedTest(comp_avg_pred_func test_impl,
    376                                      BLOCK_SIZE bsize) {
    377  const int w = block_size_wide[bsize];
    378  const int h = block_size_high[bsize];
    379  const int num_loops = 1000000000 / (w + h);
    380 
    381  comp_avg_pred_func functions[2] = { aom_comp_avg_pred_c, test_impl };
    382  double elapsed_time[2] = { 0.0 };
    383  for (int i = 0; i < 2; ++i) {
    384    aom_usec_timer timer;
    385    aom_usec_timer_start(&timer);
    386    comp_avg_pred_func func = functions[i];
    387    for (int j = 0; j < num_loops; ++j) {
    388      func(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE);
    389    }
    390    aom_usec_timer_mark(&timer);
    391    const double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    392    elapsed_time[i] = 1000.0 * time;
    393  }
    394  printf("CompAvgPred %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
    395         elapsed_time[1]);
    396  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    397 }
    398 
    399 TEST_P(AV1CompAvgPredTest, CheckOutput) {
    400  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
    401 }
    402 
    403 TEST_P(AV1CompAvgPredTest, DISABLED_Speed) {
    404  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
    405 }
    406 
    407 #if HAVE_AVX2
    408 INSTANTIATE_TEST_SUITE_P(
    409    AVX2, AV1CompAvgPredTest,
    410    ::testing::Combine(::testing::Values(&aom_comp_avg_pred_avx2),
    411                       ::testing::ValuesIn(kValidBlockSize)));
    412 #endif
    413 
    414 #if HAVE_NEON
    415 INSTANTIATE_TEST_SUITE_P(
    416    NEON, AV1CompAvgPredTest,
    417    ::testing::Combine(::testing::Values(&aom_comp_avg_pred_neon),
    418                       ::testing::ValuesIn(kValidBlockSize)));
    419 #endif
    420 
    421 #if CONFIG_AV1_HIGHBITDEPTH
    422 class AV1HighbdCompMaskPredTestBase : public ::testing::Test {
    423 public:
    424  ~AV1HighbdCompMaskPredTestBase() override;
    425  void SetUp() override;
    426 
    427  void TearDown() override;
    428 
    429 protected:
    430  bool CheckResult(int width, int height) {
    431    for (int y = 0; y < height; ++y) {
    432      for (int x = 0; x < width; ++x) {
    433        const int idx = y * width + x;
    434        if (comp_pred1_[idx] != comp_pred2_[idx]) {
    435          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, y, x);
    436          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
    437          return false;
    438        }
    439      }
    440    }
    441    return true;
    442  }
    443 
    444  libaom_test::ACMRandom rnd_;
    445  uint16_t *comp_pred1_;
    446  uint16_t *comp_pred2_;
    447  uint16_t *pred_;
    448  uint16_t *ref_buffer_;
    449  uint16_t *ref_;
    450 };
    451 
    452 AV1HighbdCompMaskPredTestBase::~AV1HighbdCompMaskPredTestBase() = default;
    453 
    454 void AV1HighbdCompMaskPredTestBase::SetUp() {
    455  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
    456  av1_init_wedge_masks();
    457 
    458  comp_pred1_ =
    459      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
    460  ASSERT_NE(comp_pred1_, nullptr);
    461  comp_pred2_ =
    462      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
    463  ASSERT_NE(comp_pred2_, nullptr);
    464  pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
    465  ASSERT_NE(pred_, nullptr);
    466  // The biggest block size is MAX_SB_SQUARE(128*128), however for the
    467  // convolution we need to access 3 elements before and 4 elements after (for
    468  // an 8-tap filter), in both directions, so we need to allocate (128 + 7) *
    469  // (128 + 7) = (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) *
    470  // sizeof(*ref_buffer_)
    471  ref_buffer_ = (uint16_t *)aom_memalign(
    472      16, (MAX_SB_SQUARE + (14 * MAX_SB_SIZE) + 49) * sizeof(*ref_buffer_));
    473  ASSERT_NE(ref_buffer_, nullptr);
    474  // Start of the actual block where the convolution will be computed
    475  ref_ = ref_buffer_ + (3 * MAX_SB_SIZE + 3);
    476 }
    477 
    478 void AV1HighbdCompMaskPredTestBase::TearDown() {
    479  aom_free(comp_pred1_);
    480  aom_free(comp_pred2_);
    481  aom_free(pred_);
    482  aom_free(ref_buffer_);
    483 }
    484 
    485 using highbd_comp_mask_pred_func = void (*)(uint8_t *comp_pred8,
    486                                            const uint8_t *pred8, int width,
    487                                            int height, const uint8_t *ref8,
    488                                            int ref_stride, const uint8_t *mask,
    489                                            int mask_stride, int invert_mask);
    490 
    491 using HighbdCompMaskPredParam =
    492    std::tuple<highbd_comp_mask_pred_func, BLOCK_SIZE, int>;
    493 
    494 class AV1HighbdCompMaskPredTest
    495    : public AV1HighbdCompMaskPredTestBase,
    496      public ::testing::WithParamInterface<HighbdCompMaskPredParam> {
    497 public:
    498  ~AV1HighbdCompMaskPredTest() override;
    499 
    500 protected:
    501  void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
    502  void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
    503 };
    504 
    505 AV1HighbdCompMaskPredTest::~AV1HighbdCompMaskPredTest() = default;
    506 
    507 void AV1HighbdCompMaskPredTest::RunCheckOutput(
    508    highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv) {
    509  int bd_ = GET_PARAM(2);
    510  const int w = block_size_wide[bsize];
    511  const int h = block_size_high[bsize];
    512  const int wedge_types = get_wedge_types_lookup(bsize);
    513 
    514  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    515    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    516  }
    517  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    518    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    519  }
    520 
    521  for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    522    const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    523 
    524    aom_highbd_comp_mask_pred_c(
    525        CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
    526        CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
    527 
    528    test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
    529              CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, inv);
    530 
    531    ASSERT_EQ(CheckResult(w, h), true)
    532        << " wedge " << wedge_index << " inv " << inv;
    533  }
    534 }
    535 
    536 void AV1HighbdCompMaskPredTest::RunSpeedTest(
    537    highbd_comp_mask_pred_func test_impl, BLOCK_SIZE bsize) {
    538  int bd_ = GET_PARAM(2);
    539 
    540  const int w = block_size_wide[bsize];
    541  const int h = block_size_high[bsize];
    542  const int wedge_types = get_wedge_types_lookup(bsize);
    543  int wedge_index = wedge_types / 2;
    544 
    545  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    546    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    547  }
    548  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    549    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    550  }
    551 
    552  const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
    553  const int num_loops = 1000000000 / (w + h);
    554 
    555  highbd_comp_mask_pred_func funcs[2] = { aom_highbd_comp_mask_pred_c,
    556                                          test_impl };
    557  double elapsed_time[2] = { 0 };
    558  for (int i = 0; i < 2; ++i) {
    559    aom_usec_timer timer;
    560    aom_usec_timer_start(&timer);
    561    highbd_comp_mask_pred_func func = funcs[i];
    562    for (int j = 0; j < num_loops; ++j) {
    563      func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
    564           CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, mask, w, 0);
    565    }
    566    aom_usec_timer_mark(&timer);
    567    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    568    elapsed_time[i] = 1000.0 * time / num_loops;
    569  }
    570  printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
    571         elapsed_time[1]);
    572  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    573 }
    574 
    575 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompMaskPredTest);
    576 
    577 TEST_P(AV1HighbdCompMaskPredTest, CheckOutput) {
    578  // inv = 0, 1
    579  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
    580  RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
    581 }
    582 
    583 TEST_P(AV1HighbdCompMaskPredTest, DISABLED_Speed) {
    584  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
    585 }
    586 
    587 #if HAVE_NEON
    588 INSTANTIATE_TEST_SUITE_P(
    589    NEON, AV1HighbdCompMaskPredTest,
    590    ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_neon),
    591                       ::testing::ValuesIn(kCompMaskPredParams),
    592                       ::testing::Range(8, 13, 2)));
    593 #endif
    594 
    595 #if HAVE_AVX2
    596 INSTANTIATE_TEST_SUITE_P(
    597    AVX2, AV1HighbdCompMaskPredTest,
    598    ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_avx2),
    599                       ::testing::ValuesIn(kCompMaskPredParams),
    600                       ::testing::Range(8, 13, 2)));
    601 #endif
    602 
    603 #if HAVE_SSE2
    604 INSTANTIATE_TEST_SUITE_P(
    605    SSE2, AV1HighbdCompMaskPredTest,
    606    ::testing::Combine(::testing::Values(&aom_highbd_comp_mask_pred_sse2),
    607                       ::testing::ValuesIn(kCompMaskPredParams),
    608                       ::testing::Range(8, 13, 2)));
    609 #endif
    610 
    611 using highbd_upsampled_pred_func =
    612    void (*)(MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row,
    613             int mi_col, const MV *const mv, uint8_t *comp_pred8, int width,
    614             int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
    615             int ref_stride, int bd, int subpel_search);
    616 
    617 using HighbdUpsampledPredParam =
    618    std::tuple<highbd_upsampled_pred_func, BLOCK_SIZE, int>;
    619 
    620 class AV1HighbdUpsampledPredTest
    621    : public AV1HighbdCompMaskPredTestBase,
    622      public ::testing::WithParamInterface<HighbdUpsampledPredParam> {
    623 public:
    624  ~AV1HighbdUpsampledPredTest() override;
    625 
    626 protected:
    627  void RunCheckOutput(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize);
    628  void RunSpeedTest(highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize,
    629                    int havSub);
    630 };
    631 
    632 AV1HighbdUpsampledPredTest::~AV1HighbdUpsampledPredTest() = default;
    633 
    634 void AV1HighbdUpsampledPredTest::RunCheckOutput(
    635    highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize) {
    636  int bd_ = GET_PARAM(2);
    637  const int w = block_size_wide[bsize];
    638  const int h = block_size_high[bsize];
    639 
    640  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    641    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    642  }
    643  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    644    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    645  }
    646 
    647  for (int subpel_search = 1; subpel_search <= 2; ++subpel_search) {
    648    // loop through subx and suby
    649    for (int sub = 0; sub < 8 * 8; ++sub) {
    650      int subx = sub & 0x7;
    651      int suby = (sub >> 3);
    652 
    653      aom_highbd_upsampled_pred_c(nullptr, nullptr, 0, 0, nullptr,
    654                                  CONVERT_TO_BYTEPTR(comp_pred1_), w, h, subx,
    655                                  suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE,
    656                                  bd_, subpel_search);
    657 
    658      test_impl(nullptr, nullptr, 0, 0, nullptr,
    659                CONVERT_TO_BYTEPTR(comp_pred2_), w, h, subx, suby,
    660                CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_, subpel_search);
    661 
    662      ASSERT_EQ(CheckResult(w, h), true)
    663          << "sub (" << subx << "," << suby << ")";
    664    }
    665  }
    666 }
    667 
    668 void AV1HighbdUpsampledPredTest::RunSpeedTest(
    669    highbd_upsampled_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
    670  int bd_ = GET_PARAM(2);
    671  const int w = block_size_wide[bsize];
    672  const int h = block_size_high[bsize];
    673  const int subx = havSub ? 3 : 0;
    674  const int suby = havSub ? 4 : 0;
    675 
    676  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    677    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    678  }
    679  for (int i = 0; i < MAX_SB_SQUARE + (8 * MAX_SB_SIZE); ++i) {
    680    ref_buffer_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    681  }
    682 
    683  const int num_loops = 1000000000 / (w + h);
    684  highbd_upsampled_pred_func funcs[2] = { &aom_highbd_upsampled_pred_c,
    685                                          test_impl };
    686  double elapsed_time[2] = { 0 };
    687  for (int i = 0; i < 2; ++i) {
    688    aom_usec_timer timer;
    689    aom_usec_timer_start(&timer);
    690    highbd_upsampled_pred_func func = funcs[i];
    691    int subpel_search = 2;  // set to 1 to test 4-tap filter.
    692    for (int j = 0; j < num_loops; ++j) {
    693      func(nullptr, nullptr, 0, 0, nullptr, CONVERT_TO_BYTEPTR(comp_pred1_), w,
    694           h, subx, suby, CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE, bd_,
    695           subpel_search);
    696    }
    697    aom_usec_timer_mark(&timer);
    698    double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    699    elapsed_time[i] = 1000.0 * time / num_loops;
    700  }
    701  printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
    702         elapsed_time[1]);
    703  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    704 }
    705 
    706 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdUpsampledPredTest);
    707 
    708 TEST_P(AV1HighbdUpsampledPredTest, CheckOutput) {
    709  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
    710 }
    711 
    712 TEST_P(AV1HighbdUpsampledPredTest, DISABLED_Speed) {
    713  RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
    714 }
    715 
    716 #if HAVE_SSE2
    717 INSTANTIATE_TEST_SUITE_P(
    718    SSE2, AV1HighbdUpsampledPredTest,
    719    ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_sse2),
    720                       ::testing::ValuesIn(kValidBlockSize),
    721                       ::testing::Range(8, 13, 2)));
    722 #endif
    723 
    724 #if HAVE_NEON
    725 INSTANTIATE_TEST_SUITE_P(
    726    NEON, AV1HighbdUpsampledPredTest,
    727    ::testing::Combine(::testing::Values(&aom_highbd_upsampled_pred_neon),
    728                       ::testing::ValuesIn(kValidBlockSize),
    729                       ::testing::Range(8, 13, 2)));
    730 #endif
    731 
    732 using highbd_comp_avg_pred_func = void (*)(uint8_t *comp_pred,
    733                                           const uint8_t *pred, int width,
    734                                           int height, const uint8_t *ref,
    735                                           int ref_stride);
    736 
    737 using HighbdCompAvgPredParam =
    738    std::tuple<highbd_comp_avg_pred_func, BLOCK_SIZE, int>;
    739 
    740 class AV1HighbdCompAvgPredTest
    741    : public ::testing::TestWithParam<HighbdCompAvgPredParam> {
    742 public:
    743  ~AV1HighbdCompAvgPredTest() override;
    744  void SetUp() override;
    745 
    746 protected:
    747  void RunCheckOutput(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
    748  void RunSpeedTest(highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize);
    749  bool CheckResult(int width, int height) const {
    750    for (int y = 0; y < height; ++y) {
    751      for (int x = 0; x < width; ++x) {
    752        const int idx = y * width + x;
    753        if (comp_pred1_[idx] != comp_pred2_[idx]) {
    754          printf("%dx%d mismatch @%d(%d,%d) ", width, height, idx, x, y);
    755          printf("%d != %d ", comp_pred1_[idx], comp_pred2_[idx]);
    756          return false;
    757        }
    758      }
    759    }
    760    return true;
    761  }
    762 
    763  libaom_test::ACMRandom rnd_;
    764  uint16_t *comp_pred1_;
    765  uint16_t *comp_pred2_;
    766  uint16_t *pred_;
    767  uint16_t *ref_;
    768 };
    769 
    770 AV1HighbdCompAvgPredTest::~AV1HighbdCompAvgPredTest() {
    771  aom_free(comp_pred1_);
    772  aom_free(comp_pred2_);
    773  aom_free(pred_);
    774  aom_free(ref_);
    775 }
    776 
    777 void AV1HighbdCompAvgPredTest::SetUp() {
    778  int bd_ = GET_PARAM(2);
    779  rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
    780 
    781  comp_pred1_ =
    782      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred1_));
    783  ASSERT_NE(comp_pred1_, nullptr);
    784  comp_pred2_ =
    785      (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*comp_pred2_));
    786  ASSERT_NE(comp_pred2_, nullptr);
    787  pred_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*pred_));
    788  ASSERT_NE(pred_, nullptr);
    789  ref_ = (uint16_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*ref_));
    790  ASSERT_NE(ref_, nullptr);
    791  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    792    pred_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    793  }
    794  for (int i = 0; i < MAX_SB_SQUARE; ++i) {
    795    ref_[i] = rnd_.Rand16() & ((1 << bd_) - 1);
    796  }
    797 }
    798 
    799 void AV1HighbdCompAvgPredTest::RunCheckOutput(
    800    highbd_comp_avg_pred_func test_impl, BLOCK_SIZE bsize) {
    801  const int w = block_size_wide[bsize];
    802  const int h = block_size_high[bsize];
    803  aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(comp_pred1_),
    804                             CONVERT_TO_BYTEPTR(pred_), w, h,
    805                             CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
    806  test_impl(CONVERT_TO_BYTEPTR(comp_pred2_), CONVERT_TO_BYTEPTR(pred_), w, h,
    807            CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
    808 
    809  ASSERT_EQ(CheckResult(w, h), true);
    810 }
    811 
    812 void AV1HighbdCompAvgPredTest::RunSpeedTest(highbd_comp_avg_pred_func test_impl,
    813                                            BLOCK_SIZE bsize) {
    814  const int w = block_size_wide[bsize];
    815  const int h = block_size_high[bsize];
    816  const int num_loops = 1000000000 / (w + h);
    817 
    818  highbd_comp_avg_pred_func functions[2] = { aom_highbd_comp_avg_pred_c,
    819                                             test_impl };
    820  double elapsed_time[2] = { 0.0 };
    821  for (int i = 0; i < 2; ++i) {
    822    aom_usec_timer timer;
    823    aom_usec_timer_start(&timer);
    824    highbd_comp_avg_pred_func func = functions[i];
    825    for (int j = 0; j < num_loops; ++j) {
    826      func(CONVERT_TO_BYTEPTR(comp_pred1_), CONVERT_TO_BYTEPTR(pred_), w, h,
    827           CONVERT_TO_BYTEPTR(ref_), MAX_SB_SIZE);
    828    }
    829    aom_usec_timer_mark(&timer);
    830    const double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
    831    elapsed_time[i] = 1000.0 * time;
    832  }
    833  printf("HighbdCompAvg %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
    834         elapsed_time[1]);
    835  printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
    836 }
    837 
    838 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdCompAvgPredTest);
    839 
    840 TEST_P(AV1HighbdCompAvgPredTest, CheckOutput) {
    841  RunCheckOutput(GET_PARAM(0), GET_PARAM(1));
    842 }
    843 
    844 TEST_P(AV1HighbdCompAvgPredTest, DISABLED_Speed) {
    845  RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
    846 }
    847 
    848 #if HAVE_NEON
    849 INSTANTIATE_TEST_SUITE_P(
    850    NEON, AV1HighbdCompAvgPredTest,
    851    ::testing::Combine(::testing::Values(&aom_highbd_comp_avg_pred_neon),
    852                       ::testing::ValuesIn(kValidBlockSize),
    853                       ::testing::Range(8, 13, 2)));
    854 #endif
    855 
    856 #endif  // CONFIG_AV1_HIGHBITDEPTH
    857 }  // namespace