tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

cfl_test.cc (21681B)


      1 /*
      2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <tuple>
     13 
     14 #include "gtest/gtest.h"
     15 
     16 #include "config/av1_rtcd.h"
     17 
     18 #include "aom_ports/aom_timer.h"
     19 #include "test/util.h"
     20 #include "test/acm_random.h"
     21 
     22 using std::make_tuple;
     23 
     24 using libaom_test::ACMRandom;
     25 
     26 #define NUM_ITERATIONS (100)
     27 #define NUM_ITERATIONS_SPEED (INT16_MAX)
     28 
     29 #define ALL_CFL_TX_SIZES(function)                           \
     30  make_tuple(static_cast<TX_SIZE>(TX_4X4), &function),       \
     31      make_tuple(static_cast<TX_SIZE>(TX_4X8), &function),   \
     32      make_tuple(static_cast<TX_SIZE>(TX_4X16), &function),  \
     33      make_tuple(static_cast<TX_SIZE>(TX_8X4), &function),   \
     34      make_tuple(static_cast<TX_SIZE>(TX_8X8), &function),   \
     35      make_tuple(static_cast<TX_SIZE>(TX_8X16), &function),  \
     36      make_tuple(static_cast<TX_SIZE>(TX_8X32), &function),  \
     37      make_tuple(static_cast<TX_SIZE>(TX_16X4), &function),  \
     38      make_tuple(static_cast<TX_SIZE>(TX_16X8), &function),  \
     39      make_tuple(static_cast<TX_SIZE>(TX_16X16), &function), \
     40      make_tuple(static_cast<TX_SIZE>(TX_16X32), &function), \
     41      make_tuple(static_cast<TX_SIZE>(TX_32X8), &function),  \
     42      make_tuple(static_cast<TX_SIZE>(TX_32X16), &function), \
     43      make_tuple(static_cast<TX_SIZE>(TX_32X32), &function)
     44 
     45 #define ALL_CFL_TX_SIZES_SUBSAMPLE(fun420, fun422, fun444)                   \
     46  make_tuple(static_cast<TX_SIZE>(TX_4X4), &fun420, &fun422, &fun444),       \
     47      make_tuple(static_cast<TX_SIZE>(TX_4X8), &fun420, &fun422, &fun444),   \
     48      make_tuple(static_cast<TX_SIZE>(TX_4X16), &fun420, &fun422, &fun444),  \
     49      make_tuple(static_cast<TX_SIZE>(TX_8X4), &fun420, &fun422, &fun444),   \
     50      make_tuple(static_cast<TX_SIZE>(TX_8X8), &fun420, &fun422, &fun444),   \
     51      make_tuple(static_cast<TX_SIZE>(TX_8X16), &fun420, &fun422, &fun444),  \
     52      make_tuple(static_cast<TX_SIZE>(TX_8X32), &fun420, &fun422, &fun444),  \
     53      make_tuple(static_cast<TX_SIZE>(TX_16X4), &fun420, &fun422, &fun444),  \
     54      make_tuple(static_cast<TX_SIZE>(TX_16X8), &fun420, &fun422, &fun444),  \
     55      make_tuple(static_cast<TX_SIZE>(TX_16X16), &fun420, &fun422, &fun444), \
     56      make_tuple(static_cast<TX_SIZE>(TX_16X32), &fun420, &fun422, &fun444), \
     57      make_tuple(static_cast<TX_SIZE>(TX_32X8), &fun420, &fun422, &fun444),  \
     58      make_tuple(static_cast<TX_SIZE>(TX_32X16), &fun420, &fun422, &fun444), \
     59      make_tuple(static_cast<TX_SIZE>(TX_32X32), &fun420, &fun422, &fun444)
     60 
     61 namespace {
     62 
     63 template <typename A>
     64 static void assert_eq(const A *a, const A *b, int width, int height) {
     65  for (int j = 0; j < height; j++) {
     66    for (int i = 0; i < width; i++) {
     67      ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
     68    }
     69  }
     70 }
     71 
     72 static void assertFaster(int ref_elapsed_time, int elapsed_time) {
     73  EXPECT_GT(ref_elapsed_time, elapsed_time)
     74      << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
     75      << "C time: " << ref_elapsed_time << " us" << std::endl
     76      << "SIMD time: " << elapsed_time << " us" << std::endl;
     77 }
     78 
     79 static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
     80                       int height) {
     81  std::cout.precision(2);
     82  std::cout << "[          ] " << width << "x" << height
     83            << ": C time = " << ref_elapsed_time
     84            << " us, SIMD time = " << elapsed_time << " us"
     85            << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
     86            << std::endl;
     87 }
     88 
     89 class CFLTest {
     90 public:
     91  virtual ~CFLTest() = default;
     92  void init(TX_SIZE tx) {
     93    tx_size = tx;
     94    width = tx_size_wide[tx_size];
     95    height = tx_size_high[tx_size];
     96    rnd.Reset(ACMRandom::DeterministicSeed());
     97  }
     98 
     99 protected:
    100  TX_SIZE tx_size;
    101  int width;
    102  int height;
    103  ACMRandom rnd;
    104 };
    105 
    106 template <typename I>
    107 class CFLTestWithData : public CFLTest {
    108 public:
    109  ~CFLTestWithData() override = default;
    110 
    111 protected:
    112  I data[CFL_BUF_SQUARE];
    113  I data_ref[CFL_BUF_SQUARE];
    114  void randData(I (ACMRandom::*random)()) {
    115    for (int j = 0; j < this->height; j++) {
    116      for (int i = 0; i < this->width; i++) {
    117        const I d = (this->rnd.*random)();
    118        data[j * CFL_BUF_LINE + i] = d;
    119        data_ref[j * CFL_BUF_LINE + i] = d;
    120      }
    121    }
    122  }
    123 };
    124 
    125 template <typename I>
    126 class CFLTestWithAlignedData : public CFLTest {
    127 public:
    128  ~CFLTestWithAlignedData() override {
    129    aom_free(chroma_pels_ref);
    130    aom_free(sub_luma_pels_ref);
    131    aom_free(chroma_pels);
    132    aom_free(sub_luma_pels);
    133  }
    134 
    135 protected:
    136  void init() {
    137    chroma_pels_ref =
    138        reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
    139    ASSERT_NE(chroma_pels_ref, nullptr);
    140    chroma_pels =
    141        reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
    142    ASSERT_NE(chroma_pels, nullptr);
    143    sub_luma_pels_ref = reinterpret_cast<int16_t *>(
    144        aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
    145    ASSERT_NE(sub_luma_pels_ref, nullptr);
    146    sub_luma_pels = reinterpret_cast<int16_t *>(
    147        aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
    148    ASSERT_NE(sub_luma_pels, nullptr);
    149    memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
    150    memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
    151    memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
    152    memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
    153  }
    154 
    155  I *chroma_pels_ref;
    156  I *chroma_pels;
    157  int16_t *sub_luma_pels_ref;
    158  int16_t *sub_luma_pels;
    159  int alpha_q3;
    160  I dc;
    161  void randData(int bd) {
    162    alpha_q3 = this->rnd(33) - 16;
    163    dc = this->rnd(1 << bd);
    164    for (int j = 0; j < this->height; j++) {
    165      for (int i = 0; i < this->width; i++) {
    166        chroma_pels[j * CFL_BUF_LINE + i] = dc;
    167        chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
    168        sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
    169            sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd(1 << (bd + 3));
    170      }
    171    }
    172  }
    173 };
    174 
    175 using sub_avg_fn = cfl_subtract_average_fn (*)(TX_SIZE tx_size);
    176 using sub_avg_param = std::tuple<TX_SIZE, sub_avg_fn>;
    177 class CFLSubAvgTest : public ::testing::TestWithParam<sub_avg_param>,
    178                      public CFLTestWithData<uint16_t> {
    179 public:
    180  void SetUp() override {
    181    CFLTest::init(std::get<0>(this->GetParam()));
    182    sub_avg = std::get<1>(this->GetParam())(tx_size);
    183    sub_avg_ref = cfl_get_subtract_average_fn_c(tx_size);
    184  }
    185  ~CFLSubAvgTest() override = default;
    186 
    187 protected:
    188  cfl_subtract_average_fn sub_avg;
    189  cfl_subtract_average_fn sub_avg_ref;
    190 };
    191 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubAvgTest);
    192 
    193 TEST_P(CFLSubAvgTest, SubAvgTest) {
    194  int16_t dst[CFL_BUF_SQUARE];
    195  int16_t dst_ref[CFL_BUF_SQUARE];
    196  for (int it = 0; it < NUM_ITERATIONS; it++) {
    197    randData(&ACMRandom::Rand15);
    198    sub_avg(data, dst);
    199    sub_avg_ref(data_ref, dst_ref);
    200    assert_eq<int16_t>(dst, dst_ref, width, height);
    201  }
    202 }
    203 
    204 TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
    205  int16_t dst[CFL_BUF_SQUARE];
    206  int16_t dst_ref[CFL_BUF_SQUARE];
    207  aom_usec_timer ref_timer;
    208  aom_usec_timer timer;
    209  randData(&ACMRandom::Rand15);
    210  aom_usec_timer_start(&ref_timer);
    211  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    212    sub_avg_ref(data_ref, dst_ref);
    213  }
    214  aom_usec_timer_mark(&ref_timer);
    215  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
    216  aom_usec_timer_start(&timer);
    217  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    218    sub_avg(data, dst);
    219  }
    220  aom_usec_timer_mark(&timer);
    221  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
    222  printSpeed(ref_elapsed_time, elapsed_time, width, height);
    223  assertFaster(ref_elapsed_time, elapsed_time);
    224 }
    225 
    226 template <typename S, typename T, typename I>
    227 class CFLSubsampleTest : public ::testing::TestWithParam<S>,
    228                         public CFLTestWithData<I> {
    229 public:
    230  void SetUp() override {
    231    CFLTest::init(std::get<0>(this->GetParam()));
    232    fun_420 = std::get<1>(this->GetParam())(this->tx_size);
    233    fun_422 = std::get<2>(this->GetParam())(this->tx_size);
    234    fun_444 = std::get<3>(this->GetParam())(this->tx_size);
    235  }
    236 
    237 protected:
    238  T fun_420;
    239  T fun_422;
    240  T fun_444;
    241  T fun_420_ref;
    242  T fun_422_ref;
    243  T fun_444_ref;
    244 
    245  void subsampleTest(T fun, T fun_ref, int sub_width, int sub_height,
    246                     I (ACMRandom::*random)()) {
    247    uint16_t sub_luma_pels[CFL_BUF_SQUARE];
    248    uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
    249 
    250    for (int it = 0; it < NUM_ITERATIONS; it++) {
    251      CFLTestWithData<I>::randData(random);
    252      fun(this->data, CFL_BUF_LINE, sub_luma_pels);
    253      fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
    254      assert_eq<uint16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width,
    255                          sub_height);
    256    }
    257  }
    258 
    259  void subsampleSpeedTest(T fun, T fun_ref, I (ACMRandom::*random)()) {
    260    uint16_t sub_luma_pels[CFL_BUF_SQUARE];
    261    uint16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
    262    aom_usec_timer ref_timer;
    263    aom_usec_timer timer;
    264 
    265    CFLTestWithData<I>::randData(random);
    266    aom_usec_timer_start(&ref_timer);
    267    for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    268      fun_ref(this->data_ref, CFL_BUF_LINE, sub_luma_pels_ref);
    269    }
    270    aom_usec_timer_mark(&ref_timer);
    271    int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
    272    aom_usec_timer_start(&timer);
    273    for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    274      fun(this->data, CFL_BUF_LINE, sub_luma_pels);
    275    }
    276    aom_usec_timer_mark(&timer);
    277    int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
    278    printSpeed(ref_elapsed_time, elapsed_time, this->width, this->height);
    279    assertFaster(ref_elapsed_time, elapsed_time);
    280  }
    281 };
    282 
    283 using get_subsample_lbd_fn = cfl_subsample_lbd_fn (*)(TX_SIZE tx_size);
    284 using subsample_lbd_param =
    285    std::tuple<TX_SIZE, get_subsample_lbd_fn, get_subsample_lbd_fn,
    286               get_subsample_lbd_fn>;
    287 class CFLSubsampleLBDTest
    288    : public CFLSubsampleTest<subsample_lbd_param, cfl_subsample_lbd_fn,
    289                              uint8_t> {
    290 public:
    291  ~CFLSubsampleLBDTest() override = default;
    292  void SetUp() override {
    293    CFLSubsampleTest::SetUp();
    294    fun_420_ref = cfl_get_luma_subsampling_420_lbd_c(tx_size);
    295    fun_422_ref = cfl_get_luma_subsampling_422_lbd_c(tx_size);
    296    fun_444_ref = cfl_get_luma_subsampling_444_lbd_c(tx_size);
    297  }
    298 };
    299 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleLBDTest);
    300 
    301 TEST_P(CFLSubsampleLBDTest, SubsampleLBD420Test) {
    302  subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
    303                &ACMRandom::Rand8);
    304 }
    305 
    306 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD420SpeedTest) {
    307  subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand8);
    308 }
    309 
    310 TEST_P(CFLSubsampleLBDTest, SubsampleLBD422Test) {
    311  subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand8);
    312 }
    313 
    314 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD422SpeedTest) {
    315  subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand8);
    316 }
    317 
    318 TEST_P(CFLSubsampleLBDTest, SubsampleLBD444Test) {
    319  subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand8);
    320 }
    321 
    322 TEST_P(CFLSubsampleLBDTest, DISABLED_SubsampleLBD444SpeedTest) {
    323  subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand8);
    324 }
    325 
    326 #if CONFIG_AV1_HIGHBITDEPTH
    327 using get_subsample_hbd_fn = cfl_subsample_hbd_fn (*)(TX_SIZE tx_size);
    328 using subsample_hbd_param =
    329    std::tuple<TX_SIZE, get_subsample_hbd_fn, get_subsample_hbd_fn,
    330               get_subsample_hbd_fn>;
    331 class CFLSubsampleHBDTest
    332    : public CFLSubsampleTest<subsample_hbd_param, cfl_subsample_hbd_fn,
    333                              uint16_t> {
    334 public:
    335  ~CFLSubsampleHBDTest() override = default;
    336  void SetUp() override {
    337    CFLSubsampleTest::SetUp();
    338    fun_420_ref = cfl_get_luma_subsampling_420_hbd_c(tx_size);
    339    fun_422_ref = cfl_get_luma_subsampling_422_hbd_c(tx_size);
    340    fun_444_ref = cfl_get_luma_subsampling_444_hbd_c(tx_size);
    341  }
    342 };
    343 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLSubsampleHBDTest);
    344 
    345 TEST_P(CFLSubsampleHBDTest, SubsampleHBD420Test) {
    346  subsampleTest(fun_420, fun_420_ref, width >> 1, height >> 1,
    347                &ACMRandom::Rand12);
    348 }
    349 
    350 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD420SpeedTest) {
    351  subsampleSpeedTest(fun_420, fun_420_ref, &ACMRandom::Rand12);
    352 }
    353 
    354 TEST_P(CFLSubsampleHBDTest, SubsampleHBD422Test) {
    355  subsampleTest(fun_422, fun_422_ref, width >> 1, height, &ACMRandom::Rand12);
    356 }
    357 
    358 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD422SpeedTest) {
    359  subsampleSpeedTest(fun_422, fun_422_ref, &ACMRandom::Rand12);
    360 }
    361 
    362 TEST_P(CFLSubsampleHBDTest, SubsampleHBD444Test) {
    363  subsampleTest(fun_444, fun_444_ref, width, height, &ACMRandom::Rand12);
    364 }
    365 
    366 TEST_P(CFLSubsampleHBDTest, DISABLED_SubsampleHBD444SpeedTest) {
    367  subsampleSpeedTest(fun_444, fun_444_ref, &ACMRandom::Rand12);
    368 }
    369 #endif  // CONFIG_AV1_HIGHBITDEPTH
    370 
    371 using get_predict_fn = cfl_predict_lbd_fn (*)(TX_SIZE tx_size);
    372 using predict_param = std::tuple<TX_SIZE, get_predict_fn>;
    373 class CFLPredictTest : public ::testing::TestWithParam<predict_param>,
    374                       public CFLTestWithAlignedData<uint8_t> {
    375 public:
    376  void SetUp() override {
    377    CFLTest::init(std::get<0>(this->GetParam()));
    378    CFLTestWithAlignedData::init();
    379    predict = std::get<1>(this->GetParam())(tx_size);
    380    predict_ref = cfl_get_predict_lbd_fn_c(tx_size);
    381  }
    382  ~CFLPredictTest() override = default;
    383 
    384 protected:
    385  cfl_predict_lbd_fn predict;
    386  cfl_predict_lbd_fn predict_ref;
    387 };
    388 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictTest);
    389 
    390 TEST_P(CFLPredictTest, PredictTest) {
    391  for (int it = 0; it < NUM_ITERATIONS; it++) {
    392    randData(8);
    393    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
    394    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
    395    assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
    396  }
    397 }
    398 TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
    399  aom_usec_timer ref_timer;
    400  aom_usec_timer timer;
    401  randData(8);
    402  aom_usec_timer_start(&ref_timer);
    403  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    404    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
    405  }
    406  aom_usec_timer_mark(&ref_timer);
    407  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
    408 
    409  aom_usec_timer_start(&timer);
    410  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    411    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
    412  }
    413  aom_usec_timer_mark(&timer);
    414  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
    415  printSpeed(ref_elapsed_time, elapsed_time, width, height);
    416  assertFaster(ref_elapsed_time, elapsed_time);
    417 }
    418 
    419 #if CONFIG_AV1_HIGHBITDEPTH
    420 using get_predict_fn_hbd = cfl_predict_hbd_fn (*)(TX_SIZE tx_size);
    421 using predict_param_hbd = std::tuple<TX_SIZE, get_predict_fn_hbd>;
    422 class CFLPredictHBDTest : public ::testing::TestWithParam<predict_param_hbd>,
    423                          public CFLTestWithAlignedData<uint16_t> {
    424 public:
    425  void SetUp() override {
    426    CFLTest::init(std::get<0>(this->GetParam()));
    427    CFLTestWithAlignedData::init();
    428    predict = std::get<1>(this->GetParam())(tx_size);
    429    predict_ref = cfl_get_predict_hbd_fn_c(tx_size);
    430  }
    431  ~CFLPredictHBDTest() override = default;
    432 
    433 protected:
    434  cfl_predict_hbd_fn predict;
    435  cfl_predict_hbd_fn predict_ref;
    436 };
    437 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CFLPredictHBDTest);
    438 
    439 TEST_P(CFLPredictHBDTest, PredictHBDTest) {
    440  int bd = 12;
    441  for (int it = 0; it < NUM_ITERATIONS; it++) {
    442    randData(bd);
    443    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
    444    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
    445    assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
    446  }
    447 }
    448 TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
    449  aom_usec_timer ref_timer;
    450  aom_usec_timer timer;
    451  const int bd = 12;
    452  randData(bd);
    453  aom_usec_timer_start(&ref_timer);
    454  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    455    predict_ref(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3, bd);
    456  }
    457  aom_usec_timer_mark(&ref_timer);
    458  int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
    459 
    460  aom_usec_timer_start(&timer);
    461  for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
    462    predict(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
    463  }
    464  aom_usec_timer_mark(&timer);
    465  int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
    466  printSpeed(ref_elapsed_time, elapsed_time, width, height);
    467  assertFaster(ref_elapsed_time, elapsed_time);
    468 }
    469 #endif  // CONFIG_AV1_HIGHBITDEPTH
    470 
    471 #if HAVE_SSE2
    472 const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
    473    cfl_get_subtract_average_fn_sse2) };
    474 
    475 INSTANTIATE_TEST_SUITE_P(SSE2, CFLSubAvgTest,
    476                         ::testing::ValuesIn(sub_avg_sizes_sse2));
    477 
    478 #endif
    479 
    480 #if HAVE_SSSE3
    481 const subsample_lbd_param subsample_lbd_sizes_ssse3[] = {
    482  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_ssse3,
    483                             cfl_get_luma_subsampling_422_lbd_ssse3,
    484                             cfl_get_luma_subsampling_444_lbd_ssse3)
    485 };
    486 
    487 const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
    488    cfl_get_predict_lbd_fn_ssse3) };
    489 
    490 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleLBDTest,
    491                         ::testing::ValuesIn(subsample_lbd_sizes_ssse3));
    492 
    493 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictTest,
    494                         ::testing::ValuesIn(predict_sizes_ssse3));
    495 
    496 #if CONFIG_AV1_HIGHBITDEPTH
    497 const subsample_hbd_param subsample_hbd_sizes_ssse3[] = {
    498  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_ssse3,
    499                             cfl_get_luma_subsampling_422_hbd_ssse3,
    500                             cfl_get_luma_subsampling_444_hbd_ssse3)
    501 };
    502 
    503 const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
    504    cfl_get_predict_hbd_fn_ssse3) };
    505 
    506 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLSubsampleHBDTest,
    507                         ::testing::ValuesIn(subsample_hbd_sizes_ssse3));
    508 
    509 INSTANTIATE_TEST_SUITE_P(SSSE3, CFLPredictHBDTest,
    510                         ::testing::ValuesIn(predict_sizes_hbd_ssse3));
    511 #endif  // CONFIG_AV1_HIGHBITDEPTH
    512 #endif  // HAVE_SSSE3
    513 
    514 #if HAVE_AVX2
    515 const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
    516    cfl_get_subtract_average_fn_avx2) };
    517 
    518 const subsample_lbd_param subsample_lbd_sizes_avx2[] = {
    519  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_avx2,
    520                             cfl_get_luma_subsampling_422_lbd_avx2,
    521                             cfl_get_luma_subsampling_444_lbd_avx2)
    522 };
    523 
    524 const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
    525    cfl_get_predict_lbd_fn_avx2) };
    526 
    527 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubAvgTest,
    528                         ::testing::ValuesIn(sub_avg_sizes_avx2));
    529 
    530 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleLBDTest,
    531                         ::testing::ValuesIn(subsample_lbd_sizes_avx2));
    532 
    533 INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictTest,
    534                         ::testing::ValuesIn(predict_sizes_avx2));
    535 
    536 #if CONFIG_AV1_HIGHBITDEPTH
    537 const subsample_hbd_param subsample_hbd_sizes_avx2[] = {
    538  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_avx2,
    539                             cfl_get_luma_subsampling_422_hbd_avx2,
    540                             cfl_get_luma_subsampling_444_hbd_avx2)
    541 };
    542 
    543 const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
    544    cfl_get_predict_hbd_fn_avx2) };
    545 
    546 INSTANTIATE_TEST_SUITE_P(AVX2, CFLSubsampleHBDTest,
    547                         ::testing::ValuesIn(subsample_hbd_sizes_avx2));
    548 
    549 INSTANTIATE_TEST_SUITE_P(AVX2, CFLPredictHBDTest,
    550                         ::testing::ValuesIn(predict_sizes_hbd_avx2));
    551 #endif  // CONFIG_AV1_HIGHBITDEPTH
    552 #endif  // HAVE_AVX2
    553 
    554 #if HAVE_NEON
    555 const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
    556    cfl_get_subtract_average_fn_neon) };
    557 
    558 const predict_param predict_sizes_neon[] = { ALL_CFL_TX_SIZES(
    559    cfl_get_predict_lbd_fn_neon) };
    560 
    561 const subsample_lbd_param subsample_lbd_sizes_neon[] = {
    562  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_lbd_neon,
    563                             cfl_get_luma_subsampling_422_lbd_neon,
    564                             cfl_get_luma_subsampling_444_lbd_neon)
    565 };
    566 
    567 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubAvgTest,
    568                         ::testing::ValuesIn(sub_avg_sizes_neon));
    569 
    570 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleLBDTest,
    571                         ::testing::ValuesIn(subsample_lbd_sizes_neon));
    572 
    573 INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictTest,
    574                         ::testing::ValuesIn(predict_sizes_neon));
    575 
    576 #if CONFIG_AV1_HIGHBITDEPTH
    577 const subsample_hbd_param subsample_hbd_sizes_neon[] = {
    578  ALL_CFL_TX_SIZES_SUBSAMPLE(cfl_get_luma_subsampling_420_hbd_neon,
    579                             cfl_get_luma_subsampling_422_hbd_neon,
    580                             cfl_get_luma_subsampling_444_hbd_neon)
    581 };
    582 
    583 const predict_param_hbd predict_sizes_hbd_neon[] = { ALL_CFL_TX_SIZES(
    584    cfl_get_predict_hbd_fn_neon) };
    585 
    586 INSTANTIATE_TEST_SUITE_P(NEON, CFLSubsampleHBDTest,
    587                         ::testing::ValuesIn(subsample_hbd_sizes_neon));
    588 
    589 INSTANTIATE_TEST_SUITE_P(NEON, CFLPredictHBDTest,
    590                         ::testing::ValuesIn(predict_sizes_hbd_neon));
    591 #endif  // CONFIG_AV1_HIGHBITDEPTH
    592 #endif  // HAVE_NEON
    593 
    594 #if HAVE_VSX
    595 const sub_avg_param sub_avg_sizes_vsx[] = { ALL_CFL_TX_SIZES(
    596    cfl_get_subtract_average_fn_vsx) };
    597 
    598 INSTANTIATE_TEST_SUITE_P(VSX, CFLSubAvgTest,
    599                         ::testing::ValuesIn(sub_avg_sizes_vsx));
    600 #endif
    601 }  // namespace