tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

avg_test.cc (38257B)


      1 /*
      2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <stdlib.h>
     13 #include <ostream>
     14 #include <string>
     15 #include <tuple>
     16 
     17 #include "gtest/gtest.h"
     18 
     19 #include "config/aom_config.h"
     20 #include "config/aom_dsp_rtcd.h"
     21 
     22 #include "aom_ports/aom_timer.h"
     23 #include "aom_ports/mem.h"
     24 #include "test/acm_random.h"
     25 #include "test/register_state_check.h"
     26 #include "test/util.h"
     27 
     28 namespace {
     29 
     30 using libaom_test::ACMRandom;
     31 
     32 template <typename Pixel>
     33 class AverageTestBase : public ::testing::Test {
     34 public:
     35  AverageTestBase(int width, int height, int bit_depth = 8)
     36      : width_(width), height_(height), source_data_(nullptr),
     37        source_stride_(0), bit_depth_(bit_depth) {}
     38 
     39  void TearDown() override {
     40    aom_free(source_data_);
     41    source_data_ = nullptr;
     42  }
     43 
     44 protected:
     45  // Handle blocks up to 4 blocks 64x64 with stride up to 128
     46  static const int kDataAlignment = 16;
     47  static const int kDataBlockWidth = 128;
     48  static const int kDataBlockHeight = 128;
     49  static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight;
     50 
     51  void SetUp() override {
     52    const testing::TestInfo *const test_info =
     53        testing::UnitTest::GetInstance()->current_test_info();
     54    // Skip the speed test for C code as the baseline uses the same function.
     55    if (std::string(test_info->test_suite_name()).find("C/") == 0 &&
     56        std::string(test_info->name()).find("DISABLED_Speed") !=
     57            std::string::npos) {
     58      GTEST_SKIP();
     59    }
     60 
     61    source_data_ = static_cast<Pixel *>(
     62        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
     63    ASSERT_NE(source_data_, nullptr);
     64    memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0]));
     65    source_stride_ = (width_ + 31) & ~31;
     66    bit_depth_ = 8;
     67    rnd_.Reset(ACMRandom::DeterministicSeed());
     68  }
     69 
     70  // Sum Pixels
     71  static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) {
     72    unsigned int average = 0;
     73    for (int h = 0; h < 8; ++h) {
     74      for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
     75    }
     76    return (average + 32) >> 6;
     77  }
     78 
     79  static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch,
     80                                       int x16_idx, int y16_idx, int *avg) {
     81    for (int k = 0; k < 4; k++) {
     82      int average = 0;
     83      int x8_idx = x16_idx + ((k & 1) << 3);
     84      int y8_idx = y16_idx + ((k >> 1) << 3);
     85      for (int h = 0; h < 8; ++h) {
     86        for (int w = 0; w < 8; ++w)
     87          average += source[(h + y8_idx) * pitch + w + x8_idx];
     88      }
     89      avg[k] = (average + 32) >> 6;
     90    }
     91  }
     92 
     93  static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) {
     94    unsigned int average = 0;
     95    for (int h = 0; h < 4; ++h) {
     96      for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
     97    }
     98    return (average + 8) >> 4;
     99  }
    100 
    101  void FillConstant(Pixel fill_constant) {
    102    for (int i = 0; i < width_ * height_; ++i) {
    103      source_data_[i] = fill_constant;
    104    }
    105  }
    106 
    107  void FillRandom() {
    108    for (int i = 0; i < width_ * height_; ++i) {
    109      source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1);
    110    }
    111  }
    112 
    113  int width_, height_;
    114  Pixel *source_data_;
    115  int source_stride_;
    116  int bit_depth_;
    117 
    118  ACMRandom rnd_;
    119 };
    120 using AverageFunction = unsigned int (*)(const uint8_t *s, int pitch);
    121 
    122 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
    123 // function.
    124 using AvgFunc = std::tuple<int, int, int, int, int, AverageFunction>;
    125 
    126 template <typename Pixel>
    127 class AverageTest : public AverageTestBase<Pixel>,
    128                    public ::testing::WithParamInterface<AvgFunc> {
    129 public:
    130  AverageTest()
    131      : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
    132 
    133 protected:
    134  using AverageTestBase<Pixel>::source_data_;
    135  using AverageTestBase<Pixel>::source_stride_;
    136  using AverageTestBase<Pixel>::ReferenceAverage8x8;
    137  using AverageTestBase<Pixel>::ReferenceAverage4x4;
    138  using AverageTestBase<Pixel>::FillConstant;
    139  using AverageTestBase<Pixel>::FillRandom;
    140 
    141  void CheckAverages() {
    142    const int block_size = GET_PARAM(4);
    143    unsigned int expected = 0;
    144 
    145    // The reference frame, but not the source frame, may be unaligned for
    146    // certain types of searches.
    147    const Pixel *const src = source_data_ + GET_PARAM(3);
    148    if (block_size == 8) {
    149      expected = ReferenceAverage8x8(src, source_stride_);
    150    } else if (block_size == 4) {
    151      expected = ReferenceAverage4x4(src, source_stride_);
    152    }
    153 
    154    aom_usec_timer timer;
    155    unsigned int actual;
    156    if (sizeof(Pixel) == 2) {
    157 #if CONFIG_AV1_HIGHBITDEPTH
    158      AverageFunction avg_c =
    159          (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c;
    160      // To avoid differences in optimization with the local Reference*()
    161      // functions the C implementation is used as a baseline.
    162      aom_usec_timer_start(&timer);
    163      avg_c(CONVERT_TO_BYTEPTR(src), source_stride_);
    164      aom_usec_timer_mark(&timer);
    165      ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
    166 
    167      AverageFunction avg_opt = GET_PARAM(5);
    168      API_REGISTER_STATE_CHECK(
    169          aom_usec_timer_start(&timer);
    170          actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_);
    171          aom_usec_timer_mark(&timer));
    172 #endif  // CONFIG_AV1_HIGHBITDEPTH
    173    } else {
    174      ASSERT_EQ(sizeof(Pixel), 1u);
    175 
    176      AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c;
    177      aom_usec_timer_start(&timer);
    178      avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_);
    179      aom_usec_timer_mark(&timer);
    180      ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
    181 
    182      AverageFunction avg_opt = GET_PARAM(5);
    183      API_REGISTER_STATE_CHECK(
    184          aom_usec_timer_start(&timer);
    185          actual =
    186              avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_);
    187          aom_usec_timer_mark(&timer));
    188    }
    189    opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
    190 
    191    EXPECT_EQ(expected, actual);
    192  }
    193 
    194  void TestConstantValue(Pixel value) {
    195    FillConstant(value);
    196    CheckAverages();
    197  }
    198 
    199  void TestRandom(int iterations = 1000) {
    200    for (int i = 0; i < iterations; i++) {
    201      FillRandom();
    202      CheckAverages();
    203    }
    204  }
    205 
    206  void PrintTimingStats() const {
    207    printf(
    208        "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
    209        GET_PARAM(4), static_cast<int>(ref_elapsed_time_),
    210        static_cast<int>(opt_elapsed_time_),
    211        (static_cast<float>(ref_elapsed_time_) /
    212         static_cast<float>(opt_elapsed_time_)));
    213  }
    214 
    215  int64_t ref_elapsed_time_ = 0;
    216  int64_t opt_elapsed_time_ = 0;
    217 };
    218 
    219 using AverageFunction_8x8_quad = void (*)(const uint8_t *s, int pitch,
    220                                          int x_idx, int y_idx, int *avg);
    221 
    222 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
    223 // function.
    224 using AvgFunc_8x8_quad =
    225    std::tuple<int, int, int, int, int, AverageFunction_8x8_quad>;
    226 
    227 template <typename Pixel>
    228 class AverageTest_8x8_quad
    229    : public AverageTestBase<Pixel>,
    230      public ::testing::WithParamInterface<AvgFunc_8x8_quad> {
    231 public:
    232  AverageTest_8x8_quad()
    233      : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
    234 
    235 protected:
    236  using AverageTestBase<Pixel>::source_data_;
    237  using AverageTestBase<Pixel>::source_stride_;
    238  using AverageTestBase<Pixel>::ReferenceAverage8x8_quad;
    239  using AverageTestBase<Pixel>::FillConstant;
    240  using AverageTestBase<Pixel>::FillRandom;
    241 
    242  void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) {
    243    ASSERT_EQ(sizeof(Pixel), 1u);
    244    const int block_size = GET_PARAM(4);
    245    (void)block_size;
    246    int expected[4] = { 0 };
    247 
    248    // The reference frame, but not the source frame, may be unaligned for
    249    // certain types of searches.
    250    const Pixel *const src = source_data_ + GET_PARAM(3);
    251    ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected);
    252 
    253    aom_usec_timer timer;
    254    int expected_c[4] = { 0 };
    255    int actual[4] = { 0 };
    256    AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c;
    257    aom_usec_timer_start(&timer);
    258    for (int i = 0; i < iterations; i++) {
    259      avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
    260            y16_idx, expected_c);
    261    }
    262    aom_usec_timer_mark(&timer);
    263    ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
    264 
    265    AverageFunction_8x8_quad avg_opt = GET_PARAM(5);
    266    aom_usec_timer_start(&timer);
    267    for (int i = 0; i < iterations; i++) {
    268      avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
    269              y16_idx, actual);
    270    }
    271    aom_usec_timer_mark(&timer);
    272    opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
    273 
    274    for (int k = 0; k < 4; k++) {
    275      EXPECT_EQ(expected[k], actual[k]);
    276      EXPECT_EQ(expected_c[k], actual[k]);
    277    }
    278 
    279    // Print scaling information only when Speed test is called.
    280    if (iterations > 1) {
    281      printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
    282             static_cast<int>(ref_elapsed_time_),
    283             static_cast<int>(opt_elapsed_time_),
    284             (static_cast<float>(ref_elapsed_time_) /
    285              static_cast<float>(opt_elapsed_time_)));
    286    }
    287  }
    288 
    289  void CheckAverages() {
    290    for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2)
    291      for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2)
    292        CheckAveragesAt(1, x16_idx, y16_idx);
    293  }
    294 
    295  void TestConstantValue(Pixel value) {
    296    FillConstant(value);
    297    CheckAverages();
    298  }
    299 
    300  void TestRandom() {
    301    FillRandom();
    302    CheckAverages();
    303  }
    304 
    305  void TestSpeed() {
    306    FillRandom();
    307    CheckAveragesAt(1000000, 0, 0);
    308  }
    309 
    310  int64_t ref_elapsed_time_ = 0;
    311  int64_t opt_elapsed_time_ = 0;
    312 };
    313 
    314 using AverageTest8bpp = AverageTest<uint8_t>;
    315 
    316 TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); }
    317 
    318 TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); }
    319 
    320 TEST_P(AverageTest8bpp, Random) { TestRandom(); }
    321 
    322 TEST_P(AverageTest8bpp, DISABLED_Speed) {
    323  TestRandom(1000000);
    324  PrintTimingStats();
    325 }
    326 
    327 using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>;
    328 
    329 TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); }
    330 
    331 TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); }
    332 
    333 TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); }
    334 
    335 TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); }
    336 
    337 #if CONFIG_AV1_HIGHBITDEPTH
    338 using AverageTestHbd = AverageTest<uint16_t>;
    339 
    340 TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); }
    341 
    342 TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); }
    343 TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); }
    344 
    345 TEST_P(AverageTestHbd, Random) { TestRandom(); }
    346 
    347 TEST_P(AverageTestHbd, DISABLED_Speed) {
    348  TestRandom(1000000);
    349  PrintTimingStats();
    350 }
    351 #endif  // CONFIG_AV1_HIGHBITDEPTH
    352 
    353 using IntProRowFunc = void (*)(int16_t *hbuf, uint8_t const *ref,
    354                               const int ref_stride, const int width,
    355                               const int height, int norm_factor);
    356 
    357 // Params: width, height, asm function, c function.
    358 using IntProRowParam = std::tuple<int, int, IntProRowFunc, IntProRowFunc>;
    359 
    360 class IntProRowTest : public AverageTestBase<uint8_t>,
    361                      public ::testing::WithParamInterface<IntProRowParam> {
    362 public:
    363  IntProRowTest()
    364      : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr),
    365        hbuf_c_(nullptr) {
    366    asm_func_ = GET_PARAM(2);
    367    c_func_ = GET_PARAM(3);
    368  }
    369 
    370  void set_norm_factor() {
    371    if (height_ == 128)
    372      norm_factor_ = 6;
    373    else if (height_ == 64)
    374      norm_factor_ = 5;
    375    else if (height_ == 32)
    376      norm_factor_ = 4;
    377    else if (height_ == 16)
    378      norm_factor_ = 3;
    379  }
    380 
    381 protected:
    382  void SetUp() override {
    383    source_data_ = static_cast<uint8_t *>(
    384        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
    385    ASSERT_NE(source_data_, nullptr);
    386 
    387    hbuf_asm_ = static_cast<int16_t *>(
    388        aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_));
    389    ASSERT_NE(hbuf_asm_, nullptr);
    390    hbuf_c_ = static_cast<int16_t *>(
    391        aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_));
    392    ASSERT_NE(hbuf_c_, nullptr);
    393  }
    394 
    395  void TearDown() override {
    396    aom_free(source_data_);
    397    source_data_ = nullptr;
    398    aom_free(hbuf_c_);
    399    hbuf_c_ = nullptr;
    400    aom_free(hbuf_asm_);
    401    hbuf_asm_ = nullptr;
    402  }
    403 
    404  void RunComparison() {
    405    set_norm_factor();
    406    API_REGISTER_STATE_CHECK(
    407        c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_));
    408    API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_,
    409                                       height_, norm_factor_));
    410    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
    411        << "Output mismatch\n";
    412  }
    413 
    414  void RunSpeedTest() {
    415    const int numIter = 5000000;
    416    set_norm_factor();
    417    printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
    418           numIter);
    419    aom_usec_timer c_timer_;
    420    aom_usec_timer_start(&c_timer_);
    421    for (int i = 0; i < numIter; i++) {
    422      c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_);
    423    }
    424    aom_usec_timer_mark(&c_timer_);
    425 
    426    aom_usec_timer asm_timer_;
    427    aom_usec_timer_start(&asm_timer_);
    428 
    429    for (int i = 0; i < numIter; i++) {
    430      asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
    431    }
    432    aom_usec_timer_mark(&asm_timer_);
    433 
    434    const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
    435    const int asm_sum_time =
    436        static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
    437 
    438    printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
    439           asm_sum_time,
    440           (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
    441 
    442    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
    443        << "Output mismatch\n";
    444  }
    445 
    446 private:
    447  IntProRowFunc asm_func_;
    448  IntProRowFunc c_func_;
    449  int16_t *hbuf_asm_;
    450  int16_t *hbuf_c_;
    451  int norm_factor_;
    452 };
    453 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest);
    454 
    455 using IntProColFunc = void (*)(int16_t *vbuf, uint8_t const *ref,
    456                               const int ref_stride, const int width,
    457                               const int height, int norm_factor);
    458 
    459 // Params: width, height, asm function, c function.
    460 using IntProColParam = std::tuple<int, int, IntProColFunc, IntProColFunc>;
    461 
    462 class IntProColTest : public AverageTestBase<uint8_t>,
    463                      public ::testing::WithParamInterface<IntProColParam> {
    464 public:
    465  IntProColTest()
    466      : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr),
    467        vbuf_c_(nullptr) {
    468    asm_func_ = GET_PARAM(2);
    469    c_func_ = GET_PARAM(3);
    470  }
    471 
    472 protected:
    473  void SetUp() override {
    474    source_data_ = static_cast<uint8_t *>(
    475        aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
    476    ASSERT_NE(source_data_, nullptr);
    477 
    478    vbuf_asm_ = static_cast<int16_t *>(
    479        aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_));
    480    ASSERT_NE(vbuf_asm_, nullptr);
    481    vbuf_c_ = static_cast<int16_t *>(
    482        aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_));
    483    ASSERT_NE(vbuf_c_, nullptr);
    484  }
    485 
    486  void TearDown() override {
    487    aom_free(source_data_);
    488    source_data_ = nullptr;
    489    aom_free(vbuf_c_);
    490    vbuf_c_ = nullptr;
    491    aom_free(vbuf_asm_);
    492    vbuf_asm_ = nullptr;
    493  }
    494 
    495  void RunComparison() {
    496    int norm_factor_ = 3 + (width_ >> 5);
    497    API_REGISTER_STATE_CHECK(
    498        c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_));
    499    API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_,
    500                                       height_, norm_factor_));
    501    EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
    502        << "Output mismatch\n";
    503  }
    504  void RunSpeedTest() {
    505    const int numIter = 5000000;
    506    printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
    507           numIter);
    508    int norm_factor_ = 3 + (width_ >> 5);
    509    aom_usec_timer c_timer_;
    510    aom_usec_timer_start(&c_timer_);
    511    for (int i = 0; i < numIter; i++) {
    512      c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_);
    513    }
    514    aom_usec_timer_mark(&c_timer_);
    515 
    516    aom_usec_timer asm_timer_;
    517    aom_usec_timer_start(&asm_timer_);
    518 
    519    for (int i = 0; i < numIter; i++) {
    520      asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
    521    }
    522    aom_usec_timer_mark(&asm_timer_);
    523 
    524    const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
    525    const int asm_sum_time =
    526        static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
    527 
    528    printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
    529           asm_sum_time,
    530           (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
    531 
    532    EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
    533        << "Output mismatch\n";
    534  }
    535 
    536 private:
    537  IntProColFunc asm_func_;
    538  IntProColFunc c_func_;
    539  int16_t *vbuf_asm_;
    540  int16_t *vbuf_c_;
    541 };
    542 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest);
    543 
    544 TEST_P(IntProRowTest, MinValue) {
    545  FillConstant(0);
    546  RunComparison();
    547 }
    548 
    549 TEST_P(IntProRowTest, MaxValue) {
    550  FillConstant(255);
    551  RunComparison();
    552 }
    553 
    554 TEST_P(IntProRowTest, Random) {
    555  FillRandom();
    556  RunComparison();
    557 }
    558 
    559 TEST_P(IntProRowTest, DISABLED_Speed) {
    560  FillRandom();
    561  RunSpeedTest();
    562 }
    563 
    564 TEST_P(IntProColTest, MinValue) {
    565  FillConstant(0);
    566  RunComparison();
    567 }
    568 
    569 TEST_P(IntProColTest, MaxValue) {
    570  FillConstant(255);
    571  RunComparison();
    572 }
    573 
    574 TEST_P(IntProColTest, Random) {
    575  FillRandom();
    576  RunComparison();
    577 }
    578 
    579 TEST_P(IntProColTest, DISABLED_Speed) {
    580  FillRandom();
    581  RunSpeedTest();
    582 }
    583 class VectorVarTestBase : public ::testing::Test {
    584 public:
    585  explicit VectorVarTestBase(int bwl) { m_bwl = bwl; }
    586  VectorVarTestBase() = default;
    587  ~VectorVarTestBase() override = default;
    588 
    589 protected:
    590  static const int kDataAlignment = 16;
    591 
    592  void SetUp() override {
    593    width = 4 << m_bwl;
    594 
    595    ref_vector = static_cast<int16_t *>(
    596        aom_memalign(kDataAlignment, width * sizeof(ref_vector[0])));
    597    ASSERT_NE(ref_vector, nullptr);
    598    src_vector = static_cast<int16_t *>(
    599        aom_memalign(kDataAlignment, width * sizeof(src_vector[0])));
    600    ASSERT_NE(src_vector, nullptr);
    601 
    602    rnd_.Reset(ACMRandom::DeterministicSeed());
    603  }
    604  void TearDown() override {
    605    aom_free(ref_vector);
    606    ref_vector = nullptr;
    607    aom_free(src_vector);
    608    src_vector = nullptr;
    609  }
    610 
    611  void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) {
    612    for (int i = 0; i < width; ++i) {
    613      ref_vector[i] = fill_constant_ref;
    614      src_vector[i] = fill_constant_src;
    615    }
    616  }
    617 
    618  void FillRandom() {
    619    for (int i = 0; i < width; ++i) {
    620      ref_vector[i] =
    621          rnd_.Rand16() % max_range;  // acc. aom_vector_var_c brief.
    622      src_vector[i] = rnd_.Rand16() % max_range;
    623    }
    624  }
    625 
    626  int width;
    627  int m_bwl;
    628  int16_t *ref_vector;
    629  int16_t *src_vector;
    630  ACMRandom rnd_;
    631 
    632  static const int max_range = 510;
    633  static const int num_random_cmp = 50;
    634 };
    635 
    636 using VectorVarFunc = int (*)(const int16_t *ref, const int16_t *src,
    637                              const int bwl);
    638 
    639 using VecVarFunc = std::tuple<int, VectorVarFunc, VectorVarFunc>;
    640 
    641 class VectorVarTest : public VectorVarTestBase,
    642                      public ::testing::WithParamInterface<VecVarFunc> {
    643 public:
    644  VectorVarTest()
    645      : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)),
    646        simd_func(GET_PARAM(2)) {}
    647 
    648 protected:
    649  int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); }
    650  int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); }
    651 
    652  VectorVarFunc c_func;
    653  VectorVarFunc simd_func;
    654 };
    655 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest);
    656 
    657 TEST_P(VectorVarTest, MaxVar) {
    658  FillConstant(0, max_range);
    659  int c_var = calcVarC();
    660  int simd_var = calcVarSIMD();
    661  ASSERT_EQ(c_var, simd_var);
    662 }
    663 TEST_P(VectorVarTest, MaxVarRev) {
    664  FillConstant(max_range, 0);
    665  int c_var = calcVarC();
    666  int simd_var = calcVarSIMD();
    667  ASSERT_EQ(c_var, simd_var);
    668 }
    669 TEST_P(VectorVarTest, ZeroDiff) {
    670  FillConstant(0, 0);
    671  int c_var = calcVarC();
    672  int simd_var = calcVarSIMD();
    673  ASSERT_EQ(c_var, simd_var);
    674 }
    675 TEST_P(VectorVarTest, ZeroDiff2) {
    676  FillConstant(max_range, max_range);
    677  int c_var = calcVarC();
    678  int simd_var = calcVarSIMD();
    679  ASSERT_EQ(c_var, simd_var);
    680 }
    681 TEST_P(VectorVarTest, Constant) {
    682  FillConstant(30, 90);
    683  int c_var = calcVarC();
    684  int simd_var = calcVarSIMD();
    685  ASSERT_EQ(c_var, simd_var);
    686 }
    687 TEST_P(VectorVarTest, Random) {
    688  for (size_t i = 0; i < num_random_cmp; i++) {
    689    FillRandom();
    690    int c_var = calcVarC();
    691    int simd_var = calcVarSIMD();
    692    ASSERT_EQ(c_var, simd_var);
    693  }
    694 }
    695 TEST_P(VectorVarTest, DISABLED_Speed) {
    696  FillRandom();
    697  const int numIter = 5000000;
    698  printf("Width = %d number of iteration is %d \n", width, numIter);
    699 
    700  int sum_c_var = 0;
    701  int c_var = 0;
    702 
    703  aom_usec_timer c_timer_;
    704  aom_usec_timer_start(&c_timer_);
    705  for (size_t i = 0; i < numIter; i++) {
    706    c_var = calcVarC();
    707    sum_c_var += c_var;
    708  }
    709  aom_usec_timer_mark(&c_timer_);
    710 
    711  int simd_var = 0;
    712  int sum_simd_var = 0;
    713  aom_usec_timer simd_timer_;
    714  aom_usec_timer_start(&simd_timer_);
    715  for (size_t i = 0; i < numIter; i++) {
    716    simd_var = calcVarSIMD();
    717    sum_simd_var += simd_var;
    718  }
    719  aom_usec_timer_mark(&simd_timer_);
    720 
    721  const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
    722  const int simd_sum_time =
    723      static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
    724 
    725  printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
    726         simd_sum_time,
    727         (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
    728 
    729  EXPECT_EQ(c_var, simd_var) << "Output mismatch \n";
    730  EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n";
    731 }
    732 
    733 using std::make_tuple;
    734 
    735 INSTANTIATE_TEST_SUITE_P(
    736    C, AverageTest8bpp,
    737    ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c),
    738                      make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c)));
    739 
    740 INSTANTIATE_TEST_SUITE_P(
    741    C, AvgTest8bpp_avg_8x8_quad,
    742    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c),
    743                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c),
    744                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c)));
    745 
    746 #if HAVE_SSE2
    747 INSTANTIATE_TEST_SUITE_P(
    748    SSE2, AverageTest8bpp,
    749    ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2),
    750                      make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2),
    751                      make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2),
    752                      make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2),
    753                      make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2),
    754                      make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2)));
    755 
    756 INSTANTIATE_TEST_SUITE_P(
    757    SSE2, AvgTest8bpp_avg_8x8_quad,
    758    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2),
    759                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2),
    760                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2)));
    761 
    762 INSTANTIATE_TEST_SUITE_P(
    763    SSE2, IntProRowTest,
    764    ::testing::Values(
    765        make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
    766        make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
    767        make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
    768        make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c)));
    769 
    770 INSTANTIATE_TEST_SUITE_P(
    771    SSE2, IntProColTest,
    772    ::testing::Values(
    773        make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
    774        make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
    775        make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
    776        make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c)));
    777 #endif
    778 
    779 #if HAVE_AVX2
    780 INSTANTIATE_TEST_SUITE_P(
    781    AVX2, AvgTest8bpp_avg_8x8_quad,
    782    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2),
    783                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2),
    784                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2)));
    785 
    786 INSTANTIATE_TEST_SUITE_P(
    787    AVX2, IntProRowTest,
    788    ::testing::Values(
    789        make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
    790        make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
    791        make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
    792        make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c)));
    793 
    794 INSTANTIATE_TEST_SUITE_P(
    795    AVX2, IntProColTest,
    796    ::testing::Values(
    797        make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
    798        make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
    799        make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
    800        make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c)));
    801 #endif
    802 
    803 #if HAVE_NEON
    804 INSTANTIATE_TEST_SUITE_P(
    805    NEON, AverageTest8bpp,
    806    ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon),
    807                      make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon),
    808                      make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon),
    809                      make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon),
    810                      make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon),
    811                      make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon)));
    812 INSTANTIATE_TEST_SUITE_P(
    813    NEON, IntProRowTest,
    814    ::testing::Values(
    815        make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
    816        make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
    817        make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c),
    818        make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c)));
    819 
    820 INSTANTIATE_TEST_SUITE_P(
    821    NEON, IntProColTest,
    822    ::testing::Values(
    823        make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
    824        make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
    825        make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c),
    826        make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c)));
    827 
    828 INSTANTIATE_TEST_SUITE_P(
    829    NEON, AvgTest8bpp_avg_8x8_quad,
    830    ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon),
    831                      make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon),
    832                      make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon)));
    833 #endif
    834 
    835 #if CONFIG_AV1_HIGHBITDEPTH
    836 INSTANTIATE_TEST_SUITE_P(
    837    C, AverageTestHbd,
    838    ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c),
    839                      make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c),
    840                      make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c),
    841                      make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c)));
    842 
    843 #if HAVE_NEON
    844 INSTANTIATE_TEST_SUITE_P(
    845    NEON, AverageTestHbd,
    846    ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon),
    847                      make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon),
    848                      make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon),
    849                      make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon),
    850                      make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon),
    851                      make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon),
    852                      make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon),
    853                      make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon),
    854                      make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon),
    855                      make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon),
    856                      make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon),
    857                      make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon)));
    858 #endif  // HAVE_NEON
    859 #endif  // CONFIG_AV1_HIGHBITDEPTH
    860 
    861 using SatdFunc = int (*)(const tran_low_t *coeffs, int length);
    862 using SatdLpFunc = int (*)(const int16_t *coeffs, int length);
    863 
    864 template <typename SatdFuncType>
    865 struct SatdTestParam {
    866  SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2)
    867      : satd_size(s), func_ref(f1), func_simd(f2) {}
    868  friend std::ostream &operator<<(std::ostream &os,
    869                                  const SatdTestParam<SatdFuncType> &param) {
    870    return os << "satd_size: " << param.satd_size;
    871  }
    872  int satd_size;
    873  SatdFuncType func_ref;
    874  SatdFuncType func_simd;
    875 };
    876 
    877 template <typename CoeffType, typename SatdFuncType>
    878 class SatdTestBase
    879    : public ::testing::Test,
    880      public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> {
    881 protected:
    882  explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) {
    883    satd_size_ = func_param.satd_size;
    884    satd_func_ref_ = func_param.func_ref;
    885    satd_func_simd_ = func_param.func_simd;
    886  }
    887  void SetUp() override {
    888    rnd_.Reset(ACMRandom::DeterministicSeed());
    889    src_ = reinterpret_cast<CoeffType *>(
    890        aom_memalign(32, sizeof(*src_) * satd_size_));
    891    ASSERT_NE(src_, nullptr);
    892  }
    893  void TearDown() override { aom_free(src_); }
    894  void FillConstant(const CoeffType val) {
    895    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
    896  }
    897  void FillRandom() {
    898    for (int i = 0; i < satd_size_; ++i) {
    899      src_[i] = static_cast<int16_t>(rnd_.Rand16());
    900    }
    901  }
    902  void Check(int expected) {
    903    int total_ref;
    904    API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
    905    EXPECT_EQ(expected, total_ref);
    906 
    907    int total_simd;
    908    API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
    909    EXPECT_EQ(expected, total_simd);
    910  }
    911  void RunComparison() {
    912    int total_ref;
    913    API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
    914 
    915    int total_simd;
    916    API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
    917 
    918    EXPECT_EQ(total_ref, total_simd);
    919  }
    920  void RunSpeedTest() {
    921    const int numIter = 500000;
    922    printf("size = %d number of iteration is %d \n", satd_size_, numIter);
    923 
    924    int total_ref;
    925    aom_usec_timer c_timer_;
    926    aom_usec_timer_start(&c_timer_);
    927    for (int i = 0; i < numIter; i++) {
    928      total_ref = satd_func_ref_(src_, satd_size_);
    929    }
    930    aom_usec_timer_mark(&c_timer_);
    931 
    932    int total_simd;
    933    aom_usec_timer simd_timer_;
    934    aom_usec_timer_start(&simd_timer_);
    935 
    936    for (int i = 0; i < numIter; i++) {
    937      total_simd = satd_func_simd_(src_, satd_size_);
    938    }
    939    aom_usec_timer_mark(&simd_timer_);
    940 
    941    const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
    942    const int simd_sum_time =
    943        static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
    944 
    945    printf(
    946        "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
    947        simd_sum_time,
    948        (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
    949 
    950    EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n";
    951  }
    952  int satd_size_;
    953 
    954 private:
    955  CoeffType *src_;
    956  SatdFuncType satd_func_ref_;
    957  SatdFuncType satd_func_simd_;
    958  ACMRandom rnd_;
    959 };
    960 
    961 class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> {
    962 public:
    963  SatdTest() : SatdTestBase(GetParam()) {}
    964 };
    965 
    966 TEST_P(SatdTest, MinValue) {
    967  const int kMin = -524287;
    968  const int expected = -kMin * satd_size_;
    969  FillConstant(kMin);
    970  Check(expected);
    971 }
    972 TEST_P(SatdTest, MaxValue) {
    973  const int kMax = 524287;
    974  const int expected = kMax * satd_size_;
    975  FillConstant(kMax);
    976  Check(expected);
    977 }
    978 TEST_P(SatdTest, Random) {
    979  int expected;
    980  switch (satd_size_) {
    981    case 16: expected = 205298; break;
    982    case 64: expected = 1113950; break;
    983    case 256: expected = 4268415; break;
    984    case 1024: expected = 16954082; break;
    985    default:
    986      FAIL() << "Invalid satd size (" << satd_size_
    987             << ") valid: 16/64/256/1024";
    988  }
    989  FillRandom();
    990  Check(expected);
    991 }
    992 TEST_P(SatdTest, Match) {
    993  FillRandom();
    994  RunComparison();
    995 }
    996 TEST_P(SatdTest, DISABLED_Speed) {
    997  FillRandom();
    998  RunSpeedTest();
    999 }
   1000 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest);
   1001 
   1002 INSTANTIATE_TEST_SUITE_P(
   1003    C, SatdTest,
   1004    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c),
   1005                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c),
   1006                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c),
   1007                      SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c)));
   1008 
   1009 #if HAVE_NEON
   1010 INSTANTIATE_TEST_SUITE_P(
   1011    NEON, SatdTest,
   1012    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon),
   1013                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon),
   1014                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon),
   1015                      SatdTestParam<SatdFunc>(1024, &aom_satd_c,
   1016                                              &aom_satd_neon)));
   1017 INSTANTIATE_TEST_SUITE_P(
   1018    NEON, VectorVarTest,
   1019    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon),
   1020                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon),
   1021                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon),
   1022                      make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon)));
   1023 #endif
   1024 
   1025 #if HAVE_SVE
   1026 INSTANTIATE_TEST_SUITE_P(
   1027    SVE, VectorVarTest,
   1028    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve),
   1029                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve),
   1030                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve),
   1031                      make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve)));
   1032 #endif  // HAVE_SVE
   1033 
   1034 #if HAVE_SSE4_1
   1035 INSTANTIATE_TEST_SUITE_P(
   1036    SSE4_1, VectorVarTest,
   1037    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1),
   1038                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1),
   1039                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1),
   1040                      make_tuple(5, &aom_vector_var_c,
   1041                                 &aom_vector_var_sse4_1)));
   1042 #endif  // HAVE_SSE4_1
   1043 
   1044 #if HAVE_AVX2
   1045 INSTANTIATE_TEST_SUITE_P(
   1046    AVX2, SatdTest,
   1047    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2),
   1048                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2),
   1049                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2),
   1050                      SatdTestParam<SatdFunc>(1024, &aom_satd_c,
   1051                                              &aom_satd_avx2)));
   1052 
   1053 INSTANTIATE_TEST_SUITE_P(
   1054    AVX2, VectorVarTest,
   1055    ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2),
   1056                      make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2),
   1057                      make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2),
   1058                      make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2)));
   1059 #endif  // HAVE_AVX2
   1060 
   1061 #if HAVE_SSE2
   1062 INSTANTIATE_TEST_SUITE_P(
   1063    SSE2, SatdTest,
   1064    ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2),
   1065                      SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2),
   1066                      SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2),
   1067                      SatdTestParam<SatdFunc>(1024, &aom_satd_c,
   1068                                              &aom_satd_sse2)));
   1069 #endif
   1070 
   1071 class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> {
   1072 public:
   1073  SatdLpTest() : SatdTestBase(GetParam()) {}
   1074 };
   1075 
   1076 TEST_P(SatdLpTest, MinValue) {
   1077  const int kMin = -32640;
   1078  const int expected = -kMin * satd_size_;
   1079  FillConstant(kMin);
   1080  Check(expected);
   1081 }
   1082 TEST_P(SatdLpTest, MaxValue) {
   1083  const int kMax = 32640;
   1084  const int expected = kMax * satd_size_;
   1085  FillConstant(kMax);
   1086  Check(expected);
   1087 }
   1088 TEST_P(SatdLpTest, Random) {
   1089  int expected;
   1090  switch (satd_size_) {
   1091    case 16: expected = 205298; break;
   1092    case 64: expected = 1113950; break;
   1093    case 256: expected = 4268415; break;
   1094    case 1024: expected = 16954082; break;
   1095    default:
   1096      FAIL() << "Invalid satd size (" << satd_size_
   1097             << ") valid: 16/64/256/1024";
   1098  }
   1099  FillRandom();
   1100  Check(expected);
   1101 }
   1102 TEST_P(SatdLpTest, Match) {
   1103  FillRandom();
   1104  RunComparison();
   1105 }
   1106 TEST_P(SatdLpTest, DISABLED_Speed) {
   1107  FillRandom();
   1108  RunSpeedTest();
   1109 }
   1110 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest);
   1111 
   1112 // Add the following c test to avoid gtest uninitialized warning.
   1113 INSTANTIATE_TEST_SUITE_P(
   1114    C, SatdLpTest,
   1115    ::testing::Values(
   1116        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c),
   1117        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c),
   1118        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c),
   1119        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c)));
   1120 
   1121 #if HAVE_NEON
   1122 INSTANTIATE_TEST_SUITE_P(
   1123    NEON, SatdLpTest,
   1124    ::testing::Values(
   1125        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon),
   1126        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon),
   1127        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon),
   1128        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon)));
   1129 #endif
   1130 
   1131 #if HAVE_AVX2
   1132 INSTANTIATE_TEST_SUITE_P(
   1133    AVX2, SatdLpTest,
   1134    ::testing::Values(
   1135        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2),
   1136        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2),
   1137        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2),
   1138        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2)));
   1139 #endif
   1140 
   1141 #if HAVE_SSE2
   1142 INSTANTIATE_TEST_SUITE_P(
   1143    SSE2, SatdLpTest,
   1144    ::testing::Values(
   1145        SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2),
   1146        SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2),
   1147        SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2),
   1148        SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2)));
   1149 #endif
   1150 
   1151 }  // namespace