tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

variance_test.cc (172704B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <cstdlib>
     13 #include <new>
     14 #include <ostream>
     15 #include <tuple>
     16 
     17 #include "gtest/gtest.h"
     18 
     19 #include "config/aom_config.h"
     20 #include "config/aom_dsp_rtcd.h"
     21 
     22 #include "test/acm_random.h"
     23 #include "test/register_state_check.h"
     24 #include "aom/aom_codec.h"
     25 #include "aom/aom_integer.h"
     26 #include "aom_mem/aom_mem.h"
     27 #include "aom_ports/aom_timer.h"
     28 #include "aom_ports/mem.h"
     29 #include "av1/common/cdef_block.h"
     30 
     31 namespace {
     32 
     33 using MseWxH16bitFunc = uint64_t (*)(uint8_t *dst, int dstride, uint16_t *src,
     34                                     int sstride, int w, int h);
     35 using Mse16xH16bitFunc = uint64_t (*)(uint8_t *dst, int dstride, uint16_t *src,
     36                                      int w, int h);
     37 using VarianceMxNFunc = unsigned int (*)(const uint8_t *a, int a_stride,
     38                                         const uint8_t *b, int b_stride,
     39                                         unsigned int *sse);
     40 using GetSseSum8x8QuadFunc = void (*)(const uint8_t *a, int a_stride,
     41                                      const uint8_t *b, int b_stride,
     42                                      uint32_t *sse8x8, int *sum8x8,
     43                                      unsigned int *tot_sse, int *tot_sum,
     44                                      uint32_t *var8x8);
     45 using GetSseSum16x16DualFunc = void (*)(const uint8_t *a, int a_stride,
     46                                        const uint8_t *b, int b_stride,
     47                                        uint32_t *sse16x16,
     48                                        unsigned int *tot_sse, int *tot_sum,
     49                                        uint32_t *var16x16);
     50 using SubpixVarMxNFunc = unsigned int (*)(const uint8_t *a, int a_stride,
     51                                          int xoffset, int yoffset,
     52                                          const uint8_t *b, int b_stride,
     53                                          unsigned int *sse);
     54 using SubpixAvgVarMxNFunc = unsigned int (*)(const uint8_t *a, int a_stride,
     55                                             int xoffset, int yoffset,
     56                                             const uint8_t *b, int b_stride,
     57                                             uint32_t *sse,
     58                                             const uint8_t *second_pred);
     59 using SumOfSquaresFunction = unsigned int (*)(const int16_t *src);
     60 
     61 #if !CONFIG_REALTIME_ONLY
     62 using ObmcSubpelVarFunc = uint32_t (*)(const uint8_t *pre, int pre_stride,
     63                                       int xoffset, int yoffset,
     64                                       const int32_t *wsrc, const int32_t *mask,
     65                                       unsigned int *sse);
     66 
     67 #endif
     68 
     69 using libaom_test::ACMRandom;
     70 
     71 // Truncate high bit depth results by downshifting (with rounding) by:
     72 // 2 * (bit_depth - 8) for sse
     73 // (bit_depth - 8) for se
     74 static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
     75  switch (bit_depth) {
     76    case AOM_BITS_12:
     77      *sse = (*sse + 128) >> 8;
     78      *se = (*se + 8) >> 4;
     79      break;
     80    case AOM_BITS_10:
     81      *sse = (*sse + 8) >> 4;
     82      *se = (*se + 2) >> 2;
     83      break;
     84    case AOM_BITS_8:
     85    default: break;
     86  }
     87 }
     88 
     89 /* Note:
     90 *  Our codebase calculates the "diff" value in the variance algorithm by
     91 *  (src - ref).
     92 */
     93 static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
     94                             int l2h, int src_stride, int ref_stride,
     95                             uint32_t *sse_ptr, bool use_high_bit_depth_,
     96                             aom_bit_depth_t bit_depth) {
     97  int64_t se = 0;
     98  uint64_t sse = 0;
     99  const int w = 1 << l2w;
    100  const int h = 1 << l2h;
    101  for (int y = 0; y < h; y++) {
    102    for (int x = 0; x < w; x++) {
    103      int diff;
    104      if (!use_high_bit_depth_) {
    105        diff = src[y * src_stride + x] - ref[y * ref_stride + x];
    106        se += diff;
    107        sse += diff * diff;
    108      } else {
    109        diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
    110               CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
    111        se += diff;
    112        sse += diff * diff;
    113      }
    114    }
    115  }
    116  RoundHighBitDepth(bit_depth, &se, &sse);
    117  *sse_ptr = static_cast<uint32_t>(sse);
    118  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
    119 }
    120 
    121 /* The subpel reference functions differ from the codec version in one aspect:
    122 * they calculate the bilinear factors directly instead of using a lookup table
    123 * and therefore upshift xoff and yoff by 1. Only every other calculated value
    124 * is used so the codec version shrinks the table to save space.
    125 */
    126 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
    127                                    int l2w, int l2h, int xoff, int yoff,
    128                                    uint32_t *sse_ptr, bool use_high_bit_depth_,
    129                                    aom_bit_depth_t bit_depth) {
    130  int64_t se = 0;
    131  uint64_t sse = 0;
    132  const int w = 1 << l2w;
    133  const int h = 1 << l2h;
    134 
    135  xoff <<= 1;
    136  yoff <<= 1;
    137 
    138  for (int y = 0; y < h; y++) {
    139    for (int x = 0; x < w; x++) {
    140      // Bilinear interpolation at a 16th pel step.
    141      if (!use_high_bit_depth_) {
    142        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
    143        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
    144        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
    145        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
    146        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
    147        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
    148        const int r = a + (((b - a) * yoff + 8) >> 4);
    149        const int diff = r - src[w * y + x];
    150        se += diff;
    151        sse += diff * diff;
    152      } else {
    153        uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
    154        uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
    155        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
    156        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
    157        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
    158        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
    159        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
    160        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
    161        const int r = a + (((b - a) * yoff + 8) >> 4);
    162        const int diff = r - src16[w * y + x];
    163        se += diff;
    164        sse += diff * diff;
    165      }
    166    }
    167  }
    168  RoundHighBitDepth(bit_depth, &se, &sse);
    169  *sse_ptr = static_cast<uint32_t>(sse);
    170  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
    171 }
    172 
    173 static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
    174                                        const uint8_t *second_pred, int l2w,
    175                                        int l2h, int xoff, int yoff,
    176                                        uint32_t *sse_ptr,
    177                                        bool use_high_bit_depth,
    178                                        aom_bit_depth_t bit_depth) {
    179  int64_t se = 0;
    180  uint64_t sse = 0;
    181  const int w = 1 << l2w;
    182  const int h = 1 << l2h;
    183 
    184  xoff <<= 1;
    185  yoff <<= 1;
    186 
    187  for (int y = 0; y < h; y++) {
    188    for (int x = 0; x < w; x++) {
    189      // bilinear interpolation at a 16th pel step
    190      if (!use_high_bit_depth) {
    191        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
    192        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
    193        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
    194        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
    195        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
    196        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
    197        const int r = a + (((b - a) * yoff + 8) >> 4);
    198        const int diff =
    199            ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
    200        se += diff;
    201        sse += diff * diff;
    202      } else {
    203        const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
    204        const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
    205        const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
    206        const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
    207        const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
    208        const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
    209        const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
    210        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
    211        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
    212        const int r = a + (((b - a) * yoff + 8) >> 4);
    213        const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
    214        se += diff;
    215        sse += diff * diff;
    216      }
    217    }
    218  }
    219  RoundHighBitDepth(bit_depth, &se, &sse);
    220  *sse_ptr = static_cast<uint32_t>(sse);
    221  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
    222 }
    223 
    224 #if !CONFIG_REALTIME_ONLY
    225 static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h,
    226                                         int xoff, int yoff,
    227                                         const int32_t *wsrc,
    228                                         const int32_t *mask, uint32_t *sse_ptr,
    229                                         bool use_high_bit_depth_,
    230                                         aom_bit_depth_t bit_depth) {
    231  int64_t se = 0;
    232  uint64_t sse = 0;
    233  const int w = 1 << l2w;
    234  const int h = 1 << l2h;
    235 
    236  xoff <<= 1;
    237  yoff <<= 1;
    238 
    239  for (int y = 0; y < h; y++) {
    240    for (int x = 0; x < w; x++) {
    241      // Bilinear interpolation at a 16th pel step.
    242      if (!use_high_bit_depth_) {
    243        const int a1 = pre[(w + 1) * (y + 0) + x + 0];
    244        const int a2 = pre[(w + 1) * (y + 0) + x + 1];
    245        const int b1 = pre[(w + 1) * (y + 1) + x + 0];
    246        const int b2 = pre[(w + 1) * (y + 1) + x + 1];
    247        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
    248        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
    249        const int r = a + (((b - a) * yoff + 8) >> 4);
    250        const int diff = ROUND_POWER_OF_TWO_SIGNED(
    251            wsrc[w * y + x] - r * mask[w * y + x], 12);
    252        se += diff;
    253        sse += diff * diff;
    254      } else {
    255        uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre);
    256        const int a1 = pre16[(w + 1) * (y + 0) + x + 0];
    257        const int a2 = pre16[(w + 1) * (y + 0) + x + 1];
    258        const int b1 = pre16[(w + 1) * (y + 1) + x + 0];
    259        const int b2 = pre16[(w + 1) * (y + 1) + x + 1];
    260        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
    261        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
    262        const int r = a + (((b - a) * yoff + 8) >> 4);
    263        const int diff = ROUND_POWER_OF_TWO_SIGNED(
    264            wsrc[w * y + x] - r * mask[w * y + x], 12);
    265        se += diff;
    266        sse += diff * diff;
    267      }
    268    }
    269  }
    270  RoundHighBitDepth(bit_depth, &se, &sse);
    271  *sse_ptr = static_cast<uint32_t>(sse);
    272  return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
    273 }
    274 #endif
    275 
    276 ////////////////////////////////////////////////////////////////////////////////
    277 
    278 #if !CONFIG_REALTIME_ONLY
    279 class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
    280 public:
    281  SumOfSquaresTest() : func_(GetParam()) {}
    282 
    283  ~SumOfSquaresTest() override = default;
    284 
    285 protected:
    286  void ConstTest();
    287  void RefTest();
    288 
    289  SumOfSquaresFunction func_;
    290  ACMRandom rnd_;
    291 };
    292 
    293 void SumOfSquaresTest::ConstTest() {
    294  int16_t mem[256];
    295  unsigned int res;
    296  for (int v = 0; v < 256; ++v) {
    297    for (int i = 0; i < 256; ++i) {
    298      mem[i] = v;
    299    }
    300    API_REGISTER_STATE_CHECK(res = func_(mem));
    301    EXPECT_EQ(256u * (v * v), res);
    302  }
    303 }
    304 
    305 unsigned int mb_ss_ref(const int16_t *src) {
    306  unsigned int res = 0;
    307  for (int i = 0; i < 256; ++i) {
    308    res += src[i] * src[i];
    309  }
    310  return res;
    311 }
    312 
    313 void SumOfSquaresTest::RefTest() {
    314  int16_t mem[256];
    315  for (int i = 0; i < 100; ++i) {
    316    for (int j = 0; j < 256; ++j) {
    317      mem[j] = rnd_.Rand8() - rnd_.Rand8();
    318    }
    319 
    320    const unsigned int expected = mb_ss_ref(mem);
    321    unsigned int res;
    322    API_REGISTER_STATE_CHECK(res = func_(mem));
    323    EXPECT_EQ(expected, res);
    324  }
    325 }
    326 #endif  // !CONFIG_REALTIME_ONLY
    327 
    328 ////////////////////////////////////////////////////////////////////////////////
    329 // Encapsulating struct to store the function to test along with
    330 // some testing context.
    331 // Can be used for MSE, SSE, Variance, etc.
    332 
    333 template <typename Func>
    334 struct TestParams {
    335  TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
    336             int bit_depth_value = 0)
    337      : log2width(log2w), log2height(log2h), func(function) {
    338    use_high_bit_depth = (bit_depth_value > 0);
    339    if (use_high_bit_depth) {
    340      bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
    341    } else {
    342      bit_depth = AOM_BITS_8;
    343    }
    344    width = 1 << log2width;
    345    height = 1 << log2height;
    346    block_size = width * height;
    347    mask = (1u << bit_depth) - 1;
    348  }
    349 
    350  int log2width, log2height;
    351  int width, height;
    352  int block_size;
    353  Func func;
    354  aom_bit_depth_t bit_depth;
    355  bool use_high_bit_depth;
    356  uint32_t mask;
    357 };
    358 
    359 template <typename Func>
    360 std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
    361  return os << "width/height:" << p.width << "/" << p.height
    362            << " function:" << reinterpret_cast<const void *>(p.func)
    363            << " bit-depth:" << p.bit_depth;
    364 }
    365 
    366 // Main class for testing a function type
    367 template <typename FunctionType>
    368 class MseWxHTestClass
    369    : public ::testing::TestWithParam<TestParams<FunctionType> > {
    370 public:
    371  void SetUp() override {
    372    params_ = this->GetParam();
    373 
    374    rnd_.Reset(ACMRandom::DeterministicSeed());
    375    src_ = reinterpret_cast<uint16_t *>(
    376        aom_memalign(16, block_size() * sizeof(src_)));
    377    dst_ = reinterpret_cast<uint8_t *>(
    378        aom_memalign(16, block_size() * sizeof(dst_)));
    379    ASSERT_NE(src_, nullptr);
    380    ASSERT_NE(dst_, nullptr);
    381  }
    382 
    383  void TearDown() override {
    384    aom_free(src_);
    385    aom_free(dst_);
    386    src_ = nullptr;
    387    dst_ = nullptr;
    388  }
    389 
    390 protected:
    391  void RefMatchTestMse();
    392  void SpeedTest();
    393 
    394 protected:
    395  ACMRandom rnd_;
    396  uint8_t *dst_;
    397  uint16_t *src_;
    398  TestParams<FunctionType> params_;
    399 
    400  // some relay helpers
    401  int block_size() const { return params_.block_size; }
    402  int width() const { return params_.width; }
    403  int height() const { return params_.height; }
    404  int d_stride() const { return params_.width; }  // stride is same as width
    405  int s_stride() const { return params_.width; }  // stride is same as width
    406 };
    407 
    408 template <typename MseWxHFunctionType>
    409 void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() {
    410  aom_usec_timer ref_timer, test_timer;
    411  double elapsed_time_c = 0;
    412  double elapsed_time_simd = 0;
    413  int run_time = 10000000;
    414  int w = width();
    415  int h = height();
    416  int dstride = d_stride();
    417  int sstride = s_stride();
    418 
    419  for (int k = 0; k < block_size(); ++k) {
    420    dst_[k] = rnd_.Rand8();
    421    src_[k] = rnd_.Rand8();
    422  }
    423  aom_usec_timer_start(&ref_timer);
    424  for (int i = 0; i < run_time; i++) {
    425    aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h);
    426  }
    427  aom_usec_timer_mark(&ref_timer);
    428  elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
    429 
    430  aom_usec_timer_start(&test_timer);
    431  for (int i = 0; i < run_time; i++) {
    432    params_.func(dst_, dstride, src_, sstride, w, h);
    433  }
    434  aom_usec_timer_mark(&test_timer);
    435  elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
    436 
    437  printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
    438         elapsed_time_c, elapsed_time_simd,
    439         (elapsed_time_c / elapsed_time_simd));
    440 }
    441 
    442 template <typename MseWxHFunctionType>
    443 void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() {
    444  uint64_t mse_ref = 0;
    445  uint64_t mse_mod = 0;
    446  int w = width();
    447  int h = height();
    448  int dstride = d_stride();
    449  int sstride = s_stride();
    450 
    451  for (int i = 0; i < 10; i++) {
    452    for (int k = 0; k < block_size(); ++k) {
    453      dst_[k] = rnd_.Rand8();
    454      src_[k] = rnd_.Rand8();
    455    }
    456    API_REGISTER_STATE_CHECK(
    457        mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h));
    458    API_REGISTER_STATE_CHECK(
    459        mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
    460    EXPECT_EQ(mse_ref, mse_mod)
    461        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
    462  }
    463 }
    464 
    465 template <typename FunctionType>
    466 class Mse16xHTestClass
    467    : public ::testing::TestWithParam<TestParams<FunctionType> > {
    468 public:
    469  // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for
    470  // maximum width 16 and maximum height 8.
    471  int mem_size = 16 * 8;
    472  void SetUp() override {
    473    params_ = this->GetParam();
    474    rnd_.Reset(ACMRandom::DeterministicSeed());
    475    src_ = reinterpret_cast<uint16_t *>(
    476        aom_memalign(16, mem_size * sizeof(*src_)));
    477    dst_ =
    478        reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_)));
    479    ASSERT_NE(src_, nullptr);
    480    ASSERT_NE(dst_, nullptr);
    481  }
    482 
    483  void TearDown() override {
    484    aom_free(src_);
    485    aom_free(dst_);
    486    src_ = nullptr;
    487    dst_ = nullptr;
    488  }
    489 
    490  uint8_t RandBool() {
    491    const uint32_t value = rnd_.Rand8();
    492    return (value & 0x1);
    493  }
    494 
    495 protected:
    496  void RefMatchExtremeTestMse();
    497  void RefMatchTestMse();
    498  void SpeedTest();
    499 
    500 protected:
    501  ACMRandom rnd_;
    502  uint8_t *dst_;
    503  uint16_t *src_;
    504  TestParams<FunctionType> params_;
    505 
    506  // some relay helpers
    507  int width() const { return params_.width; }
    508  int height() const { return params_.height; }
    509  int d_stride() const { return params_.width; }
    510 };
    511 
    512 template <typename Mse16xHFunctionType>
    513 void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() {
    514  aom_usec_timer ref_timer, test_timer;
    515  double elapsed_time_c = 0.0;
    516  double elapsed_time_simd = 0.0;
    517  const int loop_count = 10000000;
    518  const int w = width();
    519  const int h = height();
    520  const int dstride = d_stride();
    521 
    522  for (int k = 0; k < mem_size; ++k) {
    523    dst_[k] = rnd_.Rand8();
    524    // Right shift by 6 is done to generate more input in range of [0,255] than
    525    // CDEF_VERY_LARGE
    526    int rnd_i10 = rnd_.Rand16() >> 6;
    527    src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
    528  }
    529 
    530  aom_usec_timer_start(&ref_timer);
    531  for (int i = 0; i < loop_count; i++) {
    532    aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h);
    533  }
    534  aom_usec_timer_mark(&ref_timer);
    535  elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
    536 
    537  aom_usec_timer_start(&test_timer);
    538  for (int i = 0; i < loop_count; i++) {
    539    params_.func(dst_, dstride, src_, w, h);
    540  }
    541  aom_usec_timer_mark(&test_timer);
    542  elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
    543 
    544  printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(),
    545         height(), elapsed_time_c, elapsed_time_simd,
    546         (elapsed_time_c / elapsed_time_simd));
    547 }
    548 
    549 template <typename Mse16xHFunctionType>
    550 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() {
    551  uint64_t mse_ref = 0;
    552  uint64_t mse_mod = 0;
    553  const int w = width();
    554  const int h = height();
    555  const int dstride = d_stride();
    556 
    557  for (int i = 0; i < 10; i++) {
    558    for (int k = 0; k < mem_size; ++k) {
    559      dst_[k] = rnd_.Rand8();
    560      // Right shift by 6 is done to generate more input in range of [0,255]
    561      // than CDEF_VERY_LARGE
    562      int rnd_i10 = rnd_.Rand16() >> 6;
    563      src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
    564    }
    565 
    566    API_REGISTER_STATE_CHECK(
    567        mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
    568    API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
    569    EXPECT_EQ(mse_ref, mse_mod)
    570        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
    571  }
    572 }
    573 
    574 template <typename Mse16xHFunctionType>
    575 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() {
    576  uint64_t mse_ref = 0;
    577  uint64_t mse_mod = 0;
    578  const int w = width();
    579  const int h = height();
    580  const int dstride = d_stride();
    581  const int iter = 10;
    582 
    583  // Fill the buffers with extreme values
    584  for (int i = 0; i < iter; i++) {
    585    for (int k = 0; k < mem_size; ++k) {
    586      dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255);
    587      src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE);
    588    }
    589 
    590    API_REGISTER_STATE_CHECK(
    591        mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
    592    API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
    593    EXPECT_EQ(mse_ref, mse_mod)
    594        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
    595  }
    596 }
    597 
    598 // Main class for testing a function type
    599 template <typename FunctionType>
    600 class MainTestClass
    601    : public ::testing::TestWithParam<TestParams<FunctionType> > {
    602 public:
    603  void SetUp() override {
    604    params_ = this->GetParam();
    605 
    606    rnd_.Reset(ACMRandom::DeterministicSeed());
    607    const size_t unit =
    608        use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
    609    src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
    610    ref_ = new uint8_t[block_size() * unit];
    611    ASSERT_NE(src_, nullptr);
    612    ASSERT_NE(ref_, nullptr);
    613    memset(src_, 0, block_size() * sizeof(src_[0]));
    614    memset(ref_, 0, block_size() * sizeof(ref_[0]));
    615    if (use_high_bit_depth()) {
    616      // TODO(skal): remove!
    617      src_ = CONVERT_TO_BYTEPTR(src_);
    618      ref_ = CONVERT_TO_BYTEPTR(ref_);
    619    }
    620  }
    621 
    622  void TearDown() override {
    623    if (use_high_bit_depth()) {
    624      // TODO(skal): remove!
    625      src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
    626      ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
    627    }
    628 
    629    aom_free(src_);
    630    delete[] ref_;
    631    src_ = nullptr;
    632    ref_ = nullptr;
    633  }
    634 
    635 protected:
    636  // We could sub-class MainTestClass into dedicated class for Variance
    637  // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
    638  // to access top class fields xxx. That's cumbersome, so for now we'll just
    639  // implement the testing methods here:
    640 
    641  // Variance tests
    642  void ZeroTest();
    643  void RefTest();
    644  void RefStrideTest();
    645  void OneQuarterTest();
    646  void SpeedTest();
    647 
    648  // SSE&SUM tests
    649  void RefTestSseSum();
    650  void MinTestSseSum();
    651  void MaxTestSseSum();
    652  void SseSum_SpeedTest();
    653 
    654  // SSE&SUM dual tests
    655  void RefTestSseSumDual();
    656  void MinTestSseSumDual();
    657  void MaxTestSseSumDual();
    658  void SseSum_SpeedTestDual();
    659 
    660  // MSE/SSE tests
    661  void RefTestMse();
    662  void RefTestSse();
    663  void MaxTestMse();
    664  void MaxTestSse();
    665 
    666 protected:
    667  ACMRandom rnd_;
    668  uint8_t *src_;
    669  uint8_t *ref_;
    670  TestParams<FunctionType> params_;
    671 
    672  // some relay helpers
    673  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
    674  int byte_shift() const { return params_.bit_depth - 8; }
    675  int block_size() const { return params_.block_size; }
    676  int width() const { return params_.width; }
    677  int height() const { return params_.height; }
    678  uint32_t mask() const { return params_.mask; }
    679 };
    680 
    681 ////////////////////////////////////////////////////////////////////////////////
    682 // Tests related to variance.
    683 
    684 template <typename VarianceFunctionType>
    685 void MainTestClass<VarianceFunctionType>::ZeroTest() {
    686  for (int i = 0; i <= 255; ++i) {
    687    if (!use_high_bit_depth()) {
    688      memset(src_, i, block_size());
    689    } else {
    690      uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
    691      for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
    692    }
    693    for (int j = 0; j <= 255; ++j) {
    694      if (!use_high_bit_depth()) {
    695        memset(ref_, j, block_size());
    696      } else {
    697        uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
    698        for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
    699      }
    700      unsigned int sse, var;
    701      API_REGISTER_STATE_CHECK(
    702          var = params_.func(src_, width(), ref_, width(), &sse));
    703      EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
    704    }
    705  }
    706 }
    707 
    708 template <typename VarianceFunctionType>
    709 void MainTestClass<VarianceFunctionType>::RefTest() {
    710  for (int i = 0; i < 10; ++i) {
    711    for (int j = 0; j < block_size(); j++) {
    712      if (!use_high_bit_depth()) {
    713        src_[j] = rnd_.Rand8();
    714        ref_[j] = rnd_.Rand8();
    715      } else {
    716        CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
    717        CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
    718      }
    719    }
    720    unsigned int sse1, sse2, var1, var2;
    721    const int stride = width();
    722    API_REGISTER_STATE_CHECK(
    723        var1 = params_.func(src_, stride, ref_, stride, &sse1));
    724    var2 =
    725        variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
    726                     stride, &sse2, use_high_bit_depth(), params_.bit_depth);
    727    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
    728    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
    729  }
    730 }
    731 
    732 template <typename VarianceFunctionType>
    733 void MainTestClass<VarianceFunctionType>::RefStrideTest() {
    734  for (int i = 0; i < 10; ++i) {
    735    const int ref_stride = (i & 1) * width();
    736    const int src_stride = ((i >> 1) & 1) * width();
    737    for (int j = 0; j < block_size(); j++) {
    738      const int ref_ind = (j / width()) * ref_stride + j % width();
    739      const int src_ind = (j / width()) * src_stride + j % width();
    740      if (!use_high_bit_depth()) {
    741        src_[src_ind] = rnd_.Rand8();
    742        ref_[ref_ind] = rnd_.Rand8();
    743      } else {
    744        CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
    745        CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
    746      }
    747    }
    748    unsigned int sse1, sse2;
    749    unsigned int var1, var2;
    750 
    751    API_REGISTER_STATE_CHECK(
    752        var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
    753    var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
    754                        src_stride, ref_stride, &sse2, use_high_bit_depth(),
    755                        params_.bit_depth);
    756    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
    757    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
    758  }
    759 }
    760 
    761 template <typename VarianceFunctionType>
    762 void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
    763  const int half = block_size() / 2;
    764  if (!use_high_bit_depth()) {
    765    memset(src_, 255, block_size());
    766    memset(ref_, 255, half);
    767    memset(ref_ + half, 0, half);
    768  } else {
    769    aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
    770    aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
    771    aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
    772  }
    773  unsigned int sse, var, expected;
    774  API_REGISTER_STATE_CHECK(
    775      var = params_.func(src_, width(), ref_, width(), &sse));
    776  expected = block_size() * 255 * 255 / 4;
    777  EXPECT_EQ(expected, var);
    778 }
    779 
    780 template <typename VarianceFunctionType>
    781 void MainTestClass<VarianceFunctionType>::SpeedTest() {
    782  for (int j = 0; j < block_size(); j++) {
    783    if (!use_high_bit_depth()) {
    784      src_[j] = rnd_.Rand8();
    785      ref_[j] = rnd_.Rand8();
    786 #if CONFIG_AV1_HIGHBITDEPTH
    787    } else {
    788      CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
    789      CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
    790 #endif  // CONFIG_AV1_HIGHBITDEPTH
    791    }
    792  }
    793  unsigned int sse;
    794  const int stride = width();
    795  int run_time = 1000000000 / block_size();
    796  aom_usec_timer timer;
    797  aom_usec_timer_start(&timer);
    798  for (int i = 0; i < run_time; ++i) {
    799    params_.func(src_, stride, ref_, stride, &sse);
    800  }
    801 
    802  aom_usec_timer_mark(&timer);
    803  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
    804  printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time);
    805 }
    806 
    807 template <typename GetSseSum8x8QuadFuncType>
    808 void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() {
    809  for (int i = 0; i < 10; ++i) {
    810    for (int j = 0; j < block_size(); ++j) {
    811      src_[j] = rnd_.Rand8();
    812      ref_[j] = rnd_.Rand8();
    813    }
    814    unsigned int sse1[256] = { 0 };
    815    unsigned int sse2[256] = { 0 };
    816    unsigned int var1[256] = { 0 };
    817    unsigned int var2[256] = { 0 };
    818    int sum1[256] = { 0 };
    819    int sum2[256] = { 0 };
    820    unsigned int sse_tot_c = 0;
    821    unsigned int sse_tot_simd = 0;
    822    int sum_tot_c = 0;
    823    int sum_tot_simd = 0;
    824    const int stride = width();
    825    int k = 0;
    826 
    827    for (int row = 0; row < height(); row += 8) {
    828      for (int col = 0; col < width(); col += 32) {
    829        API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride,
    830                                              ref_ + stride * row + col, stride,
    831                                              &sse1[k], &sum1[k], &sse_tot_simd,
    832                                              &sum_tot_simd, &var1[k]));
    833        aom_get_var_sse_sum_8x8_quad_c(
    834            src_ + stride * row + col, stride, ref_ + stride * row + col,
    835            stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
    836        k += 4;
    837      }
    838    }
    839    EXPECT_EQ(sse_tot_c, sse_tot_simd);
    840    EXPECT_EQ(sum_tot_c, sum_tot_simd);
    841    for (int p = 0; p < 256; p++) {
    842      EXPECT_EQ(sse1[p], sse2[p]);
    843      EXPECT_EQ(sum1[p], sum2[p]);
    844      EXPECT_EQ(var1[p], var2[p]);
    845    }
    846  }
    847 }
    848 
    849 template <typename GetSseSum8x8QuadFuncType>
    850 void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() {
    851  memset(src_, 0, block_size());
    852  memset(ref_, 255, block_size());
    853  unsigned int sse1[256] = { 0 };
    854  unsigned int sse2[256] = { 0 };
    855  unsigned int var1[256] = { 0 };
    856  unsigned int var2[256] = { 0 };
    857  int sum1[256] = { 0 };
    858  int sum2[256] = { 0 };
    859  unsigned int sse_tot_c = 0;
    860  unsigned int sse_tot_simd = 0;
    861  int sum_tot_c = 0;
    862  int sum_tot_simd = 0;
    863  const int stride = width();
    864  int k = 0;
    865 
    866  for (int i = 0; i < height(); i += 8) {
    867    for (int j = 0; j < width(); j += 32) {
    868      API_REGISTER_STATE_CHECK(params_.func(
    869          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
    870          &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
    871      aom_get_var_sse_sum_8x8_quad_c(
    872          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
    873          &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
    874      k += 4;
    875    }
    876  }
    877  EXPECT_EQ(sse_tot_simd, sse_tot_c);
    878  EXPECT_EQ(sum_tot_simd, sum_tot_c);
    879  for (int p = 0; p < 256; p++) {
    880    EXPECT_EQ(sse1[p], sse2[p]);
    881    EXPECT_EQ(sum1[p], sum2[p]);
    882    EXPECT_EQ(var1[p], var2[p]);
    883  }
    884 }
    885 
    886 template <typename GetSseSum8x8QuadFuncType>
    887 void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() {
    888  memset(src_, 255, block_size());
    889  memset(ref_, 0, block_size());
    890  unsigned int sse1[256] = { 0 };
    891  unsigned int sse2[256] = { 0 };
    892  unsigned int var1[256] = { 0 };
    893  unsigned int var2[256] = { 0 };
    894  int sum1[256] = { 0 };
    895  int sum2[256] = { 0 };
    896  unsigned int sse_tot_c = 0;
    897  unsigned int sse_tot_simd = 0;
    898  int sum_tot_c = 0;
    899  int sum_tot_simd = 0;
    900  const int stride = width();
    901  int k = 0;
    902 
    903  for (int i = 0; i < height(); i += 8) {
    904    for (int j = 0; j < width(); j += 32) {
    905      API_REGISTER_STATE_CHECK(params_.func(
    906          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
    907          &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
    908      aom_get_var_sse_sum_8x8_quad_c(
    909          src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
    910          &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
    911      k += 4;
    912    }
    913  }
    914  EXPECT_EQ(sse_tot_c, sse_tot_simd);
    915  EXPECT_EQ(sum_tot_c, sum_tot_simd);
    916 
    917  for (int p = 0; p < 256; p++) {
    918    EXPECT_EQ(sse1[p], sse2[p]);
    919    EXPECT_EQ(sum1[p], sum2[p]);
    920    EXPECT_EQ(var1[p], var2[p]);
    921  }
    922 }
    923 
    924 template <typename GetSseSum8x8QuadFuncType>
    925 void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() {
    926  const int loop_count = 1000000000 / block_size();
    927  for (int j = 0; j < block_size(); ++j) {
    928    src_[j] = rnd_.Rand8();
    929    ref_[j] = rnd_.Rand8();
    930  }
    931 
    932  unsigned int sse1[4] = { 0 };
    933  unsigned int sse2[4] = { 0 };
    934  unsigned int var1[4] = { 0 };
    935  unsigned int var2[4] = { 0 };
    936  int sum1[4] = { 0 };
    937  int sum2[4] = { 0 };
    938  unsigned int sse_tot_c = 0;
    939  unsigned int sse_tot_simd = 0;
    940  int sum_tot_c = 0;
    941  int sum_tot_simd = 0;
    942  const int stride = width();
    943 
    944  aom_usec_timer timer;
    945  aom_usec_timer_start(&timer);
    946  for (int r = 0; r < loop_count; ++r) {
    947    for (int i = 0; i < height(); i += 8) {
    948      for (int j = 0; j < width(); j += 32) {
    949        aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride,
    950                                       ref_ + stride * i + j, stride, sse2,
    951                                       sum2, &sse_tot_c, &sum_tot_c, var2);
    952      }
    953    }
    954  }
    955  aom_usec_timer_mark(&timer);
    956  const double elapsed_time_ref =
    957      static_cast<double>(aom_usec_timer_elapsed(&timer));
    958 
    959  aom_usec_timer_start(&timer);
    960  for (int r = 0; r < loop_count; ++r) {
    961    for (int i = 0; i < height(); i += 8) {
    962      for (int j = 0; j < width(); j += 32) {
    963        params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j,
    964                     stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1);
    965      }
    966    }
    967  }
    968  aom_usec_timer_mark(&timer);
    969  const double elapsed_time_simd =
    970      static_cast<double>(aom_usec_timer_elapsed(&timer));
    971 
    972  printf(
    973      "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t "
    974      "gain=%lf \n",
    975      width(), height(), elapsed_time_ref, elapsed_time_simd,
    976      elapsed_time_ref / elapsed_time_simd);
    977 }
    978 
    979 template <typename GetSseSum16x16DualFuncType>
    980 void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() {
    981  for (int iter = 0; iter < 10; ++iter) {
    982    for (int idx = 0; idx < block_size(); ++idx) {
    983      src_[idx] = rnd_.Rand8();
    984      ref_[idx] = rnd_.Rand8();
    985    }
    986    unsigned int sse1[64] = { 0 };
    987    unsigned int sse2[64] = { 0 };
    988    unsigned int var1[64] = { 0 };
    989    unsigned int var2[64] = { 0 };
    990    unsigned int sse_tot_c = 0;
    991    unsigned int sse_tot_simd = 0;
    992    int sum_tot_c = 0;
    993    int sum_tot_simd = 0;
    994    const int stride = width();
    995    int k = 0;
    996 
    997    for (int row = 0; row < height(); row += 16) {
    998      for (int col = 0; col < width(); col += 32) {
    999        API_REGISTER_STATE_CHECK(params_.func(
   1000            src_ + stride * row + col, stride, ref_ + stride * row + col,
   1001            stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
   1002        aom_get_var_sse_sum_16x16_dual_c(
   1003            src_ + stride * row + col, stride, ref_ + stride * row + col,
   1004            stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
   1005        k += 2;
   1006      }
   1007    }
   1008    EXPECT_EQ(sse_tot_c, sse_tot_simd);
   1009    EXPECT_EQ(sum_tot_c, sum_tot_simd);
   1010    for (int p = 0; p < 64; p++) {
   1011      EXPECT_EQ(sse1[p], sse2[p]);
   1012      EXPECT_EQ(sse_tot_simd, sse_tot_c);
   1013      EXPECT_EQ(sum_tot_simd, sum_tot_c);
   1014      EXPECT_EQ(var1[p], var2[p]);
   1015    }
   1016  }
   1017 }
   1018 
   1019 template <typename GetSseSum16x16DualFuncType>
   1020 void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() {
   1021  memset(src_, 0, block_size());
   1022  memset(ref_, 255, block_size());
   1023  unsigned int sse1[64] = { 0 };
   1024  unsigned int sse2[64] = { 0 };
   1025  unsigned int var1[64] = { 0 };
   1026  unsigned int var2[64] = { 0 };
   1027  unsigned int sse_tot_c = 0;
   1028  unsigned int sse_tot_simd = 0;
   1029  int sum_tot_c = 0;
   1030  int sum_tot_simd = 0;
   1031  const int stride = width();
   1032  int k = 0;
   1033 
   1034  for (int row = 0; row < height(); row += 16) {
   1035    for (int col = 0; col < width(); col += 32) {
   1036      API_REGISTER_STATE_CHECK(params_.func(
   1037          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
   1038          &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
   1039      aom_get_var_sse_sum_16x16_dual_c(
   1040          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
   1041          &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
   1042      k += 2;
   1043    }
   1044  }
   1045  EXPECT_EQ(sse_tot_simd, sse_tot_c);
   1046  EXPECT_EQ(sum_tot_simd, sum_tot_c);
   1047  for (int p = 0; p < 64; p++) {
   1048    EXPECT_EQ(sse1[p], sse2[p]);
   1049    EXPECT_EQ(var1[p], var2[p]);
   1050  }
   1051 }
   1052 
   1053 template <typename GetSseSum16x16DualFuncType>
   1054 void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() {
   1055  memset(src_, 255, block_size());
   1056  memset(ref_, 0, block_size());
   1057  unsigned int sse1[64] = { 0 };
   1058  unsigned int sse2[64] = { 0 };
   1059  unsigned int var1[64] = { 0 };
   1060  unsigned int var2[64] = { 0 };
   1061  unsigned int sse_tot_c = 0;
   1062  unsigned int sse_tot_simd = 0;
   1063  int sum_tot_c = 0;
   1064  int sum_tot_simd = 0;
   1065  const int stride = width();
   1066  int k = 0;
   1067 
   1068  for (int row = 0; row < height(); row += 16) {
   1069    for (int col = 0; col < width(); col += 32) {
   1070      API_REGISTER_STATE_CHECK(params_.func(
   1071          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
   1072          &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
   1073      aom_get_var_sse_sum_16x16_dual_c(
   1074          src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
   1075          &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
   1076      k += 2;
   1077    }
   1078  }
   1079  EXPECT_EQ(sse_tot_c, sse_tot_simd);
   1080  EXPECT_EQ(sum_tot_c, sum_tot_simd);
   1081 
   1082  for (int p = 0; p < 64; p++) {
   1083    EXPECT_EQ(sse1[p], sse2[p]);
   1084    EXPECT_EQ(var1[p], var2[p]);
   1085  }
   1086 }
   1087 
   1088 template <typename GetSseSum16x16DualFuncType>
   1089 void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() {
   1090  const int loop_count = 1000000000 / block_size();
   1091  for (int idx = 0; idx < block_size(); ++idx) {
   1092    src_[idx] = rnd_.Rand8();
   1093    ref_[idx] = rnd_.Rand8();
   1094  }
   1095 
   1096  unsigned int sse1[2] = { 0 };
   1097  unsigned int sse2[2] = { 0 };
   1098  unsigned int var1[2] = { 0 };
   1099  unsigned int var2[2] = { 0 };
   1100  unsigned int sse_tot_c = 0;
   1101  unsigned int sse_tot_simd = 0;
   1102  int sum_tot_c = 0;
   1103  int sum_tot_simd = 0;
   1104  const int stride = width();
   1105 
   1106  aom_usec_timer timer;
   1107  aom_usec_timer_start(&timer);
   1108  for (int r = 0; r < loop_count; ++r) {
   1109    for (int row = 0; row < height(); row += 16) {
   1110      for (int col = 0; col < width(); col += 32) {
   1111        aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride,
   1112                                         ref_ + stride * row + col, stride,
   1113                                         sse2, &sse_tot_c, &sum_tot_c, var2);
   1114      }
   1115    }
   1116  }
   1117  aom_usec_timer_mark(&timer);
   1118  const double elapsed_time_ref =
   1119      static_cast<double>(aom_usec_timer_elapsed(&timer));
   1120 
   1121  aom_usec_timer_start(&timer);
   1122  for (int r = 0; r < loop_count; ++r) {
   1123    for (int row = 0; row < height(); row += 16) {
   1124      for (int col = 0; col < width(); col += 32) {
   1125        params_.func(src_ + stride * row + col, stride,
   1126                     ref_ + stride * row + col, stride, sse1, &sse_tot_simd,
   1127                     &sum_tot_simd, var1);
   1128      }
   1129    }
   1130  }
   1131  aom_usec_timer_mark(&timer);
   1132  const double elapsed_time_simd =
   1133      static_cast<double>(aom_usec_timer_elapsed(&timer));
   1134 
   1135  printf(
   1136      "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf "
   1137      "\t "
   1138      "gain=%lf \n",
   1139      width(), height(), elapsed_time_ref, elapsed_time_simd,
   1140      elapsed_time_ref / elapsed_time_simd);
   1141 }
   1142 
   1143 ////////////////////////////////////////////////////////////////////////////////
   1144 // Tests related to MSE / SSE.
   1145 
   1146 template <typename FunctionType>
   1147 void MainTestClass<FunctionType>::RefTestMse() {
   1148  for (int i = 0; i < 10; ++i) {
   1149    for (int j = 0; j < block_size(); ++j) {
   1150      if (!use_high_bit_depth()) {
   1151        src_[j] = rnd_.Rand8();
   1152        ref_[j] = rnd_.Rand8();
   1153 #if CONFIG_AV1_HIGHBITDEPTH
   1154      } else {
   1155        CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
   1156        CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
   1157 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1158      }
   1159    }
   1160    unsigned int sse1, sse2;
   1161    const int stride = width();
   1162    API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
   1163    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
   1164                 stride, &sse2, use_high_bit_depth(), params_.bit_depth);
   1165    EXPECT_EQ(sse1, sse2);
   1166  }
   1167 }
   1168 
   1169 template <typename FunctionType>
   1170 void MainTestClass<FunctionType>::RefTestSse() {
   1171  for (int i = 0; i < 10; ++i) {
   1172    for (int j = 0; j < block_size(); ++j) {
   1173      src_[j] = rnd_.Rand8();
   1174      ref_[j] = rnd_.Rand8();
   1175    }
   1176    unsigned int sse2;
   1177    unsigned int var1;
   1178    const int stride = width();
   1179    API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
   1180    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
   1181                 stride, &sse2, false, AOM_BITS_8);
   1182    EXPECT_EQ(var1, sse2);
   1183  }
   1184 }
   1185 
   1186 template <typename FunctionType>
   1187 void MainTestClass<FunctionType>::MaxTestMse() {
   1188  int max_value = (1 << params_.bit_depth) - 1;
   1189  if (!use_high_bit_depth()) {
   1190    memset(src_, max_value, block_size());
   1191    memset(ref_, 0, block_size());
   1192 #if CONFIG_AV1_HIGHBITDEPTH
   1193  } else {
   1194    aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size());
   1195    aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size());
   1196 #endif  // CONFIG_AV1_HIGHBITDEPTH
   1197  }
   1198  unsigned int sse;
   1199  API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
   1200  unsigned int expected = (unsigned int)block_size() * max_value * max_value;
   1201  switch (params_.bit_depth) {
   1202    case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break;
   1203    case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break;
   1204    case AOM_BITS_8:
   1205    default: break;
   1206  }
   1207  EXPECT_EQ(expected, sse);
   1208 }
   1209 
   1210 template <typename FunctionType>
   1211 void MainTestClass<FunctionType>::MaxTestSse() {
   1212  memset(src_, 255, block_size());
   1213  memset(ref_, 0, block_size());
   1214  unsigned int var;
   1215  API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
   1216  const unsigned int expected = block_size() * 255 * 255;
   1217  EXPECT_EQ(expected, var);
   1218 }
   1219 
   1220 ////////////////////////////////////////////////////////////////////////////////
   1221 
   1222 using std::get;
   1223 using std::make_tuple;
   1224 using std::tuple;
   1225 
   1226 template <typename FunctionType>
   1227 class SubpelVarianceTest
   1228    : public ::testing::TestWithParam<TestParams<FunctionType> > {
   1229 public:
   1230  void SetUp() override {
   1231    params_ = this->GetParam();
   1232 
   1233    rnd_.Reset(ACMRandom::DeterministicSeed());
   1234    if (!use_high_bit_depth()) {
   1235      src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
   1236      sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
   1237      ref_ = reinterpret_cast<uint8_t *>(
   1238          aom_memalign(32, block_size() + width() + height() + 1));
   1239    } else {
   1240      src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
   1241          aom_memalign(32, block_size() * sizeof(uint16_t))));
   1242      sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
   1243          aom_memalign(32, block_size() * sizeof(uint16_t))));
   1244      ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
   1245          32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
   1246    }
   1247    ASSERT_NE(src_, nullptr);
   1248    ASSERT_NE(sec_, nullptr);
   1249    ASSERT_NE(ref_, nullptr);
   1250  }
   1251 
   1252  void TearDown() override {
   1253    if (!use_high_bit_depth()) {
   1254      aom_free(src_);
   1255      aom_free(ref_);
   1256      aom_free(sec_);
   1257    } else {
   1258      aom_free(CONVERT_TO_SHORTPTR(src_));
   1259      aom_free(CONVERT_TO_SHORTPTR(ref_));
   1260      aom_free(CONVERT_TO_SHORTPTR(sec_));
   1261    }
   1262  }
   1263 
   1264 protected:
   1265  void RefTest();
   1266  void ExtremeRefTest();
   1267  void SpeedTest();
   1268 
   1269  ACMRandom rnd_;
   1270  uint8_t *src_;
   1271  uint8_t *ref_;
   1272  uint8_t *sec_;
   1273  TestParams<FunctionType> params_;
   1274  DIST_WTD_COMP_PARAMS jcp_param_;
   1275 
   1276  // some relay helpers
   1277  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
   1278  int byte_shift() const { return params_.bit_depth - 8; }
   1279  int block_size() const { return params_.block_size; }
   1280  int width() const { return params_.width; }
   1281  int height() const { return params_.height; }
   1282  uint32_t mask() const { return params_.mask; }
   1283 };
   1284 
   1285 template <typename SubpelVarianceFunctionType>
   1286 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
   1287  for (int x = 0; x < 8; ++x) {
   1288    for (int y = 0; y < 8; ++y) {
   1289      if (!use_high_bit_depth()) {
   1290        for (int j = 0; j < block_size(); j++) {
   1291          src_[j] = rnd_.Rand8();
   1292        }
   1293        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
   1294          ref_[j] = rnd_.Rand8();
   1295        }
   1296      } else {
   1297        for (int j = 0; j < block_size(); j++) {
   1298          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
   1299        }
   1300        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
   1301          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
   1302        }
   1303      }
   1304      unsigned int sse1, sse2;
   1305      unsigned int var1;
   1306      API_REGISTER_STATE_CHECK(
   1307          var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
   1308      const unsigned int var2 = subpel_variance_ref(
   1309          ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
   1310          use_high_bit_depth(), params_.bit_depth);
   1311      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
   1312      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
   1313    }
   1314  }
   1315 }
   1316 
   1317 template <typename SubpelVarianceFunctionType>
   1318 void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
   1319  // Compare against reference.
   1320  // Src: Set the first half of values to 0, the second half to the maximum.
   1321  // Ref: Set the first half of values to the maximum, the second half to 0.
   1322  for (int x = 0; x < 8; ++x) {
   1323    for (int y = 0; y < 8; ++y) {
   1324      const int half = block_size() / 2;
   1325      if (!use_high_bit_depth()) {
   1326        memset(src_, 0, half);
   1327        memset(src_ + half, 255, half);
   1328        memset(ref_, 255, half);
   1329        memset(ref_ + half, 0, half + width() + height() + 1);
   1330      } else {
   1331        aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
   1332        aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
   1333        aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
   1334        aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
   1335                     half + width() + height() + 1);
   1336      }
   1337      unsigned int sse1, sse2;
   1338      unsigned int var1;
   1339      API_REGISTER_STATE_CHECK(
   1340          var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
   1341      const unsigned int var2 = subpel_variance_ref(
   1342          ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
   1343          use_high_bit_depth(), params_.bit_depth);
   1344      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
   1345      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
   1346    }
   1347  }
   1348 }
   1349 
   1350 template <typename SubpelVarianceFunctionType>
   1351 void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
   1352  if (!use_high_bit_depth()) {
   1353    for (int j = 0; j < block_size(); j++) {
   1354      src_[j] = rnd_.Rand8();
   1355    }
   1356    for (int j = 0; j < block_size() + width() + height() + 1; j++) {
   1357      ref_[j] = rnd_.Rand8();
   1358    }
   1359  } else {
   1360    for (int j = 0; j < block_size(); j++) {
   1361      CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
   1362    }
   1363    for (int j = 0; j < block_size() + width() + height() + 1; j++) {
   1364      CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
   1365    }
   1366  }
   1367 
   1368  unsigned int sse1, sse2;
   1369  int run_time = 1000000000 / block_size();
   1370  aom_usec_timer timer;
   1371 
   1372  aom_usec_timer_start(&timer);
   1373  for (int i = 0; i < run_time; ++i) {
   1374    int x = rnd_(8);
   1375    int y = rnd_(8);
   1376    params_.func(ref_, width() + 1, x, y, src_, width(), &sse1);
   1377  }
   1378  aom_usec_timer_mark(&timer);
   1379 
   1380  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
   1381 
   1382  aom_usec_timer timer_c;
   1383 
   1384  aom_usec_timer_start(&timer_c);
   1385  for (int i = 0; i < run_time; ++i) {
   1386    int x = rnd_(8);
   1387    int y = rnd_(8);
   1388    subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y,
   1389                        &sse2, use_high_bit_depth(), params_.bit_depth);
   1390  }
   1391  aom_usec_timer_mark(&timer_c);
   1392 
   1393  const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c));
   1394 
   1395  printf(
   1396      "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n",
   1397      width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time,
   1398      elapsed_time_c / elapsed_time);
   1399 }
   1400 
   1401 template <>
   1402 void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
   1403  for (int x = 0; x < 8; ++x) {
   1404    for (int y = 0; y < 8; ++y) {
   1405      if (!use_high_bit_depth()) {
   1406        for (int j = 0; j < block_size(); j++) {
   1407          src_[j] = rnd_.Rand8();
   1408          sec_[j] = rnd_.Rand8();
   1409        }
   1410        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
   1411          ref_[j] = rnd_.Rand8();
   1412        }
   1413      } else {
   1414        for (int j = 0; j < block_size(); j++) {
   1415          CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
   1416          CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
   1417        }
   1418        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
   1419          CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
   1420        }
   1421      }
   1422      uint32_t sse1, sse2;
   1423      uint32_t var1, var2;
   1424      API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
   1425                                                   src_, width(), &sse1, sec_));
   1426      var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
   1427                                     params_.log2height, x, y, &sse2,
   1428                                     use_high_bit_depth(), params_.bit_depth);
   1429      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
   1430      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
   1431    }
   1432  }
   1433 }
   1434 
   1435 ////////////////////////////////////////////////////////////////////////////////
   1436 
   1437 #if !CONFIG_REALTIME_ONLY
   1438 
   1439 static const int kMaskMax = 64;
   1440 
   1441 using ObmcSubpelVarianceParams = TestParams<ObmcSubpelVarFunc>;
   1442 
   1443 template <typename FunctionType>
   1444 class ObmcVarianceTest
   1445    : public ::testing::TestWithParam<TestParams<FunctionType> > {
   1446 public:
   1447  void SetUp() override {
   1448    params_ = this->GetParam();
   1449 
   1450    rnd_.Reset(ACMRandom::DeterministicSeed());
   1451    if (!use_high_bit_depth()) {
   1452      pre_ = reinterpret_cast<uint8_t *>(
   1453          aom_memalign(32, block_size() + width() + height() + 1));
   1454    } else {
   1455      pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign(
   1456          32, (block_size() + width() + height() + 1) * sizeof(uint16_t))));
   1457    }
   1458    wsrc_ = reinterpret_cast<int32_t *>(
   1459        aom_memalign(32, block_size() * sizeof(uint32_t)));
   1460    mask_ = reinterpret_cast<int32_t *>(
   1461        aom_memalign(32, block_size() * sizeof(uint32_t)));
   1462    ASSERT_NE(pre_, nullptr);
   1463    ASSERT_NE(wsrc_, nullptr);
   1464    ASSERT_NE(mask_, nullptr);
   1465  }
   1466 
   1467  void TearDown() override {
   1468    if (!use_high_bit_depth()) {
   1469      aom_free(pre_);
   1470    } else {
   1471      aom_free(CONVERT_TO_SHORTPTR(pre_));
   1472    }
   1473    aom_free(wsrc_);
   1474    aom_free(mask_);
   1475  }
   1476 
   1477 protected:
   1478  void RefTest();
   1479  void ExtremeRefTest();
   1480  void SpeedTest();
   1481 
   1482  ACMRandom rnd_;
   1483  uint8_t *pre_;
   1484  int32_t *wsrc_;
   1485  int32_t *mask_;
   1486  TestParams<FunctionType> params_;
   1487 
   1488  // some relay helpers
   1489  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
   1490  int byte_shift() const { return params_.bit_depth - 8; }
   1491  int block_size() const { return params_.block_size; }
   1492  int width() const { return params_.width; }
   1493  int height() const { return params_.height; }
   1494  uint32_t bd_mask() const { return params_.mask; }
   1495 };
   1496 
   1497 template <>
   1498 void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() {
   1499  for (int x = 0; x < 8; ++x) {
   1500    for (int y = 0; y < 8; ++y) {
   1501      if (!use_high_bit_depth())
   1502        for (int j = 0; j < block_size() + width() + height() + 1; j++)
   1503          pre_[j] = rnd_.Rand8();
   1504      else
   1505        for (int j = 0; j < block_size() + width() + height() + 1; j++)
   1506          CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
   1507      for (int j = 0; j < block_size(); j++) {
   1508        wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
   1509        mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
   1510      }
   1511 
   1512      uint32_t sse1, sse2;
   1513      uint32_t var1, var2;
   1514      API_REGISTER_STATE_CHECK(
   1515          var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
   1516      var2 = obmc_subpel_variance_ref(
   1517          pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
   1518          &sse2, use_high_bit_depth(), params_.bit_depth);
   1519      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
   1520      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
   1521    }
   1522  }
   1523 }
   1524 
   1525 template <>
   1526 void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() {
   1527  // Pre: Set the first half of values to the maximum, the second half to 0.
   1528  // Mask: same as above
   1529  // WSrc: Set the first half of values to 0, the second half to the maximum.
   1530  for (int x = 0; x < 8; ++x) {
   1531    for (int y = 0; y < 8; ++y) {
   1532      const int half = block_size() / 2;
   1533      if (!use_high_bit_depth()) {
   1534        memset(pre_, 255, half);
   1535        memset(pre_ + half, 0, half + width() + height() + 1);
   1536      } else {
   1537        aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
   1538        aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0,
   1539                     half + width() + height() + 1);
   1540      }
   1541      for (int j = 0; j < half; j++) {
   1542        wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
   1543        mask_[j] = 0;
   1544      }
   1545      for (int j = half; j < block_size(); j++) {
   1546        wsrc_[j] = 0;
   1547        mask_[j] = kMaskMax * kMaskMax;
   1548      }
   1549 
   1550      uint32_t sse1, sse2;
   1551      uint32_t var1, var2;
   1552      API_REGISTER_STATE_CHECK(
   1553          var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
   1554      var2 = obmc_subpel_variance_ref(
   1555          pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
   1556          &sse2, use_high_bit_depth(), params_.bit_depth);
   1557      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
   1558      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
   1559    }
   1560  }
   1561 }
   1562 
   1563 template <>
   1564 void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() {
   1565  if (!use_high_bit_depth())
   1566    for (int j = 0; j < block_size() + width() + height() + 1; j++)
   1567      pre_[j] = rnd_.Rand8();
   1568  else
   1569    for (int j = 0; j < block_size() + width() + height() + 1; j++)
   1570      CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
   1571  for (int j = 0; j < block_size(); j++) {
   1572    wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
   1573    mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
   1574  }
   1575  unsigned int sse1;
   1576  const int stride = width() + 1;
   1577  int run_time = 1000000000 / block_size();
   1578  aom_usec_timer timer;
   1579 
   1580  aom_usec_timer_start(&timer);
   1581  for (int i = 0; i < run_time; ++i) {
   1582    int x = rnd_(8);
   1583    int y = rnd_(8);
   1584    API_REGISTER_STATE_CHECK(
   1585        params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
   1586  }
   1587  aom_usec_timer_mark(&timer);
   1588 
   1589  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
   1590  printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(),
   1591         params_.bit_depth, elapsed_time);
   1592 }
   1593 
   1594 #endif  // !CONFIG_REALTIME_ONLY
   1595 
   1596 using MseWxHTest = MseWxHTestClass<MseWxH16bitFunc>;
   1597 using Mse16xHTest = Mse16xHTestClass<Mse16xH16bitFunc>;
   1598 using AvxMseTest = MainTestClass<VarianceMxNFunc>;
   1599 using AvxVarianceTest = MainTestClass<VarianceMxNFunc>;
   1600 using GetSseSum8x8QuadTest = MainTestClass<GetSseSum8x8QuadFunc>;
   1601 using GetSseSum16x16DualTest = MainTestClass<GetSseSum16x16DualFunc>;
   1602 using AvxSubpelVarianceTest = SubpelVarianceTest<SubpixVarMxNFunc>;
   1603 using AvxSubpelAvgVarianceTest = SubpelVarianceTest<SubpixAvgVarMxNFunc>;
   1604 #if !CONFIG_REALTIME_ONLY
   1605 using AvxObmcSubpelVarianceTest = ObmcVarianceTest<ObmcSubpelVarFunc>;
   1606 #endif
   1607 using MseWxHParams = TestParams<MseWxH16bitFunc>;
   1608 using Mse16xHParams = TestParams<Mse16xH16bitFunc>;
   1609 
   1610 TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); }
   1611 TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
   1612 TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); }
   1613 TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); }
   1614 TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); }
   1615 TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
   1616 TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
   1617 TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
   1618 TEST_P(AvxVarianceTest, Ref) { RefTest(); }
   1619 TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
   1620 TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
   1621 TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
   1622 TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); }
   1623 TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); }
   1624 TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); }
   1625 TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); }
   1626 TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); }
   1627 TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); }
   1628 TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); }
   1629 TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); }
   1630 #if !CONFIG_REALTIME_ONLY
   1631 TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
   1632 TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
   1633 #endif  // !CONFIG_REALTIME_ONLY
   1634 TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
   1635 TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
   1636 TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
   1637 TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
   1638 #if !CONFIG_REALTIME_ONLY
   1639 TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); }
   1640 TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
   1641 TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
   1642 #endif
   1643 
   1644 INSTANTIATE_TEST_SUITE_P(
   1645    C, MseWxHTest,
   1646    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8),
   1647                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8),
   1648                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8),
   1649                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8)));
   1650 
   1651 INSTANTIATE_TEST_SUITE_P(
   1652    C, Mse16xHTest,
   1653    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8),
   1654                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8),
   1655                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8),
   1656                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8)));
   1657 
   1658 #if !CONFIG_REALTIME_ONLY
   1659 INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
   1660                         ::testing::Values(aom_get_mb_ss_c));
   1661 #endif  // !CONFIG_REALTIME_ONLY
   1662 
   1663 using MseParams = TestParams<VarianceMxNFunc>;
   1664 INSTANTIATE_TEST_SUITE_P(C, AvxMseTest,
   1665                         ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
   1666                                           MseParams(4, 3, &aom_mse16x8_c),
   1667                                           MseParams(3, 4, &aom_mse8x16_c),
   1668                                           MseParams(3, 3, &aom_mse8x8_c)));
   1669 
   1670 using VarianceParams = TestParams<VarianceMxNFunc>;
   1671 const VarianceParams kArrayVariance_c[] = {
   1672  VarianceParams(7, 7, &aom_variance128x128_c),
   1673  VarianceParams(7, 6, &aom_variance128x64_c),
   1674  VarianceParams(6, 7, &aom_variance64x128_c),
   1675  VarianceParams(6, 6, &aom_variance64x64_c),
   1676  VarianceParams(6, 5, &aom_variance64x32_c),
   1677  VarianceParams(5, 6, &aom_variance32x64_c),
   1678  VarianceParams(5, 5, &aom_variance32x32_c),
   1679  VarianceParams(5, 4, &aom_variance32x16_c),
   1680  VarianceParams(4, 5, &aom_variance16x32_c),
   1681  VarianceParams(4, 4, &aom_variance16x16_c),
   1682  VarianceParams(4, 3, &aom_variance16x8_c),
   1683  VarianceParams(3, 4, &aom_variance8x16_c),
   1684  VarianceParams(3, 3, &aom_variance8x8_c),
   1685  VarianceParams(3, 2, &aom_variance8x4_c),
   1686  VarianceParams(2, 3, &aom_variance4x8_c),
   1687  VarianceParams(2, 2, &aom_variance4x4_c),
   1688 #if !CONFIG_REALTIME_ONLY
   1689  VarianceParams(6, 4, &aom_variance64x16_c),
   1690  VarianceParams(4, 6, &aom_variance16x64_c),
   1691  VarianceParams(5, 3, &aom_variance32x8_c),
   1692  VarianceParams(3, 5, &aom_variance8x32_c),
   1693  VarianceParams(4, 2, &aom_variance16x4_c),
   1694  VarianceParams(2, 4, &aom_variance4x16_c),
   1695 #endif
   1696 };
   1697 INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest,
   1698                         ::testing::ValuesIn(kArrayVariance_c));
   1699 
   1700 using GetSseSumParams = TestParams<GetSseSum8x8QuadFunc>;
   1701 const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = {
   1702  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0),
   1703  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0),
   1704  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0),
   1705  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0)
   1706 };
   1707 INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest,
   1708                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c));
   1709 
   1710 using GetSseSumParamsDual = TestParams<GetSseSum16x16DualFunc>;
   1711 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = {
   1712  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0),
   1713  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0),
   1714  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0),
   1715  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0)
   1716 };
   1717 
   1718 INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest,
   1719                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c));
   1720 
   1721 using SubpelVarianceParams = TestParams<SubpixVarMxNFunc>;
   1722 const SubpelVarianceParams kArraySubpelVariance_c[] = {
   1723  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
   1724  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
   1725  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
   1726  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
   1727  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
   1728  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
   1729  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0),
   1730  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0),
   1731  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0),
   1732  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0),
   1733  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0),
   1734  SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0),
   1735  SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
   1736  SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
   1737  SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
   1738  SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0),
   1739 #if !CONFIG_REALTIME_ONLY
   1740  SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0),
   1741  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0),
   1742  SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0),
   1743  SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0),
   1744  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0),
   1745  SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0),
   1746 #endif
   1747 };
   1748 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest,
   1749                         ::testing::ValuesIn(kArraySubpelVariance_c));
   1750 
   1751 using SubpelAvgVarianceParams = TestParams<SubpixAvgVarMxNFunc>;
   1752 const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = {
   1753  SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
   1754  SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
   1755  SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
   1756  SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
   1757  SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
   1758  SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
   1759  SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
   1760  SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
   1761  SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
   1762  SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
   1763  SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
   1764  SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
   1765  SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
   1766  SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
   1767  SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
   1768  SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0),
   1769 #if !CONFIG_REALTIME_ONLY
   1770  SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0),
   1771  SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0),
   1772  SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0),
   1773  SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0),
   1774  SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0),
   1775  SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0),
   1776 #endif
   1777 };
   1778 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest,
   1779                         ::testing::ValuesIn(kArraySubpelAvgVariance_c));
   1780 
   1781 #if !CONFIG_REALTIME_ONLY
   1782 INSTANTIATE_TEST_SUITE_P(
   1783    C, AvxObmcSubpelVarianceTest,
   1784    ::testing::Values(
   1785        ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c,
   1786                                 0),
   1787        ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0),
   1788        ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0),
   1789        ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0),
   1790        ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0),
   1791        ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0),
   1792        ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0),
   1793        ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0),
   1794        ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0),
   1795        ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0),
   1796        ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0),
   1797        ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0),
   1798        ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
   1799        ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
   1800        ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
   1801        ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0),
   1802 
   1803        ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0),
   1804        ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0),
   1805        ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0),
   1806        ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0),
   1807        ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0),
   1808        ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0)));
   1809 #endif
   1810 
   1811 #if CONFIG_AV1_HIGHBITDEPTH
   1812 using MseHBDWxH16bitFunc = uint64_t (*)(uint16_t *, int, uint16_t *, int, int,
   1813                                        int);
   1814 
   1815 template <typename FunctionType>
   1816 class MseHBDWxHTestClass
   1817    : public ::testing::TestWithParam<TestParams<FunctionType> > {
   1818 public:
   1819  void SetUp() override {
   1820    params_ = this->GetParam();
   1821 
   1822    rnd_.Reset(ACMRandom::DeterministicSeed());
   1823    src_ = reinterpret_cast<uint16_t *>(
   1824        aom_memalign(16, block_size() * sizeof(src_)));
   1825    dst_ = reinterpret_cast<uint16_t *>(
   1826        aom_memalign(16, block_size() * sizeof(dst_)));
   1827    ASSERT_NE(src_, nullptr);
   1828    ASSERT_NE(dst_, nullptr);
   1829  }
   1830 
   1831  void TearDown() override {
   1832    aom_free(src_);
   1833    aom_free(dst_);
   1834    src_ = nullptr;
   1835    dst_ = nullptr;
   1836  }
   1837 
   1838 protected:
   1839  void RefMatchTestMse();
   1840  void SpeedTest();
   1841 
   1842 protected:
   1843  ACMRandom rnd_;
   1844  uint16_t *dst_;
   1845  uint16_t *src_;
   1846  TestParams<FunctionType> params_;
   1847 
   1848  // some relay helpers
   1849  int block_size() const { return params_.block_size; }
   1850  int width() const { return params_.width; }
   1851  int d_stride() const { return params_.width; }  // stride is same as width
   1852  int s_stride() const { return params_.width; }  // stride is same as width
   1853  int height() const { return params_.height; }
   1854  int mask() const { return params_.mask; }
   1855 };
   1856 
   1857 template <typename MseHBDWxHFunctionType>
   1858 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() {
   1859  aom_usec_timer ref_timer, test_timer;
   1860  double elapsed_time_c = 0;
   1861  double elapsed_time_simd = 0;
   1862  int run_time = 10000000;
   1863  int w = width();
   1864  int h = height();
   1865  int dstride = d_stride();
   1866  int sstride = s_stride();
   1867  for (int k = 0; k < block_size(); ++k) {
   1868    dst_[k] = rnd_.Rand16() & mask();
   1869    src_[k] = rnd_.Rand16() & mask();
   1870  }
   1871  aom_usec_timer_start(&ref_timer);
   1872  for (int i = 0; i < run_time; i++) {
   1873    aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h);
   1874  }
   1875  aom_usec_timer_mark(&ref_timer);
   1876  elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
   1877 
   1878  aom_usec_timer_start(&test_timer);
   1879  for (int i = 0; i < run_time; i++) {
   1880    params_.func(dst_, dstride, src_, sstride, w, h);
   1881  }
   1882  aom_usec_timer_mark(&test_timer);
   1883  elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
   1884 
   1885  printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
   1886         elapsed_time_c, elapsed_time_simd,
   1887         (elapsed_time_c / elapsed_time_simd));
   1888 }
   1889 
   1890 template <typename MseHBDWxHFunctionType>
   1891 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() {
   1892  uint64_t mse_ref = 0;
   1893  uint64_t mse_mod = 0;
   1894  int w = width();
   1895  int h = height();
   1896  int dstride = d_stride();
   1897  int sstride = s_stride();
   1898  for (int i = 0; i < 10; i++) {
   1899    for (int k = 0; k < block_size(); ++k) {
   1900      dst_[k] = rnd_.Rand16() & mask();
   1901      src_[k] = rnd_.Rand16() & mask();
   1902    }
   1903    API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c(
   1904                                 dst_, dstride, src_, sstride, w, h));
   1905    API_REGISTER_STATE_CHECK(
   1906        mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
   1907    EXPECT_EQ(mse_ref, mse_mod)
   1908        << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
   1909  }
   1910 }
   1911 
   1912 using MseHBDWxHParams = TestParams<MseHBDWxH16bitFunc>;
   1913 using MseHBDWxHTest = MseHBDWxHTestClass<MseHBDWxH16bitFunc>;
   1914 using AvxHBDMseTest = MainTestClass<VarianceMxNFunc>;
   1915 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest);
   1916 using AvxHBDVarianceTest = MainTestClass<VarianceMxNFunc>;
   1917 using AvxHBDSubpelVarianceTest = SubpelVarianceTest<SubpixVarMxNFunc>;
   1918 using AvxHBDSubpelAvgVarianceTest = SubpelVarianceTest<SubpixAvgVarMxNFunc>;
   1919 #if !CONFIG_REALTIME_ONLY
   1920 using AvxHBDObmcSubpelVarianceTest = ObmcVarianceTest<ObmcSubpelVarFunc>;
   1921 #endif
   1922 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest);
   1923 
   1924 TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); }
   1925 TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
   1926 TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
   1927 TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
   1928 TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); }
   1929 TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
   1930 TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
   1931 TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
   1932 TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
   1933 TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); }
   1934 TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
   1935 TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
   1936 TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
   1937 TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
   1938 #if !CONFIG_REALTIME_ONLY
   1939 TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); }
   1940 TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
   1941 TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
   1942 #endif
   1943 
   1944 INSTANTIATE_TEST_SUITE_P(
   1945    C, MseHBDWxHTest,
   1946    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10),
   1947                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10),
   1948                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10),
   1949                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10)));
   1950 
   1951 INSTANTIATE_TEST_SUITE_P(
   1952    C, AvxHBDMseTest,
   1953    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12),
   1954                      MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12),
   1955                      MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12),
   1956                      MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12),
   1957                      MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10),
   1958                      MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10),
   1959                      MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10),
   1960                      MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10),
   1961                      MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8),
   1962                      MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8),
   1963                      MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8),
   1964                      MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8)));
   1965 
   1966 #if HAVE_NEON
   1967 INSTANTIATE_TEST_SUITE_P(
   1968    NEON, MseHBDWxHTest,
   1969    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
   1970                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10),
   1971                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
   1972                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon,
   1973                                      10)));
   1974 
   1975 INSTANTIATE_TEST_SUITE_P(
   1976    NEON, AvxHBDMseTest,
   1977    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12),
   1978                      MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12),
   1979                      MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12),
   1980                      MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12),
   1981                      MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10),
   1982                      MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10),
   1983                      MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10),
   1984                      MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10),
   1985                      MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8),
   1986                      MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8),
   1987                      MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8),
   1988                      MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8)));
   1989 #endif  // HAVE_NEON
   1990 
   1991 #if HAVE_NEON_DOTPROD
   1992 INSTANTIATE_TEST_SUITE_P(
   1993    NEON_DOTPROD, AvxHBDMseTest,
   1994    ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8),
   1995                      MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8),
   1996                      MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8),
   1997                      MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8)));
   1998 #endif  // HAVE_NEON_DOTPROD
   1999 
   2000 #if HAVE_SVE
   2001 INSTANTIATE_TEST_SUITE_P(
   2002    SVE, MseHBDWxHTest,
   2003    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
   2004                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10),
   2005                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
   2006                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve,
   2007                                      10)));
   2008 
   2009 INSTANTIATE_TEST_SUITE_P(
   2010    SVE, AvxHBDMseTest,
   2011    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12),
   2012                      MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12),
   2013                      MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12),
   2014                      MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12),
   2015                      MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10),
   2016                      MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10),
   2017                      MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10),
   2018                      MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10)));
   2019 #endif  // HAVE_SVE
   2020 
   2021 const VarianceParams kArrayHBDVariance_c[] = {
   2022  VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
   2023  VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
   2024  VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
   2025  VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
   2026  VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
   2027  VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
   2028  VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
   2029  VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
   2030  VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
   2031  VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
   2032  VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
   2033  VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
   2034  VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
   2035  VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
   2036  VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
   2037  VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
   2038  VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
   2039  VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
   2040  VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
   2041  VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
   2042  VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
   2043  VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
   2044  VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
   2045  VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
   2046  VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
   2047  VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
   2048  VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
   2049  VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
   2050  VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
   2051  VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
   2052  VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
   2053  VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
   2054  VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
   2055  VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
   2056  VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
   2057  VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
   2058  VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
   2059  VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
   2060  VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
   2061  VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
   2062  VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
   2063  VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
   2064  VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
   2065  VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
   2066  VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
   2067  VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
   2068  VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
   2069  VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8),
   2070 #if !CONFIG_REALTIME_ONLY
   2071  VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12),
   2072  VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12),
   2073  VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12),
   2074  VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12),
   2075  VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12),
   2076  VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12),
   2077  VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10),
   2078  VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10),
   2079  VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10),
   2080  VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10),
   2081  VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10),
   2082  VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10),
   2083  VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8),
   2084  VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8),
   2085  VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8),
   2086  VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8),
   2087  VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8),
   2088  VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8),
   2089 #endif
   2090 };
   2091 INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest,
   2092                         ::testing::ValuesIn(kArrayHBDVariance_c));
   2093 
   2094 #if HAVE_SSE4_1
   2095 INSTANTIATE_TEST_SUITE_P(
   2096    SSE4_1, AvxHBDVarianceTest,
   2097    ::testing::Values(
   2098        VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
   2099        VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
   2100        VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
   2101 #endif  // HAVE_SSE4_1
   2102 
   2103 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
   2104  SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
   2105  SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
   2106  SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
   2107  SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
   2108  SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
   2109  SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
   2110  SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
   2111  SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
   2112  SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
   2113  SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
   2114  SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
   2115  SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
   2116  SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
   2117  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
   2118  SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
   2119  SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
   2120  SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
   2121  SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
   2122  SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
   2123  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
   2124  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
   2125  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
   2126  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
   2127  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
   2128  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
   2129  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
   2130  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
   2131  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
   2132  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
   2133  SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
   2134  SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
   2135  SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
   2136  SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
   2137  SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
   2138  SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
   2139  SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
   2140  SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
   2141  SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
   2142  SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
   2143  SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
   2144  SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
   2145  SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
   2146  SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
   2147  SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
   2148  SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
   2149  SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
   2150  SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
   2151  SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
   2152 #if !CONFIG_REALTIME_ONLY
   2153  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8),
   2154  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8),
   2155  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8),
   2156  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8),
   2157  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8),
   2158  SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8),
   2159  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10),
   2160  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10),
   2161  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10),
   2162  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10),
   2163  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10),
   2164  SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10),
   2165  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12),
   2166  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12),
   2167  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12),
   2168  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12),
   2169  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12),
   2170  SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12),
   2171 #endif
   2172 };
   2173 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest,
   2174                         ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
   2175 
   2176 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
   2177  SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
   2178                          8),
   2179  SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
   2180                          8),
   2181  SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
   2182                          8),
   2183  SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
   2184  SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
   2185  SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
   2186  SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
   2187  SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
   2188  SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
   2189  SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
   2190  SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
   2191  SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
   2192  SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
   2193  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
   2194  SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
   2195  SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
   2196  SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
   2197                          10),
   2198  SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
   2199                          10),
   2200  SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
   2201                          10),
   2202  SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
   2203                          10),
   2204  SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
   2205                          10),
   2206  SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c,
   2207                          10),
   2208  SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c,
   2209                          10),
   2210  SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c,
   2211                          10),
   2212  SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c,
   2213                          10),
   2214  SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c,
   2215                          10),
   2216  SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c,
   2217                          10),
   2218  SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c,
   2219                          10),
   2220  SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
   2221  SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
   2222  SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
   2223  SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
   2224  SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
   2225                          12),
   2226  SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
   2227                          12),
   2228  SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
   2229                          12),
   2230  SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
   2231                          12),
   2232  SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
   2233                          12),
   2234  SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c,
   2235                          12),
   2236  SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c,
   2237                          12),
   2238  SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c,
   2239                          12),
   2240  SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c,
   2241                          12),
   2242  SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c,
   2243                          12),
   2244  SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c,
   2245                          12),
   2246  SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c,
   2247                          12),
   2248  SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
   2249  SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
   2250  SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
   2251  SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12),
   2252 
   2253 #if !CONFIG_REALTIME_ONLY
   2254  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8),
   2255  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8),
   2256  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8),
   2257  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8),
   2258  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8),
   2259  SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8),
   2260  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c,
   2261                          10),
   2262  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c,
   2263                          10),
   2264  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c,
   2265                          10),
   2266  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c,
   2267                          10),
   2268  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c,
   2269                          10),
   2270  SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c,
   2271                          10),
   2272  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c,
   2273                          12),
   2274  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c,
   2275                          12),
   2276  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c,
   2277                          12),
   2278  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c,
   2279                          12),
   2280  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c,
   2281                          12),
   2282  SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c,
   2283                          12),
   2284 #endif
   2285 };
   2286 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest,
   2287                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
   2288 
   2289 #if !CONFIG_REALTIME_ONLY
   2290 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = {
   2291  ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c,
   2292                           8),
   2293  ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c,
   2294                           8),
   2295  ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c,
   2296                           8),
   2297  ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c,
   2298                           8),
   2299  ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c,
   2300                           8),
   2301  ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c,
   2302                           8),
   2303  ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c,
   2304                           8),
   2305  ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c,
   2306                           8),
   2307  ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c,
   2308                           8),
   2309  ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c,
   2310                           8),
   2311  ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c,
   2312                           8),
   2313  ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c,
   2314                           8),
   2315  ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8),
   2316  ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8),
   2317  ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8),
   2318  ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8),
   2319  ObmcSubpelVarianceParams(7, 7,
   2320                           &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10),
   2321  ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c,
   2322                           10),
   2323  ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c,
   2324                           10),
   2325  ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c,
   2326                           10),
   2327  ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c,
   2328                           10),
   2329  ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c,
   2330                           10),
   2331  ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c,
   2332                           10),
   2333  ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c,
   2334                           10),
   2335  ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c,
   2336                           10),
   2337  ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c,
   2338                           10),
   2339  ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c,
   2340                           10),
   2341  ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c,
   2342                           10),
   2343  ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c,
   2344                           10),
   2345  ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c,
   2346                           10),
   2347  ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c,
   2348                           10),
   2349  ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c,
   2350                           10),
   2351  ObmcSubpelVarianceParams(7, 7,
   2352                           &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12),
   2353  ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c,
   2354                           12),
   2355  ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c,
   2356                           12),
   2357  ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c,
   2358                           12),
   2359  ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c,
   2360                           12),
   2361  ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c,
   2362                           12),
   2363  ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c,
   2364                           12),
   2365  ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c,
   2366                           12),
   2367  ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c,
   2368                           12),
   2369  ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c,
   2370                           12),
   2371  ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c,
   2372                           12),
   2373  ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c,
   2374                           12),
   2375  ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c,
   2376                           12),
   2377  ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c,
   2378                           12),
   2379  ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
   2380                           12),
   2381  ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
   2382                           12),
   2383 
   2384  ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c,
   2385                           8),
   2386  ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c,
   2387                           8),
   2388  ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c,
   2389                           8),
   2390  ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c,
   2391                           8),
   2392  ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c,
   2393                           8),
   2394  ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c,
   2395                           8),
   2396  ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c,
   2397                           10),
   2398  ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c,
   2399                           10),
   2400  ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c,
   2401                           10),
   2402  ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c,
   2403                           10),
   2404  ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c,
   2405                           10),
   2406  ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c,
   2407                           10),
   2408  ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c,
   2409                           12),
   2410  ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c,
   2411                           12),
   2412  ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c,
   2413                           12),
   2414  ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c,
   2415                           12),
   2416  ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c,
   2417                           12),
   2418  ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c,
   2419                           12),
   2420 };
   2421 INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest,
   2422                         ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
   2423 #endif  // !CONFIG_REALTIME_ONLY
   2424 #endif  // CONFIG_AV1_HIGHBITDEPTH
   2425 
   2426 #if HAVE_SSE2
   2427 INSTANTIATE_TEST_SUITE_P(
   2428    SSE2, MseWxHTest,
   2429    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8),
   2430                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8),
   2431                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8),
   2432                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8)));
   2433 
   2434 INSTANTIATE_TEST_SUITE_P(
   2435    SSE2, Mse16xHTest,
   2436    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8),
   2437                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8),
   2438                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8),
   2439                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8)));
   2440 
   2441 #if !CONFIG_REALTIME_ONLY
   2442 INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest,
   2443                         ::testing::Values(aom_get_mb_ss_sse2));
   2444 #endif  // !CONFIG_REALTIME_ONLY
   2445 
   2446 INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest,
   2447                         ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
   2448                                           MseParams(4, 3, &aom_mse16x8_sse2),
   2449                                           MseParams(3, 4, &aom_mse8x16_sse2),
   2450                                           MseParams(3, 3, &aom_mse8x8_sse2)));
   2451 
   2452 const VarianceParams kArrayVariance_sse2[] = {
   2453  VarianceParams(7, 7, &aom_variance128x128_sse2),
   2454  VarianceParams(7, 6, &aom_variance128x64_sse2),
   2455  VarianceParams(6, 7, &aom_variance64x128_sse2),
   2456  VarianceParams(6, 6, &aom_variance64x64_sse2),
   2457  VarianceParams(6, 5, &aom_variance64x32_sse2),
   2458  VarianceParams(5, 6, &aom_variance32x64_sse2),
   2459  VarianceParams(5, 5, &aom_variance32x32_sse2),
   2460  VarianceParams(5, 4, &aom_variance32x16_sse2),
   2461  VarianceParams(4, 5, &aom_variance16x32_sse2),
   2462  VarianceParams(4, 4, &aom_variance16x16_sse2),
   2463  VarianceParams(4, 3, &aom_variance16x8_sse2),
   2464  VarianceParams(3, 4, &aom_variance8x16_sse2),
   2465  VarianceParams(3, 3, &aom_variance8x8_sse2),
   2466  VarianceParams(3, 2, &aom_variance8x4_sse2),
   2467  VarianceParams(2, 3, &aom_variance4x8_sse2),
   2468  VarianceParams(2, 2, &aom_variance4x4_sse2),
   2469 #if !CONFIG_REALTIME_ONLY
   2470  VarianceParams(6, 4, &aom_variance64x16_sse2),
   2471  VarianceParams(5, 3, &aom_variance32x8_sse2),
   2472  VarianceParams(4, 6, &aom_variance16x64_sse2),
   2473  VarianceParams(4, 2, &aom_variance16x4_sse2),
   2474  VarianceParams(3, 5, &aom_variance8x32_sse2),
   2475  VarianceParams(2, 4, &aom_variance4x16_sse2),
   2476 #endif
   2477 };
   2478 INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest,
   2479                         ::testing::ValuesIn(kArrayVariance_sse2));
   2480 
   2481 const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = {
   2482  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
   2483  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
   2484  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
   2485  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0)
   2486 };
   2487 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest,
   2488                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2));
   2489 
   2490 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = {
   2491  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
   2492  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
   2493  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
   2494  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0)
   2495 };
   2496 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest,
   2497                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2));
   2498 
   2499 #if CONFIG_AV1_HIGHBITDEPTH
   2500 #if HAVE_SSE2
   2501 INSTANTIATE_TEST_SUITE_P(
   2502    SSE2, MseHBDWxHTest,
   2503    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
   2504                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10),
   2505                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
   2506                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2,
   2507                                      10)));
   2508 
   2509 INSTANTIATE_TEST_SUITE_P(
   2510    SSE2, AvxHBDMseTest,
   2511    ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12),
   2512                      MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12),
   2513                      MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10),
   2514                      MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10),
   2515                      MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8),
   2516                      MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8)));
   2517 #endif  // HAVE_SSE2
   2518 #if HAVE_SSE4_1
   2519 INSTANTIATE_TEST_SUITE_P(
   2520    SSE4_1, AvxSubpelVarianceTest,
   2521    ::testing::Values(
   2522        SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1,
   2523                             8),
   2524        SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1,
   2525                             10),
   2526        SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1,
   2527                             12)));
   2528 
   2529 INSTANTIATE_TEST_SUITE_P(
   2530    SSE4_1, AvxSubpelAvgVarianceTest,
   2531    ::testing::Values(
   2532        SubpelAvgVarianceParams(2, 2,
   2533                                &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1,
   2534                                8),
   2535        SubpelAvgVarianceParams(2, 2,
   2536                                &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1,
   2537                                10),
   2538        SubpelAvgVarianceParams(2, 2,
   2539                                &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
   2540                                12)));
   2541 #endif  // HAVE_SSE4_1
   2542 
   2543 #if HAVE_AVX2
   2544 INSTANTIATE_TEST_SUITE_P(
   2545    AVX2, AvxHBDMseTest,
   2546    ::testing::Values(MseParams(4, 4, &aom_highbd_10_mse16x16_avx2, 10)));
   2547 #endif  // HAVE_AVX2
   2548 
   2549 const VarianceParams kArrayHBDVariance_sse2[] = {
   2550  VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),
   2551  VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12),
   2552  VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12),
   2553  VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
   2554  VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
   2555  VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
   2556  VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
   2557  VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
   2558  VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
   2559  VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
   2560  VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
   2561  VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
   2562  VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
   2563  VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10),
   2564  VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10),
   2565  VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10),
   2566  VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
   2567  VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
   2568  VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
   2569  VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
   2570  VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
   2571  VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
   2572  VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
   2573  VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
   2574  VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
   2575  VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
   2576  VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8),
   2577  VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8),
   2578  VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8),
   2579  VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
   2580  VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
   2581  VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
   2582  VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
   2583  VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
   2584  VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
   2585  VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
   2586  VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
   2587  VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
   2588  VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8),
   2589 #if !CONFIG_REALTIME_ONLY
   2590  VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12),
   2591  VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12),
   2592  VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12),
   2593  VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12),
   2594  // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12),
   2595  // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12),
   2596  VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10),
   2597  VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10),
   2598  VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10),
   2599  VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10),
   2600  // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10),
   2601  // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10),
   2602  VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8),
   2603  VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8),
   2604  VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8),
   2605  VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8),
   2606 // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8),
   2607 // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8),
   2608 #endif
   2609 };
   2610 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest,
   2611                         ::testing::ValuesIn(kArrayHBDVariance_sse2));
   2612 
   2613 #if HAVE_AVX2
   2614 
   2615 INSTANTIATE_TEST_SUITE_P(
   2616    AVX2, MseHBDWxHTest,
   2617    ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
   2618                      MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10),
   2619                      MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
   2620                      MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2,
   2621                                      10)));
   2622 
   2623 const VarianceParams kArrayHBDVariance_avx2[] = {
   2624  VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10),
   2625  VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10),
   2626  VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10),
   2627  VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10),
   2628  VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10),
   2629  VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10),
   2630  VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10),
   2631  VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10),
   2632  VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10),
   2633  VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
   2634  VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
   2635  VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
   2636  VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10),
   2637 #if !CONFIG_REALTIME_ONLY
   2638  VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10),
   2639  VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10),
   2640  VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10),
   2641  VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10),
   2642 #endif
   2643 };
   2644 
   2645 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest,
   2646                         ::testing::ValuesIn(kArrayHBDVariance_avx2));
   2647 
   2648 const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = {
   2649  SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10),
   2650  SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10),
   2651  SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10),
   2652  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10),
   2653  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10),
   2654  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10),
   2655  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10),
   2656  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10),
   2657  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10),
   2658  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10),
   2659  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10),
   2660  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10),
   2661  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10),
   2662 };
   2663 
   2664 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest,
   2665                         ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2));
   2666 #endif  // HAVE_AVX2
   2667 
   2668 const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = {
   2669  SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12),
   2670  SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12),
   2671  SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12),
   2672  SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
   2673  SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
   2674  SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
   2675  SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
   2676  SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
   2677  SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
   2678  SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
   2679  SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
   2680  SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
   2681  SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
   2682  SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
   2683  SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10),
   2684  SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10),
   2685  SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10),
   2686  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
   2687  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
   2688  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
   2689  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
   2690  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
   2691  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
   2692  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
   2693  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
   2694  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
   2695  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
   2696  SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
   2697  SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8),
   2698  SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8),
   2699  SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8),
   2700  SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
   2701  SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
   2702  SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
   2703  SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
   2704  SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
   2705  SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
   2706  SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
   2707  SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
   2708  SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
   2709  SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
   2710  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
   2711 #if !CONFIG_REALTIME_ONLY
   2712  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12),
   2713  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
   2714  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
   2715  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
   2716  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
   2717  // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
   2718  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
   2719  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
   2720  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
   2721  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
   2722  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
   2723  // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
   2724  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
   2725  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
   2726  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
   2727  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
   2728  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
   2729 // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
   2730 #endif
   2731 };
   2732 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest,
   2733                         ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
   2734 
   2735 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
   2736  SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2,
   2737                          12),
   2738  SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2,
   2739                          12),
   2740  SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2,
   2741                          12),
   2742  SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2,
   2743                          12),
   2744  SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2,
   2745                          12),
   2746  SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
   2747                          12),
   2748  SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
   2749                          12),
   2750  SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
   2751                          12),
   2752  SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
   2753                          12),
   2754  SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
   2755                          12),
   2756  SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2,
   2757                          12),
   2758  SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2,
   2759                          10),
   2760  SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2,
   2761                          10),
   2762  SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2,
   2763                          10),
   2764  SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2,
   2765                          10),
   2766  SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2,
   2767                          10),
   2768  SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
   2769                          10),
   2770  SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
   2771                          10),
   2772  SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
   2773                          10),
   2774  SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
   2775                          10),
   2776  SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
   2777                          10),
   2778  SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2,
   2779                          10),
   2780  SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2,
   2781                          8),
   2782  SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2,
   2783                          8),
   2784  SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2,
   2785                          8),
   2786  SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2,
   2787                          8),
   2788  SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2,
   2789                          8),
   2790  SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
   2791                          8),
   2792  SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
   2793                          8),
   2794  SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
   2795                          8),
   2796  SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
   2797                          8),
   2798  SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
   2799                          8),
   2800  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2,
   2801                          8),
   2802 
   2803 #if !CONFIG_REALTIME_ONLY
   2804  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2,
   2805                          12),
   2806  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2,
   2807                          12),
   2808  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2,
   2809                          12),
   2810  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
   2811                          12),
   2812  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
   2813                          12),
   2814  // SubpelAvgVarianceParams(2, 4,
   2815  // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
   2816  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
   2817                          10),
   2818  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2,
   2819                          10),
   2820  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2,
   2821                          10),
   2822  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
   2823                          10),
   2824  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
   2825                          10),
   2826  // SubpelAvgVarianceParams(2, 4,
   2827  // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
   2828  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
   2829                          8),
   2830  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2,
   2831                          8),
   2832  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2,
   2833                          8),
   2834  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
   2835                          8),
   2836  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
   2837                          8),
   2838 // SubpelAvgVarianceParams(2, 4,
   2839 // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
   2840 #endif
   2841 };
   2842 
   2843 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
   2844                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
   2845 #endif  // HAVE_SSE2
   2846 #endif  // CONFIG_AV1_HIGHBITDEPTH
   2847 
   2848 #if HAVE_SSSE3
   2849 const SubpelVarianceParams kArraySubpelVariance_ssse3[] = {
   2850  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
   2851  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
   2852  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
   2853  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
   2854  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
   2855  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
   2856  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
   2857  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
   2858  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
   2859  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
   2860  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
   2861  SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
   2862  SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
   2863  SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
   2864  SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
   2865  SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0),
   2866 #if !CONFIG_REALTIME_ONLY
   2867  SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0),
   2868  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0),
   2869  SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0),
   2870  SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0),
   2871  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0),
   2872  SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0),
   2873 #endif
   2874 };
   2875 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest,
   2876                         ::testing::ValuesIn(kArraySubpelVariance_ssse3));
   2877 
   2878 const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = {
   2879  SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0),
   2880  SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0),
   2881  SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0),
   2882  SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0),
   2883  SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0),
   2884  SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0),
   2885  SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0),
   2886  SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0),
   2887  SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0),
   2888  SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0),
   2889  SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
   2890  SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
   2891  SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
   2892  SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
   2893  SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
   2894  SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0),
   2895 #if !CONFIG_REALTIME_ONLY
   2896  SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0),
   2897  SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0),
   2898  SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0),
   2899  SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0),
   2900  SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0),
   2901  SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0),
   2902 #endif
   2903 };
   2904 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest,
   2905                         ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3));
   2906 #endif  // HAVE_SSSE3
   2907 
   2908 #if HAVE_SSE4_1
   2909 #if !CONFIG_REALTIME_ONLY
   2910 INSTANTIATE_TEST_SUITE_P(
   2911    SSE4_1, AvxObmcSubpelVarianceTest,
   2912    ::testing::Values(
   2913        ObmcSubpelVarianceParams(7, 7,
   2914                                 &aom_obmc_sub_pixel_variance128x128_sse4_1, 0),
   2915        ObmcSubpelVarianceParams(7, 6,
   2916                                 &aom_obmc_sub_pixel_variance128x64_sse4_1, 0),
   2917        ObmcSubpelVarianceParams(6, 7,
   2918                                 &aom_obmc_sub_pixel_variance64x128_sse4_1, 0),
   2919        ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1,
   2920                                 0),
   2921        ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1,
   2922                                 0),
   2923        ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1,
   2924                                 0),
   2925        ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1,
   2926                                 0),
   2927        ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1,
   2928                                 0),
   2929        ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1,
   2930                                 0),
   2931        ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1,
   2932                                 0),
   2933        ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1,
   2934                                 0),
   2935        ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1,
   2936                                 0),
   2937        ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1,
   2938                                 0),
   2939        ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1,
   2940                                 0),
   2941        ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
   2942                                 0),
   2943        ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
   2944                                 0),
   2945        ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1,
   2946                                 0),
   2947        ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1,
   2948                                 0),
   2949        ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1,
   2950                                 0),
   2951        ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1,
   2952                                 0),
   2953        ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1,
   2954                                 0),
   2955        ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1,
   2956                                 0)));
   2957 #endif
   2958 #endif  // HAVE_SSE4_1
   2959 
   2960 #if HAVE_AVX2
   2961 
   2962 INSTANTIATE_TEST_SUITE_P(
   2963    AVX2, MseWxHTest,
   2964    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8),
   2965                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8),
   2966                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8),
   2967                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8)));
   2968 
   2969 INSTANTIATE_TEST_SUITE_P(
   2970    AVX2, Mse16xHTest,
   2971    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8),
   2972                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8),
   2973                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8),
   2974                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8)));
   2975 
   2976 INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest,
   2977                         ::testing::Values(MseParams(4, 4,
   2978                                                     &aom_mse16x16_avx2)));
   2979 
   2980 const VarianceParams kArrayVariance_avx2[] = {
   2981  VarianceParams(7, 7, &aom_variance128x128_avx2),
   2982  VarianceParams(7, 6, &aom_variance128x64_avx2),
   2983  VarianceParams(6, 7, &aom_variance64x128_avx2),
   2984  VarianceParams(6, 6, &aom_variance64x64_avx2),
   2985  VarianceParams(6, 5, &aom_variance64x32_avx2),
   2986  VarianceParams(5, 6, &aom_variance32x64_avx2),
   2987  VarianceParams(5, 5, &aom_variance32x32_avx2),
   2988  VarianceParams(5, 4, &aom_variance32x16_avx2),
   2989  VarianceParams(4, 5, &aom_variance16x32_avx2),
   2990  VarianceParams(4, 4, &aom_variance16x16_avx2),
   2991  VarianceParams(4, 3, &aom_variance16x8_avx2),
   2992 #if !CONFIG_REALTIME_ONLY
   2993  VarianceParams(6, 4, &aom_variance64x16_avx2),
   2994  VarianceParams(4, 6, &aom_variance16x64_avx2),
   2995  VarianceParams(5, 3, &aom_variance32x8_avx2),
   2996  VarianceParams(4, 2, &aom_variance16x4_avx2),
   2997 #endif
   2998 };
   2999 INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest,
   3000                         ::testing::ValuesIn(kArrayVariance_avx2));
   3001 
   3002 const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = {
   3003  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
   3004  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
   3005  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
   3006  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0)
   3007 };
   3008 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest,
   3009                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2));
   3010 
   3011 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = {
   3012  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
   3013  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
   3014  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
   3015  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0)
   3016 };
   3017 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest,
   3018                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2));
   3019 
   3020 const SubpelVarianceParams kArraySubpelVariance_avx2[] = {
   3021  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
   3022  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
   3023  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
   3024  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
   3025  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
   3026  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
   3027  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
   3028  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0),
   3029 
   3030  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0),
   3031  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0),
   3032  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0),
   3033 #if !CONFIG_REALTIME_ONLY
   3034  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0),
   3035  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0),
   3036 #endif
   3037 };
   3038 INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest,
   3039                         ::testing::ValuesIn(kArraySubpelVariance_avx2));
   3040 
   3041 INSTANTIATE_TEST_SUITE_P(
   3042    AVX2, AvxSubpelAvgVarianceTest,
   3043    ::testing::Values(
   3044        SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
   3045                                0),
   3046        SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
   3047                                0),
   3048        SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
   3049                                0),
   3050        SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
   3051        SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
   3052        SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
   3053        SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
   3054        SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
   3055                                0)));
   3056 #endif  // HAVE_AVX2
   3057 
   3058 #if HAVE_NEON
   3059 INSTANTIATE_TEST_SUITE_P(
   3060    NEON, MseWxHTest,
   3061    ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8),
   3062                      MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8),
   3063                      MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8),
   3064                      MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8)));
   3065 
   3066 INSTANTIATE_TEST_SUITE_P(
   3067    NEON, Mse16xHTest,
   3068    ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8),
   3069                      Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8),
   3070                      Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8),
   3071                      Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8)));
   3072 
   3073 #if !CONFIG_REALTIME_ONLY
   3074 INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest,
   3075                         ::testing::Values(aom_get_mb_ss_neon));
   3076 #endif  // !CONFIG_REALTIME_ONLY
   3077 
   3078 INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest,
   3079                         ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon),
   3080                                           MseParams(3, 4, &aom_mse8x16_neon),
   3081                                           MseParams(4, 4, &aom_mse16x16_neon),
   3082                                           MseParams(4, 3, &aom_mse16x8_neon)));
   3083 
   3084 const VarianceParams kArrayVariance_neon[] = {
   3085  VarianceParams(7, 7, &aom_variance128x128_neon),
   3086  VarianceParams(6, 6, &aom_variance64x64_neon),
   3087  VarianceParams(7, 6, &aom_variance128x64_neon),
   3088  VarianceParams(6, 7, &aom_variance64x128_neon),
   3089  VarianceParams(6, 6, &aom_variance64x64_neon),
   3090  VarianceParams(6, 5, &aom_variance64x32_neon),
   3091  VarianceParams(5, 6, &aom_variance32x64_neon),
   3092  VarianceParams(5, 5, &aom_variance32x32_neon),
   3093  VarianceParams(5, 4, &aom_variance32x16_neon),
   3094  VarianceParams(4, 5, &aom_variance16x32_neon),
   3095  VarianceParams(4, 4, &aom_variance16x16_neon),
   3096  VarianceParams(4, 3, &aom_variance16x8_neon),
   3097  VarianceParams(3, 4, &aom_variance8x16_neon),
   3098  VarianceParams(3, 3, &aom_variance8x8_neon),
   3099  VarianceParams(3, 2, &aom_variance8x4_neon),
   3100  VarianceParams(2, 3, &aom_variance4x8_neon),
   3101  VarianceParams(2, 2, &aom_variance4x4_neon),
   3102 #if !CONFIG_REALTIME_ONLY
   3103  VarianceParams(2, 4, &aom_variance4x16_neon),
   3104  VarianceParams(4, 2, &aom_variance16x4_neon),
   3105  VarianceParams(3, 5, &aom_variance8x32_neon),
   3106  VarianceParams(5, 3, &aom_variance32x8_neon),
   3107  VarianceParams(4, 6, &aom_variance16x64_neon),
   3108  VarianceParams(6, 4, &aom_variance64x16_neon),
   3109 #endif
   3110 };
   3111 
   3112 INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest,
   3113                         ::testing::ValuesIn(kArrayVariance_neon));
   3114 
   3115 const SubpelVarianceParams kArraySubpelVariance_neon[] = {
   3116  SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0),
   3117  SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0),
   3118  SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0),
   3119  SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
   3120  SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0),
   3121  SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0),
   3122  SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
   3123  SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0),
   3124  SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0),
   3125  SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
   3126  SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0),
   3127  SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0),
   3128  SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0),
   3129  SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0),
   3130  SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0),
   3131  SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0),
   3132 #if !CONFIG_REALTIME_ONLY
   3133  SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0),
   3134  SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0),
   3135  SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0),
   3136  SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0),
   3137  SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0),
   3138  SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0),
   3139 #endif
   3140 };
   3141 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest,
   3142                         ::testing::ValuesIn(kArraySubpelVariance_neon));
   3143 
   3144 const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = {
   3145  SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0),
   3146  SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0),
   3147  SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0),
   3148  SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0),
   3149  SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0),
   3150  SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0),
   3151  SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0),
   3152  SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0),
   3153  SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0),
   3154  SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0),
   3155  SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0),
   3156  SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0),
   3157  SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0),
   3158  SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0),
   3159  SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0),
   3160  SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0),
   3161 #if !CONFIG_REALTIME_ONLY
   3162  SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0),
   3163  SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0),
   3164  SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0),
   3165  SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0),
   3166  SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0),
   3167  SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0),
   3168 #endif
   3169 };
   3170 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest,
   3171                         ::testing::ValuesIn(kArraySubpelAvgVariance_neon));
   3172 
   3173 #if !CONFIG_REALTIME_ONLY
   3174 const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = {
   3175  ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0),
   3176  ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0),
   3177  ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0),
   3178  ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0),
   3179  ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0),
   3180  ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0),
   3181  ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0),
   3182  ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0),
   3183  ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0),
   3184  ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0),
   3185  ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0),
   3186  ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0),
   3187  ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0),
   3188  ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0),
   3189  ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0),
   3190  ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0),
   3191  ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0),
   3192  ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0),
   3193  ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0),
   3194  ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0),
   3195  ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0),
   3196  ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0),
   3197 };
   3198 INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest,
   3199                         ::testing::ValuesIn(kArrayObmcSubpelVariance_neon));
   3200 #endif
   3201 
   3202 const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = {
   3203  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0),
   3204  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0),
   3205  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0),
   3206  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0)
   3207 };
   3208 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest,
   3209                         ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon));
   3210 
   3211 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = {
   3212  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0),
   3213  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0),
   3214  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0),
   3215  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0)
   3216 };
   3217 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest,
   3218                         ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon));
   3219 
   3220 #if CONFIG_AV1_HIGHBITDEPTH
   3221 const VarianceParams kArrayHBDVariance_neon[] = {
   3222  VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12),
   3223  VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12),
   3224  VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12),
   3225  VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12),
   3226  VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12),
   3227  VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12),
   3228  VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12),
   3229  VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12),
   3230  VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12),
   3231  VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12),
   3232  VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12),
   3233  VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12),
   3234  VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12),
   3235  VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12),
   3236  VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12),
   3237  VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12),
   3238  VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10),
   3239  VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10),
   3240  VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10),
   3241  VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10),
   3242  VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10),
   3243  VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10),
   3244  VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10),
   3245  VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10),
   3246  VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10),
   3247  VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10),
   3248  VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10),
   3249  VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10),
   3250  VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10),
   3251  VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10),
   3252  VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10),
   3253  VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10),
   3254  VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8),
   3255  VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8),
   3256  VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8),
   3257  VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8),
   3258  VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8),
   3259  VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8),
   3260  VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8),
   3261  VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8),
   3262  VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8),
   3263  VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8),
   3264  VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8),
   3265  VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8),
   3266  VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8),
   3267  VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8),
   3268  VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8),
   3269  VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8),
   3270 #if !CONFIG_REALTIME_ONLY
   3271  VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12),
   3272  VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12),
   3273  VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12),
   3274  VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12),
   3275  VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12),
   3276  VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12),
   3277  VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10),
   3278  VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10),
   3279  VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10),
   3280  VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10),
   3281  VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10),
   3282  VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10),
   3283  VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8),
   3284  VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8),
   3285  VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8),
   3286  VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8),
   3287  VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8),
   3288  VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8),
   3289 #endif
   3290 };
   3291 
   3292 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest,
   3293                         ::testing::ValuesIn(kArrayHBDVariance_neon));
   3294 
   3295 const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = {
   3296  SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12),
   3297  SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12),
   3298  SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12),
   3299  SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12),
   3300  SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12),
   3301  SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12),
   3302  SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12),
   3303  SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12),
   3304  SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12),
   3305  SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12),
   3306  SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12),
   3307  SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12),
   3308  SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12),
   3309  SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10),
   3310  SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10),
   3311  SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10),
   3312  SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10),
   3313  SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10),
   3314  SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10),
   3315  SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10),
   3316  SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10),
   3317  SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10),
   3318  SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10),
   3319  SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10),
   3320  SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10),
   3321  SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10),
   3322  SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8),
   3323  SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8),
   3324  SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8),
   3325  SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8),
   3326  SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8),
   3327  SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8),
   3328  SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8),
   3329  SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8),
   3330  SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8),
   3331  SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8),
   3332  SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8),
   3333  SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8),
   3334  SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8),
   3335 #if !CONFIG_REALTIME_ONLY
   3336  SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8),
   3337  SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8),
   3338  SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8),
   3339  SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8),
   3340  SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8),
   3341  SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8),
   3342  SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10),
   3343  SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10),
   3344  SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10),
   3345  SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10),
   3346  SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10),
   3347  SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10),
   3348  SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12),
   3349  SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12),
   3350  SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12),
   3351  SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12),
   3352  SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12),
   3353  SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12),
   3354 #endif  //! CONFIG_REALTIME_ONLY
   3355 };
   3356 
   3357 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest,
   3358                         ::testing::ValuesIn(kArrayHBDSubpelVariance_neon));
   3359 
   3360 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = {
   3361  SubpelAvgVarianceParams(7, 7,
   3362                          &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8),
   3363  SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon,
   3364                          8),
   3365  SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon,
   3366                          8),
   3367  SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon,
   3368                          8),
   3369  SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon,
   3370                          8),
   3371  SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon,
   3372                          8),
   3373  SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon,
   3374                          8),
   3375  SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon,
   3376                          8),
   3377  SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon,
   3378                          8),
   3379  SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon,
   3380                          8),
   3381  SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon,
   3382                          8),
   3383  SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon,
   3384                          8),
   3385  SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon,
   3386                          8),
   3387  SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon,
   3388                          8),
   3389  SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon,
   3390                          8),
   3391  SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon,
   3392                          8),
   3393  SubpelAvgVarianceParams(
   3394      7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10),
   3395  SubpelAvgVarianceParams(7, 6,
   3396                          &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10),
   3397  SubpelAvgVarianceParams(6, 7,
   3398                          &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10),
   3399  SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon,
   3400                          10),
   3401  SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon,
   3402                          10),
   3403  SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon,
   3404                          10),
   3405  SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon,
   3406                          10),
   3407  SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon,
   3408                          10),
   3409  SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon,
   3410                          10),
   3411  SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon,
   3412                          10),
   3413  SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon,
   3414                          10),
   3415  SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon,
   3416                          10),
   3417  SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon,
   3418                          10),
   3419  SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon,
   3420                          10),
   3421  SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon,
   3422                          10),
   3423  SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon,
   3424                          10),
   3425  SubpelAvgVarianceParams(
   3426      7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12),
   3427  SubpelAvgVarianceParams(7, 6,
   3428                          &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12),
   3429  SubpelAvgVarianceParams(6, 7,
   3430                          &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12),
   3431  SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon,
   3432                          12),
   3433  SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon,
   3434                          12),
   3435  SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon,
   3436                          12),
   3437  SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon,
   3438                          12),
   3439  SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon,
   3440                          12),
   3441  SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon,
   3442                          12),
   3443  SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon,
   3444                          12),
   3445  SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon,
   3446                          12),
   3447  SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon,
   3448                          12),
   3449  SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon,
   3450                          12),
   3451  SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon,
   3452                          12),
   3453  SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon,
   3454                          12),
   3455  SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon,
   3456                          12),
   3457 
   3458 #if !CONFIG_REALTIME_ONLY
   3459  SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon,
   3460                          8),
   3461  SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon,
   3462                          8),
   3463  SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon,
   3464                          8),
   3465  SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon,
   3466                          8),
   3467  SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon,
   3468                          8),
   3469  SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon,
   3470                          8),
   3471  SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon,
   3472                          10),
   3473  SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon,
   3474                          10),
   3475  SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon,
   3476                          10),
   3477  SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon,
   3478                          10),
   3479  SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon,
   3480                          10),
   3481  SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon,
   3482                          10),
   3483  SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon,
   3484                          12),
   3485  SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon,
   3486                          12),
   3487  SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon,
   3488                          12),
   3489  SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon,
   3490                          12),
   3491  SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon,
   3492                          12),
   3493  SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon,
   3494                          12),
   3495 #endif
   3496 };
   3497 
   3498 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest,
   3499                         ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon));
   3500 
   3501 #if !CONFIG_REALTIME_ONLY
   3502 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = {
   3503  ObmcSubpelVarianceParams(
   3504      7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12),
   3505  ObmcSubpelVarianceParams(
   3506      7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12),
   3507  ObmcSubpelVarianceParams(
   3508      6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12),
   3509  ObmcSubpelVarianceParams(
   3510      6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12),
   3511  ObmcSubpelVarianceParams(
   3512      6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12),
   3513  ObmcSubpelVarianceParams(
   3514      5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12),
   3515  ObmcSubpelVarianceParams(
   3516      5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12),
   3517  ObmcSubpelVarianceParams(
   3518      5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12),
   3519  ObmcSubpelVarianceParams(
   3520      4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12),
   3521  ObmcSubpelVarianceParams(
   3522      4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12),
   3523  ObmcSubpelVarianceParams(4, 3,
   3524                           &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12),
   3525  ObmcSubpelVarianceParams(3, 4,
   3526                           &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12),
   3527  ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon,
   3528                           12),
   3529  ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon,
   3530                           12),
   3531  ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon,
   3532                           12),
   3533  ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon,
   3534                           12),
   3535  ObmcSubpelVarianceParams(
   3536      6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12),
   3537  ObmcSubpelVarianceParams(
   3538      4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12),
   3539  ObmcSubpelVarianceParams(5, 3,
   3540                           &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12),
   3541  ObmcSubpelVarianceParams(3, 5,
   3542                           &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12),
   3543  ObmcSubpelVarianceParams(4, 2,
   3544                           &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12),
   3545  ObmcSubpelVarianceParams(2, 4,
   3546                           &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12),
   3547  ObmcSubpelVarianceParams(
   3548      7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10),
   3549  ObmcSubpelVarianceParams(
   3550      7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10),
   3551  ObmcSubpelVarianceParams(
   3552      6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10),
   3553  ObmcSubpelVarianceParams(
   3554      6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10),
   3555  ObmcSubpelVarianceParams(
   3556      6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10),
   3557  ObmcSubpelVarianceParams(
   3558      5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10),
   3559  ObmcSubpelVarianceParams(
   3560      5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10),
   3561  ObmcSubpelVarianceParams(
   3562      5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10),
   3563  ObmcSubpelVarianceParams(
   3564      4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10),
   3565  ObmcSubpelVarianceParams(
   3566      4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10),
   3567  ObmcSubpelVarianceParams(4, 3,
   3568                           &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10),
   3569  ObmcSubpelVarianceParams(3, 4,
   3570                           &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10),
   3571  ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon,
   3572                           10),
   3573  ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon,
   3574                           10),
   3575  ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon,
   3576                           10),
   3577  ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon,
   3578                           10),
   3579  ObmcSubpelVarianceParams(
   3580      6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10),
   3581  ObmcSubpelVarianceParams(
   3582      4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10),
   3583  ObmcSubpelVarianceParams(5, 3,
   3584                           &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10),
   3585  ObmcSubpelVarianceParams(3, 5,
   3586                           &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10),
   3587  ObmcSubpelVarianceParams(4, 2,
   3588                           &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10),
   3589  ObmcSubpelVarianceParams(2, 4,
   3590                           &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10),
   3591  ObmcSubpelVarianceParams(
   3592      7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8),
   3593  ObmcSubpelVarianceParams(7, 6,
   3594                           &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8),
   3595  ObmcSubpelVarianceParams(6, 7,
   3596                           &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8),
   3597  ObmcSubpelVarianceParams(6, 6,
   3598                           &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8),
   3599  ObmcSubpelVarianceParams(6, 5,
   3600                           &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8),
   3601  ObmcSubpelVarianceParams(5, 6,
   3602                           &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8),
   3603  ObmcSubpelVarianceParams(5, 5,
   3604                           &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8),
   3605  ObmcSubpelVarianceParams(5, 4,
   3606                           &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8),
   3607  ObmcSubpelVarianceParams(4, 5,
   3608                           &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8),
   3609  ObmcSubpelVarianceParams(4, 4,
   3610                           &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8),
   3611  ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon,
   3612                           8),
   3613  ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon,
   3614                           8),
   3615  ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon,
   3616                           8),
   3617  ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon,
   3618                           8),
   3619  ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon,
   3620                           8),
   3621  ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon,
   3622                           8),
   3623  ObmcSubpelVarianceParams(6, 4,
   3624                           &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8),
   3625  ObmcSubpelVarianceParams(4, 6,
   3626                           &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8),
   3627  ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon,
   3628                           8),
   3629  ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon,
   3630                           8),
   3631  ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon,
   3632                           8),
   3633  ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon,
   3634                           8),
   3635 };
   3636 
   3637 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest,
   3638                         ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon));
   3639 #endif  // !CONFIG_REALTIME_ONLY
   3640 
   3641 #endif  // CONFIG_AV1_HIGHBITDEPTH
   3642 
   3643 #endif  // HAVE_NEON
   3644 
   3645 #if HAVE_NEON_DOTPROD
   3646 
   3647 const VarianceParams kArrayVariance_neon_dotprod[] = {
   3648  VarianceParams(7, 7, &aom_variance128x128_neon_dotprod),
   3649  VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
   3650  VarianceParams(7, 6, &aom_variance128x64_neon_dotprod),
   3651  VarianceParams(6, 7, &aom_variance64x128_neon_dotprod),
   3652  VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
   3653  VarianceParams(6, 5, &aom_variance64x32_neon_dotprod),
   3654  VarianceParams(5, 6, &aom_variance32x64_neon_dotprod),
   3655  VarianceParams(5, 5, &aom_variance32x32_neon_dotprod),
   3656  VarianceParams(5, 4, &aom_variance32x16_neon_dotprod),
   3657  VarianceParams(4, 5, &aom_variance16x32_neon_dotprod),
   3658  VarianceParams(4, 4, &aom_variance16x16_neon_dotprod),
   3659  VarianceParams(4, 3, &aom_variance16x8_neon_dotprod),
   3660  VarianceParams(3, 4, &aom_variance8x16_neon_dotprod),
   3661  VarianceParams(3, 3, &aom_variance8x8_neon_dotprod),
   3662  VarianceParams(3, 2, &aom_variance8x4_neon_dotprod),
   3663  VarianceParams(2, 3, &aom_variance4x8_neon_dotprod),
   3664  VarianceParams(2, 2, &aom_variance4x4_neon_dotprod),
   3665 #if !CONFIG_REALTIME_ONLY
   3666  VarianceParams(2, 4, &aom_variance4x16_neon_dotprod),
   3667  VarianceParams(4, 2, &aom_variance16x4_neon_dotprod),
   3668  VarianceParams(3, 5, &aom_variance8x32_neon_dotprod),
   3669  VarianceParams(5, 3, &aom_variance32x8_neon_dotprod),
   3670  VarianceParams(4, 6, &aom_variance16x64_neon_dotprod),
   3671  VarianceParams(6, 4, &aom_variance64x16_neon_dotprod),
   3672 #endif
   3673 };
   3674 
   3675 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest,
   3676                         ::testing::ValuesIn(kArrayVariance_neon_dotprod));
   3677 
   3678 const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = {
   3679  GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
   3680  GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
   3681  GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
   3682  GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0)
   3683 };
   3684 INSTANTIATE_TEST_SUITE_P(
   3685    NEON_DOTPROD, GetSseSum8x8QuadTest,
   3686    ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod));
   3687 
   3688 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = {
   3689  GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
   3690  GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
   3691  GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
   3692  GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0)
   3693 };
   3694 INSTANTIATE_TEST_SUITE_P(
   3695    NEON_DOTPROD, GetSseSum16x16DualTest,
   3696    ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod));
   3697 
   3698 INSTANTIATE_TEST_SUITE_P(
   3699    NEON_DOTPROD, AvxMseTest,
   3700    ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod),
   3701                      MseParams(3, 4, &aom_mse8x16_neon_dotprod),
   3702                      MseParams(4, 4, &aom_mse16x16_neon_dotprod),
   3703                      MseParams(4, 3, &aom_mse16x8_neon_dotprod)));
   3704 
   3705 #endif  // HAVE_NEON_DOTPROD
   3706 
   3707 #if HAVE_SVE
   3708 
   3709 #if CONFIG_AV1_HIGHBITDEPTH
   3710 const VarianceParams kArrayHBDVariance_sve[] = {
   3711  VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12),
   3712  VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12),
   3713  VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12),
   3714  VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12),
   3715  VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12),
   3716  VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12),
   3717  VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12),
   3718  VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12),
   3719  VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12),
   3720  VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12),
   3721  VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12),
   3722  VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12),
   3723  VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12),
   3724  VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12),
   3725  VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12),
   3726  VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12),
   3727  VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10),
   3728  VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10),
   3729  VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10),
   3730  VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10),
   3731  VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10),
   3732  VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10),
   3733  VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10),
   3734  VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10),
   3735  VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10),
   3736  VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10),
   3737  VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10),
   3738  VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10),
   3739  VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10),
   3740  VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10),
   3741  VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10),
   3742  VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10),
   3743  VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8),
   3744  VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8),
   3745  VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8),
   3746  VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8),
   3747  VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8),
   3748  VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8),
   3749  VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8),
   3750  VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8),
   3751  VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8),
   3752  VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8),
   3753  VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8),
   3754  VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8),
   3755  VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8),
   3756  VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8),
   3757  VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8),
   3758  VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8),
   3759 #if !CONFIG_REALTIME_ONLY
   3760  VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12),
   3761  VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12),
   3762  VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12),
   3763  VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12),
   3764  VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12),
   3765  VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12),
   3766  VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10),
   3767  VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10),
   3768  VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10),
   3769  VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10),
   3770  VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10),
   3771  VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10),
   3772  VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8),
   3773  VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8),
   3774  VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8),
   3775  VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8),
   3776  VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8),
   3777  VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8),
   3778 #endif
   3779 };
   3780 
   3781 INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest,
   3782                         ::testing::ValuesIn(kArrayHBDVariance_sve));
   3783 
   3784 #endif  // CONFIG_AV1_HIGHBITDEPTH
   3785 #endif  // HAVE_SVE
   3786 
   3787 }  // namespace