tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

convolve_test.cc (42850B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <string.h>
     13 #include <tuple>
     14 
     15 #include "gtest/gtest.h"
     16 
     17 #include "config/aom_config.h"
     18 #include "config/aom_dsp_rtcd.h"
     19 
     20 #include "aom_dsp/aom_dsp_common.h"
     21 #include "aom_dsp/aom_filter.h"
     22 #include "aom_mem/aom_mem.h"
     23 #include "aom_ports/aom_timer.h"
     24 #include "aom_ports/mem.h"
     25 #include "av1/common/filter.h"
     26 #include "test/acm_random.h"
     27 #include "test/register_state_check.h"
     28 #include "test/util.h"
     29 
     30 namespace {
     31 
     32 static const unsigned int kMaxDimension = MAX_SB_SIZE;
     33 static const int kDataAlignment = 16;
     34 static const int kOuterBlockSize = 4 * kMaxDimension;
     35 static const int kInputStride = kOuterBlockSize;
     36 static const int kOutputStride = kOuterBlockSize;
     37 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
     38 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
     39 static const int16_t kInvalidFilter[8] = {};
     40 static const int kNumFilterBanks = SWITCHABLE_FILTERS;
     41 static const int kNumFilters = 16;
     42 
     43 using ConvolveFunc = void (*)(const uint8_t *src, ptrdiff_t src_stride,
     44                              uint8_t *dst, ptrdiff_t dst_stride,
     45                              const int16_t *filter_x, int filter_x_stride,
     46                              const int16_t *filter_y, int filter_y_stride,
     47                              int w, int h);
     48 
     49 struct ConvolveFunctions {
     50  ConvolveFunctions(ConvolveFunc h8, ConvolveFunc v8, int bd)
     51      : h8_(h8), v8_(v8), use_highbd_(bd) {}
     52 
     53  ConvolveFunc h8_;
     54  ConvolveFunc v8_;
     55  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
     56 };
     57 
     58 using ConvolveParam = std::tuple<int, int, const ConvolveFunctions *>;
     59 
     60 #define ALL_SIZES_64(convolve_fn)                                         \
     61  make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
     62      make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
     63      make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
     64      make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
     65      make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
     66      make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
     67      make_tuple(64, 64, &convolve_fn)
     68 
     69 #define ALL_SIZES(convolve_fn)                                          \
     70  make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
     71      make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
     72 
     73 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
     74 #define AV1_FILTER_WEIGHT 128
     75 #define AV1_FILTER_SHIFT 7
     76 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
     77 
     78 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
     79                        const int16_t *HFilter, const int16_t *VFilter,
     80                        uint8_t *dst_ptr, unsigned int dst_stride,
     81                        unsigned int output_width, unsigned int output_height) {
     82  // Between passes, we use an intermediate buffer whose height is extended to
     83  // have enough horizontally filtered values as input for the vertical pass.
     84  // This buffer is allocated to be big enough for the largest block type we
     85  // support.
     86  const int kInterp_Extend = 4;
     87  const unsigned int intermediate_height =
     88      (kInterp_Extend - 1) + output_height + kInterp_Extend;
     89  unsigned int i, j;
     90 
     91  assert(intermediate_height > 7);
     92 
     93  // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
     94  // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
     95  //                                 + kInterp_Extend
     96  //                               = 3 + 16 + 4
     97  //                               = 23
     98  // and filter_max_width          = 16
     99  //
    100  uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
    101  const int intermediate_next_stride =
    102      1 - static_cast<int>(intermediate_height * output_width);
    103 
    104  // Horizontal pass (src -> transposed intermediate).
    105  uint8_t *output_ptr = intermediate_buffer;
    106  const int src_next_row_stride = src_stride - output_width;
    107  src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
    108  for (i = 0; i < intermediate_height; ++i) {
    109    for (j = 0; j < output_width; ++j) {
    110      // Apply filter...
    111      const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
    112                       (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
    113                       (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
    114                       (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
    115                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
    116 
    117      // Normalize back to 0-255...
    118      *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
    119      ++src_ptr;
    120      output_ptr += intermediate_height;
    121    }
    122    src_ptr += src_next_row_stride;
    123    output_ptr += intermediate_next_stride;
    124  }
    125 
    126  // Vertical pass (transposed intermediate -> dst).
    127  src_ptr = intermediate_buffer;
    128  const int dst_next_row_stride = dst_stride - output_width;
    129  for (i = 0; i < output_height; ++i) {
    130    for (j = 0; j < output_width; ++j) {
    131      // Apply filter...
    132      const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
    133                       (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
    134                       (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
    135                       (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
    136                       (AV1_FILTER_WEIGHT >> 1);  // Rounding
    137 
    138      // Normalize back to 0-255...
    139      *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
    140      src_ptr += intermediate_height;
    141    }
    142    src_ptr += intermediate_next_stride;
    143    dst_ptr += dst_next_row_stride;
    144  }
    145 }
    146 
    147 void block2d_average_c(uint8_t *src, unsigned int src_stride,
    148                       uint8_t *output_ptr, unsigned int output_stride,
    149                       unsigned int output_width, unsigned int output_height) {
    150  unsigned int i, j;
    151  for (i = 0; i < output_height; ++i) {
    152    for (j = 0; j < output_width; ++j) {
    153      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    154    }
    155    output_ptr += output_stride;
    156  }
    157 }
    158 
    159 void filter_average_block2d_8_c(const uint8_t *src_ptr,
    160                                const unsigned int src_stride,
    161                                const int16_t *HFilter, const int16_t *VFilter,
    162                                uint8_t *dst_ptr, unsigned int dst_stride,
    163                                unsigned int output_width,
    164                                unsigned int output_height) {
    165  uint8_t tmp[kMaxDimension * kMaxDimension];
    166 
    167  assert(output_width <= kMaxDimension);
    168  assert(output_height <= kMaxDimension);
    169  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
    170                     output_width, output_height);
    171  block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
    172                    output_height);
    173 }
    174 
    175 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
    176                               const unsigned int src_stride,
    177                               const int16_t *HFilter, const int16_t *VFilter,
    178                               uint16_t *dst_ptr, unsigned int dst_stride,
    179                               unsigned int output_width,
    180                               unsigned int output_height, int bd) {
    181  // Between passes, we use an intermediate buffer whose height is extended to
    182  // have enough horizontally filtered values as input for the vertical pass.
    183  // This buffer is allocated to be big enough for the largest block type we
    184  // support.
    185  const int kInterp_Extend = 4;
    186  const unsigned int intermediate_height =
    187      (kInterp_Extend - 1) + output_height + kInterp_Extend;
    188 
    189  /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
    190   * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
    191   *                                 + kInterp_Extend
    192   *                               = 3 + 16 + 4
    193   *                               = 23
    194   * and filter_max_width = 16
    195   */
    196  uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
    197  const int intermediate_next_stride =
    198      1 - static_cast<int>(intermediate_height * output_width);
    199 
    200  // Horizontal pass (src -> transposed intermediate).
    201  {
    202    uint16_t *output_ptr = intermediate_buffer;
    203    const int src_next_row_stride = src_stride - output_width;
    204    unsigned int i, j;
    205    src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
    206    for (i = 0; i < intermediate_height; ++i) {
    207      for (j = 0; j < output_width; ++j) {
    208        // Apply filter...
    209        const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
    210                         (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
    211                         (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
    212                         (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
    213                         (AV1_FILTER_WEIGHT >> 1);  // Rounding
    214 
    215        // Normalize back to 0-255...
    216        *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
    217        ++src_ptr;
    218        output_ptr += intermediate_height;
    219      }
    220      src_ptr += src_next_row_stride;
    221      output_ptr += intermediate_next_stride;
    222    }
    223  }
    224 
    225  // Vertical pass (transposed intermediate -> dst).
    226  {
    227    const uint16_t *interm_ptr = intermediate_buffer;
    228    const int dst_next_row_stride = dst_stride - output_width;
    229    unsigned int i, j;
    230    for (i = 0; i < output_height; ++i) {
    231      for (j = 0; j < output_width; ++j) {
    232        // Apply filter...
    233        const int temp =
    234            (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
    235            (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
    236            (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
    237            (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
    238            (AV1_FILTER_WEIGHT >> 1);  // Rounding
    239 
    240        // Normalize back to 0-255...
    241        *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
    242        interm_ptr += intermediate_height;
    243      }
    244      interm_ptr += intermediate_next_stride;
    245      dst_ptr += dst_next_row_stride;
    246    }
    247  }
    248 }
    249 
    250 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
    251                              uint16_t *output_ptr, unsigned int output_stride,
    252                              unsigned int output_width,
    253                              unsigned int output_height) {
    254  unsigned int i, j;
    255  for (i = 0; i < output_height; ++i) {
    256    for (j = 0; j < output_width; ++j) {
    257      output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
    258    }
    259    output_ptr += output_stride;
    260  }
    261 }
    262 
    263 void highbd_filter_average_block2d_8_c(
    264    const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
    265    const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
    266    unsigned int output_width, unsigned int output_height, int bd) {
    267  uint16_t tmp[kMaxDimension * kMaxDimension];
    268 
    269  assert(output_width <= kMaxDimension);
    270  assert(output_height <= kMaxDimension);
    271  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
    272                            kMaxDimension, output_width, output_height, bd);
    273  highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
    274                           output_width, output_height);
    275 }
    276 
    277 class ConvolveTestBase : public ::testing::TestWithParam<ConvolveParam> {
    278 public:
    279  static void SetUpTestSuite() {
    280    // Force input_ to be unaligned, output to be 16 byte aligned.
    281    input_ = reinterpret_cast<uint8_t *>(
    282                 aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
    283             1;
    284    ASSERT_NE(input_, nullptr);
    285    ref8_ = reinterpret_cast<uint8_t *>(
    286        aom_memalign(kDataAlignment, kOutputStride * kMaxDimension));
    287    ASSERT_NE(ref8_, nullptr);
    288    output_ = reinterpret_cast<uint8_t *>(
    289        aom_memalign(kDataAlignment, kOutputBufferSize));
    290    ASSERT_NE(output_, nullptr);
    291    output_ref_ = reinterpret_cast<uint8_t *>(
    292        aom_memalign(kDataAlignment, kOutputBufferSize));
    293    ASSERT_NE(output_ref_, nullptr);
    294    input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
    295                   kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
    296               1;
    297    ASSERT_NE(input16_, nullptr);
    298    ref16_ = reinterpret_cast<uint16_t *>(aom_memalign(
    299        kDataAlignment, kOutputStride * kMaxDimension * sizeof(uint16_t)));
    300    ASSERT_NE(ref16_, nullptr);
    301    output16_ = reinterpret_cast<uint16_t *>(
    302        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
    303    ASSERT_NE(output16_, nullptr);
    304    output16_ref_ = reinterpret_cast<uint16_t *>(
    305        aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
    306    ASSERT_NE(output16_ref_, nullptr);
    307  }
    308 
    309  static void TearDownTestSuite() {
    310    aom_free(input_ - 1);
    311    input_ = nullptr;
    312    aom_free(ref8_);
    313    ref8_ = nullptr;
    314    aom_free(output_);
    315    output_ = nullptr;
    316    aom_free(output_ref_);
    317    output_ref_ = nullptr;
    318    aom_free(input16_ - 1);
    319    input16_ = nullptr;
    320    aom_free(ref16_);
    321    ref16_ = nullptr;
    322    aom_free(output16_);
    323    output16_ = nullptr;
    324    aom_free(output16_ref_);
    325    output16_ref_ = nullptr;
    326  }
    327 
    328 protected:
    329  int Width() const { return GET_PARAM(0); }
    330  int Height() const { return GET_PARAM(1); }
    331  int BorderLeft() const {
    332    const int center = (kOuterBlockSize - Width()) / 2;
    333    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
    334  }
    335  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
    336 
    337  bool IsIndexInBorder(int i) {
    338    return (i < BorderTop() * kOuterBlockSize ||
    339            i >= (BorderTop() + Height()) * kOuterBlockSize ||
    340            i % kOuterBlockSize < BorderLeft() ||
    341            i % kOuterBlockSize >= (BorderLeft() + Width()));
    342  }
    343 
    344  void SetUp() override {
    345    UUT_ = GET_PARAM(2);
    346    if (UUT_->use_highbd_ != 0)
    347      mask_ = (1 << UUT_->use_highbd_) - 1;
    348    else
    349      mask_ = 255;
    350    /* Set up guard blocks for an inner block centered in the outer block */
    351    for (int i = 0; i < kOutputBufferSize; ++i) {
    352      if (IsIndexInBorder(i)) {
    353        output_[i] = 255;
    354        output16_[i] = mask_;
    355      } else {
    356        output_[i] = 0;
    357        output16_[i] = 0;
    358      }
    359    }
    360 
    361    ::libaom_test::ACMRandom prng;
    362    for (int i = 0; i < kInputBufferSize; ++i) {
    363      if (i & 1) {
    364        input_[i] = 255;
    365        input16_[i] = mask_;
    366      } else {
    367        input_[i] = prng.Rand8Extremes();
    368        input16_[i] = prng.Rand16() & mask_;
    369      }
    370    }
    371  }
    372 
    373  void SetConstantInput(int value) {
    374    memset(input_, value, kInputBufferSize);
    375    aom_memset16(input16_, value, kInputBufferSize);
    376  }
    377 
    378  void CopyOutputToRef() {
    379    memcpy(output_ref_, output_, kOutputBufferSize);
    380    // Copy 16-bit pixels values. The effective number of bytes is double.
    381    memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
    382  }
    383 
    384  void CheckGuardBlocks() {
    385    for (int i = 0; i < kOutputBufferSize; ++i) {
    386      if (IsIndexInBorder(i)) {
    387        EXPECT_EQ(255, output_[i]);
    388      }
    389    }
    390  }
    391 
    392  uint8_t *input() const {
    393    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
    394    if (UUT_->use_highbd_ == 0) {
    395      return input_ + offset;
    396    } else {
    397      return CONVERT_TO_BYTEPTR(input16_) + offset;
    398    }
    399  }
    400 
    401  uint8_t *output() const {
    402    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
    403    if (UUT_->use_highbd_ == 0) {
    404      return output_ + offset;
    405    } else {
    406      return CONVERT_TO_BYTEPTR(output16_) + offset;
    407    }
    408  }
    409 
    410  uint8_t *output_ref() const {
    411    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
    412    if (UUT_->use_highbd_ == 0) {
    413      return output_ref_ + offset;
    414    } else {
    415      return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
    416    }
    417  }
    418 
    419  uint16_t lookup(uint8_t *list, int index) const {
    420    if (UUT_->use_highbd_ == 0) {
    421      return list[index];
    422    } else {
    423      return CONVERT_TO_SHORTPTR(list)[index];
    424    }
    425  }
    426 
    427  void assign_val(uint8_t *list, int index, uint16_t val) const {
    428    if (UUT_->use_highbd_ == 0) {
    429      list[index] = (uint8_t)val;
    430    } else {
    431      CONVERT_TO_SHORTPTR(list)[index] = val;
    432    }
    433  }
    434 
    435  void wrapper_filter_average_block2d_8_c(
    436      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
    437      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
    438      unsigned int output_width, unsigned int output_height) {
    439    if (UUT_->use_highbd_ == 0) {
    440      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
    441                                 dst_stride, output_width, output_height);
    442    } else {
    443      highbd_filter_average_block2d_8_c(
    444          CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
    445          CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
    446          UUT_->use_highbd_);
    447    }
    448  }
    449 
    450  void wrapper_filter_block2d_8_c(
    451      const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
    452      const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
    453      unsigned int output_width, unsigned int output_height) {
    454    if (UUT_->use_highbd_ == 0) {
    455      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
    456                         dst_stride, output_width, output_height);
    457    } else {
    458      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
    459                                HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
    460                                dst_stride, output_width, output_height,
    461                                UUT_->use_highbd_);
    462    }
    463  }
    464 
    465  void MatchesReferenceSubpixelFilter() {
    466    uint8_t *const in = input();
    467    uint8_t *const out = output();
    468    uint8_t *ref;
    469    if (UUT_->use_highbd_ == 0) {
    470      ref = ref8_;
    471    } else {
    472      ref = CONVERT_TO_BYTEPTR(ref16_);
    473    }
    474    int subpel_search;
    475    for (subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS;
    476         ++subpel_search) {
    477      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    478        const InterpFilter filter = (InterpFilter)filter_bank;
    479        const InterpKernel *filters =
    480            (const InterpKernel *)av1_get_interp_filter_kernel(filter,
    481                                                               subpel_search);
    482        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
    483          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
    484            wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
    485                                       filters[filter_y], ref, kOutputStride,
    486                                       Width(), Height());
    487 
    488            if (filter_x && filter_y)
    489              continue;
    490            else if (filter_y)
    491              UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
    492                        16, filters[filter_y], 16, Width(), Height());
    493            else if (filter_x)
    494              API_REGISTER_STATE_CHECK(UUT_->h8_(
    495                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
    496                  kInvalidFilter, 16, Width(), Height()));
    497            else
    498              continue;
    499 
    500            CheckGuardBlocks();
    501 
    502            for (int y = 0; y < Height(); ++y)
    503              for (int x = 0; x < Width(); ++x)
    504                ASSERT_EQ(lookup(ref, y * kOutputStride + x),
    505                          lookup(out, y * kOutputStride + x))
    506                    << "mismatch at (" << x << "," << y << "), "
    507                    << "filters (" << filter_bank << "," << filter_x << ","
    508                    << filter_y << ")";
    509          }
    510        }
    511      }
    512    }
    513  }
    514 
    515  void FilterExtremes() {
    516    uint8_t *const in = input();
    517    uint8_t *const out = output();
    518    uint8_t *ref;
    519    if (UUT_->use_highbd_ == 0) {
    520      ref = ref8_;
    521    } else {
    522      ref = CONVERT_TO_BYTEPTR(ref16_);
    523    }
    524 
    525    // Populate ref and out with some random data
    526    ::libaom_test::ACMRandom prng;
    527    for (int y = 0; y < Height(); ++y) {
    528      for (int x = 0; x < Width(); ++x) {
    529        uint16_t r;
    530        if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
    531          r = prng.Rand8Extremes();
    532        } else {
    533          r = prng.Rand16() & mask_;
    534        }
    535        assign_val(out, y * kOutputStride + x, r);
    536        assign_val(ref, y * kOutputStride + x, r);
    537      }
    538    }
    539 
    540    for (int axis = 0; axis < 2; axis++) {
    541      int seed_val = 0;
    542      while (seed_val < 256) {
    543        for (int y = 0; y < 8; ++y) {
    544          for (int x = 0; x < 8; ++x) {
    545            assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
    546                       ((seed_val >> (axis ? y : x)) & 1) * mask_);
    547            if (axis) seed_val++;
    548          }
    549          if (axis)
    550            seed_val -= 8;
    551          else
    552            seed_val++;
    553        }
    554        if (axis) seed_val += 8;
    555        int subpel_search;
    556        for (subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS;
    557             ++subpel_search) {
    558          for (int filter_bank = 0; filter_bank < kNumFilterBanks;
    559               ++filter_bank) {
    560            const InterpFilter filter = (InterpFilter)filter_bank;
    561            const InterpKernel *filters =
    562                (const InterpKernel *)av1_get_interp_filter_kernel(
    563                    filter, subpel_search);
    564            for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
    565              for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
    566                wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
    567                                           filters[filter_y], ref,
    568                                           kOutputStride, Width(), Height());
    569                if (filter_x && filter_y)
    570                  continue;
    571                else if (filter_y)
    572                  API_REGISTER_STATE_CHECK(UUT_->v8_(
    573                      in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
    574                      filters[filter_y], 16, Width(), Height()));
    575                else if (filter_x)
    576                  API_REGISTER_STATE_CHECK(UUT_->h8_(
    577                      in, kInputStride, out, kOutputStride, filters[filter_x],
    578                      16, kInvalidFilter, 16, Width(), Height()));
    579                else
    580                  continue;
    581 
    582                for (int y = 0; y < Height(); ++y)
    583                  for (int x = 0; x < Width(); ++x)
    584                    ASSERT_EQ(lookup(ref, y * kOutputStride + x),
    585                              lookup(out, y * kOutputStride + x))
    586                        << "mismatch at (" << x << "," << y << "), "
    587                        << "filters (" << filter_bank << "," << filter_x << ","
    588                        << filter_y << ")";
    589              }
    590            }
    591          }
    592        }
    593      }
    594    }
    595  }
    596 
    597  void SpeedTest() {
    598    uint8_t *const in = input();
    599    uint8_t *const out = output();
    600    uint8_t *ref;
    601    if (UUT_->use_highbd_ == 0) {
    602      ref = ref8_;
    603    } else {
    604      ref = CONVERT_TO_BYTEPTR(ref16_);
    605    }
    606 
    607    // Populate ref and out with some random data
    608    ::libaom_test::ACMRandom prng;
    609    for (int y = 0; y < Height(); ++y) {
    610      for (int x = 0; x < Width(); ++x) {
    611        uint16_t r;
    612        if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
    613          r = prng.Rand8Extremes();
    614        } else {
    615          r = prng.Rand16() & mask_;
    616        }
    617        assign_val(out, y * kOutputStride + x, r);
    618        assign_val(ref, y * kOutputStride + x, r);
    619      }
    620    }
    621 
    622    InterpFilter filter = (InterpFilter)1;
    623    const InterpKernel *filters =
    624        (const InterpKernel *)av1_get_interp_filter_kernel(filter, USE_8_TAPS);
    625    wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
    626                                       out, kOutputStride, Width(), Height());
    627 
    628    aom_usec_timer timer;
    629    int tests_num = 1000;
    630 
    631    aom_usec_timer_start(&timer);
    632    while (tests_num > 0) {
    633      for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    634        filter = (InterpFilter)filter_bank;
    635        filters = (const InterpKernel *)av1_get_interp_filter_kernel(
    636            filter, USE_8_TAPS);
    637        for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
    638          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
    639            if (filter_x && filter_y) continue;
    640            if (filter_y)
    641              API_REGISTER_STATE_CHECK(UUT_->v8_(
    642                  in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
    643                  filters[filter_y], 16, Width(), Height()));
    644            else if (filter_x)
    645              API_REGISTER_STATE_CHECK(UUT_->h8_(
    646                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
    647                  kInvalidFilter, 16, Width(), Height()));
    648          }
    649        }
    650      }
    651      tests_num--;
    652    }
    653    aom_usec_timer_mark(&timer);
    654 
    655    const int elapsed_time =
    656        static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
    657    printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
    658           UUT_->use_highbd_, elapsed_time);
    659  }
    660 
    661  const ConvolveFunctions *UUT_;
    662  static uint8_t *input_;
    663  static uint8_t *ref8_;
    664  static uint8_t *output_;
    665  static uint8_t *output_ref_;
    666  static uint16_t *input16_;
    667  static uint16_t *ref16_;
    668  static uint16_t *output16_;
    669  static uint16_t *output16_ref_;
    670  int mask_;
    671 };
    672 
    673 uint8_t *ConvolveTestBase::input_ = nullptr;
    674 uint8_t *ConvolveTestBase::ref8_ = nullptr;
    675 uint8_t *ConvolveTestBase::output_ = nullptr;
    676 uint8_t *ConvolveTestBase::output_ref_ = nullptr;
    677 uint16_t *ConvolveTestBase::input16_ = nullptr;
    678 uint16_t *ConvolveTestBase::ref16_ = nullptr;
    679 uint16_t *ConvolveTestBase::output16_ = nullptr;
    680 uint16_t *ConvolveTestBase::output16_ref_ = nullptr;
    681 
    682 using LowbdConvolveTest = ConvolveTestBase;
    683 
    684 TEST_P(LowbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
    685 
    686 void FiltersWontSaturateWhenAddedPairwise() {
    687  int subpel_search;
    688  for (subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS;
    689       ++subpel_search) {
    690    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
    691      const InterpFilter filter = (InterpFilter)filter_bank;
    692      const InterpKernel *filters =
    693          (const InterpKernel *)av1_get_interp_filter_kernel(filter,
    694                                                             subpel_search);
    695      for (int i = 0; i < kNumFilters; i++) {
    696        const int p0 = filters[i][0] + filters[i][1];
    697        const int p1 = filters[i][2] + filters[i][3];
    698        const int p2 = filters[i][4] + filters[i][5];
    699        const int p3 = filters[i][6] + filters[i][7];
    700        EXPECT_LE(p0, 128);
    701        EXPECT_LE(p1, 128);
    702        EXPECT_LE(p2, 128);
    703        EXPECT_LE(p3, 128);
    704        EXPECT_LE(p0 + p3, 128);
    705        EXPECT_LE(p0 + p3 + p1, 128);
    706        EXPECT_LE(p0 + p3 + p1 + p2, 128);
    707        EXPECT_EQ(p0 + p1 + p2 + p3, 128);
    708      }
    709    }
    710  }
    711 }
    712 
    713 TEST(LowbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
    714  FiltersWontSaturateWhenAddedPairwise();
    715 }
    716 
    717 TEST_P(LowbdConvolveTest, MatchesReferenceSubpixelFilter) {
    718  MatchesReferenceSubpixelFilter();
    719 }
    720 
    721 TEST_P(LowbdConvolveTest, FilterExtremes) { FilterExtremes(); }
    722 
    723 TEST_P(LowbdConvolveTest, DISABLED_Speed) { SpeedTest(); }
    724 
    725 using std::make_tuple;
    726 
    727 // WRAP macro is only used for high bitdepth build.
    728 #if CONFIG_AV1_HIGHBITDEPTH
    729 #define WRAP(func, bd)                                                       \
    730  static void wrap_##func##_##bd(                                            \
    731      const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
    732      ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,    \
    733      const int16_t *filter_y, int filter_y_stride, int w, int h) {          \
    734    aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
    735                      filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
    736  }
    737 #if HAVE_SSE2 && AOM_ARCH_X86_64
    738 WRAP(convolve8_horiz_sse2, 8)
    739 WRAP(convolve8_vert_sse2, 8)
    740 WRAP(convolve8_horiz_sse2, 10)
    741 WRAP(convolve8_vert_sse2, 10)
    742 WRAP(convolve8_horiz_sse2, 12)
    743 WRAP(convolve8_vert_sse2, 12)
    744 #endif  // HAVE_SSE2 && AOM_ARCH_X86_64
    745 
    746 WRAP(convolve8_horiz_c, 8)
    747 WRAP(convolve8_vert_c, 8)
    748 WRAP(convolve8_horiz_c, 10)
    749 WRAP(convolve8_vert_c, 10)
    750 WRAP(convolve8_horiz_c, 12)
    751 WRAP(convolve8_vert_c, 12)
    752 
    753 #if HAVE_AVX2
    754 WRAP(convolve8_horiz_avx2, 8)
    755 WRAP(convolve8_vert_avx2, 8)
    756 
    757 WRAP(convolve8_horiz_avx2, 10)
    758 WRAP(convolve8_vert_avx2, 10)
    759 
    760 WRAP(convolve8_horiz_avx2, 12)
    761 WRAP(convolve8_vert_avx2, 12)
    762 #endif  // HAVE_AVX2
    763 
    764 #if HAVE_NEON
    765 WRAP(convolve8_horiz_neon, 8)
    766 WRAP(convolve8_vert_neon, 8)
    767 
    768 WRAP(convolve8_horiz_neon, 10)
    769 WRAP(convolve8_vert_neon, 10)
    770 
    771 WRAP(convolve8_horiz_neon, 12)
    772 WRAP(convolve8_vert_neon, 12)
    773 #endif  // HAVE_NEON
    774 
    775 #if HAVE_SVE
    776 WRAP(convolve8_horiz_sve, 8)
    777 WRAP(convolve8_vert_sve, 8)
    778 
    779 WRAP(convolve8_horiz_sve, 10)
    780 WRAP(convolve8_vert_sve, 10)
    781 
    782 WRAP(convolve8_horiz_sve, 12)
    783 WRAP(convolve8_vert_sve, 12)
    784 #endif  // HAVE_SVE
    785 #endif  // CONFIG_AV1_HIGHBITDEPTH
    786 
    787 #undef WRAP
    788 
    789 #if CONFIG_AV1_HIGHBITDEPTH
    790 
    791 using HighbdConvolveTest = ConvolveTestBase;
    792 
    793 TEST_P(HighbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
    794 
    795 TEST(HighbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
    796  FiltersWontSaturateWhenAddedPairwise();
    797 }
    798 
    799 TEST_P(HighbdConvolveTest, MatchesReferenceSubpixelFilter) {
    800  MatchesReferenceSubpixelFilter();
    801 }
    802 
    803 TEST_P(HighbdConvolveTest, FilterExtremes) { FilterExtremes(); }
    804 
    805 TEST_P(HighbdConvolveTest, DISABLED_Speed) { SpeedTest(); }
    806 
    807 const ConvolveFunctions wrap_convolve8_c(wrap_convolve8_horiz_c_8,
    808                                         wrap_convolve8_vert_c_8, 8);
    809 const ConvolveFunctions wrap_convolve10_c(wrap_convolve8_horiz_c_10,
    810                                          wrap_convolve8_vert_c_10, 10);
    811 const ConvolveFunctions wrap_convolve12_c(wrap_convolve8_horiz_c_12,
    812                                          wrap_convolve8_vert_c_12, 12);
    813 const ConvolveParam kArrayHighbdConvolve_c[] = { ALL_SIZES(wrap_convolve8_c),
    814                                                 ALL_SIZES(wrap_convolve10_c),
    815                                                 ALL_SIZES(wrap_convolve12_c) };
    816 
    817 INSTANTIATE_TEST_SUITE_P(C, HighbdConvolveTest,
    818                         ::testing::ValuesIn(kArrayHighbdConvolve_c));
    819 #endif  // CONFIG_AV1_HIGHBITDEPTH
    820 
    821 const ConvolveFunctions convolve8_c(aom_convolve8_horiz_c, aom_convolve8_vert_c,
    822                                    0);
    823 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
    824 
    825 INSTANTIATE_TEST_SUITE_P(C, LowbdConvolveTest,
    826                         ::testing::ValuesIn(kArrayConvolve_c));
    827 
    828 #if HAVE_SSE2 && AOM_ARCH_X86_64
    829 #if CONFIG_AV1_HIGHBITDEPTH
    830 const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8,
    831                                            wrap_convolve8_vert_sse2_8, 8);
    832 const ConvolveFunctions wrap_convolve10_sse2(wrap_convolve8_horiz_sse2_10,
    833                                             wrap_convolve8_vert_sse2_10, 10);
    834 const ConvolveFunctions wrap_convolve12_sse2(wrap_convolve8_horiz_sse2_12,
    835                                             wrap_convolve8_vert_sse2_12, 12);
    836 const ConvolveParam kArrayHighbdConvolve_sse2[] = {
    837  ALL_SIZES(wrap_convolve8_sse2), ALL_SIZES(wrap_convolve10_sse2),
    838  ALL_SIZES(wrap_convolve12_sse2)
    839 };
    840 
    841 INSTANTIATE_TEST_SUITE_P(SSE2, HighbdConvolveTest,
    842                         ::testing::ValuesIn(kArrayHighbdConvolve_sse2));
    843 #endif
    844 #endif
    845 
    846 #if HAVE_SSSE3
    847 const ConvolveFunctions convolve8_ssse3(aom_convolve8_horiz_ssse3,
    848                                        aom_convolve8_vert_ssse3, 0);
    849 
    850 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
    851 
    852 INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdConvolveTest,
    853                         ::testing::ValuesIn(kArrayConvolve8_ssse3));
    854 #endif
    855 
    856 #if HAVE_AVX2
    857 #if CONFIG_AV1_HIGHBITDEPTH
    858 const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve8_horiz_avx2_8,
    859                                            wrap_convolve8_vert_avx2_8, 8);
    860 const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve8_horiz_avx2_10,
    861                                             wrap_convolve8_vert_avx2_10, 10);
    862 const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve8_horiz_avx2_12,
    863                                             wrap_convolve8_vert_avx2_12, 12);
    864 const ConvolveParam kArray_HighbdConvolve8_avx2[] = {
    865  ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2),
    866  ALL_SIZES_64(wrap_convolve12_avx2)
    867 };
    868 
    869 INSTANTIATE_TEST_SUITE_P(AVX2, HighbdConvolveTest,
    870                         ::testing::ValuesIn(kArray_HighbdConvolve8_avx2));
    871 #endif
    872 const ConvolveFunctions convolve8_avx2(aom_convolve8_horiz_avx2,
    873                                       aom_convolve8_vert_avx2, 0);
    874 const ConvolveParam kArray_Convolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
    875 
    876 INSTANTIATE_TEST_SUITE_P(AVX2, LowbdConvolveTest,
    877                         ::testing::ValuesIn(kArray_Convolve8_avx2));
    878 #endif  // HAVE_AVX2
    879 
    880 #if HAVE_NEON
    881 #if CONFIG_AV1_HIGHBITDEPTH
    882 const ConvolveFunctions wrap_convolve8_neon(wrap_convolve8_horiz_neon_8,
    883                                            wrap_convolve8_vert_neon_8, 8);
    884 const ConvolveFunctions wrap_convolve10_neon(wrap_convolve8_horiz_neon_10,
    885                                             wrap_convolve8_vert_neon_10, 10);
    886 const ConvolveFunctions wrap_convolve12_neon(wrap_convolve8_horiz_neon_12,
    887                                             wrap_convolve8_vert_neon_12, 12);
    888 const ConvolveParam kArray_HighbdConvolve8_neon[] = {
    889  ALL_SIZES_64(wrap_convolve8_neon), ALL_SIZES_64(wrap_convolve10_neon),
    890  ALL_SIZES_64(wrap_convolve12_neon)
    891 };
    892 
    893 INSTANTIATE_TEST_SUITE_P(NEON, HighbdConvolveTest,
    894                         ::testing::ValuesIn(kArray_HighbdConvolve8_neon));
    895 #endif
    896 const ConvolveFunctions convolve8_neon(aom_convolve8_horiz_neon,
    897                                       aom_convolve8_vert_neon, 0);
    898 const ConvolveParam kArray_Convolve8_neon[] = { ALL_SIZES(convolve8_neon) };
    899 
    900 INSTANTIATE_TEST_SUITE_P(NEON, LowbdConvolveTest,
    901                         ::testing::ValuesIn(kArray_Convolve8_neon));
    902 #endif  // HAVE_NEON
    903 
    904 #if HAVE_NEON_DOTPROD
    905 const ConvolveFunctions convolve8_neon_dotprod(aom_convolve8_horiz_neon_dotprod,
    906                                               aom_convolve8_vert_neon_dotprod,
    907                                               0);
    908 const ConvolveParam kArray_Convolve8_neon_dotprod[] = { ALL_SIZES(
    909    convolve8_neon_dotprod) };
    910 
    911 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, LowbdConvolveTest,
    912                         ::testing::ValuesIn(kArray_Convolve8_neon_dotprod));
    913 #endif  // HAVE_NEON_DOTPROD
    914 
    915 #if HAVE_NEON_I8MM
    916 const ConvolveFunctions convolve8_neon_i8mm(aom_convolve8_horiz_neon_i8mm,
    917                                            aom_convolve8_vert_neon_i8mm, 0);
    918 const ConvolveParam kArray_Convolve8_neon_i8mm[] = { ALL_SIZES(
    919    convolve8_neon_i8mm) };
    920 
    921 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, LowbdConvolveTest,
    922                         ::testing::ValuesIn(kArray_Convolve8_neon_i8mm));
    923 #endif  // HAVE_NEON_I8MM
    924 
    925 #if HAVE_SVE
    926 #if CONFIG_AV1_HIGHBITDEPTH
    927 const ConvolveFunctions wrap_convolve8_sve(wrap_convolve8_horiz_sve_8,
    928                                           wrap_convolve8_vert_sve_8, 8);
    929 const ConvolveFunctions wrap_convolve10_sve(wrap_convolve8_horiz_sve_10,
    930                                            wrap_convolve8_vert_sve_10, 10);
    931 const ConvolveFunctions wrap_convolve12_sve(wrap_convolve8_horiz_sve_12,
    932                                            wrap_convolve8_vert_sve_12, 12);
    933 const ConvolveParam kArray_HighbdConvolve8_sve[] = {
    934  ALL_SIZES_64(wrap_convolve8_sve), ALL_SIZES_64(wrap_convolve10_sve),
    935  ALL_SIZES_64(wrap_convolve12_sve)
    936 };
    937 
    938 INSTANTIATE_TEST_SUITE_P(SVE, HighbdConvolveTest,
    939                         ::testing::ValuesIn(kArray_HighbdConvolve8_sve));
    940 #endif
    941 #endif  // HAVE_SVE
    942 
    943 using ConvolveScale2DFunc = void (*)(const uint8_t *src, ptrdiff_t src_stride,
    944                                     uint8_t *dst, ptrdiff_t dst_stride,
    945                                     const InterpKernel *filter, int x0_q4,
    946                                     int x_step_q4, int y0_q4, int y_step_q4,
    947                                     int w, int h);
    948 
    949 using ConvolveScale2DParam = std::tuple<int, int, ConvolveScale2DFunc>;
    950 
    951 class ConvolveScale2DTest
    952    : public ::testing::TestWithParam<ConvolveScale2DParam> {
    953 public:
    954  int Width() const { return GET_PARAM(0); }
    955  int Height() const { return GET_PARAM(1); }
    956  int BorderLeft() const {
    957    const int center = (kOuterBlockSize - Width()) / 2;
    958    return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
    959  }
    960  int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
    961 
    962  bool IsIndexInBorder(int i) {
    963    return (i < BorderTop() * kOuterBlockSize ||
    964            i >= (BorderTop() + Height()) * kOuterBlockSize ||
    965            i % kOuterBlockSize < BorderLeft() ||
    966            i % kOuterBlockSize >= (BorderLeft() + Width()));
    967  }
    968 
    969  void SetUp() override {
    970    // Force input_ to be unaligned, output to be 16 byte aligned.
    971    input_ = reinterpret_cast<uint8_t *>(
    972                 aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
    973             1;
    974    output_ = reinterpret_cast<uint8_t *>(
    975        aom_memalign(kDataAlignment, kOutputBufferSize));
    976    output_ref_ = reinterpret_cast<uint8_t *>(
    977        aom_memalign(kDataAlignment, kOutputBufferSize));
    978 
    979    ASSERT_NE(input_, nullptr);
    980    ASSERT_NE(output_, nullptr);
    981    ASSERT_NE(output_ref_, nullptr);
    982 
    983    test_func_ = GET_PARAM(2);
    984    /* Set up guard blocks for an inner block centered in the outer block */
    985    for (int i = 0; i < kOutputBufferSize; ++i) {
    986      if (IsIndexInBorder(i)) {
    987        output_[i] = 255;
    988      } else {
    989        output_[i] = 0;
    990      }
    991    }
    992 
    993    ::libaom_test::ACMRandom prng;
    994    for (int i = 0; i < kInputBufferSize; ++i) {
    995      if (i & 1) {
    996        input_[i] = 255;
    997      } else {
    998        input_[i] = prng.Rand8Extremes();
    999      }
   1000    }
   1001  }
   1002 
   1003  void TearDown() override {
   1004    aom_free(input_ - 1);
   1005    input_ = nullptr;
   1006    aom_free(output_);
   1007    output_ = nullptr;
   1008    aom_free(output_ref_);
   1009    output_ref_ = nullptr;
   1010  }
   1011 
   1012  void SetConstantInput(int value) { memset(input_, value, kInputBufferSize); }
   1013 
   1014  void CopyOutputToRef() { memcpy(output_ref_, output_, kOutputBufferSize); }
   1015 
   1016  void CheckGuardBlocks() {
   1017    for (int i = 0; i < kOutputBufferSize; ++i) {
   1018      if (IsIndexInBorder(i)) {
   1019        EXPECT_EQ(255, output_[i]);
   1020      }
   1021    }
   1022  }
   1023 
   1024  uint8_t *input() const {
   1025    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
   1026    return input_ + offset;
   1027  }
   1028 
   1029  uint8_t *output() const {
   1030    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
   1031    return output_ + offset;
   1032  }
   1033 
   1034  uint8_t *output_ref() const {
   1035    const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
   1036    return output_ref_ + offset;
   1037  }
   1038 
   1039  uint16_t lookup(uint8_t *list, int index) const { return list[index]; }
   1040 
   1041  void assign_val(uint8_t *list, int index, uint16_t val) const {
   1042    list[index] = (uint8_t)val;
   1043  }
   1044 
   1045  ConvolveScale2DFunc test_func_;
   1046  uint8_t *input_;
   1047  uint8_t *output_;
   1048  uint8_t *output_ref_;
   1049 };
   1050 
   1051 TEST_P(ConvolveScale2DTest, DISABLED_Speed) {
   1052  const uint8_t *const in = input();
   1053  uint8_t *const out = output();
   1054  const InterpKernel *const filter =
   1055      (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR,
   1056                                                         USE_8_TAPS);
   1057  const int kNumTests = 10000;
   1058  const int width = Width();
   1059  const int height = Height();
   1060  const int frac = 8;
   1061  const int step = 16;
   1062  aom_usec_timer timer;
   1063 
   1064  aom_usec_timer_start(&timer);
   1065  for (int n = 0; n < kNumTests; ++n) {
   1066    test_func_(in, kInputStride, out, kOutputStride, filter, frac, step, frac,
   1067               step, width, height);
   1068  }
   1069  aom_usec_timer_mark(&timer);
   1070 
   1071  const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
   1072  printf("convolve_scale_2d_%dx%d_%d: %d us\n", width, height, 8, elapsed_time);
   1073 }
   1074 
   1075 TEST_P(ConvolveScale2DTest, Correctness) {
   1076  uint8_t *const in = input();
   1077  uint8_t *const out = output();
   1078  uint8_t ref[kOutputStride * kMaxDimension];
   1079 
   1080  ::libaom_test::ACMRandom prng;
   1081  for (int y = 0; y < Height(); ++y) {
   1082    for (int x = 0; x < Width(); ++x) {
   1083      const uint16_t r = prng.Rand8Extremes();
   1084      assign_val(in, y * kInputStride + x, r);
   1085    }
   1086  }
   1087 
   1088  for (int subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS;
   1089       ++subpel_search) {
   1090    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
   1091      const InterpFilter filter = static_cast<InterpFilter>(filter_bank);
   1092      const InterpKernel *filters =
   1093          (const InterpKernel *)av1_get_interp_filter_kernel(filter,
   1094                                                             subpel_search);
   1095      for (int frac = 0; frac < 16; ++frac) {
   1096        for (int step = 1; step <= 32; ++step) {
   1097          aom_scaled_2d_c(in, kInputStride, ref, kOutputStride, filters, frac,
   1098                          step, frac, step, Width(), Height());
   1099          API_REGISTER_STATE_CHECK(
   1100              test_func_(in, kInputStride, out, kOutputStride, filters, frac,
   1101                         step, frac, step, Width(), Height()));
   1102 
   1103          CheckGuardBlocks();
   1104 
   1105          for (int y = 0; y < Height(); ++y) {
   1106            for (int x = 0; x < Width(); ++x) {
   1107              ASSERT_EQ(lookup(ref, y * kOutputStride + x),
   1108                        lookup(out, y * kOutputStride + x))
   1109                  << "x == " << x << ", y == " << y << ", frac == " << frac
   1110                  << ", step == " << step;
   1111            }
   1112          }
   1113        }
   1114      }
   1115    }
   1116  }
   1117 }
   1118 
   1119 INSTANTIATE_TEST_SUITE_P(C, ConvolveScale2DTest,
   1120                         ::testing::Values(ALL_SIZES_64(aom_scaled_2d_c)));
   1121 
   1122 #if HAVE_NEON
   1123 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScale2DTest,
   1124                         ::testing::Values(ALL_SIZES_64(aom_scaled_2d_neon)));
   1125 #endif  // HAVE_NEON
   1126 
   1127 #if HAVE_NEON_DOTPROD
   1128 INSTANTIATE_TEST_SUITE_P(
   1129    NEON_DOTPROD, ConvolveScale2DTest,
   1130    ::testing::Values(ALL_SIZES_64(aom_scaled_2d_neon_dotprod)));
   1131 #endif  // HAVE_NEON_DOTPROD
   1132 
   1133 #if HAVE_NEON_I8MM
   1134 INSTANTIATE_TEST_SUITE_P(
   1135    NEON_I8MM, ConvolveScale2DTest,
   1136    ::testing::Values(ALL_SIZES_64(aom_scaled_2d_neon_i8mm)));
   1137 #endif  // HAVE_NEON_I8MM
   1138 
   1139 #if HAVE_SSSE3
   1140 INSTANTIATE_TEST_SUITE_P(SSSE3, ConvolveScale2DTest,
   1141                         ::testing::Values(ALL_SIZES_64(aom_scaled_2d_ssse3)));
   1142 #endif  // HAVE_SSSE3
   1143 
   1144 }  // namespace