convolve_test.cc (42850B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <string.h> 13 #include <tuple> 14 15 #include "gtest/gtest.h" 16 17 #include "config/aom_config.h" 18 #include "config/aom_dsp_rtcd.h" 19 20 #include "aom_dsp/aom_dsp_common.h" 21 #include "aom_dsp/aom_filter.h" 22 #include "aom_mem/aom_mem.h" 23 #include "aom_ports/aom_timer.h" 24 #include "aom_ports/mem.h" 25 #include "av1/common/filter.h" 26 #include "test/acm_random.h" 27 #include "test/register_state_check.h" 28 #include "test/util.h" 29 30 namespace { 31 32 static const unsigned int kMaxDimension = MAX_SB_SIZE; 33 static const int kDataAlignment = 16; 34 static const int kOuterBlockSize = 4 * kMaxDimension; 35 static const int kInputStride = kOuterBlockSize; 36 static const int kOutputStride = kOuterBlockSize; 37 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; 38 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; 39 static const int16_t kInvalidFilter[8] = {}; 40 static const int kNumFilterBanks = SWITCHABLE_FILTERS; 41 static const int kNumFilters = 16; 42 43 using ConvolveFunc = void (*)(const uint8_t *src, ptrdiff_t src_stride, 44 uint8_t *dst, ptrdiff_t dst_stride, 45 const int16_t *filter_x, int filter_x_stride, 46 const int16_t *filter_y, int filter_y_stride, 47 int w, int h); 48 49 struct ConvolveFunctions { 50 ConvolveFunctions(ConvolveFunc h8, ConvolveFunc v8, int bd) 51 : h8_(h8), v8_(v8), use_highbd_(bd) {} 52 53 ConvolveFunc h8_; 54 ConvolveFunc v8_; 55 int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. 56 }; 57 58 using ConvolveParam = std::tuple<int, int, const ConvolveFunctions *>; 59 60 #define ALL_SIZES_64(convolve_fn) \ 61 make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \ 62 make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \ 63 make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \ 64 make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \ 65 make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \ 66 make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \ 67 make_tuple(64, 64, &convolve_fn) 68 69 #define ALL_SIZES(convolve_fn) \ 70 make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \ 71 make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn) 72 73 // Reference 8-tap subpixel filter, slightly modified to fit into this test. 74 #define AV1_FILTER_WEIGHT 128 75 #define AV1_FILTER_SHIFT 7 76 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; } 77 78 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride, 79 const int16_t *HFilter, const int16_t *VFilter, 80 uint8_t *dst_ptr, unsigned int dst_stride, 81 unsigned int output_width, unsigned int output_height) { 82 // Between passes, we use an intermediate buffer whose height is extended to 83 // have enough horizontally filtered values as input for the vertical pass. 84 // This buffer is allocated to be big enough for the largest block type we 85 // support. 86 const int kInterp_Extend = 4; 87 const unsigned int intermediate_height = 88 (kInterp_Extend - 1) + output_height + kInterp_Extend; 89 unsigned int i, j; 90 91 assert(intermediate_height > 7); 92 93 // Size of intermediate_buffer is max_intermediate_height * filter_max_width, 94 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 95 // + kInterp_Extend 96 // = 3 + 16 + 4 97 // = 23 98 // and filter_max_width = 16 99 // 100 uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension]; 101 const int intermediate_next_stride = 102 1 - static_cast<int>(intermediate_height * output_width); 103 104 // Horizontal pass (src -> transposed intermediate). 105 uint8_t *output_ptr = intermediate_buffer; 106 const int src_next_row_stride = src_stride - output_width; 107 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 108 for (i = 0; i < intermediate_height; ++i) { 109 for (j = 0; j < output_width; ++j) { 110 // Apply filter... 111 const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) + 112 (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) + 113 (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) + 114 (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) + 115 (AV1_FILTER_WEIGHT >> 1); // Rounding 116 117 // Normalize back to 0-255... 118 *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT); 119 ++src_ptr; 120 output_ptr += intermediate_height; 121 } 122 src_ptr += src_next_row_stride; 123 output_ptr += intermediate_next_stride; 124 } 125 126 // Vertical pass (transposed intermediate -> dst). 127 src_ptr = intermediate_buffer; 128 const int dst_next_row_stride = dst_stride - output_width; 129 for (i = 0; i < output_height; ++i) { 130 for (j = 0; j < output_width; ++j) { 131 // Apply filter... 132 const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) + 133 (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) + 134 (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) + 135 (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) + 136 (AV1_FILTER_WEIGHT >> 1); // Rounding 137 138 // Normalize back to 0-255... 139 *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT); 140 src_ptr += intermediate_height; 141 } 142 src_ptr += intermediate_next_stride; 143 dst_ptr += dst_next_row_stride; 144 } 145 } 146 147 void block2d_average_c(uint8_t *src, unsigned int src_stride, 148 uint8_t *output_ptr, unsigned int output_stride, 149 unsigned int output_width, unsigned int output_height) { 150 unsigned int i, j; 151 for (i = 0; i < output_height; ++i) { 152 for (j = 0; j < output_width; ++j) { 153 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 154 } 155 output_ptr += output_stride; 156 } 157 } 158 159 void filter_average_block2d_8_c(const uint8_t *src_ptr, 160 const unsigned int src_stride, 161 const int16_t *HFilter, const int16_t *VFilter, 162 uint8_t *dst_ptr, unsigned int dst_stride, 163 unsigned int output_width, 164 unsigned int output_height) { 165 uint8_t tmp[kMaxDimension * kMaxDimension]; 166 167 assert(output_width <= kMaxDimension); 168 assert(output_height <= kMaxDimension); 169 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension, 170 output_width, output_height); 171 block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width, 172 output_height); 173 } 174 175 void highbd_filter_block2d_8_c(const uint16_t *src_ptr, 176 const unsigned int src_stride, 177 const int16_t *HFilter, const int16_t *VFilter, 178 uint16_t *dst_ptr, unsigned int dst_stride, 179 unsigned int output_width, 180 unsigned int output_height, int bd) { 181 // Between passes, we use an intermediate buffer whose height is extended to 182 // have enough horizontally filtered values as input for the vertical pass. 183 // This buffer is allocated to be big enough for the largest block type we 184 // support. 185 const int kInterp_Extend = 4; 186 const unsigned int intermediate_height = 187 (kInterp_Extend - 1) + output_height + kInterp_Extend; 188 189 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, 190 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height 191 * + kInterp_Extend 192 * = 3 + 16 + 4 193 * = 23 194 * and filter_max_width = 16 195 */ 196 uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 }; 197 const int intermediate_next_stride = 198 1 - static_cast<int>(intermediate_height * output_width); 199 200 // Horizontal pass (src -> transposed intermediate). 201 { 202 uint16_t *output_ptr = intermediate_buffer; 203 const int src_next_row_stride = src_stride - output_width; 204 unsigned int i, j; 205 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); 206 for (i = 0; i < intermediate_height; ++i) { 207 for (j = 0; j < output_width; ++j) { 208 // Apply filter... 209 const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) + 210 (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) + 211 (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) + 212 (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) + 213 (AV1_FILTER_WEIGHT >> 1); // Rounding 214 215 // Normalize back to 0-255... 216 *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd); 217 ++src_ptr; 218 output_ptr += intermediate_height; 219 } 220 src_ptr += src_next_row_stride; 221 output_ptr += intermediate_next_stride; 222 } 223 } 224 225 // Vertical pass (transposed intermediate -> dst). 226 { 227 const uint16_t *interm_ptr = intermediate_buffer; 228 const int dst_next_row_stride = dst_stride - output_width; 229 unsigned int i, j; 230 for (i = 0; i < output_height; ++i) { 231 for (j = 0; j < output_width; ++j) { 232 // Apply filter... 233 const int temp = 234 (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) + 235 (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) + 236 (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) + 237 (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) + 238 (AV1_FILTER_WEIGHT >> 1); // Rounding 239 240 // Normalize back to 0-255... 241 *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd); 242 interm_ptr += intermediate_height; 243 } 244 interm_ptr += intermediate_next_stride; 245 dst_ptr += dst_next_row_stride; 246 } 247 } 248 } 249 250 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride, 251 uint16_t *output_ptr, unsigned int output_stride, 252 unsigned int output_width, 253 unsigned int output_height) { 254 unsigned int i, j; 255 for (i = 0; i < output_height; ++i) { 256 for (j = 0; j < output_width; ++j) { 257 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; 258 } 259 output_ptr += output_stride; 260 } 261 } 262 263 void highbd_filter_average_block2d_8_c( 264 const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, 265 const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride, 266 unsigned int output_width, unsigned int output_height, int bd) { 267 uint16_t tmp[kMaxDimension * kMaxDimension]; 268 269 assert(output_width <= kMaxDimension); 270 assert(output_height <= kMaxDimension); 271 highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 272 kMaxDimension, output_width, output_height, bd); 273 highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, 274 output_width, output_height); 275 } 276 277 class ConvolveTestBase : public ::testing::TestWithParam<ConvolveParam> { 278 public: 279 static void SetUpTestSuite() { 280 // Force input_ to be unaligned, output to be 16 byte aligned. 281 input_ = reinterpret_cast<uint8_t *>( 282 aom_memalign(kDataAlignment, kInputBufferSize + 1)) + 283 1; 284 ASSERT_NE(input_, nullptr); 285 ref8_ = reinterpret_cast<uint8_t *>( 286 aom_memalign(kDataAlignment, kOutputStride * kMaxDimension)); 287 ASSERT_NE(ref8_, nullptr); 288 output_ = reinterpret_cast<uint8_t *>( 289 aom_memalign(kDataAlignment, kOutputBufferSize)); 290 ASSERT_NE(output_, nullptr); 291 output_ref_ = reinterpret_cast<uint8_t *>( 292 aom_memalign(kDataAlignment, kOutputBufferSize)); 293 ASSERT_NE(output_ref_, nullptr); 294 input16_ = reinterpret_cast<uint16_t *>(aom_memalign( 295 kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) + 296 1; 297 ASSERT_NE(input16_, nullptr); 298 ref16_ = reinterpret_cast<uint16_t *>(aom_memalign( 299 kDataAlignment, kOutputStride * kMaxDimension * sizeof(uint16_t))); 300 ASSERT_NE(ref16_, nullptr); 301 output16_ = reinterpret_cast<uint16_t *>( 302 aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 303 ASSERT_NE(output16_, nullptr); 304 output16_ref_ = reinterpret_cast<uint16_t *>( 305 aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); 306 ASSERT_NE(output16_ref_, nullptr); 307 } 308 309 static void TearDownTestSuite() { 310 aom_free(input_ - 1); 311 input_ = nullptr; 312 aom_free(ref8_); 313 ref8_ = nullptr; 314 aom_free(output_); 315 output_ = nullptr; 316 aom_free(output_ref_); 317 output_ref_ = nullptr; 318 aom_free(input16_ - 1); 319 input16_ = nullptr; 320 aom_free(ref16_); 321 ref16_ = nullptr; 322 aom_free(output16_); 323 output16_ = nullptr; 324 aom_free(output16_ref_); 325 output16_ref_ = nullptr; 326 } 327 328 protected: 329 int Width() const { return GET_PARAM(0); } 330 int Height() const { return GET_PARAM(1); } 331 int BorderLeft() const { 332 const int center = (kOuterBlockSize - Width()) / 2; 333 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); 334 } 335 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } 336 337 bool IsIndexInBorder(int i) { 338 return (i < BorderTop() * kOuterBlockSize || 339 i >= (BorderTop() + Height()) * kOuterBlockSize || 340 i % kOuterBlockSize < BorderLeft() || 341 i % kOuterBlockSize >= (BorderLeft() + Width())); 342 } 343 344 void SetUp() override { 345 UUT_ = GET_PARAM(2); 346 if (UUT_->use_highbd_ != 0) 347 mask_ = (1 << UUT_->use_highbd_) - 1; 348 else 349 mask_ = 255; 350 /* Set up guard blocks for an inner block centered in the outer block */ 351 for (int i = 0; i < kOutputBufferSize; ++i) { 352 if (IsIndexInBorder(i)) { 353 output_[i] = 255; 354 output16_[i] = mask_; 355 } else { 356 output_[i] = 0; 357 output16_[i] = 0; 358 } 359 } 360 361 ::libaom_test::ACMRandom prng; 362 for (int i = 0; i < kInputBufferSize; ++i) { 363 if (i & 1) { 364 input_[i] = 255; 365 input16_[i] = mask_; 366 } else { 367 input_[i] = prng.Rand8Extremes(); 368 input16_[i] = prng.Rand16() & mask_; 369 } 370 } 371 } 372 373 void SetConstantInput(int value) { 374 memset(input_, value, kInputBufferSize); 375 aom_memset16(input16_, value, kInputBufferSize); 376 } 377 378 void CopyOutputToRef() { 379 memcpy(output_ref_, output_, kOutputBufferSize); 380 // Copy 16-bit pixels values. The effective number of bytes is double. 381 memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize); 382 } 383 384 void CheckGuardBlocks() { 385 for (int i = 0; i < kOutputBufferSize; ++i) { 386 if (IsIndexInBorder(i)) { 387 EXPECT_EQ(255, output_[i]); 388 } 389 } 390 } 391 392 uint8_t *input() const { 393 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 394 if (UUT_->use_highbd_ == 0) { 395 return input_ + offset; 396 } else { 397 return CONVERT_TO_BYTEPTR(input16_) + offset; 398 } 399 } 400 401 uint8_t *output() const { 402 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 403 if (UUT_->use_highbd_ == 0) { 404 return output_ + offset; 405 } else { 406 return CONVERT_TO_BYTEPTR(output16_) + offset; 407 } 408 } 409 410 uint8_t *output_ref() const { 411 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 412 if (UUT_->use_highbd_ == 0) { 413 return output_ref_ + offset; 414 } else { 415 return CONVERT_TO_BYTEPTR(output16_ref_) + offset; 416 } 417 } 418 419 uint16_t lookup(uint8_t *list, int index) const { 420 if (UUT_->use_highbd_ == 0) { 421 return list[index]; 422 } else { 423 return CONVERT_TO_SHORTPTR(list)[index]; 424 } 425 } 426 427 void assign_val(uint8_t *list, int index, uint16_t val) const { 428 if (UUT_->use_highbd_ == 0) { 429 list[index] = (uint8_t)val; 430 } else { 431 CONVERT_TO_SHORTPTR(list)[index] = val; 432 } 433 } 434 435 void wrapper_filter_average_block2d_8_c( 436 const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, 437 const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride, 438 unsigned int output_width, unsigned int output_height) { 439 if (UUT_->use_highbd_ == 0) { 440 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, 441 dst_stride, output_width, output_height); 442 } else { 443 highbd_filter_average_block2d_8_c( 444 CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter, 445 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height, 446 UUT_->use_highbd_); 447 } 448 } 449 450 void wrapper_filter_block2d_8_c( 451 const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter, 452 const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride, 453 unsigned int output_width, unsigned int output_height) { 454 if (UUT_->use_highbd_ == 0) { 455 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr, 456 dst_stride, output_width, output_height); 457 } else { 458 highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, 459 HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr), 460 dst_stride, output_width, output_height, 461 UUT_->use_highbd_); 462 } 463 } 464 465 void MatchesReferenceSubpixelFilter() { 466 uint8_t *const in = input(); 467 uint8_t *const out = output(); 468 uint8_t *ref; 469 if (UUT_->use_highbd_ == 0) { 470 ref = ref8_; 471 } else { 472 ref = CONVERT_TO_BYTEPTR(ref16_); 473 } 474 int subpel_search; 475 for (subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS; 476 ++subpel_search) { 477 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 478 const InterpFilter filter = (InterpFilter)filter_bank; 479 const InterpKernel *filters = 480 (const InterpKernel *)av1_get_interp_filter_kernel(filter, 481 subpel_search); 482 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 483 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 484 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], 485 filters[filter_y], ref, kOutputStride, 486 Width(), Height()); 487 488 if (filter_x && filter_y) 489 continue; 490 else if (filter_y) 491 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter, 492 16, filters[filter_y], 16, Width(), Height()); 493 else if (filter_x) 494 API_REGISTER_STATE_CHECK(UUT_->h8_( 495 in, kInputStride, out, kOutputStride, filters[filter_x], 16, 496 kInvalidFilter, 16, Width(), Height())); 497 else 498 continue; 499 500 CheckGuardBlocks(); 501 502 for (int y = 0; y < Height(); ++y) 503 for (int x = 0; x < Width(); ++x) 504 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 505 lookup(out, y * kOutputStride + x)) 506 << "mismatch at (" << x << "," << y << "), " 507 << "filters (" << filter_bank << "," << filter_x << "," 508 << filter_y << ")"; 509 } 510 } 511 } 512 } 513 } 514 515 void FilterExtremes() { 516 uint8_t *const in = input(); 517 uint8_t *const out = output(); 518 uint8_t *ref; 519 if (UUT_->use_highbd_ == 0) { 520 ref = ref8_; 521 } else { 522 ref = CONVERT_TO_BYTEPTR(ref16_); 523 } 524 525 // Populate ref and out with some random data 526 ::libaom_test::ACMRandom prng; 527 for (int y = 0; y < Height(); ++y) { 528 for (int x = 0; x < Width(); ++x) { 529 uint16_t r; 530 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 531 r = prng.Rand8Extremes(); 532 } else { 533 r = prng.Rand16() & mask_; 534 } 535 assign_val(out, y * kOutputStride + x, r); 536 assign_val(ref, y * kOutputStride + x, r); 537 } 538 } 539 540 for (int axis = 0; axis < 2; axis++) { 541 int seed_val = 0; 542 while (seed_val < 256) { 543 for (int y = 0; y < 8; ++y) { 544 for (int x = 0; x < 8; ++x) { 545 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, 546 ((seed_val >> (axis ? y : x)) & 1) * mask_); 547 if (axis) seed_val++; 548 } 549 if (axis) 550 seed_val -= 8; 551 else 552 seed_val++; 553 } 554 if (axis) seed_val += 8; 555 int subpel_search; 556 for (subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS; 557 ++subpel_search) { 558 for (int filter_bank = 0; filter_bank < kNumFilterBanks; 559 ++filter_bank) { 560 const InterpFilter filter = (InterpFilter)filter_bank; 561 const InterpKernel *filters = 562 (const InterpKernel *)av1_get_interp_filter_kernel( 563 filter, subpel_search); 564 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 565 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 566 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], 567 filters[filter_y], ref, 568 kOutputStride, Width(), Height()); 569 if (filter_x && filter_y) 570 continue; 571 else if (filter_y) 572 API_REGISTER_STATE_CHECK(UUT_->v8_( 573 in, kInputStride, out, kOutputStride, kInvalidFilter, 16, 574 filters[filter_y], 16, Width(), Height())); 575 else if (filter_x) 576 API_REGISTER_STATE_CHECK(UUT_->h8_( 577 in, kInputStride, out, kOutputStride, filters[filter_x], 578 16, kInvalidFilter, 16, Width(), Height())); 579 else 580 continue; 581 582 for (int y = 0; y < Height(); ++y) 583 for (int x = 0; x < Width(); ++x) 584 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 585 lookup(out, y * kOutputStride + x)) 586 << "mismatch at (" << x << "," << y << "), " 587 << "filters (" << filter_bank << "," << filter_x << "," 588 << filter_y << ")"; 589 } 590 } 591 } 592 } 593 } 594 } 595 } 596 597 void SpeedTest() { 598 uint8_t *const in = input(); 599 uint8_t *const out = output(); 600 uint8_t *ref; 601 if (UUT_->use_highbd_ == 0) { 602 ref = ref8_; 603 } else { 604 ref = CONVERT_TO_BYTEPTR(ref16_); 605 } 606 607 // Populate ref and out with some random data 608 ::libaom_test::ACMRandom prng; 609 for (int y = 0; y < Height(); ++y) { 610 for (int x = 0; x < Width(); ++x) { 611 uint16_t r; 612 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { 613 r = prng.Rand8Extremes(); 614 } else { 615 r = prng.Rand16() & mask_; 616 } 617 assign_val(out, y * kOutputStride + x, r); 618 assign_val(ref, y * kOutputStride + x, r); 619 } 620 } 621 622 InterpFilter filter = (InterpFilter)1; 623 const InterpKernel *filters = 624 (const InterpKernel *)av1_get_interp_filter_kernel(filter, USE_8_TAPS); 625 wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1], 626 out, kOutputStride, Width(), Height()); 627 628 aom_usec_timer timer; 629 int tests_num = 1000; 630 631 aom_usec_timer_start(&timer); 632 while (tests_num > 0) { 633 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 634 filter = (InterpFilter)filter_bank; 635 filters = (const InterpKernel *)av1_get_interp_filter_kernel( 636 filter, USE_8_TAPS); 637 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { 638 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { 639 if (filter_x && filter_y) continue; 640 if (filter_y) 641 API_REGISTER_STATE_CHECK(UUT_->v8_( 642 in, kInputStride, out, kOutputStride, kInvalidFilter, 16, 643 filters[filter_y], 16, Width(), Height())); 644 else if (filter_x) 645 API_REGISTER_STATE_CHECK(UUT_->h8_( 646 in, kInputStride, out, kOutputStride, filters[filter_x], 16, 647 kInvalidFilter, 16, Width(), Height())); 648 } 649 } 650 } 651 tests_num--; 652 } 653 aom_usec_timer_mark(&timer); 654 655 const int elapsed_time = 656 static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000); 657 printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(), 658 UUT_->use_highbd_, elapsed_time); 659 } 660 661 const ConvolveFunctions *UUT_; 662 static uint8_t *input_; 663 static uint8_t *ref8_; 664 static uint8_t *output_; 665 static uint8_t *output_ref_; 666 static uint16_t *input16_; 667 static uint16_t *ref16_; 668 static uint16_t *output16_; 669 static uint16_t *output16_ref_; 670 int mask_; 671 }; 672 673 uint8_t *ConvolveTestBase::input_ = nullptr; 674 uint8_t *ConvolveTestBase::ref8_ = nullptr; 675 uint8_t *ConvolveTestBase::output_ = nullptr; 676 uint8_t *ConvolveTestBase::output_ref_ = nullptr; 677 uint16_t *ConvolveTestBase::input16_ = nullptr; 678 uint16_t *ConvolveTestBase::ref16_ = nullptr; 679 uint16_t *ConvolveTestBase::output16_ = nullptr; 680 uint16_t *ConvolveTestBase::output16_ref_ = nullptr; 681 682 using LowbdConvolveTest = ConvolveTestBase; 683 684 TEST_P(LowbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); } 685 686 void FiltersWontSaturateWhenAddedPairwise() { 687 int subpel_search; 688 for (subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS; 689 ++subpel_search) { 690 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 691 const InterpFilter filter = (InterpFilter)filter_bank; 692 const InterpKernel *filters = 693 (const InterpKernel *)av1_get_interp_filter_kernel(filter, 694 subpel_search); 695 for (int i = 0; i < kNumFilters; i++) { 696 const int p0 = filters[i][0] + filters[i][1]; 697 const int p1 = filters[i][2] + filters[i][3]; 698 const int p2 = filters[i][4] + filters[i][5]; 699 const int p3 = filters[i][6] + filters[i][7]; 700 EXPECT_LE(p0, 128); 701 EXPECT_LE(p1, 128); 702 EXPECT_LE(p2, 128); 703 EXPECT_LE(p3, 128); 704 EXPECT_LE(p0 + p3, 128); 705 EXPECT_LE(p0 + p3 + p1, 128); 706 EXPECT_LE(p0 + p3 + p1 + p2, 128); 707 EXPECT_EQ(p0 + p1 + p2 + p3, 128); 708 } 709 } 710 } 711 } 712 713 TEST(LowbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) { 714 FiltersWontSaturateWhenAddedPairwise(); 715 } 716 717 TEST_P(LowbdConvolveTest, MatchesReferenceSubpixelFilter) { 718 MatchesReferenceSubpixelFilter(); 719 } 720 721 TEST_P(LowbdConvolveTest, FilterExtremes) { FilterExtremes(); } 722 723 TEST_P(LowbdConvolveTest, DISABLED_Speed) { SpeedTest(); } 724 725 using std::make_tuple; 726 727 // WRAP macro is only used for high bitdepth build. 728 #if CONFIG_AV1_HIGHBITDEPTH 729 #define WRAP(func, bd) \ 730 static void wrap_##func##_##bd( \ 731 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 732 ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \ 733 const int16_t *filter_y, int filter_y_stride, int w, int h) { \ 734 aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \ 735 filter_x_stride, filter_y, filter_y_stride, w, h, bd); \ 736 } 737 #if HAVE_SSE2 && AOM_ARCH_X86_64 738 WRAP(convolve8_horiz_sse2, 8) 739 WRAP(convolve8_vert_sse2, 8) 740 WRAP(convolve8_horiz_sse2, 10) 741 WRAP(convolve8_vert_sse2, 10) 742 WRAP(convolve8_horiz_sse2, 12) 743 WRAP(convolve8_vert_sse2, 12) 744 #endif // HAVE_SSE2 && AOM_ARCH_X86_64 745 746 WRAP(convolve8_horiz_c, 8) 747 WRAP(convolve8_vert_c, 8) 748 WRAP(convolve8_horiz_c, 10) 749 WRAP(convolve8_vert_c, 10) 750 WRAP(convolve8_horiz_c, 12) 751 WRAP(convolve8_vert_c, 12) 752 753 #if HAVE_AVX2 754 WRAP(convolve8_horiz_avx2, 8) 755 WRAP(convolve8_vert_avx2, 8) 756 757 WRAP(convolve8_horiz_avx2, 10) 758 WRAP(convolve8_vert_avx2, 10) 759 760 WRAP(convolve8_horiz_avx2, 12) 761 WRAP(convolve8_vert_avx2, 12) 762 #endif // HAVE_AVX2 763 764 #if HAVE_NEON 765 WRAP(convolve8_horiz_neon, 8) 766 WRAP(convolve8_vert_neon, 8) 767 768 WRAP(convolve8_horiz_neon, 10) 769 WRAP(convolve8_vert_neon, 10) 770 771 WRAP(convolve8_horiz_neon, 12) 772 WRAP(convolve8_vert_neon, 12) 773 #endif // HAVE_NEON 774 775 #if HAVE_SVE 776 WRAP(convolve8_horiz_sve, 8) 777 WRAP(convolve8_vert_sve, 8) 778 779 WRAP(convolve8_horiz_sve, 10) 780 WRAP(convolve8_vert_sve, 10) 781 782 WRAP(convolve8_horiz_sve, 12) 783 WRAP(convolve8_vert_sve, 12) 784 #endif // HAVE_SVE 785 #endif // CONFIG_AV1_HIGHBITDEPTH 786 787 #undef WRAP 788 789 #if CONFIG_AV1_HIGHBITDEPTH 790 791 using HighbdConvolveTest = ConvolveTestBase; 792 793 TEST_P(HighbdConvolveTest, GuardBlocks) { CheckGuardBlocks(); } 794 795 TEST(HighbdConvolveTest, FiltersWontSaturateWhenAddedPairwise) { 796 FiltersWontSaturateWhenAddedPairwise(); 797 } 798 799 TEST_P(HighbdConvolveTest, MatchesReferenceSubpixelFilter) { 800 MatchesReferenceSubpixelFilter(); 801 } 802 803 TEST_P(HighbdConvolveTest, FilterExtremes) { FilterExtremes(); } 804 805 TEST_P(HighbdConvolveTest, DISABLED_Speed) { SpeedTest(); } 806 807 const ConvolveFunctions wrap_convolve8_c(wrap_convolve8_horiz_c_8, 808 wrap_convolve8_vert_c_8, 8); 809 const ConvolveFunctions wrap_convolve10_c(wrap_convolve8_horiz_c_10, 810 wrap_convolve8_vert_c_10, 10); 811 const ConvolveFunctions wrap_convolve12_c(wrap_convolve8_horiz_c_12, 812 wrap_convolve8_vert_c_12, 12); 813 const ConvolveParam kArrayHighbdConvolve_c[] = { ALL_SIZES(wrap_convolve8_c), 814 ALL_SIZES(wrap_convolve10_c), 815 ALL_SIZES(wrap_convolve12_c) }; 816 817 INSTANTIATE_TEST_SUITE_P(C, HighbdConvolveTest, 818 ::testing::ValuesIn(kArrayHighbdConvolve_c)); 819 #endif // CONFIG_AV1_HIGHBITDEPTH 820 821 const ConvolveFunctions convolve8_c(aom_convolve8_horiz_c, aom_convolve8_vert_c, 822 0); 823 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; 824 825 INSTANTIATE_TEST_SUITE_P(C, LowbdConvolveTest, 826 ::testing::ValuesIn(kArrayConvolve_c)); 827 828 #if HAVE_SSE2 && AOM_ARCH_X86_64 829 #if CONFIG_AV1_HIGHBITDEPTH 830 const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8, 831 wrap_convolve8_vert_sse2_8, 8); 832 const ConvolveFunctions wrap_convolve10_sse2(wrap_convolve8_horiz_sse2_10, 833 wrap_convolve8_vert_sse2_10, 10); 834 const ConvolveFunctions wrap_convolve12_sse2(wrap_convolve8_horiz_sse2_12, 835 wrap_convolve8_vert_sse2_12, 12); 836 const ConvolveParam kArrayHighbdConvolve_sse2[] = { 837 ALL_SIZES(wrap_convolve8_sse2), ALL_SIZES(wrap_convolve10_sse2), 838 ALL_SIZES(wrap_convolve12_sse2) 839 }; 840 841 INSTANTIATE_TEST_SUITE_P(SSE2, HighbdConvolveTest, 842 ::testing::ValuesIn(kArrayHighbdConvolve_sse2)); 843 #endif 844 #endif 845 846 #if HAVE_SSSE3 847 const ConvolveFunctions convolve8_ssse3(aom_convolve8_horiz_ssse3, 848 aom_convolve8_vert_ssse3, 0); 849 850 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) }; 851 852 INSTANTIATE_TEST_SUITE_P(SSSE3, LowbdConvolveTest, 853 ::testing::ValuesIn(kArrayConvolve8_ssse3)); 854 #endif 855 856 #if HAVE_AVX2 857 #if CONFIG_AV1_HIGHBITDEPTH 858 const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve8_horiz_avx2_8, 859 wrap_convolve8_vert_avx2_8, 8); 860 const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve8_horiz_avx2_10, 861 wrap_convolve8_vert_avx2_10, 10); 862 const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve8_horiz_avx2_12, 863 wrap_convolve8_vert_avx2_12, 12); 864 const ConvolveParam kArray_HighbdConvolve8_avx2[] = { 865 ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2), 866 ALL_SIZES_64(wrap_convolve12_avx2) 867 }; 868 869 INSTANTIATE_TEST_SUITE_P(AVX2, HighbdConvolveTest, 870 ::testing::ValuesIn(kArray_HighbdConvolve8_avx2)); 871 #endif 872 const ConvolveFunctions convolve8_avx2(aom_convolve8_horiz_avx2, 873 aom_convolve8_vert_avx2, 0); 874 const ConvolveParam kArray_Convolve8_avx2[] = { ALL_SIZES(convolve8_avx2) }; 875 876 INSTANTIATE_TEST_SUITE_P(AVX2, LowbdConvolveTest, 877 ::testing::ValuesIn(kArray_Convolve8_avx2)); 878 #endif // HAVE_AVX2 879 880 #if HAVE_NEON 881 #if CONFIG_AV1_HIGHBITDEPTH 882 const ConvolveFunctions wrap_convolve8_neon(wrap_convolve8_horiz_neon_8, 883 wrap_convolve8_vert_neon_8, 8); 884 const ConvolveFunctions wrap_convolve10_neon(wrap_convolve8_horiz_neon_10, 885 wrap_convolve8_vert_neon_10, 10); 886 const ConvolveFunctions wrap_convolve12_neon(wrap_convolve8_horiz_neon_12, 887 wrap_convolve8_vert_neon_12, 12); 888 const ConvolveParam kArray_HighbdConvolve8_neon[] = { 889 ALL_SIZES_64(wrap_convolve8_neon), ALL_SIZES_64(wrap_convolve10_neon), 890 ALL_SIZES_64(wrap_convolve12_neon) 891 }; 892 893 INSTANTIATE_TEST_SUITE_P(NEON, HighbdConvolveTest, 894 ::testing::ValuesIn(kArray_HighbdConvolve8_neon)); 895 #endif 896 const ConvolveFunctions convolve8_neon(aom_convolve8_horiz_neon, 897 aom_convolve8_vert_neon, 0); 898 const ConvolveParam kArray_Convolve8_neon[] = { ALL_SIZES(convolve8_neon) }; 899 900 INSTANTIATE_TEST_SUITE_P(NEON, LowbdConvolveTest, 901 ::testing::ValuesIn(kArray_Convolve8_neon)); 902 #endif // HAVE_NEON 903 904 #if HAVE_NEON_DOTPROD 905 const ConvolveFunctions convolve8_neon_dotprod(aom_convolve8_horiz_neon_dotprod, 906 aom_convolve8_vert_neon_dotprod, 907 0); 908 const ConvolveParam kArray_Convolve8_neon_dotprod[] = { ALL_SIZES( 909 convolve8_neon_dotprod) }; 910 911 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, LowbdConvolveTest, 912 ::testing::ValuesIn(kArray_Convolve8_neon_dotprod)); 913 #endif // HAVE_NEON_DOTPROD 914 915 #if HAVE_NEON_I8MM 916 const ConvolveFunctions convolve8_neon_i8mm(aom_convolve8_horiz_neon_i8mm, 917 aom_convolve8_vert_neon_i8mm, 0); 918 const ConvolveParam kArray_Convolve8_neon_i8mm[] = { ALL_SIZES( 919 convolve8_neon_i8mm) }; 920 921 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, LowbdConvolveTest, 922 ::testing::ValuesIn(kArray_Convolve8_neon_i8mm)); 923 #endif // HAVE_NEON_I8MM 924 925 #if HAVE_SVE 926 #if CONFIG_AV1_HIGHBITDEPTH 927 const ConvolveFunctions wrap_convolve8_sve(wrap_convolve8_horiz_sve_8, 928 wrap_convolve8_vert_sve_8, 8); 929 const ConvolveFunctions wrap_convolve10_sve(wrap_convolve8_horiz_sve_10, 930 wrap_convolve8_vert_sve_10, 10); 931 const ConvolveFunctions wrap_convolve12_sve(wrap_convolve8_horiz_sve_12, 932 wrap_convolve8_vert_sve_12, 12); 933 const ConvolveParam kArray_HighbdConvolve8_sve[] = { 934 ALL_SIZES_64(wrap_convolve8_sve), ALL_SIZES_64(wrap_convolve10_sve), 935 ALL_SIZES_64(wrap_convolve12_sve) 936 }; 937 938 INSTANTIATE_TEST_SUITE_P(SVE, HighbdConvolveTest, 939 ::testing::ValuesIn(kArray_HighbdConvolve8_sve)); 940 #endif 941 #endif // HAVE_SVE 942 943 using ConvolveScale2DFunc = void (*)(const uint8_t *src, ptrdiff_t src_stride, 944 uint8_t *dst, ptrdiff_t dst_stride, 945 const InterpKernel *filter, int x0_q4, 946 int x_step_q4, int y0_q4, int y_step_q4, 947 int w, int h); 948 949 using ConvolveScale2DParam = std::tuple<int, int, ConvolveScale2DFunc>; 950 951 class ConvolveScale2DTest 952 : public ::testing::TestWithParam<ConvolveScale2DParam> { 953 public: 954 int Width() const { return GET_PARAM(0); } 955 int Height() const { return GET_PARAM(1); } 956 int BorderLeft() const { 957 const int center = (kOuterBlockSize - Width()) / 2; 958 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); 959 } 960 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } 961 962 bool IsIndexInBorder(int i) { 963 return (i < BorderTop() * kOuterBlockSize || 964 i >= (BorderTop() + Height()) * kOuterBlockSize || 965 i % kOuterBlockSize < BorderLeft() || 966 i % kOuterBlockSize >= (BorderLeft() + Width())); 967 } 968 969 void SetUp() override { 970 // Force input_ to be unaligned, output to be 16 byte aligned. 971 input_ = reinterpret_cast<uint8_t *>( 972 aom_memalign(kDataAlignment, kInputBufferSize + 1)) + 973 1; 974 output_ = reinterpret_cast<uint8_t *>( 975 aom_memalign(kDataAlignment, kOutputBufferSize)); 976 output_ref_ = reinterpret_cast<uint8_t *>( 977 aom_memalign(kDataAlignment, kOutputBufferSize)); 978 979 ASSERT_NE(input_, nullptr); 980 ASSERT_NE(output_, nullptr); 981 ASSERT_NE(output_ref_, nullptr); 982 983 test_func_ = GET_PARAM(2); 984 /* Set up guard blocks for an inner block centered in the outer block */ 985 for (int i = 0; i < kOutputBufferSize; ++i) { 986 if (IsIndexInBorder(i)) { 987 output_[i] = 255; 988 } else { 989 output_[i] = 0; 990 } 991 } 992 993 ::libaom_test::ACMRandom prng; 994 for (int i = 0; i < kInputBufferSize; ++i) { 995 if (i & 1) { 996 input_[i] = 255; 997 } else { 998 input_[i] = prng.Rand8Extremes(); 999 } 1000 } 1001 } 1002 1003 void TearDown() override { 1004 aom_free(input_ - 1); 1005 input_ = nullptr; 1006 aom_free(output_); 1007 output_ = nullptr; 1008 aom_free(output_ref_); 1009 output_ref_ = nullptr; 1010 } 1011 1012 void SetConstantInput(int value) { memset(input_, value, kInputBufferSize); } 1013 1014 void CopyOutputToRef() { memcpy(output_ref_, output_, kOutputBufferSize); } 1015 1016 void CheckGuardBlocks() { 1017 for (int i = 0; i < kOutputBufferSize; ++i) { 1018 if (IsIndexInBorder(i)) { 1019 EXPECT_EQ(255, output_[i]); 1020 } 1021 } 1022 } 1023 1024 uint8_t *input() const { 1025 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 1026 return input_ + offset; 1027 } 1028 1029 uint8_t *output() const { 1030 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 1031 return output_ + offset; 1032 } 1033 1034 uint8_t *output_ref() const { 1035 const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); 1036 return output_ref_ + offset; 1037 } 1038 1039 uint16_t lookup(uint8_t *list, int index) const { return list[index]; } 1040 1041 void assign_val(uint8_t *list, int index, uint16_t val) const { 1042 list[index] = (uint8_t)val; 1043 } 1044 1045 ConvolveScale2DFunc test_func_; 1046 uint8_t *input_; 1047 uint8_t *output_; 1048 uint8_t *output_ref_; 1049 }; 1050 1051 TEST_P(ConvolveScale2DTest, DISABLED_Speed) { 1052 const uint8_t *const in = input(); 1053 uint8_t *const out = output(); 1054 const InterpKernel *const filter = 1055 (const InterpKernel *)av1_get_interp_filter_kernel(EIGHTTAP_REGULAR, 1056 USE_8_TAPS); 1057 const int kNumTests = 10000; 1058 const int width = Width(); 1059 const int height = Height(); 1060 const int frac = 8; 1061 const int step = 16; 1062 aom_usec_timer timer; 1063 1064 aom_usec_timer_start(&timer); 1065 for (int n = 0; n < kNumTests; ++n) { 1066 test_func_(in, kInputStride, out, kOutputStride, filter, frac, step, frac, 1067 step, width, height); 1068 } 1069 aom_usec_timer_mark(&timer); 1070 1071 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 1072 printf("convolve_scale_2d_%dx%d_%d: %d us\n", width, height, 8, elapsed_time); 1073 } 1074 1075 TEST_P(ConvolveScale2DTest, Correctness) { 1076 uint8_t *const in = input(); 1077 uint8_t *const out = output(); 1078 uint8_t ref[kOutputStride * kMaxDimension]; 1079 1080 ::libaom_test::ACMRandom prng; 1081 for (int y = 0; y < Height(); ++y) { 1082 for (int x = 0; x < Width(); ++x) { 1083 const uint16_t r = prng.Rand8Extremes(); 1084 assign_val(in, y * kInputStride + x, r); 1085 } 1086 } 1087 1088 for (int subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS; 1089 ++subpel_search) { 1090 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { 1091 const InterpFilter filter = static_cast<InterpFilter>(filter_bank); 1092 const InterpKernel *filters = 1093 (const InterpKernel *)av1_get_interp_filter_kernel(filter, 1094 subpel_search); 1095 for (int frac = 0; frac < 16; ++frac) { 1096 for (int step = 1; step <= 32; ++step) { 1097 aom_scaled_2d_c(in, kInputStride, ref, kOutputStride, filters, frac, 1098 step, frac, step, Width(), Height()); 1099 API_REGISTER_STATE_CHECK( 1100 test_func_(in, kInputStride, out, kOutputStride, filters, frac, 1101 step, frac, step, Width(), Height())); 1102 1103 CheckGuardBlocks(); 1104 1105 for (int y = 0; y < Height(); ++y) { 1106 for (int x = 0; x < Width(); ++x) { 1107 ASSERT_EQ(lookup(ref, y * kOutputStride + x), 1108 lookup(out, y * kOutputStride + x)) 1109 << "x == " << x << ", y == " << y << ", frac == " << frac 1110 << ", step == " << step; 1111 } 1112 } 1113 } 1114 } 1115 } 1116 } 1117 } 1118 1119 INSTANTIATE_TEST_SUITE_P(C, ConvolveScale2DTest, 1120 ::testing::Values(ALL_SIZES_64(aom_scaled_2d_c))); 1121 1122 #if HAVE_NEON 1123 INSTANTIATE_TEST_SUITE_P(NEON, ConvolveScale2DTest, 1124 ::testing::Values(ALL_SIZES_64(aom_scaled_2d_neon))); 1125 #endif // HAVE_NEON 1126 1127 #if HAVE_NEON_DOTPROD 1128 INSTANTIATE_TEST_SUITE_P( 1129 NEON_DOTPROD, ConvolveScale2DTest, 1130 ::testing::Values(ALL_SIZES_64(aom_scaled_2d_neon_dotprod))); 1131 #endif // HAVE_NEON_DOTPROD 1132 1133 #if HAVE_NEON_I8MM 1134 INSTANTIATE_TEST_SUITE_P( 1135 NEON_I8MM, ConvolveScale2DTest, 1136 ::testing::Values(ALL_SIZES_64(aom_scaled_2d_neon_i8mm))); 1137 #endif // HAVE_NEON_I8MM 1138 1139 #if HAVE_SSSE3 1140 INSTANTIATE_TEST_SUITE_P(SSSE3, ConvolveScale2DTest, 1141 ::testing::Values(ALL_SIZES_64(aom_scaled_2d_ssse3))); 1142 #endif // HAVE_SSSE3 1143 1144 } // namespace