variance_test.cc (172704B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <cstdlib> 13 #include <new> 14 #include <ostream> 15 #include <tuple> 16 17 #include "gtest/gtest.h" 18 19 #include "config/aom_config.h" 20 #include "config/aom_dsp_rtcd.h" 21 22 #include "test/acm_random.h" 23 #include "test/register_state_check.h" 24 #include "aom/aom_codec.h" 25 #include "aom/aom_integer.h" 26 #include "aom_mem/aom_mem.h" 27 #include "aom_ports/aom_timer.h" 28 #include "aom_ports/mem.h" 29 #include "av1/common/cdef_block.h" 30 31 namespace { 32 33 using MseWxH16bitFunc = uint64_t (*)(uint8_t *dst, int dstride, uint16_t *src, 34 int sstride, int w, int h); 35 using Mse16xH16bitFunc = uint64_t (*)(uint8_t *dst, int dstride, uint16_t *src, 36 int w, int h); 37 using VarianceMxNFunc = unsigned int (*)(const uint8_t *a, int a_stride, 38 const uint8_t *b, int b_stride, 39 unsigned int *sse); 40 using GetSseSum8x8QuadFunc = void (*)(const uint8_t *a, int a_stride, 41 const uint8_t *b, int b_stride, 42 uint32_t *sse8x8, int *sum8x8, 43 unsigned int *tot_sse, int *tot_sum, 44 uint32_t *var8x8); 45 using GetSseSum16x16DualFunc = void (*)(const uint8_t *a, int a_stride, 46 const uint8_t *b, int b_stride, 47 uint32_t *sse16x16, 48 unsigned int *tot_sse, int *tot_sum, 49 uint32_t *var16x16); 50 using SubpixVarMxNFunc = unsigned int (*)(const uint8_t *a, int a_stride, 51 int xoffset, int yoffset, 52 const uint8_t *b, int b_stride, 53 unsigned int *sse); 54 using SubpixAvgVarMxNFunc = unsigned int (*)(const uint8_t *a, int a_stride, 55 int xoffset, int yoffset, 56 const uint8_t *b, int b_stride, 57 uint32_t *sse, 58 const uint8_t *second_pred); 59 using SumOfSquaresFunction = unsigned int (*)(const int16_t *src); 60 61 #if !CONFIG_REALTIME_ONLY 62 using ObmcSubpelVarFunc = uint32_t (*)(const uint8_t *pre, int pre_stride, 63 int xoffset, int yoffset, 64 const int32_t *wsrc, const int32_t *mask, 65 unsigned int *sse); 66 67 #endif 68 69 using libaom_test::ACMRandom; 70 71 // Truncate high bit depth results by downshifting (with rounding) by: 72 // 2 * (bit_depth - 8) for sse 73 // (bit_depth - 8) for se 74 static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) { 75 switch (bit_depth) { 76 case AOM_BITS_12: 77 *sse = (*sse + 128) >> 8; 78 *se = (*se + 8) >> 4; 79 break; 80 case AOM_BITS_10: 81 *sse = (*sse + 8) >> 4; 82 *se = (*se + 2) >> 2; 83 break; 84 case AOM_BITS_8: 85 default: break; 86 } 87 } 88 89 /* Note: 90 * Our codebase calculates the "diff" value in the variance algorithm by 91 * (src - ref). 92 */ 93 static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, 94 int l2h, int src_stride, int ref_stride, 95 uint32_t *sse_ptr, bool use_high_bit_depth_, 96 aom_bit_depth_t bit_depth) { 97 int64_t se = 0; 98 uint64_t sse = 0; 99 const int w = 1 << l2w; 100 const int h = 1 << l2h; 101 for (int y = 0; y < h; y++) { 102 for (int x = 0; x < w; x++) { 103 int diff; 104 if (!use_high_bit_depth_) { 105 diff = src[y * src_stride + x] - ref[y * ref_stride + x]; 106 se += diff; 107 sse += diff * diff; 108 } else { 109 diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] - 110 CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x]; 111 se += diff; 112 sse += diff * diff; 113 } 114 } 115 } 116 RoundHighBitDepth(bit_depth, &se, &sse); 117 *sse_ptr = static_cast<uint32_t>(sse); 118 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); 119 } 120 121 /* The subpel reference functions differ from the codec version in one aspect: 122 * they calculate the bilinear factors directly instead of using a lookup table 123 * and therefore upshift xoff and yoff by 1. Only every other calculated value 124 * is used so the codec version shrinks the table to save space. 125 */ 126 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src, 127 int l2w, int l2h, int xoff, int yoff, 128 uint32_t *sse_ptr, bool use_high_bit_depth_, 129 aom_bit_depth_t bit_depth) { 130 int64_t se = 0; 131 uint64_t sse = 0; 132 const int w = 1 << l2w; 133 const int h = 1 << l2h; 134 135 xoff <<= 1; 136 yoff <<= 1; 137 138 for (int y = 0; y < h; y++) { 139 for (int x = 0; x < w; x++) { 140 // Bilinear interpolation at a 16th pel step. 141 if (!use_high_bit_depth_) { 142 const int a1 = ref[(w + 1) * (y + 0) + x + 0]; 143 const int a2 = ref[(w + 1) * (y + 0) + x + 1]; 144 const int b1 = ref[(w + 1) * (y + 1) + x + 0]; 145 const int b2 = ref[(w + 1) * (y + 1) + x + 1]; 146 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); 147 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); 148 const int r = a + (((b - a) * yoff + 8) >> 4); 149 const int diff = r - src[w * y + x]; 150 se += diff; 151 sse += diff * diff; 152 } else { 153 uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); 154 uint16_t *src16 = CONVERT_TO_SHORTPTR(src); 155 const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; 156 const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; 157 const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; 158 const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; 159 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); 160 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); 161 const int r = a + (((b - a) * yoff + 8) >> 4); 162 const int diff = r - src16[w * y + x]; 163 se += diff; 164 sse += diff * diff; 165 } 166 } 167 } 168 RoundHighBitDepth(bit_depth, &se, &sse); 169 *sse_ptr = static_cast<uint32_t>(sse); 170 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); 171 } 172 173 static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src, 174 const uint8_t *second_pred, int l2w, 175 int l2h, int xoff, int yoff, 176 uint32_t *sse_ptr, 177 bool use_high_bit_depth, 178 aom_bit_depth_t bit_depth) { 179 int64_t se = 0; 180 uint64_t sse = 0; 181 const int w = 1 << l2w; 182 const int h = 1 << l2h; 183 184 xoff <<= 1; 185 yoff <<= 1; 186 187 for (int y = 0; y < h; y++) { 188 for (int x = 0; x < w; x++) { 189 // bilinear interpolation at a 16th pel step 190 if (!use_high_bit_depth) { 191 const int a1 = ref[(w + 1) * (y + 0) + x + 0]; 192 const int a2 = ref[(w + 1) * (y + 0) + x + 1]; 193 const int b1 = ref[(w + 1) * (y + 1) + x + 0]; 194 const int b2 = ref[(w + 1) * (y + 1) + x + 1]; 195 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); 196 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); 197 const int r = a + (((b - a) * yoff + 8) >> 4); 198 const int diff = 199 ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x]; 200 se += diff; 201 sse += diff * diff; 202 } else { 203 const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); 204 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); 205 const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); 206 const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; 207 const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; 208 const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; 209 const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; 210 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); 211 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); 212 const int r = a + (((b - a) * yoff + 8) >> 4); 213 const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x]; 214 se += diff; 215 sse += diff * diff; 216 } 217 } 218 } 219 RoundHighBitDepth(bit_depth, &se, &sse); 220 *sse_ptr = static_cast<uint32_t>(sse); 221 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); 222 } 223 224 #if !CONFIG_REALTIME_ONLY 225 static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h, 226 int xoff, int yoff, 227 const int32_t *wsrc, 228 const int32_t *mask, uint32_t *sse_ptr, 229 bool use_high_bit_depth_, 230 aom_bit_depth_t bit_depth) { 231 int64_t se = 0; 232 uint64_t sse = 0; 233 const int w = 1 << l2w; 234 const int h = 1 << l2h; 235 236 xoff <<= 1; 237 yoff <<= 1; 238 239 for (int y = 0; y < h; y++) { 240 for (int x = 0; x < w; x++) { 241 // Bilinear interpolation at a 16th pel step. 242 if (!use_high_bit_depth_) { 243 const int a1 = pre[(w + 1) * (y + 0) + x + 0]; 244 const int a2 = pre[(w + 1) * (y + 0) + x + 1]; 245 const int b1 = pre[(w + 1) * (y + 1) + x + 0]; 246 const int b2 = pre[(w + 1) * (y + 1) + x + 1]; 247 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); 248 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); 249 const int r = a + (((b - a) * yoff + 8) >> 4); 250 const int diff = ROUND_POWER_OF_TWO_SIGNED( 251 wsrc[w * y + x] - r * mask[w * y + x], 12); 252 se += diff; 253 sse += diff * diff; 254 } else { 255 uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre); 256 const int a1 = pre16[(w + 1) * (y + 0) + x + 0]; 257 const int a2 = pre16[(w + 1) * (y + 0) + x + 1]; 258 const int b1 = pre16[(w + 1) * (y + 1) + x + 0]; 259 const int b2 = pre16[(w + 1) * (y + 1) + x + 1]; 260 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); 261 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); 262 const int r = a + (((b - a) * yoff + 8) >> 4); 263 const int diff = ROUND_POWER_OF_TWO_SIGNED( 264 wsrc[w * y + x] - r * mask[w * y + x], 12); 265 se += diff; 266 sse += diff * diff; 267 } 268 } 269 } 270 RoundHighBitDepth(bit_depth, &se, &sse); 271 *sse_ptr = static_cast<uint32_t>(sse); 272 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h))); 273 } 274 #endif 275 276 //////////////////////////////////////////////////////////////////////////////// 277 278 #if !CONFIG_REALTIME_ONLY 279 class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> { 280 public: 281 SumOfSquaresTest() : func_(GetParam()) {} 282 283 ~SumOfSquaresTest() override = default; 284 285 protected: 286 void ConstTest(); 287 void RefTest(); 288 289 SumOfSquaresFunction func_; 290 ACMRandom rnd_; 291 }; 292 293 void SumOfSquaresTest::ConstTest() { 294 int16_t mem[256]; 295 unsigned int res; 296 for (int v = 0; v < 256; ++v) { 297 for (int i = 0; i < 256; ++i) { 298 mem[i] = v; 299 } 300 API_REGISTER_STATE_CHECK(res = func_(mem)); 301 EXPECT_EQ(256u * (v * v), res); 302 } 303 } 304 305 unsigned int mb_ss_ref(const int16_t *src) { 306 unsigned int res = 0; 307 for (int i = 0; i < 256; ++i) { 308 res += src[i] * src[i]; 309 } 310 return res; 311 } 312 313 void SumOfSquaresTest::RefTest() { 314 int16_t mem[256]; 315 for (int i = 0; i < 100; ++i) { 316 for (int j = 0; j < 256; ++j) { 317 mem[j] = rnd_.Rand8() - rnd_.Rand8(); 318 } 319 320 const unsigned int expected = mb_ss_ref(mem); 321 unsigned int res; 322 API_REGISTER_STATE_CHECK(res = func_(mem)); 323 EXPECT_EQ(expected, res); 324 } 325 } 326 #endif // !CONFIG_REALTIME_ONLY 327 328 //////////////////////////////////////////////////////////////////////////////// 329 // Encapsulating struct to store the function to test along with 330 // some testing context. 331 // Can be used for MSE, SSE, Variance, etc. 332 333 template <typename Func> 334 struct TestParams { 335 TestParams(int log2w = 0, int log2h = 0, Func function = nullptr, 336 int bit_depth_value = 0) 337 : log2width(log2w), log2height(log2h), func(function) { 338 use_high_bit_depth = (bit_depth_value > 0); 339 if (use_high_bit_depth) { 340 bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value); 341 } else { 342 bit_depth = AOM_BITS_8; 343 } 344 width = 1 << log2width; 345 height = 1 << log2height; 346 block_size = width * height; 347 mask = (1u << bit_depth) - 1; 348 } 349 350 int log2width, log2height; 351 int width, height; 352 int block_size; 353 Func func; 354 aom_bit_depth_t bit_depth; 355 bool use_high_bit_depth; 356 uint32_t mask; 357 }; 358 359 template <typename Func> 360 std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) { 361 return os << "width/height:" << p.width << "/" << p.height 362 << " function:" << reinterpret_cast<const void *>(p.func) 363 << " bit-depth:" << p.bit_depth; 364 } 365 366 // Main class for testing a function type 367 template <typename FunctionType> 368 class MseWxHTestClass 369 : public ::testing::TestWithParam<TestParams<FunctionType> > { 370 public: 371 void SetUp() override { 372 params_ = this->GetParam(); 373 374 rnd_.Reset(ACMRandom::DeterministicSeed()); 375 src_ = reinterpret_cast<uint16_t *>( 376 aom_memalign(16, block_size() * sizeof(src_))); 377 dst_ = reinterpret_cast<uint8_t *>( 378 aom_memalign(16, block_size() * sizeof(dst_))); 379 ASSERT_NE(src_, nullptr); 380 ASSERT_NE(dst_, nullptr); 381 } 382 383 void TearDown() override { 384 aom_free(src_); 385 aom_free(dst_); 386 src_ = nullptr; 387 dst_ = nullptr; 388 } 389 390 protected: 391 void RefMatchTestMse(); 392 void SpeedTest(); 393 394 protected: 395 ACMRandom rnd_; 396 uint8_t *dst_; 397 uint16_t *src_; 398 TestParams<FunctionType> params_; 399 400 // some relay helpers 401 int block_size() const { return params_.block_size; } 402 int width() const { return params_.width; } 403 int height() const { return params_.height; } 404 int d_stride() const { return params_.width; } // stride is same as width 405 int s_stride() const { return params_.width; } // stride is same as width 406 }; 407 408 template <typename MseWxHFunctionType> 409 void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() { 410 aom_usec_timer ref_timer, test_timer; 411 double elapsed_time_c = 0; 412 double elapsed_time_simd = 0; 413 int run_time = 10000000; 414 int w = width(); 415 int h = height(); 416 int dstride = d_stride(); 417 int sstride = s_stride(); 418 419 for (int k = 0; k < block_size(); ++k) { 420 dst_[k] = rnd_.Rand8(); 421 src_[k] = rnd_.Rand8(); 422 } 423 aom_usec_timer_start(&ref_timer); 424 for (int i = 0; i < run_time; i++) { 425 aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h); 426 } 427 aom_usec_timer_mark(&ref_timer); 428 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); 429 430 aom_usec_timer_start(&test_timer); 431 for (int i = 0; i < run_time; i++) { 432 params_.func(dst_, dstride, src_, sstride, w, h); 433 } 434 aom_usec_timer_mark(&test_timer); 435 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer)); 436 437 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(), 438 elapsed_time_c, elapsed_time_simd, 439 (elapsed_time_c / elapsed_time_simd)); 440 } 441 442 template <typename MseWxHFunctionType> 443 void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() { 444 uint64_t mse_ref = 0; 445 uint64_t mse_mod = 0; 446 int w = width(); 447 int h = height(); 448 int dstride = d_stride(); 449 int sstride = s_stride(); 450 451 for (int i = 0; i < 10; i++) { 452 for (int k = 0; k < block_size(); ++k) { 453 dst_[k] = rnd_.Rand8(); 454 src_[k] = rnd_.Rand8(); 455 } 456 API_REGISTER_STATE_CHECK( 457 mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h)); 458 API_REGISTER_STATE_CHECK( 459 mse_mod = params_.func(dst_, dstride, src_, sstride, w, h)); 460 EXPECT_EQ(mse_ref, mse_mod) 461 << "ref mse: " << mse_ref << " mod mse: " << mse_mod; 462 } 463 } 464 465 template <typename FunctionType> 466 class Mse16xHTestClass 467 : public ::testing::TestWithParam<TestParams<FunctionType> > { 468 public: 469 // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for 470 // maximum width 16 and maximum height 8. 471 int mem_size = 16 * 8; 472 void SetUp() override { 473 params_ = this->GetParam(); 474 rnd_.Reset(ACMRandom::DeterministicSeed()); 475 src_ = reinterpret_cast<uint16_t *>( 476 aom_memalign(16, mem_size * sizeof(*src_))); 477 dst_ = 478 reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_))); 479 ASSERT_NE(src_, nullptr); 480 ASSERT_NE(dst_, nullptr); 481 } 482 483 void TearDown() override { 484 aom_free(src_); 485 aom_free(dst_); 486 src_ = nullptr; 487 dst_ = nullptr; 488 } 489 490 uint8_t RandBool() { 491 const uint32_t value = rnd_.Rand8(); 492 return (value & 0x1); 493 } 494 495 protected: 496 void RefMatchExtremeTestMse(); 497 void RefMatchTestMse(); 498 void SpeedTest(); 499 500 protected: 501 ACMRandom rnd_; 502 uint8_t *dst_; 503 uint16_t *src_; 504 TestParams<FunctionType> params_; 505 506 // some relay helpers 507 int width() const { return params_.width; } 508 int height() const { return params_.height; } 509 int d_stride() const { return params_.width; } 510 }; 511 512 template <typename Mse16xHFunctionType> 513 void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() { 514 aom_usec_timer ref_timer, test_timer; 515 double elapsed_time_c = 0.0; 516 double elapsed_time_simd = 0.0; 517 const int loop_count = 10000000; 518 const int w = width(); 519 const int h = height(); 520 const int dstride = d_stride(); 521 522 for (int k = 0; k < mem_size; ++k) { 523 dst_[k] = rnd_.Rand8(); 524 // Right shift by 6 is done to generate more input in range of [0,255] than 525 // CDEF_VERY_LARGE 526 int rnd_i10 = rnd_.Rand16() >> 6; 527 src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE; 528 } 529 530 aom_usec_timer_start(&ref_timer); 531 for (int i = 0; i < loop_count; i++) { 532 aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h); 533 } 534 aom_usec_timer_mark(&ref_timer); 535 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); 536 537 aom_usec_timer_start(&test_timer); 538 for (int i = 0; i < loop_count; i++) { 539 params_.func(dst_, dstride, src_, w, h); 540 } 541 aom_usec_timer_mark(&test_timer); 542 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer)); 543 544 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(), 545 height(), elapsed_time_c, elapsed_time_simd, 546 (elapsed_time_c / elapsed_time_simd)); 547 } 548 549 template <typename Mse16xHFunctionType> 550 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() { 551 uint64_t mse_ref = 0; 552 uint64_t mse_mod = 0; 553 const int w = width(); 554 const int h = height(); 555 const int dstride = d_stride(); 556 557 for (int i = 0; i < 10; i++) { 558 for (int k = 0; k < mem_size; ++k) { 559 dst_[k] = rnd_.Rand8(); 560 // Right shift by 6 is done to generate more input in range of [0,255] 561 // than CDEF_VERY_LARGE 562 int rnd_i10 = rnd_.Rand16() >> 6; 563 src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE; 564 } 565 566 API_REGISTER_STATE_CHECK( 567 mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h)); 568 API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h)); 569 EXPECT_EQ(mse_ref, mse_mod) 570 << "ref mse: " << mse_ref << " mod mse: " << mse_mod; 571 } 572 } 573 574 template <typename Mse16xHFunctionType> 575 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() { 576 uint64_t mse_ref = 0; 577 uint64_t mse_mod = 0; 578 const int w = width(); 579 const int h = height(); 580 const int dstride = d_stride(); 581 const int iter = 10; 582 583 // Fill the buffers with extreme values 584 for (int i = 0; i < iter; i++) { 585 for (int k = 0; k < mem_size; ++k) { 586 dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255); 587 src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE); 588 } 589 590 API_REGISTER_STATE_CHECK( 591 mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h)); 592 API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h)); 593 EXPECT_EQ(mse_ref, mse_mod) 594 << "ref mse: " << mse_ref << " mod mse: " << mse_mod; 595 } 596 } 597 598 // Main class for testing a function type 599 template <typename FunctionType> 600 class MainTestClass 601 : public ::testing::TestWithParam<TestParams<FunctionType> > { 602 public: 603 void SetUp() override { 604 params_ = this->GetParam(); 605 606 rnd_.Reset(ACMRandom::DeterministicSeed()); 607 const size_t unit = 608 use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t); 609 src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit)); 610 ref_ = new uint8_t[block_size() * unit]; 611 ASSERT_NE(src_, nullptr); 612 ASSERT_NE(ref_, nullptr); 613 memset(src_, 0, block_size() * sizeof(src_[0])); 614 memset(ref_, 0, block_size() * sizeof(ref_[0])); 615 if (use_high_bit_depth()) { 616 // TODO(skal): remove! 617 src_ = CONVERT_TO_BYTEPTR(src_); 618 ref_ = CONVERT_TO_BYTEPTR(ref_); 619 } 620 } 621 622 void TearDown() override { 623 if (use_high_bit_depth()) { 624 // TODO(skal): remove! 625 src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_)); 626 ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_)); 627 } 628 629 aom_free(src_); 630 delete[] ref_; 631 src_ = nullptr; 632 ref_ = nullptr; 633 } 634 635 protected: 636 // We could sub-class MainTestClass into dedicated class for Variance 637 // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing 638 // to access top class fields xxx. That's cumbersome, so for now we'll just 639 // implement the testing methods here: 640 641 // Variance tests 642 void ZeroTest(); 643 void RefTest(); 644 void RefStrideTest(); 645 void OneQuarterTest(); 646 void SpeedTest(); 647 648 // SSE&SUM tests 649 void RefTestSseSum(); 650 void MinTestSseSum(); 651 void MaxTestSseSum(); 652 void SseSum_SpeedTest(); 653 654 // SSE&SUM dual tests 655 void RefTestSseSumDual(); 656 void MinTestSseSumDual(); 657 void MaxTestSseSumDual(); 658 void SseSum_SpeedTestDual(); 659 660 // MSE/SSE tests 661 void RefTestMse(); 662 void RefTestSse(); 663 void MaxTestMse(); 664 void MaxTestSse(); 665 666 protected: 667 ACMRandom rnd_; 668 uint8_t *src_; 669 uint8_t *ref_; 670 TestParams<FunctionType> params_; 671 672 // some relay helpers 673 bool use_high_bit_depth() const { return params_.use_high_bit_depth; } 674 int byte_shift() const { return params_.bit_depth - 8; } 675 int block_size() const { return params_.block_size; } 676 int width() const { return params_.width; } 677 int height() const { return params_.height; } 678 uint32_t mask() const { return params_.mask; } 679 }; 680 681 //////////////////////////////////////////////////////////////////////////////// 682 // Tests related to variance. 683 684 template <typename VarianceFunctionType> 685 void MainTestClass<VarianceFunctionType>::ZeroTest() { 686 for (int i = 0; i <= 255; ++i) { 687 if (!use_high_bit_depth()) { 688 memset(src_, i, block_size()); 689 } else { 690 uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_); 691 for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift(); 692 } 693 for (int j = 0; j <= 255; ++j) { 694 if (!use_high_bit_depth()) { 695 memset(ref_, j, block_size()); 696 } else { 697 uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_); 698 for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift(); 699 } 700 unsigned int sse, var; 701 API_REGISTER_STATE_CHECK( 702 var = params_.func(src_, width(), ref_, width(), &sse)); 703 EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j; 704 } 705 } 706 } 707 708 template <typename VarianceFunctionType> 709 void MainTestClass<VarianceFunctionType>::RefTest() { 710 for (int i = 0; i < 10; ++i) { 711 for (int j = 0; j < block_size(); j++) { 712 if (!use_high_bit_depth()) { 713 src_[j] = rnd_.Rand8(); 714 ref_[j] = rnd_.Rand8(); 715 } else { 716 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); 717 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); 718 } 719 } 720 unsigned int sse1, sse2, var1, var2; 721 const int stride = width(); 722 API_REGISTER_STATE_CHECK( 723 var1 = params_.func(src_, stride, ref_, stride, &sse1)); 724 var2 = 725 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, 726 stride, &sse2, use_high_bit_depth(), params_.bit_depth); 727 EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; 728 EXPECT_EQ(var1, var2) << "Error at test index: " << i; 729 } 730 } 731 732 template <typename VarianceFunctionType> 733 void MainTestClass<VarianceFunctionType>::RefStrideTest() { 734 for (int i = 0; i < 10; ++i) { 735 const int ref_stride = (i & 1) * width(); 736 const int src_stride = ((i >> 1) & 1) * width(); 737 for (int j = 0; j < block_size(); j++) { 738 const int ref_ind = (j / width()) * ref_stride + j % width(); 739 const int src_ind = (j / width()) * src_stride + j % width(); 740 if (!use_high_bit_depth()) { 741 src_[src_ind] = rnd_.Rand8(); 742 ref_[ref_ind] = rnd_.Rand8(); 743 } else { 744 CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask(); 745 CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask(); 746 } 747 } 748 unsigned int sse1, sse2; 749 unsigned int var1, var2; 750 751 API_REGISTER_STATE_CHECK( 752 var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1)); 753 var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, 754 src_stride, ref_stride, &sse2, use_high_bit_depth(), 755 params_.bit_depth); 756 EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; 757 EXPECT_EQ(var1, var2) << "Error at test index: " << i; 758 } 759 } 760 761 template <typename VarianceFunctionType> 762 void MainTestClass<VarianceFunctionType>::OneQuarterTest() { 763 const int half = block_size() / 2; 764 if (!use_high_bit_depth()) { 765 memset(src_, 255, block_size()); 766 memset(ref_, 255, half); 767 memset(ref_ + half, 0, half); 768 } else { 769 aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size()); 770 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half); 771 aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half); 772 } 773 unsigned int sse, var, expected; 774 API_REGISTER_STATE_CHECK( 775 var = params_.func(src_, width(), ref_, width(), &sse)); 776 expected = block_size() * 255 * 255 / 4; 777 EXPECT_EQ(expected, var); 778 } 779 780 template <typename VarianceFunctionType> 781 void MainTestClass<VarianceFunctionType>::SpeedTest() { 782 for (int j = 0; j < block_size(); j++) { 783 if (!use_high_bit_depth()) { 784 src_[j] = rnd_.Rand8(); 785 ref_[j] = rnd_.Rand8(); 786 #if CONFIG_AV1_HIGHBITDEPTH 787 } else { 788 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); 789 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); 790 #endif // CONFIG_AV1_HIGHBITDEPTH 791 } 792 } 793 unsigned int sse; 794 const int stride = width(); 795 int run_time = 1000000000 / block_size(); 796 aom_usec_timer timer; 797 aom_usec_timer_start(&timer); 798 for (int i = 0; i < run_time; ++i) { 799 params_.func(src_, stride, ref_, stride, &sse); 800 } 801 802 aom_usec_timer_mark(&timer); 803 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 804 printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time); 805 } 806 807 template <typename GetSseSum8x8QuadFuncType> 808 void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() { 809 for (int i = 0; i < 10; ++i) { 810 for (int j = 0; j < block_size(); ++j) { 811 src_[j] = rnd_.Rand8(); 812 ref_[j] = rnd_.Rand8(); 813 } 814 unsigned int sse1[256] = { 0 }; 815 unsigned int sse2[256] = { 0 }; 816 unsigned int var1[256] = { 0 }; 817 unsigned int var2[256] = { 0 }; 818 int sum1[256] = { 0 }; 819 int sum2[256] = { 0 }; 820 unsigned int sse_tot_c = 0; 821 unsigned int sse_tot_simd = 0; 822 int sum_tot_c = 0; 823 int sum_tot_simd = 0; 824 const int stride = width(); 825 int k = 0; 826 827 for (int row = 0; row < height(); row += 8) { 828 for (int col = 0; col < width(); col += 32) { 829 API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride, 830 ref_ + stride * row + col, stride, 831 &sse1[k], &sum1[k], &sse_tot_simd, 832 &sum_tot_simd, &var1[k])); 833 aom_get_var_sse_sum_8x8_quad_c( 834 src_ + stride * row + col, stride, ref_ + stride * row + col, 835 stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); 836 k += 4; 837 } 838 } 839 EXPECT_EQ(sse_tot_c, sse_tot_simd); 840 EXPECT_EQ(sum_tot_c, sum_tot_simd); 841 for (int p = 0; p < 256; p++) { 842 EXPECT_EQ(sse1[p], sse2[p]); 843 EXPECT_EQ(sum1[p], sum2[p]); 844 EXPECT_EQ(var1[p], var2[p]); 845 } 846 } 847 } 848 849 template <typename GetSseSum8x8QuadFuncType> 850 void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() { 851 memset(src_, 0, block_size()); 852 memset(ref_, 255, block_size()); 853 unsigned int sse1[256] = { 0 }; 854 unsigned int sse2[256] = { 0 }; 855 unsigned int var1[256] = { 0 }; 856 unsigned int var2[256] = { 0 }; 857 int sum1[256] = { 0 }; 858 int sum2[256] = { 0 }; 859 unsigned int sse_tot_c = 0; 860 unsigned int sse_tot_simd = 0; 861 int sum_tot_c = 0; 862 int sum_tot_simd = 0; 863 const int stride = width(); 864 int k = 0; 865 866 for (int i = 0; i < height(); i += 8) { 867 for (int j = 0; j < width(); j += 32) { 868 API_REGISTER_STATE_CHECK(params_.func( 869 src_ + stride * i + j, stride, ref_ + stride * i + j, stride, 870 &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); 871 aom_get_var_sse_sum_8x8_quad_c( 872 src_ + stride * i + j, stride, ref_ + stride * i + j, stride, 873 &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); 874 k += 4; 875 } 876 } 877 EXPECT_EQ(sse_tot_simd, sse_tot_c); 878 EXPECT_EQ(sum_tot_simd, sum_tot_c); 879 for (int p = 0; p < 256; p++) { 880 EXPECT_EQ(sse1[p], sse2[p]); 881 EXPECT_EQ(sum1[p], sum2[p]); 882 EXPECT_EQ(var1[p], var2[p]); 883 } 884 } 885 886 template <typename GetSseSum8x8QuadFuncType> 887 void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() { 888 memset(src_, 255, block_size()); 889 memset(ref_, 0, block_size()); 890 unsigned int sse1[256] = { 0 }; 891 unsigned int sse2[256] = { 0 }; 892 unsigned int var1[256] = { 0 }; 893 unsigned int var2[256] = { 0 }; 894 int sum1[256] = { 0 }; 895 int sum2[256] = { 0 }; 896 unsigned int sse_tot_c = 0; 897 unsigned int sse_tot_simd = 0; 898 int sum_tot_c = 0; 899 int sum_tot_simd = 0; 900 const int stride = width(); 901 int k = 0; 902 903 for (int i = 0; i < height(); i += 8) { 904 for (int j = 0; j < width(); j += 32) { 905 API_REGISTER_STATE_CHECK(params_.func( 906 src_ + stride * i + j, stride, ref_ + stride * i + j, stride, 907 &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); 908 aom_get_var_sse_sum_8x8_quad_c( 909 src_ + stride * i + j, stride, ref_ + stride * i + j, stride, 910 &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]); 911 k += 4; 912 } 913 } 914 EXPECT_EQ(sse_tot_c, sse_tot_simd); 915 EXPECT_EQ(sum_tot_c, sum_tot_simd); 916 917 for (int p = 0; p < 256; p++) { 918 EXPECT_EQ(sse1[p], sse2[p]); 919 EXPECT_EQ(sum1[p], sum2[p]); 920 EXPECT_EQ(var1[p], var2[p]); 921 } 922 } 923 924 template <typename GetSseSum8x8QuadFuncType> 925 void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() { 926 const int loop_count = 1000000000 / block_size(); 927 for (int j = 0; j < block_size(); ++j) { 928 src_[j] = rnd_.Rand8(); 929 ref_[j] = rnd_.Rand8(); 930 } 931 932 unsigned int sse1[4] = { 0 }; 933 unsigned int sse2[4] = { 0 }; 934 unsigned int var1[4] = { 0 }; 935 unsigned int var2[4] = { 0 }; 936 int sum1[4] = { 0 }; 937 int sum2[4] = { 0 }; 938 unsigned int sse_tot_c = 0; 939 unsigned int sse_tot_simd = 0; 940 int sum_tot_c = 0; 941 int sum_tot_simd = 0; 942 const int stride = width(); 943 944 aom_usec_timer timer; 945 aom_usec_timer_start(&timer); 946 for (int r = 0; r < loop_count; ++r) { 947 for (int i = 0; i < height(); i += 8) { 948 for (int j = 0; j < width(); j += 32) { 949 aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride, 950 ref_ + stride * i + j, stride, sse2, 951 sum2, &sse_tot_c, &sum_tot_c, var2); 952 } 953 } 954 } 955 aom_usec_timer_mark(&timer); 956 const double elapsed_time_ref = 957 static_cast<double>(aom_usec_timer_elapsed(&timer)); 958 959 aom_usec_timer_start(&timer); 960 for (int r = 0; r < loop_count; ++r) { 961 for (int i = 0; i < height(); i += 8) { 962 for (int j = 0; j < width(); j += 32) { 963 params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j, 964 stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1); 965 } 966 } 967 } 968 aom_usec_timer_mark(&timer); 969 const double elapsed_time_simd = 970 static_cast<double>(aom_usec_timer_elapsed(&timer)); 971 972 printf( 973 "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t " 974 "gain=%lf \n", 975 width(), height(), elapsed_time_ref, elapsed_time_simd, 976 elapsed_time_ref / elapsed_time_simd); 977 } 978 979 template <typename GetSseSum16x16DualFuncType> 980 void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() { 981 for (int iter = 0; iter < 10; ++iter) { 982 for (int idx = 0; idx < block_size(); ++idx) { 983 src_[idx] = rnd_.Rand8(); 984 ref_[idx] = rnd_.Rand8(); 985 } 986 unsigned int sse1[64] = { 0 }; 987 unsigned int sse2[64] = { 0 }; 988 unsigned int var1[64] = { 0 }; 989 unsigned int var2[64] = { 0 }; 990 unsigned int sse_tot_c = 0; 991 unsigned int sse_tot_simd = 0; 992 int sum_tot_c = 0; 993 int sum_tot_simd = 0; 994 const int stride = width(); 995 int k = 0; 996 997 for (int row = 0; row < height(); row += 16) { 998 for (int col = 0; col < width(); col += 32) { 999 API_REGISTER_STATE_CHECK(params_.func( 1000 src_ + stride * row + col, stride, ref_ + stride * row + col, 1001 stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); 1002 aom_get_var_sse_sum_16x16_dual_c( 1003 src_ + stride * row + col, stride, ref_ + stride * row + col, 1004 stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); 1005 k += 2; 1006 } 1007 } 1008 EXPECT_EQ(sse_tot_c, sse_tot_simd); 1009 EXPECT_EQ(sum_tot_c, sum_tot_simd); 1010 for (int p = 0; p < 64; p++) { 1011 EXPECT_EQ(sse1[p], sse2[p]); 1012 EXPECT_EQ(sse_tot_simd, sse_tot_c); 1013 EXPECT_EQ(sum_tot_simd, sum_tot_c); 1014 EXPECT_EQ(var1[p], var2[p]); 1015 } 1016 } 1017 } 1018 1019 template <typename GetSseSum16x16DualFuncType> 1020 void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() { 1021 memset(src_, 0, block_size()); 1022 memset(ref_, 255, block_size()); 1023 unsigned int sse1[64] = { 0 }; 1024 unsigned int sse2[64] = { 0 }; 1025 unsigned int var1[64] = { 0 }; 1026 unsigned int var2[64] = { 0 }; 1027 unsigned int sse_tot_c = 0; 1028 unsigned int sse_tot_simd = 0; 1029 int sum_tot_c = 0; 1030 int sum_tot_simd = 0; 1031 const int stride = width(); 1032 int k = 0; 1033 1034 for (int row = 0; row < height(); row += 16) { 1035 for (int col = 0; col < width(); col += 32) { 1036 API_REGISTER_STATE_CHECK(params_.func( 1037 src_ + stride * row + col, stride, ref_ + stride * row + col, stride, 1038 &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); 1039 aom_get_var_sse_sum_16x16_dual_c( 1040 src_ + stride * row + col, stride, ref_ + stride * row + col, stride, 1041 &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); 1042 k += 2; 1043 } 1044 } 1045 EXPECT_EQ(sse_tot_simd, sse_tot_c); 1046 EXPECT_EQ(sum_tot_simd, sum_tot_c); 1047 for (int p = 0; p < 64; p++) { 1048 EXPECT_EQ(sse1[p], sse2[p]); 1049 EXPECT_EQ(var1[p], var2[p]); 1050 } 1051 } 1052 1053 template <typename GetSseSum16x16DualFuncType> 1054 void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() { 1055 memset(src_, 255, block_size()); 1056 memset(ref_, 0, block_size()); 1057 unsigned int sse1[64] = { 0 }; 1058 unsigned int sse2[64] = { 0 }; 1059 unsigned int var1[64] = { 0 }; 1060 unsigned int var2[64] = { 0 }; 1061 unsigned int sse_tot_c = 0; 1062 unsigned int sse_tot_simd = 0; 1063 int sum_tot_c = 0; 1064 int sum_tot_simd = 0; 1065 const int stride = width(); 1066 int k = 0; 1067 1068 for (int row = 0; row < height(); row += 16) { 1069 for (int col = 0; col < width(); col += 32) { 1070 API_REGISTER_STATE_CHECK(params_.func( 1071 src_ + stride * row + col, stride, ref_ + stride * row + col, stride, 1072 &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k])); 1073 aom_get_var_sse_sum_16x16_dual_c( 1074 src_ + stride * row + col, stride, ref_ + stride * row + col, stride, 1075 &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]); 1076 k += 2; 1077 } 1078 } 1079 EXPECT_EQ(sse_tot_c, sse_tot_simd); 1080 EXPECT_EQ(sum_tot_c, sum_tot_simd); 1081 1082 for (int p = 0; p < 64; p++) { 1083 EXPECT_EQ(sse1[p], sse2[p]); 1084 EXPECT_EQ(var1[p], var2[p]); 1085 } 1086 } 1087 1088 template <typename GetSseSum16x16DualFuncType> 1089 void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() { 1090 const int loop_count = 1000000000 / block_size(); 1091 for (int idx = 0; idx < block_size(); ++idx) { 1092 src_[idx] = rnd_.Rand8(); 1093 ref_[idx] = rnd_.Rand8(); 1094 } 1095 1096 unsigned int sse1[2] = { 0 }; 1097 unsigned int sse2[2] = { 0 }; 1098 unsigned int var1[2] = { 0 }; 1099 unsigned int var2[2] = { 0 }; 1100 unsigned int sse_tot_c = 0; 1101 unsigned int sse_tot_simd = 0; 1102 int sum_tot_c = 0; 1103 int sum_tot_simd = 0; 1104 const int stride = width(); 1105 1106 aom_usec_timer timer; 1107 aom_usec_timer_start(&timer); 1108 for (int r = 0; r < loop_count; ++r) { 1109 for (int row = 0; row < height(); row += 16) { 1110 for (int col = 0; col < width(); col += 32) { 1111 aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride, 1112 ref_ + stride * row + col, stride, 1113 sse2, &sse_tot_c, &sum_tot_c, var2); 1114 } 1115 } 1116 } 1117 aom_usec_timer_mark(&timer); 1118 const double elapsed_time_ref = 1119 static_cast<double>(aom_usec_timer_elapsed(&timer)); 1120 1121 aom_usec_timer_start(&timer); 1122 for (int r = 0; r < loop_count; ++r) { 1123 for (int row = 0; row < height(); row += 16) { 1124 for (int col = 0; col < width(); col += 32) { 1125 params_.func(src_ + stride * row + col, stride, 1126 ref_ + stride * row + col, stride, sse1, &sse_tot_simd, 1127 &sum_tot_simd, var1); 1128 } 1129 } 1130 } 1131 aom_usec_timer_mark(&timer); 1132 const double elapsed_time_simd = 1133 static_cast<double>(aom_usec_timer_elapsed(&timer)); 1134 1135 printf( 1136 "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf " 1137 "\t " 1138 "gain=%lf \n", 1139 width(), height(), elapsed_time_ref, elapsed_time_simd, 1140 elapsed_time_ref / elapsed_time_simd); 1141 } 1142 1143 //////////////////////////////////////////////////////////////////////////////// 1144 // Tests related to MSE / SSE. 1145 1146 template <typename FunctionType> 1147 void MainTestClass<FunctionType>::RefTestMse() { 1148 for (int i = 0; i < 10; ++i) { 1149 for (int j = 0; j < block_size(); ++j) { 1150 if (!use_high_bit_depth()) { 1151 src_[j] = rnd_.Rand8(); 1152 ref_[j] = rnd_.Rand8(); 1153 #if CONFIG_AV1_HIGHBITDEPTH 1154 } else { 1155 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); 1156 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); 1157 #endif // CONFIG_AV1_HIGHBITDEPTH 1158 } 1159 } 1160 unsigned int sse1, sse2; 1161 const int stride = width(); 1162 API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1)); 1163 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, 1164 stride, &sse2, use_high_bit_depth(), params_.bit_depth); 1165 EXPECT_EQ(sse1, sse2); 1166 } 1167 } 1168 1169 template <typename FunctionType> 1170 void MainTestClass<FunctionType>::RefTestSse() { 1171 for (int i = 0; i < 10; ++i) { 1172 for (int j = 0; j < block_size(); ++j) { 1173 src_[j] = rnd_.Rand8(); 1174 ref_[j] = rnd_.Rand8(); 1175 } 1176 unsigned int sse2; 1177 unsigned int var1; 1178 const int stride = width(); 1179 API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride)); 1180 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, 1181 stride, &sse2, false, AOM_BITS_8); 1182 EXPECT_EQ(var1, sse2); 1183 } 1184 } 1185 1186 template <typename FunctionType> 1187 void MainTestClass<FunctionType>::MaxTestMse() { 1188 int max_value = (1 << params_.bit_depth) - 1; 1189 if (!use_high_bit_depth()) { 1190 memset(src_, max_value, block_size()); 1191 memset(ref_, 0, block_size()); 1192 #if CONFIG_AV1_HIGHBITDEPTH 1193 } else { 1194 aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size()); 1195 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size()); 1196 #endif // CONFIG_AV1_HIGHBITDEPTH 1197 } 1198 unsigned int sse; 1199 API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse)); 1200 unsigned int expected = (unsigned int)block_size() * max_value * max_value; 1201 switch (params_.bit_depth) { 1202 case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break; 1203 case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break; 1204 case AOM_BITS_8: 1205 default: break; 1206 } 1207 EXPECT_EQ(expected, sse); 1208 } 1209 1210 template <typename FunctionType> 1211 void MainTestClass<FunctionType>::MaxTestSse() { 1212 memset(src_, 255, block_size()); 1213 memset(ref_, 0, block_size()); 1214 unsigned int var; 1215 API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width())); 1216 const unsigned int expected = block_size() * 255 * 255; 1217 EXPECT_EQ(expected, var); 1218 } 1219 1220 //////////////////////////////////////////////////////////////////////////////// 1221 1222 using std::get; 1223 using std::make_tuple; 1224 using std::tuple; 1225 1226 template <typename FunctionType> 1227 class SubpelVarianceTest 1228 : public ::testing::TestWithParam<TestParams<FunctionType> > { 1229 public: 1230 void SetUp() override { 1231 params_ = this->GetParam(); 1232 1233 rnd_.Reset(ACMRandom::DeterministicSeed()); 1234 if (!use_high_bit_depth()) { 1235 src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size())); 1236 sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size())); 1237 ref_ = reinterpret_cast<uint8_t *>( 1238 aom_memalign(32, block_size() + width() + height() + 1)); 1239 } else { 1240 src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( 1241 aom_memalign(32, block_size() * sizeof(uint16_t)))); 1242 sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>( 1243 aom_memalign(32, block_size() * sizeof(uint16_t)))); 1244 ref_ = CONVERT_TO_BYTEPTR(aom_memalign( 1245 32, (block_size() + width() + height() + 1) * sizeof(uint16_t))); 1246 } 1247 ASSERT_NE(src_, nullptr); 1248 ASSERT_NE(sec_, nullptr); 1249 ASSERT_NE(ref_, nullptr); 1250 } 1251 1252 void TearDown() override { 1253 if (!use_high_bit_depth()) { 1254 aom_free(src_); 1255 aom_free(ref_); 1256 aom_free(sec_); 1257 } else { 1258 aom_free(CONVERT_TO_SHORTPTR(src_)); 1259 aom_free(CONVERT_TO_SHORTPTR(ref_)); 1260 aom_free(CONVERT_TO_SHORTPTR(sec_)); 1261 } 1262 } 1263 1264 protected: 1265 void RefTest(); 1266 void ExtremeRefTest(); 1267 void SpeedTest(); 1268 1269 ACMRandom rnd_; 1270 uint8_t *src_; 1271 uint8_t *ref_; 1272 uint8_t *sec_; 1273 TestParams<FunctionType> params_; 1274 DIST_WTD_COMP_PARAMS jcp_param_; 1275 1276 // some relay helpers 1277 bool use_high_bit_depth() const { return params_.use_high_bit_depth; } 1278 int byte_shift() const { return params_.bit_depth - 8; } 1279 int block_size() const { return params_.block_size; } 1280 int width() const { return params_.width; } 1281 int height() const { return params_.height; } 1282 uint32_t mask() const { return params_.mask; } 1283 }; 1284 1285 template <typename SubpelVarianceFunctionType> 1286 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() { 1287 for (int x = 0; x < 8; ++x) { 1288 for (int y = 0; y < 8; ++y) { 1289 if (!use_high_bit_depth()) { 1290 for (int j = 0; j < block_size(); j++) { 1291 src_[j] = rnd_.Rand8(); 1292 } 1293 for (int j = 0; j < block_size() + width() + height() + 1; j++) { 1294 ref_[j] = rnd_.Rand8(); 1295 } 1296 } else { 1297 for (int j = 0; j < block_size(); j++) { 1298 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); 1299 } 1300 for (int j = 0; j < block_size() + width() + height() + 1; j++) { 1301 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); 1302 } 1303 } 1304 unsigned int sse1, sse2; 1305 unsigned int var1; 1306 API_REGISTER_STATE_CHECK( 1307 var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); 1308 const unsigned int var2 = subpel_variance_ref( 1309 ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, 1310 use_high_bit_depth(), params_.bit_depth); 1311 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; 1312 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; 1313 } 1314 } 1315 } 1316 1317 template <typename SubpelVarianceFunctionType> 1318 void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() { 1319 // Compare against reference. 1320 // Src: Set the first half of values to 0, the second half to the maximum. 1321 // Ref: Set the first half of values to the maximum, the second half to 0. 1322 for (int x = 0; x < 8; ++x) { 1323 for (int y = 0; y < 8; ++y) { 1324 const int half = block_size() / 2; 1325 if (!use_high_bit_depth()) { 1326 memset(src_, 0, half); 1327 memset(src_ + half, 255, half); 1328 memset(ref_, 255, half); 1329 memset(ref_ + half, 0, half + width() + height() + 1); 1330 } else { 1331 aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half); 1332 aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half); 1333 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half); 1334 aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(), 1335 half + width() + height() + 1); 1336 } 1337 unsigned int sse1, sse2; 1338 unsigned int var1; 1339 API_REGISTER_STATE_CHECK( 1340 var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); 1341 const unsigned int var2 = subpel_variance_ref( 1342 ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, 1343 use_high_bit_depth(), params_.bit_depth); 1344 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; 1345 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; 1346 } 1347 } 1348 } 1349 1350 template <typename SubpelVarianceFunctionType> 1351 void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() { 1352 if (!use_high_bit_depth()) { 1353 for (int j = 0; j < block_size(); j++) { 1354 src_[j] = rnd_.Rand8(); 1355 } 1356 for (int j = 0; j < block_size() + width() + height() + 1; j++) { 1357 ref_[j] = rnd_.Rand8(); 1358 } 1359 } else { 1360 for (int j = 0; j < block_size(); j++) { 1361 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); 1362 } 1363 for (int j = 0; j < block_size() + width() + height() + 1; j++) { 1364 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); 1365 } 1366 } 1367 1368 unsigned int sse1, sse2; 1369 int run_time = 1000000000 / block_size(); 1370 aom_usec_timer timer; 1371 1372 aom_usec_timer_start(&timer); 1373 for (int i = 0; i < run_time; ++i) { 1374 int x = rnd_(8); 1375 int y = rnd_(8); 1376 params_.func(ref_, width() + 1, x, y, src_, width(), &sse1); 1377 } 1378 aom_usec_timer_mark(&timer); 1379 1380 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 1381 1382 aom_usec_timer timer_c; 1383 1384 aom_usec_timer_start(&timer_c); 1385 for (int i = 0; i < run_time; ++i) { 1386 int x = rnd_(8); 1387 int y = rnd_(8); 1388 subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y, 1389 &sse2, use_high_bit_depth(), params_.bit_depth); 1390 } 1391 aom_usec_timer_mark(&timer_c); 1392 1393 const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c)); 1394 1395 printf( 1396 "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n", 1397 width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time, 1398 elapsed_time_c / elapsed_time); 1399 } 1400 1401 template <> 1402 void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() { 1403 for (int x = 0; x < 8; ++x) { 1404 for (int y = 0; y < 8; ++y) { 1405 if (!use_high_bit_depth()) { 1406 for (int j = 0; j < block_size(); j++) { 1407 src_[j] = rnd_.Rand8(); 1408 sec_[j] = rnd_.Rand8(); 1409 } 1410 for (int j = 0; j < block_size() + width() + height() + 1; j++) { 1411 ref_[j] = rnd_.Rand8(); 1412 } 1413 } else { 1414 for (int j = 0; j < block_size(); j++) { 1415 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); 1416 CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask(); 1417 } 1418 for (int j = 0; j < block_size() + width() + height() + 1; j++) { 1419 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); 1420 } 1421 } 1422 uint32_t sse1, sse2; 1423 uint32_t var1, var2; 1424 API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y, 1425 src_, width(), &sse1, sec_)); 1426 var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width, 1427 params_.log2height, x, y, &sse2, 1428 use_high_bit_depth(), params_.bit_depth); 1429 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; 1430 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; 1431 } 1432 } 1433 } 1434 1435 //////////////////////////////////////////////////////////////////////////////// 1436 1437 #if !CONFIG_REALTIME_ONLY 1438 1439 static const int kMaskMax = 64; 1440 1441 using ObmcSubpelVarianceParams = TestParams<ObmcSubpelVarFunc>; 1442 1443 template <typename FunctionType> 1444 class ObmcVarianceTest 1445 : public ::testing::TestWithParam<TestParams<FunctionType> > { 1446 public: 1447 void SetUp() override { 1448 params_ = this->GetParam(); 1449 1450 rnd_.Reset(ACMRandom::DeterministicSeed()); 1451 if (!use_high_bit_depth()) { 1452 pre_ = reinterpret_cast<uint8_t *>( 1453 aom_memalign(32, block_size() + width() + height() + 1)); 1454 } else { 1455 pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign( 1456 32, (block_size() + width() + height() + 1) * sizeof(uint16_t)))); 1457 } 1458 wsrc_ = reinterpret_cast<int32_t *>( 1459 aom_memalign(32, block_size() * sizeof(uint32_t))); 1460 mask_ = reinterpret_cast<int32_t *>( 1461 aom_memalign(32, block_size() * sizeof(uint32_t))); 1462 ASSERT_NE(pre_, nullptr); 1463 ASSERT_NE(wsrc_, nullptr); 1464 ASSERT_NE(mask_, nullptr); 1465 } 1466 1467 void TearDown() override { 1468 if (!use_high_bit_depth()) { 1469 aom_free(pre_); 1470 } else { 1471 aom_free(CONVERT_TO_SHORTPTR(pre_)); 1472 } 1473 aom_free(wsrc_); 1474 aom_free(mask_); 1475 } 1476 1477 protected: 1478 void RefTest(); 1479 void ExtremeRefTest(); 1480 void SpeedTest(); 1481 1482 ACMRandom rnd_; 1483 uint8_t *pre_; 1484 int32_t *wsrc_; 1485 int32_t *mask_; 1486 TestParams<FunctionType> params_; 1487 1488 // some relay helpers 1489 bool use_high_bit_depth() const { return params_.use_high_bit_depth; } 1490 int byte_shift() const { return params_.bit_depth - 8; } 1491 int block_size() const { return params_.block_size; } 1492 int width() const { return params_.width; } 1493 int height() const { return params_.height; } 1494 uint32_t bd_mask() const { return params_.mask; } 1495 }; 1496 1497 template <> 1498 void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() { 1499 for (int x = 0; x < 8; ++x) { 1500 for (int y = 0; y < 8; ++y) { 1501 if (!use_high_bit_depth()) 1502 for (int j = 0; j < block_size() + width() + height() + 1; j++) 1503 pre_[j] = rnd_.Rand8(); 1504 else 1505 for (int j = 0; j < block_size() + width() + height() + 1; j++) 1506 CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask(); 1507 for (int j = 0; j < block_size(); j++) { 1508 wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1); 1509 mask_[j] = rnd_(kMaskMax * kMaskMax + 1); 1510 } 1511 1512 uint32_t sse1, sse2; 1513 uint32_t var1, var2; 1514 API_REGISTER_STATE_CHECK( 1515 var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1)); 1516 var2 = obmc_subpel_variance_ref( 1517 pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_, 1518 &sse2, use_high_bit_depth(), params_.bit_depth); 1519 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; 1520 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; 1521 } 1522 } 1523 } 1524 1525 template <> 1526 void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() { 1527 // Pre: Set the first half of values to the maximum, the second half to 0. 1528 // Mask: same as above 1529 // WSrc: Set the first half of values to 0, the second half to the maximum. 1530 for (int x = 0; x < 8; ++x) { 1531 for (int y = 0; y < 8; ++y) { 1532 const int half = block_size() / 2; 1533 if (!use_high_bit_depth()) { 1534 memset(pre_, 255, half); 1535 memset(pre_ + half, 0, half + width() + height() + 1); 1536 } else { 1537 aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half); 1538 aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0, 1539 half + width() + height() + 1); 1540 } 1541 for (int j = 0; j < half; j++) { 1542 wsrc_[j] = bd_mask() * kMaskMax * kMaskMax; 1543 mask_[j] = 0; 1544 } 1545 for (int j = half; j < block_size(); j++) { 1546 wsrc_[j] = 0; 1547 mask_[j] = kMaskMax * kMaskMax; 1548 } 1549 1550 uint32_t sse1, sse2; 1551 uint32_t var1, var2; 1552 API_REGISTER_STATE_CHECK( 1553 var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1)); 1554 var2 = obmc_subpel_variance_ref( 1555 pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_, 1556 &sse2, use_high_bit_depth(), params_.bit_depth); 1557 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; 1558 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; 1559 } 1560 } 1561 } 1562 1563 template <> 1564 void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() { 1565 if (!use_high_bit_depth()) 1566 for (int j = 0; j < block_size() + width() + height() + 1; j++) 1567 pre_[j] = rnd_.Rand8(); 1568 else 1569 for (int j = 0; j < block_size() + width() + height() + 1; j++) 1570 CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask(); 1571 for (int j = 0; j < block_size(); j++) { 1572 wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1); 1573 mask_[j] = rnd_(kMaskMax * kMaskMax + 1); 1574 } 1575 unsigned int sse1; 1576 const int stride = width() + 1; 1577 int run_time = 1000000000 / block_size(); 1578 aom_usec_timer timer; 1579 1580 aom_usec_timer_start(&timer); 1581 for (int i = 0; i < run_time; ++i) { 1582 int x = rnd_(8); 1583 int y = rnd_(8); 1584 API_REGISTER_STATE_CHECK( 1585 params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1)); 1586 } 1587 aom_usec_timer_mark(&timer); 1588 1589 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 1590 printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(), 1591 params_.bit_depth, elapsed_time); 1592 } 1593 1594 #endif // !CONFIG_REALTIME_ONLY 1595 1596 using MseWxHTest = MseWxHTestClass<MseWxH16bitFunc>; 1597 using Mse16xHTest = Mse16xHTestClass<Mse16xH16bitFunc>; 1598 using AvxMseTest = MainTestClass<VarianceMxNFunc>; 1599 using AvxVarianceTest = MainTestClass<VarianceMxNFunc>; 1600 using GetSseSum8x8QuadTest = MainTestClass<GetSseSum8x8QuadFunc>; 1601 using GetSseSum16x16DualTest = MainTestClass<GetSseSum16x16DualFunc>; 1602 using AvxSubpelVarianceTest = SubpelVarianceTest<SubpixVarMxNFunc>; 1603 using AvxSubpelAvgVarianceTest = SubpelVarianceTest<SubpixAvgVarMxNFunc>; 1604 #if !CONFIG_REALTIME_ONLY 1605 using AvxObmcSubpelVarianceTest = ObmcVarianceTest<ObmcSubpelVarFunc>; 1606 #endif 1607 using MseWxHParams = TestParams<MseWxH16bitFunc>; 1608 using Mse16xHParams = TestParams<Mse16xH16bitFunc>; 1609 1610 TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); } 1611 TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); } 1612 TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); } 1613 TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); } 1614 TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); } 1615 TEST_P(AvxMseTest, RefMse) { RefTestMse(); } 1616 TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); } 1617 TEST_P(AvxVarianceTest, Zero) { ZeroTest(); } 1618 TEST_P(AvxVarianceTest, Ref) { RefTest(); } 1619 TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); } 1620 TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); } 1621 TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); } 1622 TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); } 1623 TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); } 1624 TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); } 1625 TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); } 1626 TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); } 1627 TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); } 1628 TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); } 1629 TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); } 1630 #if !CONFIG_REALTIME_ONLY 1631 TEST_P(SumOfSquaresTest, Const) { ConstTest(); } 1632 TEST_P(SumOfSquaresTest, Ref) { RefTest(); } 1633 #endif // !CONFIG_REALTIME_ONLY 1634 TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); } 1635 TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } 1636 TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } 1637 TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); } 1638 #if !CONFIG_REALTIME_ONLY 1639 TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); } 1640 TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } 1641 TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } 1642 #endif 1643 1644 INSTANTIATE_TEST_SUITE_P( 1645 C, MseWxHTest, 1646 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8), 1647 MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8), 1648 MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8), 1649 MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8))); 1650 1651 INSTANTIATE_TEST_SUITE_P( 1652 C, Mse16xHTest, 1653 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8), 1654 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8), 1655 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8), 1656 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8))); 1657 1658 #if !CONFIG_REALTIME_ONLY 1659 INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest, 1660 ::testing::Values(aom_get_mb_ss_c)); 1661 #endif // !CONFIG_REALTIME_ONLY 1662 1663 using MseParams = TestParams<VarianceMxNFunc>; 1664 INSTANTIATE_TEST_SUITE_P(C, AvxMseTest, 1665 ::testing::Values(MseParams(4, 4, &aom_mse16x16_c), 1666 MseParams(4, 3, &aom_mse16x8_c), 1667 MseParams(3, 4, &aom_mse8x16_c), 1668 MseParams(3, 3, &aom_mse8x8_c))); 1669 1670 using VarianceParams = TestParams<VarianceMxNFunc>; 1671 const VarianceParams kArrayVariance_c[] = { 1672 VarianceParams(7, 7, &aom_variance128x128_c), 1673 VarianceParams(7, 6, &aom_variance128x64_c), 1674 VarianceParams(6, 7, &aom_variance64x128_c), 1675 VarianceParams(6, 6, &aom_variance64x64_c), 1676 VarianceParams(6, 5, &aom_variance64x32_c), 1677 VarianceParams(5, 6, &aom_variance32x64_c), 1678 VarianceParams(5, 5, &aom_variance32x32_c), 1679 VarianceParams(5, 4, &aom_variance32x16_c), 1680 VarianceParams(4, 5, &aom_variance16x32_c), 1681 VarianceParams(4, 4, &aom_variance16x16_c), 1682 VarianceParams(4, 3, &aom_variance16x8_c), 1683 VarianceParams(3, 4, &aom_variance8x16_c), 1684 VarianceParams(3, 3, &aom_variance8x8_c), 1685 VarianceParams(3, 2, &aom_variance8x4_c), 1686 VarianceParams(2, 3, &aom_variance4x8_c), 1687 VarianceParams(2, 2, &aom_variance4x4_c), 1688 #if !CONFIG_REALTIME_ONLY 1689 VarianceParams(6, 4, &aom_variance64x16_c), 1690 VarianceParams(4, 6, &aom_variance16x64_c), 1691 VarianceParams(5, 3, &aom_variance32x8_c), 1692 VarianceParams(3, 5, &aom_variance8x32_c), 1693 VarianceParams(4, 2, &aom_variance16x4_c), 1694 VarianceParams(2, 4, &aom_variance4x16_c), 1695 #endif 1696 }; 1697 INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest, 1698 ::testing::ValuesIn(kArrayVariance_c)); 1699 1700 using GetSseSumParams = TestParams<GetSseSum8x8QuadFunc>; 1701 const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = { 1702 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0), 1703 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0), 1704 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0), 1705 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0) 1706 }; 1707 INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest, 1708 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c)); 1709 1710 using GetSseSumParamsDual = TestParams<GetSseSum16x16DualFunc>; 1711 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = { 1712 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0), 1713 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0), 1714 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0), 1715 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0) 1716 }; 1717 1718 INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest, 1719 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c)); 1720 1721 using SubpelVarianceParams = TestParams<SubpixVarMxNFunc>; 1722 const SubpelVarianceParams kArraySubpelVariance_c[] = { 1723 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0), 1724 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0), 1725 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0), 1726 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0), 1727 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0), 1728 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0), 1729 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0), 1730 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0), 1731 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0), 1732 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0), 1733 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0), 1734 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0), 1735 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0), 1736 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0), 1737 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0), 1738 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0), 1739 #if !CONFIG_REALTIME_ONLY 1740 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0), 1741 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0), 1742 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0), 1743 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0), 1744 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0), 1745 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0), 1746 #endif 1747 }; 1748 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest, 1749 ::testing::ValuesIn(kArraySubpelVariance_c)); 1750 1751 using SubpelAvgVarianceParams = TestParams<SubpixAvgVarMxNFunc>; 1752 const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = { 1753 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0), 1754 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0), 1755 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0), 1756 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0), 1757 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0), 1758 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0), 1759 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0), 1760 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0), 1761 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0), 1762 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0), 1763 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0), 1764 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0), 1765 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0), 1766 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0), 1767 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0), 1768 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0), 1769 #if !CONFIG_REALTIME_ONLY 1770 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0), 1771 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0), 1772 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0), 1773 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0), 1774 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0), 1775 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0), 1776 #endif 1777 }; 1778 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest, 1779 ::testing::ValuesIn(kArraySubpelAvgVariance_c)); 1780 1781 #if !CONFIG_REALTIME_ONLY 1782 INSTANTIATE_TEST_SUITE_P( 1783 C, AvxObmcSubpelVarianceTest, 1784 ::testing::Values( 1785 ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c, 1786 0), 1787 ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0), 1788 ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0), 1789 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0), 1790 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0), 1791 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0), 1792 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0), 1793 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0), 1794 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0), 1795 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0), 1796 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0), 1797 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0), 1798 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0), 1799 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0), 1800 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0), 1801 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0), 1802 1803 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0), 1804 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0), 1805 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0), 1806 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0), 1807 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0), 1808 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0))); 1809 #endif 1810 1811 #if CONFIG_AV1_HIGHBITDEPTH 1812 using MseHBDWxH16bitFunc = uint64_t (*)(uint16_t *, int, uint16_t *, int, int, 1813 int); 1814 1815 template <typename FunctionType> 1816 class MseHBDWxHTestClass 1817 : public ::testing::TestWithParam<TestParams<FunctionType> > { 1818 public: 1819 void SetUp() override { 1820 params_ = this->GetParam(); 1821 1822 rnd_.Reset(ACMRandom::DeterministicSeed()); 1823 src_ = reinterpret_cast<uint16_t *>( 1824 aom_memalign(16, block_size() * sizeof(src_))); 1825 dst_ = reinterpret_cast<uint16_t *>( 1826 aom_memalign(16, block_size() * sizeof(dst_))); 1827 ASSERT_NE(src_, nullptr); 1828 ASSERT_NE(dst_, nullptr); 1829 } 1830 1831 void TearDown() override { 1832 aom_free(src_); 1833 aom_free(dst_); 1834 src_ = nullptr; 1835 dst_ = nullptr; 1836 } 1837 1838 protected: 1839 void RefMatchTestMse(); 1840 void SpeedTest(); 1841 1842 protected: 1843 ACMRandom rnd_; 1844 uint16_t *dst_; 1845 uint16_t *src_; 1846 TestParams<FunctionType> params_; 1847 1848 // some relay helpers 1849 int block_size() const { return params_.block_size; } 1850 int width() const { return params_.width; } 1851 int d_stride() const { return params_.width; } // stride is same as width 1852 int s_stride() const { return params_.width; } // stride is same as width 1853 int height() const { return params_.height; } 1854 int mask() const { return params_.mask; } 1855 }; 1856 1857 template <typename MseHBDWxHFunctionType> 1858 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() { 1859 aom_usec_timer ref_timer, test_timer; 1860 double elapsed_time_c = 0; 1861 double elapsed_time_simd = 0; 1862 int run_time = 10000000; 1863 int w = width(); 1864 int h = height(); 1865 int dstride = d_stride(); 1866 int sstride = s_stride(); 1867 for (int k = 0; k < block_size(); ++k) { 1868 dst_[k] = rnd_.Rand16() & mask(); 1869 src_[k] = rnd_.Rand16() & mask(); 1870 } 1871 aom_usec_timer_start(&ref_timer); 1872 for (int i = 0; i < run_time; i++) { 1873 aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h); 1874 } 1875 aom_usec_timer_mark(&ref_timer); 1876 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer)); 1877 1878 aom_usec_timer_start(&test_timer); 1879 for (int i = 0; i < run_time; i++) { 1880 params_.func(dst_, dstride, src_, sstride, w, h); 1881 } 1882 aom_usec_timer_mark(&test_timer); 1883 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer)); 1884 1885 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(), 1886 elapsed_time_c, elapsed_time_simd, 1887 (elapsed_time_c / elapsed_time_simd)); 1888 } 1889 1890 template <typename MseHBDWxHFunctionType> 1891 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() { 1892 uint64_t mse_ref = 0; 1893 uint64_t mse_mod = 0; 1894 int w = width(); 1895 int h = height(); 1896 int dstride = d_stride(); 1897 int sstride = s_stride(); 1898 for (int i = 0; i < 10; i++) { 1899 for (int k = 0; k < block_size(); ++k) { 1900 dst_[k] = rnd_.Rand16() & mask(); 1901 src_[k] = rnd_.Rand16() & mask(); 1902 } 1903 API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c( 1904 dst_, dstride, src_, sstride, w, h)); 1905 API_REGISTER_STATE_CHECK( 1906 mse_mod = params_.func(dst_, dstride, src_, sstride, w, h)); 1907 EXPECT_EQ(mse_ref, mse_mod) 1908 << "ref mse: " << mse_ref << " mod mse: " << mse_mod; 1909 } 1910 } 1911 1912 using MseHBDWxHParams = TestParams<MseHBDWxH16bitFunc>; 1913 using MseHBDWxHTest = MseHBDWxHTestClass<MseHBDWxH16bitFunc>; 1914 using AvxHBDMseTest = MainTestClass<VarianceMxNFunc>; 1915 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest); 1916 using AvxHBDVarianceTest = MainTestClass<VarianceMxNFunc>; 1917 using AvxHBDSubpelVarianceTest = SubpelVarianceTest<SubpixVarMxNFunc>; 1918 using AvxHBDSubpelAvgVarianceTest = SubpelVarianceTest<SubpixAvgVarMxNFunc>; 1919 #if !CONFIG_REALTIME_ONLY 1920 using AvxHBDObmcSubpelVarianceTest = ObmcVarianceTest<ObmcSubpelVarFunc>; 1921 #endif 1922 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest); 1923 1924 TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); } 1925 TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); } 1926 TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); } 1927 TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); } 1928 TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); } 1929 TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); } 1930 TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); } 1931 TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); } 1932 TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); } 1933 TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); } 1934 TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); } 1935 TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } 1936 TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } 1937 TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); } 1938 #if !CONFIG_REALTIME_ONLY 1939 TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); } 1940 TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } 1941 TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); } 1942 #endif 1943 1944 INSTANTIATE_TEST_SUITE_P( 1945 C, MseHBDWxHTest, 1946 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10), 1947 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10), 1948 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10), 1949 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10))); 1950 1951 INSTANTIATE_TEST_SUITE_P( 1952 C, AvxHBDMseTest, 1953 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12), 1954 MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12), 1955 MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12), 1956 MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12), 1957 MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10), 1958 MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10), 1959 MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10), 1960 MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10), 1961 MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8), 1962 MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8), 1963 MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8), 1964 MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8))); 1965 1966 #if HAVE_NEON 1967 INSTANTIATE_TEST_SUITE_P( 1968 NEON, MseHBDWxHTest, 1969 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10), 1970 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10), 1971 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10), 1972 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon, 1973 10))); 1974 1975 INSTANTIATE_TEST_SUITE_P( 1976 NEON, AvxHBDMseTest, 1977 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12), 1978 MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12), 1979 MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12), 1980 MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12), 1981 MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10), 1982 MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10), 1983 MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10), 1984 MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10), 1985 MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8), 1986 MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8), 1987 MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8), 1988 MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8))); 1989 #endif // HAVE_NEON 1990 1991 #if HAVE_NEON_DOTPROD 1992 INSTANTIATE_TEST_SUITE_P( 1993 NEON_DOTPROD, AvxHBDMseTest, 1994 ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8), 1995 MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8), 1996 MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8), 1997 MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8))); 1998 #endif // HAVE_NEON_DOTPROD 1999 2000 #if HAVE_SVE 2001 INSTANTIATE_TEST_SUITE_P( 2002 SVE, MseHBDWxHTest, 2003 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10), 2004 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10), 2005 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10), 2006 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve, 2007 10))); 2008 2009 INSTANTIATE_TEST_SUITE_P( 2010 SVE, AvxHBDMseTest, 2011 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12), 2012 MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12), 2013 MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12), 2014 MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12), 2015 MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10), 2016 MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10), 2017 MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10), 2018 MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10))); 2019 #endif // HAVE_SVE 2020 2021 const VarianceParams kArrayHBDVariance_c[] = { 2022 VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12), 2023 VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12), 2024 VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12), 2025 VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12), 2026 VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12), 2027 VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12), 2028 VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12), 2029 VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12), 2030 VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12), 2031 VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12), 2032 VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12), 2033 VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12), 2034 VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12), 2035 VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12), 2036 VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12), 2037 VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12), 2038 VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10), 2039 VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10), 2040 VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10), 2041 VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10), 2042 VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10), 2043 VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10), 2044 VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10), 2045 VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10), 2046 VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10), 2047 VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10), 2048 VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10), 2049 VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10), 2050 VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10), 2051 VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10), 2052 VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10), 2053 VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10), 2054 VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8), 2055 VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8), 2056 VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8), 2057 VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8), 2058 VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8), 2059 VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8), 2060 VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8), 2061 VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8), 2062 VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8), 2063 VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8), 2064 VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8), 2065 VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8), 2066 VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8), 2067 VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8), 2068 VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8), 2069 VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8), 2070 #if !CONFIG_REALTIME_ONLY 2071 VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12), 2072 VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12), 2073 VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12), 2074 VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12), 2075 VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12), 2076 VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12), 2077 VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10), 2078 VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10), 2079 VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10), 2080 VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10), 2081 VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10), 2082 VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10), 2083 VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8), 2084 VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8), 2085 VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8), 2086 VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8), 2087 VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8), 2088 VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8), 2089 #endif 2090 }; 2091 INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest, 2092 ::testing::ValuesIn(kArrayHBDVariance_c)); 2093 2094 #if HAVE_SSE4_1 2095 INSTANTIATE_TEST_SUITE_P( 2096 SSE4_1, AvxHBDVarianceTest, 2097 ::testing::Values( 2098 VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8), 2099 VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10), 2100 VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12))); 2101 #endif // HAVE_SSE4_1 2102 2103 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = { 2104 SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8), 2105 SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8), 2106 SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8), 2107 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8), 2108 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8), 2109 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8), 2110 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8), 2111 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8), 2112 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8), 2113 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8), 2114 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8), 2115 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8), 2116 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8), 2117 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8), 2118 SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8), 2119 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8), 2120 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10), 2121 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10), 2122 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10), 2123 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10), 2124 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10), 2125 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10), 2126 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10), 2127 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10), 2128 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10), 2129 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10), 2130 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10), 2131 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10), 2132 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10), 2133 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10), 2134 SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10), 2135 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10), 2136 SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12), 2137 SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12), 2138 SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12), 2139 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12), 2140 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12), 2141 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12), 2142 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12), 2143 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12), 2144 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12), 2145 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12), 2146 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12), 2147 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12), 2148 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12), 2149 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12), 2150 SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12), 2151 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12), 2152 #if !CONFIG_REALTIME_ONLY 2153 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8), 2154 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8), 2155 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8), 2156 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8), 2157 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8), 2158 SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8), 2159 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10), 2160 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10), 2161 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10), 2162 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10), 2163 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10), 2164 SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10), 2165 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12), 2166 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12), 2167 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12), 2168 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12), 2169 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12), 2170 SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12), 2171 #endif 2172 }; 2173 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest, 2174 ::testing::ValuesIn(kArrayHBDSubpelVariance_c)); 2175 2176 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = { 2177 SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c, 2178 8), 2179 SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c, 2180 8), 2181 SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c, 2182 8), 2183 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8), 2184 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8), 2185 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8), 2186 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8), 2187 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8), 2188 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8), 2189 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8), 2190 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8), 2191 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8), 2192 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8), 2193 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8), 2194 SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8), 2195 SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8), 2196 SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c, 2197 10), 2198 SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c, 2199 10), 2200 SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c, 2201 10), 2202 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c, 2203 10), 2204 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c, 2205 10), 2206 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c, 2207 10), 2208 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c, 2209 10), 2210 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c, 2211 10), 2212 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c, 2213 10), 2214 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c, 2215 10), 2216 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c, 2217 10), 2218 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c, 2219 10), 2220 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10), 2221 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10), 2222 SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10), 2223 SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10), 2224 SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c, 2225 12), 2226 SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c, 2227 12), 2228 SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c, 2229 12), 2230 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c, 2231 12), 2232 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c, 2233 12), 2234 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c, 2235 12), 2236 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c, 2237 12), 2238 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c, 2239 12), 2240 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c, 2241 12), 2242 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c, 2243 12), 2244 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c, 2245 12), 2246 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c, 2247 12), 2248 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12), 2249 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12), 2250 SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12), 2251 SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12), 2252 2253 #if !CONFIG_REALTIME_ONLY 2254 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8), 2255 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8), 2256 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8), 2257 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8), 2258 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8), 2259 SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8), 2260 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c, 2261 10), 2262 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c, 2263 10), 2264 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c, 2265 10), 2266 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c, 2267 10), 2268 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c, 2269 10), 2270 SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c, 2271 10), 2272 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c, 2273 12), 2274 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c, 2275 12), 2276 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c, 2277 12), 2278 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c, 2279 12), 2280 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c, 2281 12), 2282 SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c, 2283 12), 2284 #endif 2285 }; 2286 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest, 2287 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c)); 2288 2289 #if !CONFIG_REALTIME_ONLY 2290 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = { 2291 ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c, 2292 8), 2293 ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c, 2294 8), 2295 ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c, 2296 8), 2297 ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c, 2298 8), 2299 ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c, 2300 8), 2301 ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c, 2302 8), 2303 ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c, 2304 8), 2305 ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c, 2306 8), 2307 ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c, 2308 8), 2309 ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c, 2310 8), 2311 ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c, 2312 8), 2313 ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c, 2314 8), 2315 ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8), 2316 ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8), 2317 ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8), 2318 ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8), 2319 ObmcSubpelVarianceParams(7, 7, 2320 &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10), 2321 ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c, 2322 10), 2323 ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c, 2324 10), 2325 ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c, 2326 10), 2327 ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c, 2328 10), 2329 ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c, 2330 10), 2331 ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c, 2332 10), 2333 ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c, 2334 10), 2335 ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c, 2336 10), 2337 ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c, 2338 10), 2339 ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c, 2340 10), 2341 ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c, 2342 10), 2343 ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c, 2344 10), 2345 ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c, 2346 10), 2347 ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c, 2348 10), 2349 ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c, 2350 10), 2351 ObmcSubpelVarianceParams(7, 7, 2352 &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12), 2353 ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c, 2354 12), 2355 ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c, 2356 12), 2357 ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c, 2358 12), 2359 ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c, 2360 12), 2361 ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c, 2362 12), 2363 ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c, 2364 12), 2365 ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c, 2366 12), 2367 ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c, 2368 12), 2369 ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c, 2370 12), 2371 ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c, 2372 12), 2373 ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c, 2374 12), 2375 ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c, 2376 12), 2377 ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c, 2378 12), 2379 ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c, 2380 12), 2381 ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c, 2382 12), 2383 2384 ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c, 2385 8), 2386 ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c, 2387 8), 2388 ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c, 2389 8), 2390 ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c, 2391 8), 2392 ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c, 2393 8), 2394 ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c, 2395 8), 2396 ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c, 2397 10), 2398 ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c, 2399 10), 2400 ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c, 2401 10), 2402 ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c, 2403 10), 2404 ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c, 2405 10), 2406 ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c, 2407 10), 2408 ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c, 2409 12), 2410 ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c, 2411 12), 2412 ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c, 2413 12), 2414 ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c, 2415 12), 2416 ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c, 2417 12), 2418 ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c, 2419 12), 2420 }; 2421 INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest, 2422 ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c)); 2423 #endif // !CONFIG_REALTIME_ONLY 2424 #endif // CONFIG_AV1_HIGHBITDEPTH 2425 2426 #if HAVE_SSE2 2427 INSTANTIATE_TEST_SUITE_P( 2428 SSE2, MseWxHTest, 2429 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8), 2430 MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8), 2431 MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8), 2432 MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8))); 2433 2434 INSTANTIATE_TEST_SUITE_P( 2435 SSE2, Mse16xHTest, 2436 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8), 2437 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8), 2438 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8), 2439 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8))); 2440 2441 #if !CONFIG_REALTIME_ONLY 2442 INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest, 2443 ::testing::Values(aom_get_mb_ss_sse2)); 2444 #endif // !CONFIG_REALTIME_ONLY 2445 2446 INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest, 2447 ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2), 2448 MseParams(4, 3, &aom_mse16x8_sse2), 2449 MseParams(3, 4, &aom_mse8x16_sse2), 2450 MseParams(3, 3, &aom_mse8x8_sse2))); 2451 2452 const VarianceParams kArrayVariance_sse2[] = { 2453 VarianceParams(7, 7, &aom_variance128x128_sse2), 2454 VarianceParams(7, 6, &aom_variance128x64_sse2), 2455 VarianceParams(6, 7, &aom_variance64x128_sse2), 2456 VarianceParams(6, 6, &aom_variance64x64_sse2), 2457 VarianceParams(6, 5, &aom_variance64x32_sse2), 2458 VarianceParams(5, 6, &aom_variance32x64_sse2), 2459 VarianceParams(5, 5, &aom_variance32x32_sse2), 2460 VarianceParams(5, 4, &aom_variance32x16_sse2), 2461 VarianceParams(4, 5, &aom_variance16x32_sse2), 2462 VarianceParams(4, 4, &aom_variance16x16_sse2), 2463 VarianceParams(4, 3, &aom_variance16x8_sse2), 2464 VarianceParams(3, 4, &aom_variance8x16_sse2), 2465 VarianceParams(3, 3, &aom_variance8x8_sse2), 2466 VarianceParams(3, 2, &aom_variance8x4_sse2), 2467 VarianceParams(2, 3, &aom_variance4x8_sse2), 2468 VarianceParams(2, 2, &aom_variance4x4_sse2), 2469 #if !CONFIG_REALTIME_ONLY 2470 VarianceParams(6, 4, &aom_variance64x16_sse2), 2471 VarianceParams(5, 3, &aom_variance32x8_sse2), 2472 VarianceParams(4, 6, &aom_variance16x64_sse2), 2473 VarianceParams(4, 2, &aom_variance16x4_sse2), 2474 VarianceParams(3, 5, &aom_variance8x32_sse2), 2475 VarianceParams(2, 4, &aom_variance4x16_sse2), 2476 #endif 2477 }; 2478 INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest, 2479 ::testing::ValuesIn(kArrayVariance_sse2)); 2480 2481 const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = { 2482 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0), 2483 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0), 2484 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0), 2485 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0) 2486 }; 2487 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest, 2488 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2)); 2489 2490 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = { 2491 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0), 2492 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0), 2493 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0), 2494 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0) 2495 }; 2496 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest, 2497 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2)); 2498 2499 #if CONFIG_AV1_HIGHBITDEPTH 2500 #if HAVE_SSE2 2501 INSTANTIATE_TEST_SUITE_P( 2502 SSE2, MseHBDWxHTest, 2503 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10), 2504 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10), 2505 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10), 2506 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2, 2507 10))); 2508 2509 INSTANTIATE_TEST_SUITE_P( 2510 SSE2, AvxHBDMseTest, 2511 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12), 2512 MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12), 2513 MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10), 2514 MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10), 2515 MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8), 2516 MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8))); 2517 #endif // HAVE_SSE2 2518 #if HAVE_SSE4_1 2519 INSTANTIATE_TEST_SUITE_P( 2520 SSE4_1, AvxSubpelVarianceTest, 2521 ::testing::Values( 2522 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1, 2523 8), 2524 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1, 2525 10), 2526 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1, 2527 12))); 2528 2529 INSTANTIATE_TEST_SUITE_P( 2530 SSE4_1, AvxSubpelAvgVarianceTest, 2531 ::testing::Values( 2532 SubpelAvgVarianceParams(2, 2, 2533 &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1, 2534 8), 2535 SubpelAvgVarianceParams(2, 2, 2536 &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1, 2537 10), 2538 SubpelAvgVarianceParams(2, 2, 2539 &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1, 2540 12))); 2541 #endif // HAVE_SSE4_1 2542 2543 #if HAVE_AVX2 2544 INSTANTIATE_TEST_SUITE_P( 2545 AVX2, AvxHBDMseTest, 2546 ::testing::Values(MseParams(4, 4, &aom_highbd_10_mse16x16_avx2, 10))); 2547 #endif // HAVE_AVX2 2548 2549 const VarianceParams kArrayHBDVariance_sse2[] = { 2550 VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12), 2551 VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12), 2552 VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12), 2553 VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12), 2554 VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12), 2555 VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12), 2556 VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12), 2557 VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12), 2558 VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12), 2559 VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12), 2560 VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12), 2561 VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12), 2562 VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12), 2563 VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10), 2564 VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10), 2565 VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10), 2566 VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10), 2567 VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10), 2568 VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10), 2569 VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10), 2570 VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10), 2571 VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10), 2572 VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10), 2573 VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10), 2574 VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10), 2575 VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10), 2576 VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8), 2577 VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8), 2578 VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8), 2579 VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8), 2580 VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8), 2581 VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8), 2582 VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8), 2583 VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8), 2584 VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8), 2585 VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8), 2586 VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8), 2587 VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8), 2588 VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8), 2589 #if !CONFIG_REALTIME_ONLY 2590 VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12), 2591 VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12), 2592 VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12), 2593 VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12), 2594 // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12), 2595 // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12), 2596 VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10), 2597 VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10), 2598 VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10), 2599 VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10), 2600 // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10), 2601 // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10), 2602 VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8), 2603 VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8), 2604 VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8), 2605 VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8), 2606 // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8), 2607 // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8), 2608 #endif 2609 }; 2610 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest, 2611 ::testing::ValuesIn(kArrayHBDVariance_sse2)); 2612 2613 #if HAVE_AVX2 2614 2615 INSTANTIATE_TEST_SUITE_P( 2616 AVX2, MseHBDWxHTest, 2617 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10), 2618 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10), 2619 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10), 2620 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2, 2621 10))); 2622 2623 const VarianceParams kArrayHBDVariance_avx2[] = { 2624 VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10), 2625 VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10), 2626 VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10), 2627 VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10), 2628 VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10), 2629 VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10), 2630 VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10), 2631 VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10), 2632 VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10), 2633 VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10), 2634 VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10), 2635 VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10), 2636 VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10), 2637 #if !CONFIG_REALTIME_ONLY 2638 VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10), 2639 VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10), 2640 VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10), 2641 VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10), 2642 #endif 2643 }; 2644 2645 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest, 2646 ::testing::ValuesIn(kArrayHBDVariance_avx2)); 2647 2648 const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = { 2649 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10), 2650 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10), 2651 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10), 2652 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10), 2653 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10), 2654 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10), 2655 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10), 2656 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10), 2657 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10), 2658 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10), 2659 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10), 2660 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10), 2661 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10), 2662 }; 2663 2664 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest, 2665 ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2)); 2666 #endif // HAVE_AVX2 2667 2668 const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = { 2669 SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12), 2670 SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12), 2671 SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12), 2672 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12), 2673 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12), 2674 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12), 2675 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12), 2676 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12), 2677 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12), 2678 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12), 2679 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12), 2680 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12), 2681 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12), 2682 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12), 2683 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10), 2684 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10), 2685 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10), 2686 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10), 2687 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10), 2688 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10), 2689 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10), 2690 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10), 2691 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10), 2692 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10), 2693 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10), 2694 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10), 2695 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10), 2696 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10), 2697 SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8), 2698 SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8), 2699 SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8), 2700 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8), 2701 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8), 2702 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8), 2703 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8), 2704 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8), 2705 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8), 2706 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8), 2707 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8), 2708 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8), 2709 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8), 2710 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8), 2711 #if !CONFIG_REALTIME_ONLY 2712 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12), 2713 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12), 2714 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12), 2715 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12), 2716 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12), 2717 // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12), 2718 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10), 2719 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10), 2720 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10), 2721 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10), 2722 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10), 2723 // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10), 2724 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8), 2725 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8), 2726 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8), 2727 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8), 2728 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8), 2729 // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8), 2730 #endif 2731 }; 2732 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest, 2733 ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2)); 2734 2735 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = { 2736 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2, 2737 12), 2738 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2, 2739 12), 2740 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2, 2741 12), 2742 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2, 2743 12), 2744 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2, 2745 12), 2746 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2, 2747 12), 2748 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2, 2749 12), 2750 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2, 2751 12), 2752 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2, 2753 12), 2754 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2, 2755 12), 2756 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2, 2757 12), 2758 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2, 2759 10), 2760 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2, 2761 10), 2762 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2, 2763 10), 2764 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2, 2765 10), 2766 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2, 2767 10), 2768 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2, 2769 10), 2770 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2, 2771 10), 2772 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2, 2773 10), 2774 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2, 2775 10), 2776 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2, 2777 10), 2778 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2, 2779 10), 2780 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2, 2781 8), 2782 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2, 2783 8), 2784 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2, 2785 8), 2786 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2, 2787 8), 2788 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2, 2789 8), 2790 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2, 2791 8), 2792 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2, 2793 8), 2794 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2, 2795 8), 2796 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2, 2797 8), 2798 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2, 2799 8), 2800 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2, 2801 8), 2802 2803 #if !CONFIG_REALTIME_ONLY 2804 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2, 2805 12), 2806 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2, 2807 12), 2808 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2, 2809 12), 2810 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2, 2811 12), 2812 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2, 2813 12), 2814 // SubpelAvgVarianceParams(2, 4, 2815 // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12), 2816 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2, 2817 10), 2818 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2, 2819 10), 2820 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2, 2821 10), 2822 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2, 2823 10), 2824 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2, 2825 10), 2826 // SubpelAvgVarianceParams(2, 4, 2827 // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10), 2828 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2, 2829 8), 2830 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2, 2831 8), 2832 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2, 2833 8), 2834 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2, 2835 8), 2836 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2, 2837 8), 2838 // SubpelAvgVarianceParams(2, 4, 2839 // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8), 2840 #endif 2841 }; 2842 2843 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest, 2844 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2)); 2845 #endif // HAVE_SSE2 2846 #endif // CONFIG_AV1_HIGHBITDEPTH 2847 2848 #if HAVE_SSSE3 2849 const SubpelVarianceParams kArraySubpelVariance_ssse3[] = { 2850 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0), 2851 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0), 2852 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0), 2853 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0), 2854 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0), 2855 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0), 2856 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0), 2857 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0), 2858 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0), 2859 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0), 2860 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0), 2861 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0), 2862 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0), 2863 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0), 2864 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0), 2865 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0), 2866 #if !CONFIG_REALTIME_ONLY 2867 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0), 2868 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0), 2869 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0), 2870 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0), 2871 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0), 2872 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0), 2873 #endif 2874 }; 2875 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest, 2876 ::testing::ValuesIn(kArraySubpelVariance_ssse3)); 2877 2878 const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = { 2879 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0), 2880 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0), 2881 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0), 2882 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0), 2883 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0), 2884 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0), 2885 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0), 2886 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0), 2887 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0), 2888 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0), 2889 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0), 2890 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0), 2891 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0), 2892 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0), 2893 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0), 2894 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0), 2895 #if !CONFIG_REALTIME_ONLY 2896 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0), 2897 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0), 2898 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0), 2899 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0), 2900 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0), 2901 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0), 2902 #endif 2903 }; 2904 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest, 2905 ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3)); 2906 #endif // HAVE_SSSE3 2907 2908 #if HAVE_SSE4_1 2909 #if !CONFIG_REALTIME_ONLY 2910 INSTANTIATE_TEST_SUITE_P( 2911 SSE4_1, AvxObmcSubpelVarianceTest, 2912 ::testing::Values( 2913 ObmcSubpelVarianceParams(7, 7, 2914 &aom_obmc_sub_pixel_variance128x128_sse4_1, 0), 2915 ObmcSubpelVarianceParams(7, 6, 2916 &aom_obmc_sub_pixel_variance128x64_sse4_1, 0), 2917 ObmcSubpelVarianceParams(6, 7, 2918 &aom_obmc_sub_pixel_variance64x128_sse4_1, 0), 2919 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1, 2920 0), 2921 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1, 2922 0), 2923 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1, 2924 0), 2925 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1, 2926 0), 2927 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1, 2928 0), 2929 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1, 2930 0), 2931 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1, 2932 0), 2933 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1, 2934 0), 2935 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1, 2936 0), 2937 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1, 2938 0), 2939 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1, 2940 0), 2941 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1, 2942 0), 2943 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1, 2944 0), 2945 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1, 2946 0), 2947 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1, 2948 0), 2949 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1, 2950 0), 2951 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1, 2952 0), 2953 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1, 2954 0), 2955 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1, 2956 0))); 2957 #endif 2958 #endif // HAVE_SSE4_1 2959 2960 #if HAVE_AVX2 2961 2962 INSTANTIATE_TEST_SUITE_P( 2963 AVX2, MseWxHTest, 2964 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8), 2965 MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8), 2966 MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8), 2967 MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8))); 2968 2969 INSTANTIATE_TEST_SUITE_P( 2970 AVX2, Mse16xHTest, 2971 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8), 2972 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8), 2973 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8), 2974 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8))); 2975 2976 INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest, 2977 ::testing::Values(MseParams(4, 4, 2978 &aom_mse16x16_avx2))); 2979 2980 const VarianceParams kArrayVariance_avx2[] = { 2981 VarianceParams(7, 7, &aom_variance128x128_avx2), 2982 VarianceParams(7, 6, &aom_variance128x64_avx2), 2983 VarianceParams(6, 7, &aom_variance64x128_avx2), 2984 VarianceParams(6, 6, &aom_variance64x64_avx2), 2985 VarianceParams(6, 5, &aom_variance64x32_avx2), 2986 VarianceParams(5, 6, &aom_variance32x64_avx2), 2987 VarianceParams(5, 5, &aom_variance32x32_avx2), 2988 VarianceParams(5, 4, &aom_variance32x16_avx2), 2989 VarianceParams(4, 5, &aom_variance16x32_avx2), 2990 VarianceParams(4, 4, &aom_variance16x16_avx2), 2991 VarianceParams(4, 3, &aom_variance16x8_avx2), 2992 #if !CONFIG_REALTIME_ONLY 2993 VarianceParams(6, 4, &aom_variance64x16_avx2), 2994 VarianceParams(4, 6, &aom_variance16x64_avx2), 2995 VarianceParams(5, 3, &aom_variance32x8_avx2), 2996 VarianceParams(4, 2, &aom_variance16x4_avx2), 2997 #endif 2998 }; 2999 INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest, 3000 ::testing::ValuesIn(kArrayVariance_avx2)); 3001 3002 const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = { 3003 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0), 3004 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0), 3005 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0), 3006 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0) 3007 }; 3008 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest, 3009 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2)); 3010 3011 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = { 3012 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0), 3013 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0), 3014 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0), 3015 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0) 3016 }; 3017 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest, 3018 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2)); 3019 3020 const SubpelVarianceParams kArraySubpelVariance_avx2[] = { 3021 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0), 3022 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0), 3023 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0), 3024 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0), 3025 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0), 3026 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0), 3027 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0), 3028 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0), 3029 3030 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0), 3031 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0), 3032 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0), 3033 #if !CONFIG_REALTIME_ONLY 3034 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0), 3035 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0), 3036 #endif 3037 }; 3038 INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest, 3039 ::testing::ValuesIn(kArraySubpelVariance_avx2)); 3040 3041 INSTANTIATE_TEST_SUITE_P( 3042 AVX2, AvxSubpelAvgVarianceTest, 3043 ::testing::Values( 3044 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2, 3045 0), 3046 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2, 3047 0), 3048 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2, 3049 0), 3050 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0), 3051 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0), 3052 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0), 3053 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0), 3054 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2, 3055 0))); 3056 #endif // HAVE_AVX2 3057 3058 #if HAVE_NEON 3059 INSTANTIATE_TEST_SUITE_P( 3060 NEON, MseWxHTest, 3061 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8), 3062 MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8), 3063 MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8), 3064 MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8))); 3065 3066 INSTANTIATE_TEST_SUITE_P( 3067 NEON, Mse16xHTest, 3068 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8), 3069 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8), 3070 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8), 3071 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8))); 3072 3073 #if !CONFIG_REALTIME_ONLY 3074 INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest, 3075 ::testing::Values(aom_get_mb_ss_neon)); 3076 #endif // !CONFIG_REALTIME_ONLY 3077 3078 INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest, 3079 ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon), 3080 MseParams(3, 4, &aom_mse8x16_neon), 3081 MseParams(4, 4, &aom_mse16x16_neon), 3082 MseParams(4, 3, &aom_mse16x8_neon))); 3083 3084 const VarianceParams kArrayVariance_neon[] = { 3085 VarianceParams(7, 7, &aom_variance128x128_neon), 3086 VarianceParams(6, 6, &aom_variance64x64_neon), 3087 VarianceParams(7, 6, &aom_variance128x64_neon), 3088 VarianceParams(6, 7, &aom_variance64x128_neon), 3089 VarianceParams(6, 6, &aom_variance64x64_neon), 3090 VarianceParams(6, 5, &aom_variance64x32_neon), 3091 VarianceParams(5, 6, &aom_variance32x64_neon), 3092 VarianceParams(5, 5, &aom_variance32x32_neon), 3093 VarianceParams(5, 4, &aom_variance32x16_neon), 3094 VarianceParams(4, 5, &aom_variance16x32_neon), 3095 VarianceParams(4, 4, &aom_variance16x16_neon), 3096 VarianceParams(4, 3, &aom_variance16x8_neon), 3097 VarianceParams(3, 4, &aom_variance8x16_neon), 3098 VarianceParams(3, 3, &aom_variance8x8_neon), 3099 VarianceParams(3, 2, &aom_variance8x4_neon), 3100 VarianceParams(2, 3, &aom_variance4x8_neon), 3101 VarianceParams(2, 2, &aom_variance4x4_neon), 3102 #if !CONFIG_REALTIME_ONLY 3103 VarianceParams(2, 4, &aom_variance4x16_neon), 3104 VarianceParams(4, 2, &aom_variance16x4_neon), 3105 VarianceParams(3, 5, &aom_variance8x32_neon), 3106 VarianceParams(5, 3, &aom_variance32x8_neon), 3107 VarianceParams(4, 6, &aom_variance16x64_neon), 3108 VarianceParams(6, 4, &aom_variance64x16_neon), 3109 #endif 3110 }; 3111 3112 INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest, 3113 ::testing::ValuesIn(kArrayVariance_neon)); 3114 3115 const SubpelVarianceParams kArraySubpelVariance_neon[] = { 3116 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0), 3117 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0), 3118 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0), 3119 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0), 3120 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0), 3121 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0), 3122 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0), 3123 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0), 3124 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0), 3125 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0), 3126 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0), 3127 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0), 3128 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0), 3129 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0), 3130 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0), 3131 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0), 3132 #if !CONFIG_REALTIME_ONLY 3133 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0), 3134 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0), 3135 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0), 3136 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0), 3137 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0), 3138 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0), 3139 #endif 3140 }; 3141 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest, 3142 ::testing::ValuesIn(kArraySubpelVariance_neon)); 3143 3144 const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = { 3145 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0), 3146 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0), 3147 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0), 3148 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0), 3149 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0), 3150 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0), 3151 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0), 3152 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0), 3153 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0), 3154 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0), 3155 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0), 3156 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0), 3157 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0), 3158 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0), 3159 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0), 3160 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0), 3161 #if !CONFIG_REALTIME_ONLY 3162 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0), 3163 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0), 3164 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0), 3165 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0), 3166 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0), 3167 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0), 3168 #endif 3169 }; 3170 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest, 3171 ::testing::ValuesIn(kArraySubpelAvgVariance_neon)); 3172 3173 #if !CONFIG_REALTIME_ONLY 3174 const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = { 3175 ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0), 3176 ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0), 3177 ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0), 3178 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0), 3179 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0), 3180 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0), 3181 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0), 3182 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0), 3183 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0), 3184 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0), 3185 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0), 3186 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0), 3187 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0), 3188 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0), 3189 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0), 3190 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0), 3191 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0), 3192 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0), 3193 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0), 3194 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0), 3195 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0), 3196 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0), 3197 }; 3198 INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest, 3199 ::testing::ValuesIn(kArrayObmcSubpelVariance_neon)); 3200 #endif 3201 3202 const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = { 3203 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0), 3204 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0), 3205 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0), 3206 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0) 3207 }; 3208 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest, 3209 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon)); 3210 3211 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = { 3212 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0), 3213 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0), 3214 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0), 3215 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0) 3216 }; 3217 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest, 3218 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon)); 3219 3220 #if CONFIG_AV1_HIGHBITDEPTH 3221 const VarianceParams kArrayHBDVariance_neon[] = { 3222 VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12), 3223 VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12), 3224 VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12), 3225 VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12), 3226 VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12), 3227 VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12), 3228 VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12), 3229 VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12), 3230 VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12), 3231 VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12), 3232 VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12), 3233 VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12), 3234 VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12), 3235 VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12), 3236 VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12), 3237 VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12), 3238 VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10), 3239 VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10), 3240 VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10), 3241 VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10), 3242 VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10), 3243 VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10), 3244 VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10), 3245 VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10), 3246 VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10), 3247 VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10), 3248 VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10), 3249 VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10), 3250 VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10), 3251 VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10), 3252 VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10), 3253 VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10), 3254 VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8), 3255 VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8), 3256 VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8), 3257 VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8), 3258 VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8), 3259 VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8), 3260 VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8), 3261 VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8), 3262 VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8), 3263 VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8), 3264 VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8), 3265 VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8), 3266 VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8), 3267 VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8), 3268 VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8), 3269 VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8), 3270 #if !CONFIG_REALTIME_ONLY 3271 VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12), 3272 VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12), 3273 VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12), 3274 VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12), 3275 VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12), 3276 VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12), 3277 VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10), 3278 VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10), 3279 VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10), 3280 VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10), 3281 VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10), 3282 VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10), 3283 VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8), 3284 VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8), 3285 VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8), 3286 VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8), 3287 VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8), 3288 VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8), 3289 #endif 3290 }; 3291 3292 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest, 3293 ::testing::ValuesIn(kArrayHBDVariance_neon)); 3294 3295 const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = { 3296 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12), 3297 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12), 3298 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12), 3299 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12), 3300 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12), 3301 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12), 3302 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12), 3303 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12), 3304 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12), 3305 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12), 3306 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12), 3307 SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12), 3308 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12), 3309 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10), 3310 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10), 3311 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10), 3312 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10), 3313 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10), 3314 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10), 3315 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10), 3316 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10), 3317 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10), 3318 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10), 3319 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10), 3320 SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10), 3321 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10), 3322 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8), 3323 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8), 3324 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8), 3325 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8), 3326 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8), 3327 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8), 3328 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8), 3329 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8), 3330 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8), 3331 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8), 3332 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8), 3333 SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8), 3334 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8), 3335 #if !CONFIG_REALTIME_ONLY 3336 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8), 3337 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8), 3338 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8), 3339 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8), 3340 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8), 3341 SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8), 3342 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10), 3343 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10), 3344 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10), 3345 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10), 3346 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10), 3347 SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10), 3348 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12), 3349 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12), 3350 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12), 3351 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12), 3352 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12), 3353 SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12), 3354 #endif //! CONFIG_REALTIME_ONLY 3355 }; 3356 3357 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest, 3358 ::testing::ValuesIn(kArrayHBDSubpelVariance_neon)); 3359 3360 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = { 3361 SubpelAvgVarianceParams(7, 7, 3362 &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8), 3363 SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon, 3364 8), 3365 SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon, 3366 8), 3367 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon, 3368 8), 3369 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon, 3370 8), 3371 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon, 3372 8), 3373 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon, 3374 8), 3375 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon, 3376 8), 3377 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon, 3378 8), 3379 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon, 3380 8), 3381 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon, 3382 8), 3383 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon, 3384 8), 3385 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon, 3386 8), 3387 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon, 3388 8), 3389 SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon, 3390 8), 3391 SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon, 3392 8), 3393 SubpelAvgVarianceParams( 3394 7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10), 3395 SubpelAvgVarianceParams(7, 6, 3396 &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10), 3397 SubpelAvgVarianceParams(6, 7, 3398 &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10), 3399 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon, 3400 10), 3401 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon, 3402 10), 3403 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon, 3404 10), 3405 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon, 3406 10), 3407 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon, 3408 10), 3409 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon, 3410 10), 3411 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon, 3412 10), 3413 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon, 3414 10), 3415 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon, 3416 10), 3417 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon, 3418 10), 3419 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon, 3420 10), 3421 SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon, 3422 10), 3423 SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon, 3424 10), 3425 SubpelAvgVarianceParams( 3426 7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12), 3427 SubpelAvgVarianceParams(7, 6, 3428 &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12), 3429 SubpelAvgVarianceParams(6, 7, 3430 &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12), 3431 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon, 3432 12), 3433 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon, 3434 12), 3435 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon, 3436 12), 3437 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon, 3438 12), 3439 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon, 3440 12), 3441 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon, 3442 12), 3443 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon, 3444 12), 3445 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon, 3446 12), 3447 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon, 3448 12), 3449 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon, 3450 12), 3451 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon, 3452 12), 3453 SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon, 3454 12), 3455 SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon, 3456 12), 3457 3458 #if !CONFIG_REALTIME_ONLY 3459 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon, 3460 8), 3461 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon, 3462 8), 3463 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon, 3464 8), 3465 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon, 3466 8), 3467 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon, 3468 8), 3469 SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon, 3470 8), 3471 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon, 3472 10), 3473 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon, 3474 10), 3475 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon, 3476 10), 3477 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon, 3478 10), 3479 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon, 3480 10), 3481 SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon, 3482 10), 3483 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon, 3484 12), 3485 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon, 3486 12), 3487 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon, 3488 12), 3489 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon, 3490 12), 3491 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon, 3492 12), 3493 SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon, 3494 12), 3495 #endif 3496 }; 3497 3498 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest, 3499 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon)); 3500 3501 #if !CONFIG_REALTIME_ONLY 3502 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = { 3503 ObmcSubpelVarianceParams( 3504 7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12), 3505 ObmcSubpelVarianceParams( 3506 7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12), 3507 ObmcSubpelVarianceParams( 3508 6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12), 3509 ObmcSubpelVarianceParams( 3510 6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12), 3511 ObmcSubpelVarianceParams( 3512 6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12), 3513 ObmcSubpelVarianceParams( 3514 5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12), 3515 ObmcSubpelVarianceParams( 3516 5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12), 3517 ObmcSubpelVarianceParams( 3518 5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12), 3519 ObmcSubpelVarianceParams( 3520 4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12), 3521 ObmcSubpelVarianceParams( 3522 4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12), 3523 ObmcSubpelVarianceParams(4, 3, 3524 &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12), 3525 ObmcSubpelVarianceParams(3, 4, 3526 &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12), 3527 ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon, 3528 12), 3529 ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon, 3530 12), 3531 ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon, 3532 12), 3533 ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon, 3534 12), 3535 ObmcSubpelVarianceParams( 3536 6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12), 3537 ObmcSubpelVarianceParams( 3538 4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12), 3539 ObmcSubpelVarianceParams(5, 3, 3540 &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12), 3541 ObmcSubpelVarianceParams(3, 5, 3542 &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12), 3543 ObmcSubpelVarianceParams(4, 2, 3544 &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12), 3545 ObmcSubpelVarianceParams(2, 4, 3546 &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12), 3547 ObmcSubpelVarianceParams( 3548 7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10), 3549 ObmcSubpelVarianceParams( 3550 7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10), 3551 ObmcSubpelVarianceParams( 3552 6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10), 3553 ObmcSubpelVarianceParams( 3554 6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10), 3555 ObmcSubpelVarianceParams( 3556 6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10), 3557 ObmcSubpelVarianceParams( 3558 5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10), 3559 ObmcSubpelVarianceParams( 3560 5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10), 3561 ObmcSubpelVarianceParams( 3562 5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10), 3563 ObmcSubpelVarianceParams( 3564 4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10), 3565 ObmcSubpelVarianceParams( 3566 4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10), 3567 ObmcSubpelVarianceParams(4, 3, 3568 &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10), 3569 ObmcSubpelVarianceParams(3, 4, 3570 &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10), 3571 ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon, 3572 10), 3573 ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon, 3574 10), 3575 ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon, 3576 10), 3577 ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon, 3578 10), 3579 ObmcSubpelVarianceParams( 3580 6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10), 3581 ObmcSubpelVarianceParams( 3582 4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10), 3583 ObmcSubpelVarianceParams(5, 3, 3584 &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10), 3585 ObmcSubpelVarianceParams(3, 5, 3586 &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10), 3587 ObmcSubpelVarianceParams(4, 2, 3588 &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10), 3589 ObmcSubpelVarianceParams(2, 4, 3590 &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10), 3591 ObmcSubpelVarianceParams( 3592 7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8), 3593 ObmcSubpelVarianceParams(7, 6, 3594 &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8), 3595 ObmcSubpelVarianceParams(6, 7, 3596 &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8), 3597 ObmcSubpelVarianceParams(6, 6, 3598 &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8), 3599 ObmcSubpelVarianceParams(6, 5, 3600 &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8), 3601 ObmcSubpelVarianceParams(5, 6, 3602 &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8), 3603 ObmcSubpelVarianceParams(5, 5, 3604 &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8), 3605 ObmcSubpelVarianceParams(5, 4, 3606 &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8), 3607 ObmcSubpelVarianceParams(4, 5, 3608 &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8), 3609 ObmcSubpelVarianceParams(4, 4, 3610 &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8), 3611 ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon, 3612 8), 3613 ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon, 3614 8), 3615 ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon, 3616 8), 3617 ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon, 3618 8), 3619 ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon, 3620 8), 3621 ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon, 3622 8), 3623 ObmcSubpelVarianceParams(6, 4, 3624 &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8), 3625 ObmcSubpelVarianceParams(4, 6, 3626 &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8), 3627 ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon, 3628 8), 3629 ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon, 3630 8), 3631 ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon, 3632 8), 3633 ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon, 3634 8), 3635 }; 3636 3637 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest, 3638 ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon)); 3639 #endif // !CONFIG_REALTIME_ONLY 3640 3641 #endif // CONFIG_AV1_HIGHBITDEPTH 3642 3643 #endif // HAVE_NEON 3644 3645 #if HAVE_NEON_DOTPROD 3646 3647 const VarianceParams kArrayVariance_neon_dotprod[] = { 3648 VarianceParams(7, 7, &aom_variance128x128_neon_dotprod), 3649 VarianceParams(6, 6, &aom_variance64x64_neon_dotprod), 3650 VarianceParams(7, 6, &aom_variance128x64_neon_dotprod), 3651 VarianceParams(6, 7, &aom_variance64x128_neon_dotprod), 3652 VarianceParams(6, 6, &aom_variance64x64_neon_dotprod), 3653 VarianceParams(6, 5, &aom_variance64x32_neon_dotprod), 3654 VarianceParams(5, 6, &aom_variance32x64_neon_dotprod), 3655 VarianceParams(5, 5, &aom_variance32x32_neon_dotprod), 3656 VarianceParams(5, 4, &aom_variance32x16_neon_dotprod), 3657 VarianceParams(4, 5, &aom_variance16x32_neon_dotprod), 3658 VarianceParams(4, 4, &aom_variance16x16_neon_dotprod), 3659 VarianceParams(4, 3, &aom_variance16x8_neon_dotprod), 3660 VarianceParams(3, 4, &aom_variance8x16_neon_dotprod), 3661 VarianceParams(3, 3, &aom_variance8x8_neon_dotprod), 3662 VarianceParams(3, 2, &aom_variance8x4_neon_dotprod), 3663 VarianceParams(2, 3, &aom_variance4x8_neon_dotprod), 3664 VarianceParams(2, 2, &aom_variance4x4_neon_dotprod), 3665 #if !CONFIG_REALTIME_ONLY 3666 VarianceParams(2, 4, &aom_variance4x16_neon_dotprod), 3667 VarianceParams(4, 2, &aom_variance16x4_neon_dotprod), 3668 VarianceParams(3, 5, &aom_variance8x32_neon_dotprod), 3669 VarianceParams(5, 3, &aom_variance32x8_neon_dotprod), 3670 VarianceParams(4, 6, &aom_variance16x64_neon_dotprod), 3671 VarianceParams(6, 4, &aom_variance64x16_neon_dotprod), 3672 #endif 3673 }; 3674 3675 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest, 3676 ::testing::ValuesIn(kArrayVariance_neon_dotprod)); 3677 3678 const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = { 3679 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), 3680 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), 3681 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0), 3682 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0) 3683 }; 3684 INSTANTIATE_TEST_SUITE_P( 3685 NEON_DOTPROD, GetSseSum8x8QuadTest, 3686 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod)); 3687 3688 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = { 3689 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), 3690 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), 3691 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0), 3692 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0) 3693 }; 3694 INSTANTIATE_TEST_SUITE_P( 3695 NEON_DOTPROD, GetSseSum16x16DualTest, 3696 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod)); 3697 3698 INSTANTIATE_TEST_SUITE_P( 3699 NEON_DOTPROD, AvxMseTest, 3700 ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod), 3701 MseParams(3, 4, &aom_mse8x16_neon_dotprod), 3702 MseParams(4, 4, &aom_mse16x16_neon_dotprod), 3703 MseParams(4, 3, &aom_mse16x8_neon_dotprod))); 3704 3705 #endif // HAVE_NEON_DOTPROD 3706 3707 #if HAVE_SVE 3708 3709 #if CONFIG_AV1_HIGHBITDEPTH 3710 const VarianceParams kArrayHBDVariance_sve[] = { 3711 VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12), 3712 VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12), 3713 VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12), 3714 VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12), 3715 VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12), 3716 VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12), 3717 VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12), 3718 VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12), 3719 VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12), 3720 VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12), 3721 VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12), 3722 VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12), 3723 VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12), 3724 VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12), 3725 VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12), 3726 VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12), 3727 VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10), 3728 VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10), 3729 VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10), 3730 VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10), 3731 VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10), 3732 VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10), 3733 VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10), 3734 VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10), 3735 VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10), 3736 VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10), 3737 VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10), 3738 VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10), 3739 VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10), 3740 VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10), 3741 VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10), 3742 VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10), 3743 VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8), 3744 VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8), 3745 VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8), 3746 VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8), 3747 VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8), 3748 VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8), 3749 VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8), 3750 VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8), 3751 VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8), 3752 VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8), 3753 VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8), 3754 VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8), 3755 VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8), 3756 VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8), 3757 VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8), 3758 VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8), 3759 #if !CONFIG_REALTIME_ONLY 3760 VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12), 3761 VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12), 3762 VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12), 3763 VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12), 3764 VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12), 3765 VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12), 3766 VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10), 3767 VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10), 3768 VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10), 3769 VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10), 3770 VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10), 3771 VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10), 3772 VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8), 3773 VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8), 3774 VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8), 3775 VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8), 3776 VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8), 3777 VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8), 3778 #endif 3779 }; 3780 3781 INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest, 3782 ::testing::ValuesIn(kArrayHBDVariance_sve)); 3783 3784 #endif // CONFIG_AV1_HIGHBITDEPTH 3785 #endif // HAVE_SVE 3786 3787 } // namespace