quantize_func_test.cc (33040B)
1 /* 2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <algorithm> 13 #include <tuple> 14 15 #include "gtest/gtest.h" 16 17 #include "config/aom_config.h" 18 #include "config/aom_dsp_rtcd.h" 19 #include "config/av1_rtcd.h" 20 21 #include "aom/aom_codec.h" 22 #include "aom_dsp/txfm_common.h" 23 #include "aom_ports/aom_timer.h" 24 #include "av1/encoder/encoder.h" 25 #include "av1/common/scan.h" 26 #include "test/acm_random.h" 27 #include "test/register_state_check.h" 28 #include "test/util.h" 29 30 namespace { 31 using libaom_test::ACMRandom; 32 33 #define QUAN_PARAM_LIST \ 34 const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, \ 35 const int16_t *round_ptr, const int16_t *quant_ptr, \ 36 const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, \ 37 tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, \ 38 const int16_t *scan, const int16_t *iscan 39 40 #define LP_QUANTIZE_PARAM_LIST \ 41 const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, \ 42 const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, \ 43 const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, \ 44 const int16_t *iscan 45 46 using LPQuantizeFunc = void (*)(LP_QUANTIZE_PARAM_LIST); 47 using QuantizeFunc = void (*)(QUAN_PARAM_LIST); 48 using QuantizeFuncHbd = void (*)(QUAN_PARAM_LIST, int log_scale); 49 50 #undef LP_QUANTIZE_PARAM_LIST 51 52 #define HBD_QUAN_FUNC \ 53 fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \ 54 qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, log_scale) 55 56 #define LBD_QUAN_FUNC \ 57 fn(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, \ 58 qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan) 59 60 template <QuantizeFuncHbd fn> 61 void highbd_quan16x16_wrapper(QUAN_PARAM_LIST) { 62 const int log_scale = 0; 63 HBD_QUAN_FUNC; 64 } 65 66 template <QuantizeFuncHbd fn> 67 void highbd_quan32x32_wrapper(QUAN_PARAM_LIST) { 68 const int log_scale = 1; 69 HBD_QUAN_FUNC; 70 } 71 72 template <QuantizeFuncHbd fn> 73 void highbd_quan64x64_wrapper(QUAN_PARAM_LIST) { 74 const int log_scale = 2; 75 HBD_QUAN_FUNC; 76 } 77 78 enum QuantType { TYPE_B, TYPE_DC, TYPE_FP }; 79 80 using std::tuple; 81 82 template <typename FuncType> 83 using QuantizeParam = 84 tuple<FuncType, FuncType, TX_SIZE, QuantType, aom_bit_depth_t>; 85 86 struct QuanTable { 87 QUANTS quant; 88 Dequants dequant; 89 }; 90 91 const int kTestNum = 1000; 92 93 #define GET_TEMPLATE_PARAM(k) std::get<k>(this->GetParam()) 94 95 template <typename CoeffType, typename FuncType> 96 class QuantizeTestBase 97 : public ::testing::TestWithParam<QuantizeParam<FuncType>> { 98 protected: 99 QuantizeTestBase() 100 : quant_ref_(GET_TEMPLATE_PARAM(0)), quant_(GET_TEMPLATE_PARAM(1)), 101 tx_size_(GET_TEMPLATE_PARAM(2)), type_(GET_TEMPLATE_PARAM(3)), 102 bd_(GET_TEMPLATE_PARAM(4)) {} 103 104 ~QuantizeTestBase() override = default; 105 106 void SetUp() override { 107 qtab_ = reinterpret_cast<QuanTable *>(aom_memalign(32, sizeof(*qtab_))); 108 ASSERT_NE(qtab_, nullptr); 109 const int n_coeffs = coeff_num(); 110 coeff_ = reinterpret_cast<CoeffType *>( 111 aom_memalign(32, 6 * n_coeffs * sizeof(CoeffType))); 112 ASSERT_NE(coeff_, nullptr); 113 InitQuantizer(); 114 } 115 116 void TearDown() override { 117 aom_free(qtab_); 118 qtab_ = nullptr; 119 aom_free(coeff_); 120 coeff_ = nullptr; 121 } 122 123 void InitQuantizer() { 124 av1_build_quantizer(bd_, 0, 0, 0, 0, 0, &qtab_->quant, &qtab_->dequant, 0); 125 } 126 127 virtual void RunQuantizeFunc( 128 const CoeffType *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, 129 const int16_t *round_ptr, const int16_t *quant_ptr, 130 const int16_t *quant_shift_ptr, CoeffType *qcoeff_ptr, 131 CoeffType *qcoeff_ref_ptr, CoeffType *dqcoeff_ptr, 132 CoeffType *dqcoeff_ref_ptr, const int16_t *dequant_ptr, 133 uint16_t *eob_ref_ptr, uint16_t *eob_ptr, const int16_t *scan, 134 const int16_t *iscan) = 0; 135 136 void QuantizeRun(bool is_loop, int q = 0, int test_num = 1) { 137 CoeffType *coeff_ptr = coeff_; 138 const intptr_t n_coeffs = coeff_num(); 139 140 CoeffType *qcoeff_ref = coeff_ptr + n_coeffs; 141 CoeffType *dqcoeff_ref = qcoeff_ref + n_coeffs; 142 143 CoeffType *qcoeff = dqcoeff_ref + n_coeffs; 144 CoeffType *dqcoeff = qcoeff + n_coeffs; 145 uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs); 146 147 // Testing uses 2-D DCT scan order table 148 const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT); 149 150 // Testing uses luminance quantization table 151 const int16_t *zbin = qtab_->quant.y_zbin[q]; 152 153 const int16_t *round = nullptr; 154 const int16_t *quant = nullptr; 155 if (type_ == TYPE_B) { 156 round = qtab_->quant.y_round[q]; 157 quant = qtab_->quant.y_quant[q]; 158 } else if (type_ == TYPE_FP) { 159 round = qtab_->quant.y_round_fp[q]; 160 quant = qtab_->quant.y_quant_fp[q]; 161 } 162 163 const int16_t *quant_shift = qtab_->quant.y_quant_shift[q]; 164 const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q]; 165 166 for (int i = 0; i < test_num; ++i) { 167 if (is_loop) FillCoeffRandom(); 168 169 memset(qcoeff_ref, 0, 5 * n_coeffs * sizeof(*qcoeff_ref)); 170 171 RunQuantizeFunc(coeff_ptr, n_coeffs, zbin, round, quant, quant_shift, 172 qcoeff, qcoeff_ref, dqcoeff, dqcoeff_ref, dequant, 173 &eob[0], &eob[1], sc->scan, sc->iscan); 174 175 for (int j = 0; j < n_coeffs; ++j) { 176 ASSERT_EQ(qcoeff_ref[j], qcoeff[j]) 177 << "Q mismatch on test: " << i << " at position: " << j 178 << " Q: " << q << " coeff: " << coeff_ptr[j]; 179 } 180 181 for (int j = 0; j < n_coeffs; ++j) { 182 ASSERT_EQ(dqcoeff_ref[j], dqcoeff[j]) 183 << "Dq mismatch on test: " << i << " at position: " << j 184 << " Q: " << q << " coeff: " << coeff_ptr[j]; 185 } 186 187 ASSERT_EQ(eob[0], eob[1]) 188 << "eobs mismatch on test: " << i << " Q: " << q; 189 } 190 } 191 192 void CompareResults(const CoeffType *buf_ref, const CoeffType *buf, int size, 193 const char *text, int q, int number) { 194 int i; 195 for (i = 0; i < size; ++i) { 196 ASSERT_EQ(buf_ref[i], buf[i]) << text << " mismatch on test: " << number 197 << " at position: " << i << " Q: " << q; 198 } 199 } 200 201 int coeff_num() const { return av1_get_max_eob(tx_size_); } 202 203 void FillCoeff(CoeffType c) { 204 const int n_coeffs = coeff_num(); 205 for (int i = 0; i < n_coeffs; ++i) { 206 coeff_[i] = c; 207 } 208 } 209 210 void FillCoeffRandom() { 211 const int n_coeffs = coeff_num(); 212 FillCoeffZero(); 213 const int num = rnd_.Rand16() % n_coeffs; 214 // Randomize the first non zero coeff position. 215 const int start = rnd_.Rand16() % n_coeffs; 216 const int end = std::min(start + num, n_coeffs); 217 for (int i = start; i < end; ++i) { 218 coeff_[i] = GetRandomCoeff(); 219 } 220 } 221 222 void FillCoeffRandomRows(int num) { 223 FillCoeffZero(); 224 for (int i = 0; i < num; ++i) { 225 coeff_[i] = GetRandomCoeff(); 226 } 227 } 228 229 void FillCoeffZero() { FillCoeff(0); } 230 231 void FillCoeffConstant() { 232 CoeffType c = GetRandomCoeff(); 233 FillCoeff(c); 234 } 235 236 void FillDcOnly() { 237 FillCoeffZero(); 238 coeff_[0] = GetRandomCoeff(); 239 } 240 241 void FillDcLargeNegative() { 242 FillCoeffZero(); 243 // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues 244 // like BUG=883 where the constant being compared was incorrectly 245 // initialized. 246 coeff_[0] = -8191; 247 } 248 249 CoeffType GetRandomCoeff() { 250 CoeffType coeff; 251 if (bd_ == AOM_BITS_8) { 252 coeff = 253 clamp(static_cast<int16_t>(rnd_.Rand16()), INT16_MIN + 1, INT16_MAX); 254 } else { 255 CoeffType min = -(1 << (7 + bd_)); 256 CoeffType max = -min - 1; 257 coeff = clamp(static_cast<CoeffType>(rnd_.Rand31()), min, max); 258 } 259 return coeff; 260 } 261 262 ACMRandom rnd_; 263 QuanTable *qtab_; 264 CoeffType *coeff_; 265 FuncType quant_ref_; 266 FuncType quant_; 267 TX_SIZE tx_size_; 268 QuantType type_; 269 aom_bit_depth_t bd_; 270 }; 271 272 class FullPrecisionQuantizeTest 273 : public QuantizeTestBase<tran_low_t, QuantizeFunc> { 274 void RunQuantizeFunc(const tran_low_t *coeff_ptr, intptr_t n_coeffs, 275 const int16_t *zbin_ptr, const int16_t *round_ptr, 276 const int16_t *quant_ptr, const int16_t *quant_shift_ptr, 277 tran_low_t *qcoeff_ptr, tran_low_t *qcoeff_ref_ptr, 278 tran_low_t *dqcoeff_ptr, tran_low_t *dqcoeff_ref_ptr, 279 const int16_t *dequant_ptr, uint16_t *eob_ref_ptr, 280 uint16_t *eob_ptr, const int16_t *scan, 281 const int16_t *iscan) override { 282 quant_ref_(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, 283 quant_shift_ptr, qcoeff_ref_ptr, dqcoeff_ref_ptr, dequant_ptr, 284 eob_ref_ptr, scan, iscan); 285 286 API_REGISTER_STATE_CHECK(quant_( 287 coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, 288 qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan)); 289 } 290 }; 291 292 class LowPrecisionQuantizeTest 293 : public QuantizeTestBase<int16_t, LPQuantizeFunc> { 294 void RunQuantizeFunc(const int16_t *coeff_ptr, intptr_t n_coeffs, 295 const int16_t * /*zbin_ptr*/, const int16_t *round_ptr, 296 const int16_t *quant_ptr, 297 const int16_t * /*quant_shift_ptr*/, int16_t *qcoeff_ptr, 298 int16_t *qcoeff_ref_ptr, int16_t *dqcoeff_ptr, 299 int16_t *dqcoeff_ref_ptr, const int16_t *dequant_ptr, 300 uint16_t *eob_ref_ptr, uint16_t *eob_ptr, 301 const int16_t *scan, const int16_t *iscan) override { 302 quant_ref_(coeff_ptr, n_coeffs, round_ptr, quant_ptr, qcoeff_ref_ptr, 303 dqcoeff_ref_ptr, dequant_ptr, eob_ref_ptr, scan, iscan); 304 305 API_REGISTER_STATE_CHECK(quant_(coeff_ptr, n_coeffs, round_ptr, quant_ptr, 306 qcoeff_ptr, dqcoeff_ptr, dequant_ptr, 307 eob_ptr, scan, iscan)); 308 } 309 }; 310 311 TEST_P(FullPrecisionQuantizeTest, ZeroInput) { 312 FillCoeffZero(); 313 QuantizeRun(false); 314 } 315 316 TEST_P(FullPrecisionQuantizeTest, LargeNegativeInput) { 317 FillDcLargeNegative(); 318 QuantizeRun(false, 0, 1); 319 } 320 321 TEST_P(FullPrecisionQuantizeTest, DcOnlyInput) { 322 FillDcOnly(); 323 QuantizeRun(false, 0, 1); 324 } 325 326 TEST_P(FullPrecisionQuantizeTest, RandomInput) { 327 QuantizeRun(true, 0, kTestNum); 328 } 329 330 TEST_P(FullPrecisionQuantizeTest, MultipleQ) { 331 for (int q = 0; q < QINDEX_RANGE; ++q) { 332 QuantizeRun(true, q, kTestNum); 333 } 334 } 335 336 // Force the coeff to be half the value of the dequant. This exposes a 337 // mismatch found in av1_quantize_fp_sse2(). 338 TEST_P(FullPrecisionQuantizeTest, CoeffHalfDequant) { 339 FillCoeff(16); 340 QuantizeRun(false, 25, 1); 341 } 342 343 TEST_P(FullPrecisionQuantizeTest, DISABLED_Speed) { 344 tran_low_t *coeff_ptr = coeff_; 345 const intptr_t n_coeffs = coeff_num(); 346 347 tran_low_t *qcoeff_ref = coeff_ptr + n_coeffs; 348 tran_low_t *dqcoeff_ref = qcoeff_ref + n_coeffs; 349 350 tran_low_t *qcoeff = dqcoeff_ref + n_coeffs; 351 tran_low_t *dqcoeff = qcoeff + n_coeffs; 352 uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs); 353 354 // Testing uses 2-D DCT scan order table 355 const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT); 356 357 // Testing uses luminance quantization table 358 const int q = 22; 359 const int16_t *zbin = qtab_->quant.y_zbin[q]; 360 const int16_t *round_fp = qtab_->quant.y_round_fp[q]; 361 const int16_t *quant_fp = qtab_->quant.y_quant_fp[q]; 362 const int16_t *quant_shift = qtab_->quant.y_quant_shift[q]; 363 const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q]; 364 const int kNumTests = 5000000; 365 aom_usec_timer timer, simd_timer; 366 int rows = tx_size_high[tx_size_]; 367 int cols = tx_size_wide[tx_size_]; 368 rows = AOMMIN(32, rows); 369 cols = AOMMIN(32, cols); 370 for (int cnt = 0; cnt <= rows; cnt++) { 371 FillCoeffRandomRows(cnt * cols); 372 373 aom_usec_timer_start(&timer); 374 for (int n = 0; n < kNumTests; ++n) { 375 quant_ref_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, 376 qcoeff, dqcoeff, dequant, eob, sc->scan, sc->iscan); 377 } 378 aom_usec_timer_mark(&timer); 379 380 aom_usec_timer_start(&simd_timer); 381 for (int n = 0; n < kNumTests; ++n) { 382 quant_(coeff_ptr, n_coeffs, zbin, round_fp, quant_fp, quant_shift, qcoeff, 383 dqcoeff, dequant, eob, sc->scan, sc->iscan); 384 } 385 aom_usec_timer_mark(&simd_timer); 386 387 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 388 const int simd_elapsed_time = 389 static_cast<int>(aom_usec_timer_elapsed(&simd_timer)); 390 printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time, 391 simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time)); 392 } 393 } 394 395 // TODO(crbug.com/aomedia/2796) 396 TEST_P(LowPrecisionQuantizeTest, ZeroInput) { 397 FillCoeffZero(); 398 QuantizeRun(false); 399 } 400 401 TEST_P(LowPrecisionQuantizeTest, LargeNegativeInput) { 402 FillDcLargeNegative(); 403 QuantizeRun(false, 0, 1); 404 } 405 406 TEST_P(LowPrecisionQuantizeTest, DcOnlyInput) { 407 FillDcOnly(); 408 QuantizeRun(false, 0, 1); 409 } 410 411 TEST_P(LowPrecisionQuantizeTest, RandomInput) { 412 QuantizeRun(true, 0, kTestNum); 413 } 414 415 TEST_P(LowPrecisionQuantizeTest, MultipleQ) { 416 for (int q = 0; q < QINDEX_RANGE; ++q) { 417 QuantizeRun(true, q, kTestNum); 418 } 419 } 420 421 // Force the coeff to be half the value of the dequant. This exposes a 422 // mismatch found in av1_quantize_fp_sse2(). 423 TEST_P(LowPrecisionQuantizeTest, CoeffHalfDequant) { 424 FillCoeff(16); 425 QuantizeRun(false, 25, 1); 426 } 427 428 TEST_P(LowPrecisionQuantizeTest, DISABLED_Speed) { 429 int16_t *coeff_ptr = coeff_; 430 const intptr_t n_coeffs = coeff_num(); 431 432 int16_t *qcoeff_ref = coeff_ptr + n_coeffs; 433 int16_t *dqcoeff_ref = qcoeff_ref + n_coeffs; 434 435 int16_t *qcoeff = dqcoeff_ref + n_coeffs; 436 int16_t *dqcoeff = qcoeff + n_coeffs; 437 uint16_t *eob = (uint16_t *)(dqcoeff + n_coeffs); 438 439 // Testing uses 2-D DCT scan order table 440 const SCAN_ORDER *const sc = get_default_scan(tx_size_, DCT_DCT); 441 442 // Testing uses luminance quantization table 443 const int q = 22; 444 const int16_t *round_fp = qtab_->quant.y_round_fp[q]; 445 const int16_t *quant_fp = qtab_->quant.y_quant_fp[q]; 446 const int16_t *dequant = qtab_->dequant.y_dequant_QTX[q]; 447 const int kNumTests = 5000000; 448 aom_usec_timer timer, simd_timer; 449 int rows = tx_size_high[tx_size_]; 450 int cols = tx_size_wide[tx_size_]; 451 rows = AOMMIN(32, rows); 452 cols = AOMMIN(32, cols); 453 for (int cnt = 0; cnt <= rows; cnt++) { 454 FillCoeffRandomRows(cnt * cols); 455 456 aom_usec_timer_start(&timer); 457 for (int n = 0; n < kNumTests; ++n) { 458 quant_ref_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff, 459 dequant, eob, sc->scan, sc->iscan); 460 } 461 aom_usec_timer_mark(&timer); 462 463 aom_usec_timer_start(&simd_timer); 464 for (int n = 0; n < kNumTests; ++n) { 465 quant_(coeff_ptr, n_coeffs, round_fp, quant_fp, qcoeff, dqcoeff, dequant, 466 eob, sc->scan, sc->iscan); 467 } 468 aom_usec_timer_mark(&simd_timer); 469 470 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 471 const int simd_elapsed_time = 472 static_cast<int>(aom_usec_timer_elapsed(&simd_timer)); 473 printf("c_time = %d \t simd_time = %d \t Gain = %f \n", elapsed_time, 474 simd_elapsed_time, ((float)elapsed_time / simd_elapsed_time)); 475 } 476 } 477 478 using std::make_tuple; 479 480 #if HAVE_AVX2 481 482 const QuantizeParam<LPQuantizeFunc> kLPQParamArrayAvx2[] = { 483 make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2, 484 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), 485 make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2, 486 static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8), 487 make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_avx2, 488 static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8) 489 }; 490 491 INSTANTIATE_TEST_SUITE_P(AVX2, LowPrecisionQuantizeTest, 492 ::testing::ValuesIn(kLPQParamArrayAvx2)); 493 494 const QuantizeParam<QuantizeFunc> kQParamArrayAvx2[] = { 495 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, 496 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), 497 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, 498 static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8), 499 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, 500 static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8), 501 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, 502 static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8), 503 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_avx2, 504 static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8), 505 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, 506 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), 507 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, 508 static_cast<TX_SIZE>(TX_16X64), TYPE_FP, AOM_BITS_8), 509 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_avx2, 510 static_cast<TX_SIZE>(TX_64X16), TYPE_FP, AOM_BITS_8), 511 make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_avx2, 512 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8), 513 #if CONFIG_AV1_HIGHBITDEPTH 514 make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, 515 &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, 516 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), 517 make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, 518 &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, 519 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_10), 520 make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, 521 &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_avx2>, 522 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12), 523 make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, 524 &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, 525 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), 526 make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, 527 &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, 528 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_10), 529 make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, 530 &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_avx2>, 531 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12), 532 make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, 533 &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, 534 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8), 535 make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, 536 &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, 537 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_10), 538 make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, 539 &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_avx2>, 540 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12), 541 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, 542 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 543 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, 544 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), 545 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_avx2, 546 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), 547 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_avx2, 548 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), 549 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_avx2, 550 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), 551 #if !CONFIG_REALTIME_ONLY 552 make_tuple(&aom_highbd_quantize_b_adaptive_c, 553 &aom_highbd_quantize_b_adaptive_avx2, 554 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 555 make_tuple(&aom_highbd_quantize_b_adaptive_c, 556 &aom_highbd_quantize_b_adaptive_avx2, 557 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), 558 make_tuple(&aom_highbd_quantize_b_adaptive_c, 559 &aom_highbd_quantize_b_adaptive_avx2, 560 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), 561 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, 562 &aom_highbd_quantize_b_32x32_adaptive_avx2, 563 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), 564 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, 565 &aom_highbd_quantize_b_32x32_adaptive_avx2, 566 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10), 567 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, 568 &aom_highbd_quantize_b_32x32_adaptive_avx2, 569 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), 570 #endif // !CONFIG_REALTIME_ONLY 571 #endif // CONFIG_AV1_HIGHBITDEPTH 572 #if !CONFIG_REALTIME_ONLY 573 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2, 574 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 575 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2, 576 static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8), 577 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_avx2, 578 static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8), 579 #endif // !CONFIG_REALTIME_ONLY 580 make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx2, 581 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 582 make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx2, 583 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), 584 make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_avx2, 585 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), 586 }; 587 588 INSTANTIATE_TEST_SUITE_P(AVX2, FullPrecisionQuantizeTest, 589 ::testing::ValuesIn(kQParamArrayAvx2)); 590 #endif // HAVE_AVX2 591 592 #if HAVE_SSE2 593 594 const QuantizeParam<LPQuantizeFunc> kLPQParamArraySSE2[] = { 595 make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2, 596 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), 597 make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2, 598 static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8), 599 make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2, 600 static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8) 601 }; 602 603 INSTANTIATE_TEST_SUITE_P(SSE2, LowPrecisionQuantizeTest, 604 ::testing::ValuesIn(kLPQParamArraySSE2)); 605 606 const QuantizeParam<QuantizeFunc> kQParamArraySSE2[] = { 607 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, 608 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), 609 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, 610 static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8), 611 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, 612 static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8), 613 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, 614 static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8), 615 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_sse2, 616 static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8), 617 make_tuple(&aom_quantize_b_c, &aom_quantize_b_sse2, 618 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 619 #if CONFIG_AV1_HIGHBITDEPTH 620 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, 621 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 622 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, 623 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), 624 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_sse2, 625 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), 626 #if !CONFIG_REALTIME_ONLY 627 make_tuple(&aom_highbd_quantize_b_adaptive_c, 628 &aom_highbd_quantize_b_adaptive_sse2, 629 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 630 make_tuple(&aom_highbd_quantize_b_adaptive_c, 631 &aom_highbd_quantize_b_adaptive_sse2, 632 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_10), 633 make_tuple(&aom_highbd_quantize_b_adaptive_c, 634 &aom_highbd_quantize_b_adaptive_sse2, 635 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), 636 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2, 637 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), 638 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2, 639 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10), 640 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_sse2, 641 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), 642 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, 643 &aom_highbd_quantize_b_32x32_adaptive_sse2, 644 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), 645 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, 646 &aom_highbd_quantize_b_32x32_adaptive_sse2, 647 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_10), 648 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, 649 &aom_highbd_quantize_b_32x32_adaptive_sse2, 650 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), 651 #endif // !CONFIG_REALTIME_ONLY 652 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2, 653 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), 654 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2, 655 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10), 656 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_sse2, 657 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), 658 #if !CONFIG_REALTIME_ONLY 659 make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, 660 &aom_highbd_quantize_b_64x64_adaptive_sse2, 661 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), 662 make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, 663 &aom_highbd_quantize_b_64x64_adaptive_sse2, 664 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_10), 665 make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, 666 &aom_highbd_quantize_b_64x64_adaptive_sse2, 667 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), 668 #endif // !CONFIG_REALTIME_ONLY 669 #endif // CONFIG_AV1_HIGHBITDEPTH 670 #if !CONFIG_REALTIME_ONLY 671 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2, 672 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 673 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2, 674 static_cast<TX_SIZE>(TX_8X8), TYPE_B, AOM_BITS_8), 675 make_tuple(&aom_quantize_b_adaptive_c, &aom_quantize_b_adaptive_sse2, 676 static_cast<TX_SIZE>(TX_4X4), TYPE_B, AOM_BITS_8), 677 make_tuple(&aom_quantize_b_32x32_adaptive_c, 678 &aom_quantize_b_32x32_adaptive_sse2, 679 static_cast<TX_SIZE>(TX_32X16), TYPE_B, AOM_BITS_8), 680 make_tuple(&aom_quantize_b_32x32_adaptive_c, 681 &aom_quantize_b_32x32_adaptive_sse2, 682 static_cast<TX_SIZE>(TX_16X32), TYPE_B, AOM_BITS_8), 683 make_tuple(&aom_quantize_b_32x32_adaptive_c, 684 &aom_quantize_b_32x32_adaptive_sse2, 685 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), 686 make_tuple(&aom_quantize_b_64x64_adaptive_c, 687 &aom_quantize_b_64x64_adaptive_sse2, 688 static_cast<TX_SIZE>(TX_32X64), TYPE_B, AOM_BITS_8), 689 make_tuple(&aom_quantize_b_64x64_adaptive_c, 690 &aom_quantize_b_64x64_adaptive_sse2, 691 static_cast<TX_SIZE>(TX_64X32), TYPE_B, AOM_BITS_8), 692 make_tuple(&aom_quantize_b_64x64_adaptive_c, 693 &aom_quantize_b_64x64_adaptive_sse2, 694 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8) 695 #endif // !CONFIG_REALTIME_ONLY 696 }; 697 698 INSTANTIATE_TEST_SUITE_P(SSE2, FullPrecisionQuantizeTest, 699 ::testing::ValuesIn(kQParamArraySSE2)); 700 #endif 701 702 #if HAVE_NEON 703 704 const QuantizeParam<LPQuantizeFunc> kLPQParamArrayNEON[] = { 705 make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon, 706 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), 707 make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon, 708 static_cast<TX_SIZE>(TX_8X8), TYPE_FP, AOM_BITS_8), 709 make_tuple(av1_quantize_lp_c, av1_quantize_lp_neon, 710 static_cast<TX_SIZE>(TX_4X4), TYPE_FP, AOM_BITS_8) 711 }; 712 713 INSTANTIATE_TEST_SUITE_P(NEON, LowPrecisionQuantizeTest, 714 ::testing::ValuesIn(kLPQParamArrayNEON)); 715 716 const QuantizeParam<QuantizeFunc> kQParamArrayNEON[] = { 717 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, 718 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_8), 719 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, 720 static_cast<TX_SIZE>(TX_4X16), TYPE_FP, AOM_BITS_8), 721 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, 722 static_cast<TX_SIZE>(TX_16X4), TYPE_FP, AOM_BITS_8), 723 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, 724 static_cast<TX_SIZE>(TX_8X32), TYPE_FP, AOM_BITS_8), 725 make_tuple(&av1_quantize_fp_c, &av1_quantize_fp_neon, 726 static_cast<TX_SIZE>(TX_32X8), TYPE_FP, AOM_BITS_8), 727 make_tuple(&av1_quantize_fp_32x32_c, &av1_quantize_fp_32x32_neon, 728 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_8), 729 make_tuple(&av1_quantize_fp_64x64_c, &av1_quantize_fp_64x64_neon, 730 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_8), 731 make_tuple(&aom_quantize_b_c, &aom_quantize_b_neon, 732 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 733 make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_neon, 734 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), 735 make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_neon, 736 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8), 737 738 #if CONFIG_AV1_HIGHBITDEPTH 739 make_tuple(&highbd_quan16x16_wrapper<av1_highbd_quantize_fp_c>, 740 &highbd_quan16x16_wrapper<av1_highbd_quantize_fp_neon>, 741 static_cast<TX_SIZE>(TX_16X16), TYPE_FP, AOM_BITS_12), 742 make_tuple(&highbd_quan32x32_wrapper<av1_highbd_quantize_fp_c>, 743 &highbd_quan32x32_wrapper<av1_highbd_quantize_fp_neon>, 744 static_cast<TX_SIZE>(TX_32X32), TYPE_FP, AOM_BITS_12), 745 make_tuple(&highbd_quan64x64_wrapper<av1_highbd_quantize_fp_c>, 746 &highbd_quan64x64_wrapper<av1_highbd_quantize_fp_neon>, 747 static_cast<TX_SIZE>(TX_64X64), TYPE_FP, AOM_BITS_12), 748 make_tuple(&aom_highbd_quantize_b_c, &aom_highbd_quantize_b_neon, 749 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), 750 make_tuple(&aom_highbd_quantize_b_32x32_c, &aom_highbd_quantize_b_32x32_neon, 751 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), 752 make_tuple(&aom_highbd_quantize_b_64x64_c, &aom_highbd_quantize_b_64x64_neon, 753 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), 754 #if !CONFIG_REALTIME_ONLY 755 make_tuple(&aom_highbd_quantize_b_adaptive_c, 756 &aom_highbd_quantize_b_adaptive_neon, 757 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_12), 758 make_tuple(&aom_highbd_quantize_b_32x32_adaptive_c, 759 &aom_highbd_quantize_b_32x32_adaptive_neon, 760 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_12), 761 make_tuple(&aom_highbd_quantize_b_64x64_adaptive_c, 762 &aom_highbd_quantize_b_64x64_adaptive_neon, 763 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_12), 764 #endif // !CONFIG_REALTIME_ONLY 765 #endif // CONFIG_AV1_HIGHBITDEPTH 766 }; 767 768 INSTANTIATE_TEST_SUITE_P(NEON, FullPrecisionQuantizeTest, 769 ::testing::ValuesIn(kQParamArrayNEON)); 770 #endif 771 772 #if HAVE_SSSE3 && AOM_ARCH_X86_64 773 INSTANTIATE_TEST_SUITE_P( 774 SSSE3, FullPrecisionQuantizeTest, 775 ::testing::Values( 776 make_tuple(&aom_quantize_b_c, &aom_quantize_b_ssse3, 777 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 778 make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_ssse3, 779 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8), 780 make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3, 781 static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8))); 782 783 #endif // HAVE_SSSE3 && AOM_ARCH_X86_64 784 785 #if HAVE_AVX 786 INSTANTIATE_TEST_SUITE_P( 787 AVX, FullPrecisionQuantizeTest, 788 ::testing::Values( 789 make_tuple(&aom_quantize_b_c, &aom_quantize_b_avx, 790 static_cast<TX_SIZE>(TX_16X16), TYPE_B, AOM_BITS_8), 791 make_tuple(&aom_quantize_b_32x32_c, &aom_quantize_b_32x32_avx, 792 static_cast<TX_SIZE>(TX_32X32), TYPE_B, AOM_BITS_8))); 793 794 #endif // HAVE_AVX 795 796 } // namespace