av1_convolve_scale_test.cc (16847B)
1 /* 2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <tuple> 13 #include <vector> 14 15 #include "gtest/gtest.h" 16 17 #include "config/aom_config.h" 18 #include "config/av1_rtcd.h" 19 20 #include "aom_ports/aom_timer.h" 21 #include "test/acm_random.h" 22 #include "test/register_state_check.h" 23 #include "test/util.h" 24 25 #include "av1/common/common_data.h" 26 #include "av1/common/filter.h" 27 28 namespace { 29 const int kTestIters = 10; 30 const int kPerfIters = 1000; 31 32 const int kVPad = 32; 33 const int kHPad = 32; 34 const int kXStepQn = 16; 35 const int kYStepQn = 20; 36 37 const int kNumFilterBanks = SWITCHABLE_FILTERS; 38 39 using libaom_test::ACMRandom; 40 using std::make_tuple; 41 using std::tuple; 42 43 template <typename SrcPixel> 44 class TestImage { 45 public: 46 TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) { 47 assert(bd < 16); 48 assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel))); 49 50 // Pad width by 2*kHPad and then round up to the next multiple of 16 51 // to get src_stride_. Add another 16 for dst_stride_ (to make sure 52 // something goes wrong if we use the wrong one) 53 src_stride_ = (w_ + 2 * kHPad + 15) & ~15; 54 dst_stride_ = src_stride_ + 16; 55 56 // Allocate image data 57 src_data_.resize(2 * src_block_size()); 58 dst_data_.resize(2 * dst_block_size()); 59 dst_16_data_.resize(2 * dst_block_size()); 60 } 61 62 void Initialize(ACMRandom *rnd); 63 void Check() const; 64 65 int src_stride() const { return src_stride_; } 66 int dst_stride() const { return dst_stride_; } 67 68 int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); } 69 int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); } 70 71 const SrcPixel *GetSrcData(bool ref, bool borders) const { 72 const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()]; 73 return borders ? block : block + kHPad + src_stride_ * kVPad; 74 } 75 76 SrcPixel *GetDstData(bool ref, bool borders) { 77 SrcPixel *block = &dst_data_[ref ? 0 : dst_block_size()]; 78 return borders ? block : block + kHPad + dst_stride_ * kVPad; 79 } 80 81 CONV_BUF_TYPE *GetDst16Data(bool ref, bool borders) { 82 CONV_BUF_TYPE *block = &dst_16_data_[ref ? 0 : dst_block_size()]; 83 return borders ? block : block + kHPad + dst_stride_ * kVPad; 84 } 85 86 private: 87 int w_, h_, bd_; 88 int src_stride_, dst_stride_; 89 90 std::vector<SrcPixel> src_data_; 91 std::vector<SrcPixel> dst_data_; 92 std::vector<CONV_BUF_TYPE> dst_16_data_; 93 }; 94 95 template <typename Pixel> 96 void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) { 97 if (!trash) { 98 memset(data, 0, sizeof(*data) * num_pixels); 99 return; 100 } 101 const Pixel mask = (1 << bd) - 1; 102 for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask; 103 } 104 105 template <typename Pixel> 106 void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd, 107 bool trash_edges, Pixel *data) { 108 assert(rnd); 109 const Pixel mask = (1 << bd) - 1; 110 111 // Fill in the first buffer with random data 112 // Top border 113 FillEdge(rnd, stride * kVPad, bd, trash_edges, data); 114 for (int r = 0; r < h; ++r) { 115 Pixel *row_data = data + (kVPad + r) * stride; 116 // Left border, contents, right border 117 FillEdge(rnd, kHPad, bd, trash_edges, row_data); 118 for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask; 119 FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w); 120 } 121 // Bottom border 122 FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h)); 123 124 const int bpp = sizeof(*data); 125 const int block_elts = stride * (h + 2 * kVPad); 126 const int block_size = bpp * block_elts; 127 128 // Now copy that to the second buffer 129 memcpy(data + block_elts, data, block_size); 130 } 131 132 template <typename SrcPixel> 133 void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) { 134 PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]); 135 PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]); 136 PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_16_data_[0]); 137 } 138 139 template <typename SrcPixel> 140 void TestImage<SrcPixel>::Check() const { 141 // If memcmp returns 0, there's nothing to do. 142 const int num_pixels = dst_block_size(); 143 const SrcPixel *ref_dst = &dst_data_[0]; 144 const SrcPixel *tst_dst = &dst_data_[num_pixels]; 145 146 const CONV_BUF_TYPE *ref_16_dst = &dst_16_data_[0]; 147 const CONV_BUF_TYPE *tst_16_dst = &dst_16_data_[num_pixels]; 148 149 if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) { 150 if (0 == memcmp(ref_16_dst, tst_16_dst, sizeof(*ref_16_dst) * num_pixels)) 151 return; 152 } 153 // Otherwise, iterate through the buffer looking for differences (including 154 // the edges) 155 const int stride = dst_stride_; 156 for (int r = 0; r < h_ + 2 * kVPad; ++r) { 157 for (int c = 0; c < w_ + 2 * kHPad; ++c) { 158 const int32_t ref_value = ref_dst[r * stride + c]; 159 const int32_t tst_value = tst_dst[r * stride + c]; 160 161 EXPECT_EQ(tst_value, ref_value) 162 << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad); 163 } 164 } 165 166 for (int r = 0; r < h_ + 2 * kVPad; ++r) { 167 for (int c = 0; c < w_ + 2 * kHPad; ++c) { 168 const int32_t ref_value = ref_16_dst[r * stride + c]; 169 const int32_t tst_value = tst_16_dst[r * stride + c]; 170 171 EXPECT_EQ(tst_value, ref_value) 172 << "Error in 16 bit buffer " 173 << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad); 174 } 175 } 176 } 177 178 using BlockDimension = tuple<int, int>; 179 180 struct BaseParams { 181 BaseParams(BlockDimension dimensions) : dims(dimensions) {} 182 183 BlockDimension dims; 184 }; 185 186 template <typename SrcPixel> 187 class ConvolveScaleTestBase : public ::testing::Test { 188 public: 189 ConvolveScaleTestBase() : image_(nullptr) {} 190 ~ConvolveScaleTestBase() override { delete image_; } 191 192 // Implemented by subclasses (SetUp depends on the parameters passed 193 // in and RunOne depends on the function to be tested. These can't 194 // be templated for low/high bit depths because they have different 195 // numbers of parameters) 196 void SetUp() override = 0; 197 virtual void RunOne(bool ref) = 0; 198 199 protected: 200 void SetParams(const BaseParams ¶ms, int bd) { 201 width_ = std::get<0>(params.dims); 202 height_ = std::get<1>(params.dims); 203 bd_ = bd; 204 205 delete image_; 206 image_ = new TestImage<SrcPixel>(width_, height_, bd_); 207 ASSERT_NE(image_, nullptr); 208 } 209 210 std::vector<ConvolveParams> GetConvParams() { 211 std::vector<ConvolveParams> convolve_params; 212 213 ConvolveParams param_no_compound = 214 get_conv_params_no_round(0, 0, nullptr, 0, 0, bd_); 215 convolve_params.push_back(param_no_compound); 216 217 ConvolveParams param_compound_avg = 218 get_conv_params_no_round(1, 0, nullptr, 0, 1, bd_); 219 convolve_params.push_back(param_compound_avg); 220 221 ConvolveParams param_compound_avg_dist_wtd = param_compound_avg; 222 param_compound_avg_dist_wtd.use_dist_wtd_comp_avg = 1; 223 224 for (int i = 0; i < 2; ++i) { 225 for (int j = 0; j < 4; ++j) { 226 param_compound_avg_dist_wtd.fwd_offset = quant_dist_lookup_table[j][i]; 227 param_compound_avg_dist_wtd.bck_offset = 228 quant_dist_lookup_table[j][1 - i]; 229 convolve_params.push_back(param_compound_avg_dist_wtd); 230 } 231 } 232 233 return convolve_params; 234 } 235 236 void Run() { 237 ACMRandom rnd(ACMRandom::DeterministicSeed()); 238 std::vector<ConvolveParams> conv_params = GetConvParams(); 239 240 for (int i = 0; i < kTestIters; ++i) { 241 for (int subpel_search = USE_2_TAPS; subpel_search <= USE_8_TAPS; 242 ++subpel_search) { 243 for (int filter_bank_y = 0; filter_bank_y < kNumFilterBanks; 244 ++filter_bank_y) { 245 const InterpFilter filter_y = 246 static_cast<InterpFilter>(filter_bank_y); 247 filter_y_ = 248 av1_get_interp_filter_params_with_block_size(filter_y, width_); 249 250 for (int filter_bank_x = 0; filter_bank_x < kNumFilterBanks; 251 ++filter_bank_x) { 252 const InterpFilter filter_x = 253 static_cast<InterpFilter>(filter_bank_x); 254 filter_x_ = 255 av1_get_interp_filter_params_with_block_size(filter_x, width_); 256 257 for (const auto c : conv_params) { 258 convolve_params_ = c; 259 Prep(&rnd); 260 RunOne(true); 261 RunOne(false); 262 image_->Check(); 263 } 264 } 265 } 266 } 267 } 268 } 269 void SpeedTest() { 270 ACMRandom rnd(ACMRandom::DeterministicSeed()); 271 Prep(&rnd); 272 273 aom_usec_timer ref_timer; 274 aom_usec_timer_start(&ref_timer); 275 for (int i = 0; i < kPerfIters; ++i) RunOne(true); 276 aom_usec_timer_mark(&ref_timer); 277 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); 278 279 aom_usec_timer tst_timer; 280 aom_usec_timer_start(&tst_timer); 281 for (int i = 0; i < kPerfIters; ++i) RunOne(false); 282 aom_usec_timer_mark(&tst_timer); 283 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); 284 285 std::cout << "[ ] C time = " << ref_time / 1000 286 << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; 287 288 EXPECT_GT(ref_time, tst_time) 289 << "Error: CDEFSpeedTest, SIMD slower than C.\n" 290 << "C time: " << ref_time << " us\n" 291 << "SIMD time: " << tst_time << " us\n"; 292 } 293 294 static int RandomSubpel(ACMRandom *rnd) { 295 const uint8_t subpel_mode = rnd->Rand8(); 296 if ((subpel_mode & 7) == 0) { 297 return 0; 298 } else if ((subpel_mode & 7) == 1) { 299 return SCALE_SUBPEL_SHIFTS - 1; 300 } else { 301 return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2); 302 } 303 } 304 305 void Prep(ACMRandom *rnd) { 306 assert(rnd); 307 308 // Choose subpel_x_ and subpel_y_. They should be less than 309 // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to 310 // "interesting" values: 0 and SCALE_SUBPEL_SHIFTS - 1 311 subpel_x_ = RandomSubpel(rnd); 312 subpel_y_ = RandomSubpel(rnd); 313 314 image_->Initialize(rnd); 315 } 316 317 int width_, height_, bd_; 318 int subpel_x_, subpel_y_; 319 const InterpFilterParams *filter_x_, *filter_y_; 320 TestImage<SrcPixel> *image_; 321 ConvolveParams convolve_params_; 322 }; 323 324 using BlockDimension = tuple<int, int>; 325 326 using LowbdConvolveFunc = void (*)(const uint8_t *src, int src_stride, 327 uint8_t *dst, int dst_stride, int w, int h, 328 const InterpFilterParams *filter_params_x, 329 const InterpFilterParams *filter_params_y, 330 const int subpel_x_qn, const int x_step_qn, 331 const int subpel_y_qn, const int y_step_qn, 332 ConvolveParams *conv_params); 333 334 // Test parameter list: 335 // <tst_fun, dims, avg> 336 using LowBDParams = tuple<LowbdConvolveFunc, BlockDimension>; 337 338 class LowBDConvolveScaleTest 339 : public ConvolveScaleTestBase<uint8_t>, 340 public ::testing::WithParamInterface<LowBDParams> { 341 public: 342 ~LowBDConvolveScaleTest() override = default; 343 344 void SetUp() override { 345 tst_fun_ = GET_PARAM(0); 346 347 const BlockDimension &block = GET_PARAM(1); 348 const int bd = 8; 349 350 SetParams(BaseParams(block), bd); 351 } 352 353 void RunOne(bool ref) override { 354 const uint8_t *src = image_->GetSrcData(ref, false); 355 uint8_t *dst = image_->GetDstData(ref, false); 356 convolve_params_.dst = image_->GetDst16Data(ref, false); 357 const int src_stride = image_->src_stride(); 358 const int dst_stride = image_->dst_stride(); 359 if (ref) { 360 av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_, 361 filter_x_, filter_y_, subpel_x_, kXStepQn, 362 subpel_y_, kYStepQn, &convolve_params_); 363 } else { 364 tst_fun_(src, src_stride, dst, dst_stride, width_, height_, filter_x_, 365 filter_y_, subpel_x_, kXStepQn, subpel_y_, kYStepQn, 366 &convolve_params_); 367 } 368 } 369 370 private: 371 LowbdConvolveFunc tst_fun_; 372 }; 373 374 const BlockDimension kBlockDim[] = { 375 make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4), 376 make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8), 377 make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16), 378 make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32), 379 make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64), 380 make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128), 381 }; 382 383 TEST_P(LowBDConvolveScaleTest, Check) { Run(); } 384 TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); } 385 386 INSTANTIATE_TEST_SUITE_P( 387 C, LowBDConvolveScaleTest, 388 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_c), 389 ::testing::ValuesIn(kBlockDim))); 390 391 #if HAVE_NEON 392 INSTANTIATE_TEST_SUITE_P( 393 NEON, LowBDConvolveScaleTest, 394 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_neon), 395 ::testing::ValuesIn(kBlockDim))); 396 #endif // HAVE_NEON 397 398 #if HAVE_NEON_DOTPROD 399 INSTANTIATE_TEST_SUITE_P( 400 NEON_DOTPROD, LowBDConvolveScaleTest, 401 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_neon_dotprod), 402 ::testing::ValuesIn(kBlockDim))); 403 #endif // HAVE_NEON_DOTPROD 404 405 #if HAVE_NEON_I8MM 406 INSTANTIATE_TEST_SUITE_P( 407 NEON_I8MM, LowBDConvolveScaleTest, 408 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_neon_i8mm), 409 ::testing::ValuesIn(kBlockDim))); 410 #endif // HAVE_NEON_I8MM 411 412 #if HAVE_SSE4_1 413 INSTANTIATE_TEST_SUITE_P( 414 SSE4_1, LowBDConvolveScaleTest, 415 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1), 416 ::testing::ValuesIn(kBlockDim))); 417 #endif // HAVE_SSE4_1 418 419 #if CONFIG_AV1_HIGHBITDEPTH 420 using HighbdConvolveFunc = void (*)(const uint16_t *src, int src_stride, 421 uint16_t *dst, int dst_stride, int w, int h, 422 const InterpFilterParams *filter_params_x, 423 const InterpFilterParams *filter_params_y, 424 const int subpel_x_qn, const int x_step_qn, 425 const int subpel_y_qn, const int y_step_qn, 426 ConvolveParams *conv_params, int bd); 427 428 // Test parameter list: 429 // <tst_fun, dims, avg, bd> 430 using HighBDParams = tuple<HighbdConvolveFunc, BlockDimension, int>; 431 432 class HighBDConvolveScaleTest 433 : public ConvolveScaleTestBase<uint16_t>, 434 public ::testing::WithParamInterface<HighBDParams> { 435 public: 436 ~HighBDConvolveScaleTest() override = default; 437 438 void SetUp() override { 439 tst_fun_ = GET_PARAM(0); 440 441 const BlockDimension &block = GET_PARAM(1); 442 const int bd = GET_PARAM(2); 443 444 SetParams(BaseParams(block), bd); 445 } 446 447 void RunOne(bool ref) override { 448 const uint16_t *src = image_->GetSrcData(ref, false); 449 uint16_t *dst = image_->GetDstData(ref, false); 450 convolve_params_.dst = image_->GetDst16Data(ref, false); 451 const int src_stride = image_->src_stride(); 452 const int dst_stride = image_->dst_stride(); 453 454 if (ref) { 455 av1_highbd_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, 456 height_, filter_x_, filter_y_, subpel_x_, 457 kXStepQn, subpel_y_, kYStepQn, 458 &convolve_params_, bd_); 459 } else { 460 tst_fun_(src, src_stride, dst, dst_stride, width_, height_, filter_x_, 461 filter_y_, subpel_x_, kXStepQn, subpel_y_, kYStepQn, 462 &convolve_params_, bd_); 463 } 464 } 465 466 private: 467 HighbdConvolveFunc tst_fun_; 468 }; 469 470 const int kBDs[] = { 8, 10, 12 }; 471 472 TEST_P(HighBDConvolveScaleTest, Check) { Run(); } 473 TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); } 474 475 INSTANTIATE_TEST_SUITE_P( 476 C, HighBDConvolveScaleTest, 477 ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_c), 478 ::testing::ValuesIn(kBlockDim), 479 ::testing::ValuesIn(kBDs))); 480 481 #if HAVE_SSE4_1 482 INSTANTIATE_TEST_SUITE_P( 483 SSE4_1, HighBDConvolveScaleTest, 484 ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1), 485 ::testing::ValuesIn(kBlockDim), 486 ::testing::ValuesIn(kBDs))); 487 #endif // HAVE_SSE4_1 488 489 #if HAVE_NEON 490 INSTANTIATE_TEST_SUITE_P( 491 NEON, HighBDConvolveScaleTest, 492 ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_neon), 493 ::testing::ValuesIn(kBlockDim), 494 ::testing::ValuesIn(kBDs))); 495 496 #endif // HAVE_NEON 497 498 #endif // CONFIG_AV1_HIGHBITDEPTH 499 } // namespace