av1_horz_only_frame_superres_test.cc (13242B)
1 /* 2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <tuple> 13 #include <vector> 14 15 #include "gtest/gtest.h" 16 17 #include "config/av1_rtcd.h" 18 19 #include "aom_ports/aom_timer.h" 20 #include "av1/common/convolve.h" 21 #include "av1/common/resize.h" 22 #include "test/acm_random.h" 23 #include "test/register_state_check.h" 24 #include "test/util.h" 25 26 namespace { 27 const int kTestIters = 10; 28 const int kPerfIters = 1000; 29 30 const int kVPad = 32; 31 const int kHPad = 32; 32 33 using libaom_test::ACMRandom; 34 using std::make_tuple; 35 using std::tuple; 36 37 // Inverse of av1_calculate_scaled_superres_size(): calculates the original 38 // dimensions from the given scaled dimensions and the scale denominator. 39 void calculate_unscaled_superres_size(int *width, int denom) { 40 if (denom != SCALE_NUMERATOR) { 41 // Note: av1_calculate_scaled_superres_size() rounds *up* after division 42 // when the resulting dimensions are odd. So here, we round *down*. 43 *width = *width * denom / SCALE_NUMERATOR; 44 } 45 } 46 47 template <typename Pixel> 48 class TestImage { 49 public: 50 TestImage(int w_src, int h, int superres_denom, int x0, int bd) 51 : w_src_(w_src), h_(h), superres_denom_(superres_denom), x0_(x0), 52 bd_(bd) { 53 assert(bd < 16); 54 assert(bd <= 8 * static_cast<int>(sizeof(Pixel))); 55 assert(9 <= superres_denom && superres_denom <= 16); 56 assert(SCALE_NUMERATOR == 8); 57 assert(0 <= x0_ && x0_ <= RS_SCALE_SUBPEL_MASK); 58 59 w_dst_ = w_src_; 60 calculate_unscaled_superres_size(&w_dst_, superres_denom); 61 62 src_stride_ = ALIGN_POWER_OF_TWO(w_src_ + 2 * kHPad, 4); 63 dst_stride_ = ALIGN_POWER_OF_TWO(w_dst_ + 2 * kHPad, 4); 64 65 // Allocate image data 66 src_data_.resize(2 * src_block_size()); 67 dst_data_.resize(2 * dst_block_size()); 68 } 69 70 void Initialize(ACMRandom *rnd); 71 void Check() const; 72 73 int src_stride() const { return src_stride_; } 74 int dst_stride() const { return dst_stride_; } 75 76 int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); } 77 int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); } 78 79 int src_width() const { return w_src_; } 80 int dst_width() const { return w_dst_; } 81 int height() const { return h_; } 82 int x0() const { return x0_; } 83 84 const Pixel *GetSrcData(bool ref, bool borders) const { 85 const Pixel *block = &src_data_[ref ? 0 : src_block_size()]; 86 return borders ? block : block + kHPad + src_stride_ * kVPad; 87 } 88 89 Pixel *GetDstData(bool ref, bool borders) { 90 Pixel *block = &dst_data_[ref ? 0 : dst_block_size()]; 91 return borders ? block : block + kHPad + dst_stride_ * kVPad; 92 } 93 94 private: 95 int w_src_, w_dst_, h_, superres_denom_, x0_, bd_; 96 int src_stride_, dst_stride_; 97 98 std::vector<Pixel> src_data_; 99 std::vector<Pixel> dst_data_; 100 }; 101 102 template <typename Pixel> 103 void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) { 104 if (!trash) { 105 memset(data, 0, sizeof(*data) * num_pixels); 106 return; 107 } 108 const Pixel mask = (1 << bd) - 1; 109 for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask; 110 } 111 112 template <typename Pixel> 113 void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd, 114 bool trash_edges, Pixel *data) { 115 assert(rnd); 116 const Pixel mask = (1 << bd) - 1; 117 118 // Fill in the first buffer with random data 119 // Top border 120 FillEdge(rnd, stride * kVPad, bd, trash_edges, data); 121 for (int r = 0; r < h; ++r) { 122 Pixel *row_data = data + (kVPad + r) * stride; 123 // Left border, contents, right border 124 FillEdge(rnd, kHPad, bd, trash_edges, row_data); 125 for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask; 126 FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w); 127 } 128 // Bottom border 129 FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h)); 130 131 const int bpp = sizeof(*data); 132 const int block_elts = stride * (h + 2 * kVPad); 133 const int block_size = bpp * block_elts; 134 135 // Now copy that to the second buffer 136 memcpy(data + block_elts, data, block_size); 137 } 138 139 template <typename Pixel> 140 void TestImage<Pixel>::Initialize(ACMRandom *rnd) { 141 PrepBuffers(rnd, w_src_, h_, src_stride_, bd_, false, &src_data_[0]); 142 PrepBuffers(rnd, w_dst_, h_, dst_stride_, bd_, true, &dst_data_[0]); 143 } 144 145 template <typename Pixel> 146 void TestImage<Pixel>::Check() const { 147 const int num_pixels = dst_block_size(); 148 const Pixel *ref_dst = &dst_data_[0]; 149 const Pixel *tst_dst = &dst_data_[num_pixels]; 150 151 // If memcmp returns 0, there's nothing to do. 152 if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) return; 153 154 // Otherwise, iterate through the buffer looking for differences, *ignoring 155 // the edges* 156 const int stride = dst_stride_; 157 for (int r = kVPad; r < h_ + kVPad; ++r) { 158 for (int c = kVPad; c < w_dst_ + kHPad; ++c) { 159 const int32_t ref_value = ref_dst[r * stride + c]; 160 const int32_t tst_value = tst_dst[r * stride + c]; 161 162 EXPECT_EQ(tst_value, ref_value) 163 << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad) 164 << ", superres_denom: " << superres_denom_ << ", height: " << h_ 165 << ", src_width: " << w_src_ << ", dst_width: " << w_dst_ 166 << ", x0: " << x0_; 167 } 168 } 169 } 170 171 template <typename Pixel> 172 class ConvolveHorizRSTestBase : public ::testing::Test { 173 public: 174 ConvolveHorizRSTestBase() : image_(nullptr) {} 175 ~ConvolveHorizRSTestBase() override = default; 176 177 // Implemented by subclasses (SetUp depends on the parameters passed 178 // in and RunOne depends on the function to be tested. These can't 179 // be templated for low/high bit depths because they have different 180 // numbers of parameters) 181 void SetUp() override = 0; 182 virtual void RunOne(bool ref) = 0; 183 184 protected: 185 void SetBitDepth(int bd) { bd_ = bd; } 186 187 void CorrectnessTest() { 188 ACMRandom rnd(ACMRandom::DeterministicSeed()); 189 for (int i = 0; i < kTestIters; ++i) { 190 for (int superres_denom = 9; superres_denom <= 16; superres_denom++) { 191 // Get a random height between 512 and 767 192 int height = rnd.Rand8() + 512; 193 194 // Get a random src width between 128 and 383 195 int width_src = rnd.Rand8() + 128; 196 197 // x0 is normally calculated by get_upscale_convolve_x0 in 198 // av1/common/resize.c. However, this test should work for 199 // any value of x0 between 0 and RS_SCALE_SUBPEL_MASK 200 // (inclusive), so we choose one at random. 201 int x0 = rnd.Rand16() % (RS_SCALE_SUBPEL_MASK + 1); 202 203 image_ = 204 new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); 205 ASSERT_NE(image_, nullptr); 206 207 Prep(&rnd); 208 RunOne(true); 209 RunOne(false); 210 image_->Check(); 211 212 delete image_; 213 } 214 } 215 } 216 217 void SpeedTest() { 218 // Pick some specific parameters to test 219 int height = 767; 220 int width_src = 129; 221 int superres_denom = 13; 222 int x0 = RS_SCALE_SUBPEL_MASK >> 1; 223 224 image_ = new TestImage<Pixel>(width_src, height, superres_denom, x0, bd_); 225 ASSERT_NE(image_, nullptr); 226 227 ACMRandom rnd(ACMRandom::DeterministicSeed()); 228 Prep(&rnd); 229 230 aom_usec_timer ref_timer; 231 aom_usec_timer_start(&ref_timer); 232 for (int i = 0; i < kPerfIters; ++i) RunOne(true); 233 aom_usec_timer_mark(&ref_timer); 234 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); 235 236 aom_usec_timer tst_timer; 237 aom_usec_timer_start(&tst_timer); 238 for (int i = 0; i < kPerfIters; ++i) RunOne(false); 239 aom_usec_timer_mark(&tst_timer); 240 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); 241 242 std::cout << "[ ] C time = " << ref_time / 1000 243 << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; 244 245 EXPECT_GT(ref_time, tst_time) 246 << "Error: ConvolveHorizRSTest (Speed Test), SIMD slower than C.\n" 247 << "C time: " << ref_time << " us\n" 248 << "SIMD time: " << tst_time << " us\n"; 249 } 250 251 void Prep(ACMRandom *rnd) { 252 assert(rnd); 253 image_->Initialize(rnd); 254 } 255 256 int bd_; 257 TestImage<Pixel> *image_; 258 }; 259 260 using LowBDConvolveHorizRsFunc = void (*)(const uint8_t *src, int src_stride, 261 uint8_t *dst, int dst_stride, int w, 262 int h, const int16_t *x_filters, 263 const int x0_qn, const int x_step_qn); 264 265 // Test parameter list: 266 // <tst_fun_> 267 using LowBDParams = tuple<LowBDConvolveHorizRsFunc>; 268 269 class LowBDConvolveHorizRSTest 270 : public ConvolveHorizRSTestBase<uint8_t>, 271 public ::testing::WithParamInterface<LowBDParams> { 272 public: 273 ~LowBDConvolveHorizRSTest() override = default; 274 275 void SetUp() override { 276 tst_fun_ = GET_PARAM(0); 277 const int bd = 8; 278 SetBitDepth(bd); 279 } 280 281 void RunOne(bool ref) override { 282 const uint8_t *src = image_->GetSrcData(ref, false); 283 uint8_t *dst = image_->GetDstData(ref, false); 284 const int src_stride = image_->src_stride(); 285 const int dst_stride = image_->dst_stride(); 286 const int width_src = image_->src_width(); 287 const int width_dst = image_->dst_width(); 288 const int height = image_->height(); 289 const int x0_qn = image_->x0(); 290 291 const int32_t x_step_qn = 292 av1_get_upscale_convolve_step(width_src, width_dst); 293 294 if (ref) { 295 av1_convolve_horiz_rs_c(src, src_stride, dst, dst_stride, width_dst, 296 height, &av1_resize_filter_normative[0][0], x0_qn, 297 x_step_qn); 298 } else { 299 tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, 300 &av1_resize_filter_normative[0][0], x0_qn, x_step_qn); 301 } 302 } 303 304 private: 305 LowBDConvolveHorizRsFunc tst_fun_; 306 }; 307 308 TEST_P(LowBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } 309 TEST_P(LowBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } 310 311 INSTANTIATE_TEST_SUITE_P(C, LowBDConvolveHorizRSTest, 312 ::testing::Values(av1_convolve_horiz_rs_c)); 313 314 #if HAVE_NEON 315 INSTANTIATE_TEST_SUITE_P(NEON, LowBDConvolveHorizRSTest, 316 ::testing::Values(av1_convolve_horiz_rs_neon)); 317 #endif 318 319 #if HAVE_SSE4_1 320 INSTANTIATE_TEST_SUITE_P(SSE4_1, LowBDConvolveHorizRSTest, 321 ::testing::Values(av1_convolve_horiz_rs_sse4_1)); 322 #endif 323 324 #if CONFIG_AV1_HIGHBITDEPTH 325 using HighBDConvolveHorizRsFunc = void (*)(const uint16_t *src, int src_stride, 326 uint16_t *dst, int dst_stride, int w, 327 int h, const int16_t *x_filters, 328 const int x0_qn, const int x_step_qn, 329 int bd); 330 331 // Test parameter list: 332 // <tst_fun_, bd_> 333 using HighBDParams = tuple<HighBDConvolveHorizRsFunc, int>; 334 335 class HighBDConvolveHorizRSTest 336 : public ConvolveHorizRSTestBase<uint16_t>, 337 public ::testing::WithParamInterface<HighBDParams> { 338 public: 339 ~HighBDConvolveHorizRSTest() override = default; 340 341 void SetUp() override { 342 tst_fun_ = GET_PARAM(0); 343 const int bd = GET_PARAM(1); 344 SetBitDepth(bd); 345 } 346 347 void RunOne(bool ref) override { 348 const uint16_t *src = image_->GetSrcData(ref, false); 349 uint16_t *dst = image_->GetDstData(ref, false); 350 const int src_stride = image_->src_stride(); 351 const int dst_stride = image_->dst_stride(); 352 const int width_src = image_->src_width(); 353 const int width_dst = image_->dst_width(); 354 const int height = image_->height(); 355 const int x0_qn = image_->x0(); 356 357 const int32_t x_step_qn = 358 av1_get_upscale_convolve_step(width_src, width_dst); 359 360 if (ref) { 361 av1_highbd_convolve_horiz_rs_c( 362 src, src_stride, dst, dst_stride, width_dst, height, 363 &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); 364 } else { 365 tst_fun_(src, src_stride, dst, dst_stride, width_dst, height, 366 &av1_resize_filter_normative[0][0], x0_qn, x_step_qn, bd_); 367 } 368 } 369 370 private: 371 HighBDConvolveHorizRsFunc tst_fun_; 372 }; 373 374 const int kBDs[] = { 8, 10, 12 }; 375 376 TEST_P(HighBDConvolveHorizRSTest, Correctness) { CorrectnessTest(); } 377 TEST_P(HighBDConvolveHorizRSTest, DISABLED_Speed) { SpeedTest(); } 378 379 INSTANTIATE_TEST_SUITE_P( 380 C, HighBDConvolveHorizRSTest, 381 ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_c), 382 ::testing::ValuesIn(kBDs))); 383 384 #if HAVE_SSE4_1 385 INSTANTIATE_TEST_SUITE_P( 386 SSE4_1, HighBDConvolveHorizRSTest, 387 ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_sse4_1), 388 ::testing::ValuesIn(kBDs))); 389 #endif // HAVE_SSE4_1 390 391 #if HAVE_NEON 392 INSTANTIATE_TEST_SUITE_P( 393 NEON, HighBDConvolveHorizRSTest, 394 ::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_rs_neon), 395 ::testing::ValuesIn(kBDs))); 396 #endif // HAVE_NEON 397 398 #endif // CONFIG_AV1_HIGHBITDEPTH 399 400 } // namespace