hiprec_convolve_test_util.cc (15747B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "test/hiprec_convolve_test_util.h" 13 14 #include <memory> 15 #include <new> 16 17 #include "av1/common/restoration.h" 18 19 using std::make_tuple; 20 using std::tuple; 21 22 namespace libaom_test { 23 24 // Generate a random pair of filter kernels, using the ranges 25 // of possible values from the loop-restoration experiment 26 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel, 27 InterpKernel vkernel, int kernel_type = 2) { 28 if (kernel_type == 0) { 29 // Low possible values for filter coefficients, 7-tap kernel 30 hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV; 31 hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV; 32 hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV; 33 hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); 34 hkernel[7] = vkernel[7] = 0; 35 } else if (kernel_type == 1) { 36 // Max possible values for filter coefficients, 7-tap kernel 37 hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV; 38 hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV; 39 hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV; 40 hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); 41 hkernel[7] = vkernel[7] = 0; 42 } else if (kernel_type == 2) { 43 // Randomly generated values for filter coefficients, 7-tap kernel 44 hkernel[0] = hkernel[6] = 45 WIENER_FILT_TAP0_MINV + 46 rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV); 47 hkernel[1] = hkernel[5] = 48 WIENER_FILT_TAP1_MINV + 49 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV); 50 hkernel[2] = hkernel[4] = 51 WIENER_FILT_TAP2_MINV + 52 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV); 53 hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); 54 hkernel[7] = 0; 55 56 vkernel[0] = vkernel[6] = 57 WIENER_FILT_TAP0_MINV + 58 rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV); 59 vkernel[1] = vkernel[5] = 60 WIENER_FILT_TAP1_MINV + 61 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV); 62 vkernel[2] = vkernel[4] = 63 WIENER_FILT_TAP2_MINV + 64 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV); 65 vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]); 66 vkernel[7] = 0; 67 } else if (kernel_type == 3) { 68 // Low possible values for filter coefficients, 5-tap kernel 69 hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0; 70 hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV; 71 hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV; 72 hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); 73 hkernel[7] = vkernel[7] = 0; 74 } else if (kernel_type == 4) { 75 // Max possible values for filter coefficients, 5-tap kernel 76 hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = 0; 77 hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV; 78 hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV; 79 hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); 80 hkernel[7] = vkernel[7] = 0; 81 } else { 82 // Randomly generated values for filter coefficients, 5-tap kernel 83 hkernel[0] = hkernel[6] = 0; 84 hkernel[1] = hkernel[5] = 85 WIENER_FILT_TAP1_MINV + 86 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV); 87 hkernel[2] = hkernel[4] = 88 WIENER_FILT_TAP2_MINV + 89 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV); 90 hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]); 91 hkernel[7] = 0; 92 93 vkernel[0] = vkernel[6] = 0; 94 vkernel[1] = vkernel[5] = 95 WIENER_FILT_TAP1_MINV + 96 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV); 97 vkernel[2] = vkernel[4] = 98 WIENER_FILT_TAP2_MINV + 99 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV); 100 vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]); 101 vkernel[7] = 0; 102 } 103 } 104 105 namespace AV1HiprecConvolve { 106 107 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams( 108 hiprec_convolve_func filter) { 109 const HiprecConvolveParam params[] = { 110 make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter), 111 make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter), 112 make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter), 113 make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter), 114 make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter), 115 make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter), 116 make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter) 117 }; 118 return ::testing::ValuesIn(params); 119 } 120 121 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() = default; 122 void AV1HiprecConvolveTest::SetUp() { 123 rnd_.Reset(ACMRandom::DeterministicSeed()); 124 } 125 126 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) { 127 const int w = 128, h = 128; 128 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); 129 const int num_iters = GET_PARAM(2); 130 int i, j, k, m; 131 const WienerConvolveParams conv_params = get_conv_params_wiener(8); 132 133 std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]); 134 ASSERT_NE(input_, nullptr); 135 uint8_t *input = input_.get(); 136 137 // The AVX2 convolve functions always write rows with widths that are 138 // multiples of 16. So to avoid a buffer overflow, we may need to pad 139 // rows to a multiple of 16. 140 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; 141 std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]); 142 ASSERT_NE(output, nullptr); 143 std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]); 144 ASSERT_NE(output2, nullptr); 145 146 // Generate random filter kernels 147 DECLARE_ALIGNED(16, InterpKernel, hkernel); 148 DECLARE_ALIGNED(16, InterpKernel, vkernel); 149 150 for (int kernel_type = 0; kernel_type < 6; kernel_type++) { 151 generate_kernels(&rnd_, hkernel, vkernel, kernel_type); 152 for (i = 0; i < num_iters; ++i) { 153 for (k = 0; k < h; ++k) 154 for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8(); 155 // Choose random locations within the source block 156 int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); 157 int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); 158 av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w, 159 output.get(), out_w, hkernel, 16, vkernel, 160 16, out_w, out_h, &conv_params); 161 test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w, 162 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params); 163 164 for (j = 0; j < out_w * out_h; ++j) 165 ASSERT_EQ(output[j], output2[j]) 166 << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", " 167 << (j / out_w) << ") on iteration " << i; 168 } 169 } 170 } 171 172 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) { 173 const int w = 128, h = 128; 174 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); 175 const int num_iters = GET_PARAM(2) / 500; 176 int i, j, k; 177 const WienerConvolveParams conv_params = get_conv_params_wiener(8); 178 179 std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]); 180 ASSERT_NE(input_, nullptr); 181 uint8_t *input = input_.get(); 182 183 // The AVX2 convolve functions always write rows with widths that are 184 // multiples of 16. So to avoid a buffer overflow, we may need to pad 185 // rows to a multiple of 16. 186 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; 187 std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]); 188 ASSERT_NE(output, nullptr); 189 std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]); 190 ASSERT_NE(output2, nullptr); 191 192 // Generate random filter kernels 193 DECLARE_ALIGNED(16, InterpKernel, hkernel); 194 DECLARE_ALIGNED(16, InterpKernel, vkernel); 195 196 generate_kernels(&rnd_, hkernel, vkernel); 197 198 for (i = 0; i < h; ++i) 199 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8(); 200 201 aom_usec_timer ref_timer; 202 aom_usec_timer_start(&ref_timer); 203 for (i = 0; i < num_iters; ++i) { 204 for (j = 3; j < h - out_h - 4; j++) { 205 for (k = 3; k < w - out_w - 4; k++) { 206 av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w, 207 hkernel, 16, vkernel, 16, out_w, out_h, 208 &conv_params); 209 } 210 } 211 } 212 aom_usec_timer_mark(&ref_timer); 213 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); 214 215 aom_usec_timer tst_timer; 216 aom_usec_timer_start(&tst_timer); 217 for (i = 0; i < num_iters; ++i) { 218 for (j = 3; j < h - out_h - 4; j++) { 219 for (k = 3; k < w - out_w - 4; k++) { 220 test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16, 221 vkernel, 16, out_w, out_h, &conv_params); 222 } 223 } 224 } 225 aom_usec_timer_mark(&tst_timer); 226 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); 227 228 std::cout << "[ ] C time = " << ref_time / 1000 229 << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; 230 231 EXPECT_GT(ref_time, tst_time) 232 << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n" 233 << "C time: " << ref_time << " us\n" 234 << "SIMD time: " << tst_time << " us\n"; 235 } 236 } // namespace AV1HiprecConvolve 237 238 #if CONFIG_AV1_HIGHBITDEPTH 239 namespace AV1HighbdHiprecConvolve { 240 241 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams( 242 highbd_hiprec_convolve_func filter) { 243 const HighbdHiprecConvolveParam params[] = { 244 make_tuple(8, 8, 50000, 8, filter), make_tuple(64, 64, 1000, 8, filter), 245 make_tuple(32, 8, 10000, 8, filter), make_tuple(8, 8, 50000, 10, filter), 246 make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter), 247 make_tuple(8, 8, 50000, 12, filter), make_tuple(64, 64, 1000, 12, filter), 248 make_tuple(32, 8, 10000, 12, filter), 249 }; 250 return ::testing::ValuesIn(params); 251 } 252 253 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() = default; 254 void AV1HighbdHiprecConvolveTest::SetUp() { 255 rnd_.Reset(ACMRandom::DeterministicSeed()); 256 } 257 258 void AV1HighbdHiprecConvolveTest::RunCheckOutput( 259 highbd_hiprec_convolve_func test_impl) { 260 const int w = 128, h = 128; 261 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); 262 const int num_iters = GET_PARAM(2); 263 const int bd = GET_PARAM(3); 264 int i, j; 265 const WienerConvolveParams conv_params = get_conv_params_wiener(bd); 266 267 std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]); 268 ASSERT_NE(input, nullptr); 269 270 // The AVX2 convolve functions always write rows with widths that are 271 // multiples of 16. So to avoid a buffer overflow, we may need to pad 272 // rows to a multiple of 16. 273 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; 274 std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]); 275 ASSERT_NE(output, nullptr); 276 std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]); 277 ASSERT_NE(output2, nullptr); 278 279 // Generate random filter kernels 280 DECLARE_ALIGNED(16, InterpKernel, hkernel); 281 DECLARE_ALIGNED(16, InterpKernel, vkernel); 282 283 for (i = 0; i < h; ++i) 284 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); 285 286 uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get()); 287 uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get()); 288 uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get()); 289 for (int kernel_type = 0; kernel_type < 6; kernel_type++) { 290 generate_kernels(&rnd_, hkernel, vkernel, kernel_type); 291 for (i = 0; i < num_iters; ++i) { 292 // Choose random locations within the source block 293 int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7); 294 int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7); 295 av1_highbd_wiener_convolve_add_src_c( 296 input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel, 297 16, vkernel, 16, out_w, out_h, &conv_params, bd); 298 test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w, 299 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd); 300 301 for (j = 0; j < out_w * out_h; ++j) 302 ASSERT_EQ(output[j], output2[j]) 303 << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", " 304 << (j / out_w) << ") on iteration " << i; 305 } 306 } 307 } 308 309 void AV1HighbdHiprecConvolveTest::RunSpeedTest( 310 highbd_hiprec_convolve_func test_impl) { 311 const int w = 128, h = 128; 312 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1); 313 const int num_iters = GET_PARAM(2) / 500; 314 const int bd = GET_PARAM(3); 315 int i, j, k; 316 const WienerConvolveParams conv_params = get_conv_params_wiener(bd); 317 318 std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]); 319 ASSERT_NE(input, nullptr); 320 321 // The AVX2 convolve functions always write rows with widths that are 322 // multiples of 16. So to avoid a buffer overflow, we may need to pad 323 // rows to a multiple of 16. 324 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h; 325 std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]); 326 ASSERT_NE(output, nullptr); 327 std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]); 328 ASSERT_NE(output2, nullptr); 329 330 // Generate random filter kernels 331 DECLARE_ALIGNED(16, InterpKernel, hkernel); 332 DECLARE_ALIGNED(16, InterpKernel, vkernel); 333 334 generate_kernels(&rnd_, hkernel, vkernel); 335 336 for (i = 0; i < h; ++i) 337 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1); 338 339 uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get()); 340 uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get()); 341 uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get()); 342 343 aom_usec_timer ref_timer; 344 aom_usec_timer_start(&ref_timer); 345 for (i = 0; i < num_iters; ++i) { 346 for (j = 3; j < h - out_h - 4; j++) { 347 for (k = 3; k < w - out_w - 4; k++) { 348 av1_highbd_wiener_convolve_add_src_c( 349 input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel, 350 16, out_w, out_h, &conv_params, bd); 351 } 352 } 353 } 354 aom_usec_timer_mark(&ref_timer); 355 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer); 356 357 aom_usec_timer tst_timer; 358 aom_usec_timer_start(&tst_timer); 359 for (i = 0; i < num_iters; ++i) { 360 for (j = 3; j < h - out_h - 4; j++) { 361 for (k = 3; k < w - out_w - 4; k++) { 362 test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16, 363 vkernel, 16, out_w, out_h, &conv_params, bd); 364 } 365 } 366 } 367 aom_usec_timer_mark(&tst_timer); 368 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer); 369 370 std::cout << "[ ] C time = " << ref_time / 1000 371 << " ms, SIMD time = " << tst_time / 1000 << " ms\n"; 372 373 EXPECT_GT(ref_time, tst_time) 374 << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n" 375 << "C time: " << ref_time << " us\n" 376 << "SIMD time: " << tst_time << " us\n"; 377 } 378 } // namespace AV1HighbdHiprecConvolve 379 #endif // CONFIG_AV1_HIGHBITDEPTH 380 } // namespace libaom_test