av1_wedge_utils_test.cc (14370B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "gtest/gtest.h" 13 14 #include "config/aom_config.h" 15 #include "config/aom_dsp_rtcd.h" 16 #include "config/av1_rtcd.h" 17 18 #include "aom_dsp/aom_dsp_common.h" 19 20 #include "av1/common/enums.h" 21 22 #include "test/acm_random.h" 23 #include "test/function_equivalence_test.h" 24 #include "test/register_state_check.h" 25 26 #define WEDGE_WEIGHT_BITS 6 27 #define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS)) 28 29 using libaom_test::ACMRandom; 30 using libaom_test::FunctionEquivalenceTest; 31 32 namespace { 33 34 static const int16_t kInt13Max = (1 << 12) - 1; 35 36 ////////////////////////////////////////////////////////////////////////////// 37 // av1_wedge_sse_from_residuals - functionality 38 ////////////////////////////////////////////////////////////////////////////// 39 40 class WedgeUtilsSSEFuncTest : public testing::Test { 41 protected: 42 WedgeUtilsSSEFuncTest() : rng_(ACMRandom::DeterministicSeed()) {} 43 44 static const int kIterations = 1000; 45 46 ACMRandom rng_; 47 }; 48 49 static void equiv_blend_residuals(int16_t *r, const int16_t *r0, 50 const int16_t *r1, const uint8_t *m, int N) { 51 for (int i = 0; i < N; i++) { 52 const int32_t m0 = m[i]; 53 const int32_t m1 = MAX_MASK_VALUE - m0; 54 const int16_t R = m0 * r0[i] + m1 * r1[i]; 55 // Note that this rounding is designed to match the result 56 // you would get when actually blending the 2 predictors and computing 57 // the residuals. 58 r[i] = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS); 59 } 60 } 61 62 static uint64_t equiv_sse_from_residuals(const int16_t *r0, const int16_t *r1, 63 const uint8_t *m, int N) { 64 uint64_t acc = 0; 65 for (int i = 0; i < N; i++) { 66 const int32_t m0 = m[i]; 67 const int32_t m1 = MAX_MASK_VALUE - m0; 68 const int16_t R = m0 * r0[i] + m1 * r1[i]; 69 const int32_t r = ROUND_POWER_OF_TWO(R - 1, WEDGE_WEIGHT_BITS); 70 acc += r * r; 71 } 72 return acc; 73 } 74 75 TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingEquiv) { 76 DECLARE_ALIGNED(32, uint8_t, s[MAX_SB_SQUARE]); 77 DECLARE_ALIGNED(32, uint8_t, p0[MAX_SB_SQUARE]); 78 DECLARE_ALIGNED(32, uint8_t, p1[MAX_SB_SQUARE]); 79 DECLARE_ALIGNED(32, uint8_t, p[MAX_SB_SQUARE]); 80 81 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); 82 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); 83 DECLARE_ALIGNED(32, int16_t, r_ref[MAX_SB_SQUARE]); 84 DECLARE_ALIGNED(32, int16_t, r_tst[MAX_SB_SQUARE]); 85 DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); 86 87 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 88 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 89 s[i] = rng_.Rand8(); 90 m[i] = rng_(MAX_MASK_VALUE + 1); 91 } 92 93 const int w = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3); 94 const int h = 1 << (rng_(MAX_SB_SIZE_LOG2 + 1 - 3) + 3); 95 const int N = w * h; 96 97 for (int j = 0; j < N; j++) { 98 p0[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX); 99 p1[j] = clamp(s[j] + rng_(33) - 16, 0, UINT8_MAX); 100 } 101 102 aom_blend_a64_mask(p, w, p0, w, p1, w, m, w, w, h, 0, 0); 103 104 aom_subtract_block(h, w, r0, w, s, w, p0, w); 105 aom_subtract_block(h, w, r1, w, s, w, p1, w); 106 107 aom_subtract_block(h, w, r_ref, w, s, w, p, w); 108 equiv_blend_residuals(r_tst, r0, r1, m, N); 109 110 for (int i = 0; i < N; ++i) ASSERT_EQ(r_ref[i], r_tst[i]); 111 112 uint64_t ref_sse = aom_sum_squares_i16(r_ref, N); 113 uint64_t tst_sse = equiv_sse_from_residuals(r0, r1, m, N); 114 115 ASSERT_EQ(ref_sse, tst_sse); 116 } 117 } 118 119 static uint64_t sse_from_residuals(const int16_t *r0, const int16_t *r1, 120 const uint8_t *m, int N) { 121 uint64_t acc = 0; 122 for (int i = 0; i < N; i++) { 123 const int32_t m0 = m[i]; 124 const int32_t m1 = MAX_MASK_VALUE - m0; 125 const int32_t r = m0 * r0[i] + m1 * r1[i]; 126 acc += r * r; 127 } 128 return ROUND_POWER_OF_TWO(acc, 2 * WEDGE_WEIGHT_BITS); 129 } 130 131 TEST_F(WedgeUtilsSSEFuncTest, ResidualBlendingMethod) { 132 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); 133 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); 134 DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); 135 DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); 136 137 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 138 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 139 r1[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN; 140 d[i] = rng_(2 * INT8_MAX - 2 * INT8_MIN + 1) + 2 * INT8_MIN; 141 m[i] = rng_(MAX_MASK_VALUE + 1); 142 } 143 144 const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); 145 146 for (int i = 0; i < N; i++) r0[i] = r1[i] + d[i]; 147 148 const uint64_t ref_res = sse_from_residuals(r0, r1, m, N); 149 const uint64_t tst_res = av1_wedge_sse_from_residuals(r1, d, m, N); 150 151 ASSERT_EQ(ref_res, tst_res); 152 } 153 } 154 155 ////////////////////////////////////////////////////////////////////////////// 156 // av1_wedge_sse_from_residuals - optimizations 157 ////////////////////////////////////////////////////////////////////////////// 158 159 using FSSE = uint64_t (*)(const int16_t *r1, const int16_t *d, const uint8_t *m, 160 int N); 161 using TestFuncsFSSE = libaom_test::FuncParam<FSSE>; 162 163 class WedgeUtilsSSEOptTest : public FunctionEquivalenceTest<FSSE> { 164 protected: 165 static const int kIterations = 10000; 166 }; 167 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsSSEOptTest); 168 169 TEST_P(WedgeUtilsSSEOptTest, RandomValues) { 170 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); 171 DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); 172 DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); 173 174 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 175 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 176 r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max; 177 d[i] = rng_(2 * kInt13Max + 1) - kInt13Max; 178 m[i] = rng_(MAX_MASK_VALUE + 1); 179 } 180 181 const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); 182 183 const uint64_t ref_res = params_.ref_func(r1, d, m, N); 184 uint64_t tst_res; 185 API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N)); 186 187 ASSERT_EQ(ref_res, tst_res); 188 } 189 } 190 191 TEST_P(WedgeUtilsSSEOptTest, ExtremeValues) { 192 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); 193 DECLARE_ALIGNED(32, int16_t, d[MAX_SB_SQUARE]); 194 DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); 195 196 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 197 if (rng_(2)) { 198 for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = kInt13Max; 199 } else { 200 for (int i = 0; i < MAX_SB_SQUARE; ++i) r1[i] = -kInt13Max; 201 } 202 203 if (rng_(2)) { 204 for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = kInt13Max; 205 } else { 206 for (int i = 0; i < MAX_SB_SQUARE; ++i) d[i] = -kInt13Max; 207 } 208 209 for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE; 210 211 const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); 212 213 const uint64_t ref_res = params_.ref_func(r1, d, m, N); 214 uint64_t tst_res; 215 API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(r1, d, m, N)); 216 217 ASSERT_EQ(ref_res, tst_res); 218 } 219 } 220 221 ////////////////////////////////////////////////////////////////////////////// 222 // av1_wedge_sign_from_residuals 223 ////////////////////////////////////////////////////////////////////////////// 224 225 using FSign = int8_t (*)(const int16_t *ds, const uint8_t *m, int N, 226 int64_t limit); 227 using TestFuncsFSign = libaom_test::FuncParam<FSign>; 228 229 class WedgeUtilsSignOptTest : public FunctionEquivalenceTest<FSign> { 230 protected: 231 static const int kIterations = 10000; 232 static const int kMaxSize = 8196; // Size limited by SIMD implementation. 233 }; 234 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsSignOptTest); 235 236 TEST_P(WedgeUtilsSignOptTest, RandomValues) { 237 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); 238 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); 239 DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]); 240 DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); 241 242 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 243 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 244 r0[i] = rng_(2 * kInt13Max + 1) - kInt13Max; 245 r1[i] = rng_(2 * kInt13Max + 1) - kInt13Max; 246 m[i] = rng_(MAX_MASK_VALUE + 1); 247 } 248 249 const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE); 250 const int N = 64 * (rng_(maxN / 64 - 1) + 1); 251 252 int64_t limit; 253 limit = (int64_t)aom_sum_squares_i16(r0, N); 254 limit -= (int64_t)aom_sum_squares_i16(r1, N); 255 limit *= (1 << WEDGE_WEIGHT_BITS) / 2; 256 257 for (int i = 0; i < N; i++) 258 ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX); 259 260 const int ref_res = params_.ref_func(ds, m, N, limit); 261 int tst_res; 262 API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit)); 263 264 ASSERT_EQ(ref_res, tst_res); 265 } 266 } 267 268 TEST_P(WedgeUtilsSignOptTest, ExtremeValues) { 269 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]); 270 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]); 271 DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]); 272 DECLARE_ALIGNED(32, uint8_t, m[MAX_SB_SQUARE]); 273 274 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 275 switch (rng_(4)) { 276 case 0: 277 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 278 r0[i] = 0; 279 r1[i] = kInt13Max; 280 } 281 break; 282 case 1: 283 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 284 r0[i] = kInt13Max; 285 r1[i] = 0; 286 } 287 break; 288 case 2: 289 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 290 r0[i] = 0; 291 r1[i] = -kInt13Max; 292 } 293 break; 294 default: 295 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 296 r0[i] = -kInt13Max; 297 r1[i] = 0; 298 } 299 break; 300 } 301 302 for (int i = 0; i < MAX_SB_SQUARE; ++i) m[i] = MAX_MASK_VALUE; 303 304 const int maxN = AOMMIN(kMaxSize, MAX_SB_SQUARE); 305 const int N = 64 * (rng_(maxN / 64 - 1) + 1); 306 307 int64_t limit; 308 limit = (int64_t)aom_sum_squares_i16(r0, N); 309 limit -= (int64_t)aom_sum_squares_i16(r1, N); 310 limit *= (1 << WEDGE_WEIGHT_BITS) / 2; 311 312 for (int i = 0; i < N; i++) 313 ds[i] = clamp(r0[i] * r0[i] - r1[i] * r1[i], INT16_MIN, INT16_MAX); 314 315 const int ref_res = params_.ref_func(ds, m, N, limit); 316 int tst_res; 317 API_REGISTER_STATE_CHECK(tst_res = params_.tst_func(ds, m, N, limit)); 318 319 ASSERT_EQ(ref_res, tst_res); 320 } 321 } 322 323 ////////////////////////////////////////////////////////////////////////////// 324 // av1_wedge_compute_delta_squares 325 ////////////////////////////////////////////////////////////////////////////// 326 327 using FDS = void (*)(int16_t *d, const int16_t *a, const int16_t *b, int N); 328 using TestFuncsFDS = libaom_test::FuncParam<FDS>; 329 330 class WedgeUtilsDeltaSquaresOptTest : public FunctionEquivalenceTest<FDS> { 331 protected: 332 static const int kIterations = 10000; 333 }; 334 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(WedgeUtilsDeltaSquaresOptTest); 335 336 TEST_P(WedgeUtilsDeltaSquaresOptTest, RandomValues) { 337 DECLARE_ALIGNED(32, int16_t, a[MAX_SB_SQUARE]); 338 DECLARE_ALIGNED(32, int16_t, b[MAX_SB_SQUARE]); 339 DECLARE_ALIGNED(32, int16_t, d_ref[MAX_SB_SQUARE]); 340 DECLARE_ALIGNED(32, int16_t, d_tst[MAX_SB_SQUARE]); 341 342 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) { 343 for (int i = 0; i < MAX_SB_SQUARE; ++i) { 344 a[i] = rng_.Rand16Signed(); 345 b[i] = rng_(2 * INT16_MAX + 1) - INT16_MAX; 346 } 347 348 const int N = 64 * (rng_(MAX_SB_SQUARE / 64) + 1); 349 350 memset(&d_ref, INT16_MAX, sizeof(d_ref)); 351 memset(&d_tst, INT16_MAX, sizeof(d_tst)); 352 353 params_.ref_func(d_ref, a, b, N); 354 API_REGISTER_STATE_CHECK(params_.tst_func(d_tst, a, b, N)); 355 356 for (int i = 0; i < MAX_SB_SQUARE; ++i) ASSERT_EQ(d_ref[i], d_tst[i]); 357 } 358 } 359 360 #if HAVE_SSE2 361 INSTANTIATE_TEST_SUITE_P( 362 SSE2, WedgeUtilsSSEOptTest, 363 ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c, 364 av1_wedge_sse_from_residuals_sse2))); 365 366 INSTANTIATE_TEST_SUITE_P( 367 SSE2, WedgeUtilsSignOptTest, 368 ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c, 369 av1_wedge_sign_from_residuals_sse2))); 370 371 INSTANTIATE_TEST_SUITE_P( 372 SSE2, WedgeUtilsDeltaSquaresOptTest, 373 ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c, 374 av1_wedge_compute_delta_squares_sse2))); 375 #endif // HAVE_SSE2 376 377 #if HAVE_NEON 378 INSTANTIATE_TEST_SUITE_P( 379 NEON, WedgeUtilsSSEOptTest, 380 ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c, 381 av1_wedge_sse_from_residuals_neon))); 382 383 INSTANTIATE_TEST_SUITE_P( 384 NEON, WedgeUtilsSignOptTest, 385 ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c, 386 av1_wedge_sign_from_residuals_neon))); 387 388 INSTANTIATE_TEST_SUITE_P( 389 NEON, WedgeUtilsDeltaSquaresOptTest, 390 ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c, 391 av1_wedge_compute_delta_squares_neon))); 392 #endif // HAVE_NEON 393 394 #if HAVE_AVX2 395 INSTANTIATE_TEST_SUITE_P( 396 AVX2, WedgeUtilsSSEOptTest, 397 ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_sse2, 398 av1_wedge_sse_from_residuals_avx2))); 399 400 INSTANTIATE_TEST_SUITE_P( 401 AVX2, WedgeUtilsSignOptTest, 402 ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_sse2, 403 av1_wedge_sign_from_residuals_avx2))); 404 405 INSTANTIATE_TEST_SUITE_P( 406 AVX2, WedgeUtilsDeltaSquaresOptTest, 407 ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_sse2, 408 av1_wedge_compute_delta_squares_avx2))); 409 #endif // HAVE_AVX2 410 411 #if HAVE_SVE 412 INSTANTIATE_TEST_SUITE_P( 413 SVE, WedgeUtilsSSEOptTest, 414 ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c, 415 av1_wedge_sse_from_residuals_sve))); 416 417 INSTANTIATE_TEST_SUITE_P( 418 SVE, WedgeUtilsSignOptTest, 419 ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c, 420 av1_wedge_sign_from_residuals_sve))); 421 #endif // HAVE_SVE 422 423 } // namespace