sse_sum_test.cc (5937B)
1 /* 2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <cmath> 13 #include <cstdlib> 14 #include <string> 15 #include <tuple> 16 17 #include "gtest/gtest.h" 18 19 #include "config/aom_config.h" 20 #include "config/aom_dsp_rtcd.h" 21 22 #include "aom_ports/mem.h" 23 #include "test/acm_random.h" 24 #include "test/register_state_check.h" 25 #include "test/util.h" 26 #include "test/function_equivalence_test.h" 27 28 using libaom_test::ACMRandom; 29 using libaom_test::FunctionEquivalenceTest; 30 using ::testing::Combine; 31 using ::testing::Range; 32 using ::testing::Values; 33 using ::testing::ValuesIn; 34 35 namespace { 36 const int kNumIterations = 10000; 37 38 using SSI16Func = uint64_t (*)(const int16_t *src, int src_stride, int width, 39 int height, int *sum); 40 using TestFuncs = libaom_test::FuncParam<SSI16Func>; 41 42 class SumSSETest : public ::testing::TestWithParam<TestFuncs> { 43 public: 44 ~SumSSETest() override = default; 45 void SetUp() override { 46 params_ = this->GetParam(); 47 rnd_.Reset(ACMRandom::DeterministicSeed()); 48 src_ = reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2)); 49 ASSERT_NE(src_, nullptr); 50 } 51 52 void TearDown() override { aom_free(src_); } 53 void RunTest(int isRandom); 54 void RunSpeedTest(); 55 56 void GenRandomData(int width, int height, int stride) { 57 const int msb = 11; // Up to 12 bit input 58 const int limit = 1 << (msb + 1); 59 for (int ii = 0; ii < height; ii++) { 60 for (int jj = 0; jj < width; jj++) { 61 src_[ii * stride + jj] = rnd_(2) ? rnd_(limit) : -rnd_(limit); 62 } 63 } 64 } 65 66 void GenExtremeData(int width, int height, int stride) { 67 const int msb = 11; // Up to 12 bit input 68 const int limit = 1 << (msb + 1); 69 const int val = rnd_(2) ? limit - 1 : -(limit - 1); 70 for (int ii = 0; ii < height; ii++) { 71 for (int jj = 0; jj < width; jj++) { 72 src_[ii * stride + jj] = val; 73 } 74 } 75 } 76 77 protected: 78 TestFuncs params_; 79 int16_t *src_; 80 ACMRandom rnd_; 81 }; 82 83 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSSETest); 84 85 void SumSSETest::RunTest(int isRandom) { 86 for (int k = 0; k < kNumIterations; k++) { 87 const int width = 4 * (rnd_(31) + 1); // Up to 128x128 88 const int height = 4 * (rnd_(31) + 1); // Up to 128x128 89 int stride = 4 << rnd_(7); // Up to 256 stride 90 while (stride < width) { // Make sure it's valid 91 stride = 4 << rnd_(7); 92 } 93 if (isRandom) { 94 GenRandomData(width, height, stride); 95 } else { 96 GenExtremeData(width, height, stride); 97 } 98 int sum_ref = 0, sum_tst = 0; 99 const uint64_t sse_ref = 100 params_.ref_func(src_, stride, width, height, &sum_ref); 101 const uint64_t sse_tst = 102 params_.tst_func(src_, stride, width, height, &sum_tst); 103 104 EXPECT_EQ(sse_ref, sse_tst) 105 << "Error: SumSSETest [" << width << "x" << height 106 << "] C SSE does not match optimized output."; 107 EXPECT_EQ(sum_ref, sum_tst) 108 << "Error: SumSSETest [" << width << "x" << height 109 << "] C Sum does not match optimized output."; 110 } 111 } 112 113 void SumSSETest::RunSpeedTest() { 114 for (int block = BLOCK_4X4; block < BLOCK_SIZES_ALL; block++) { 115 const int width = block_size_wide[block]; // Up to 128x128 116 const int height = block_size_high[block]; // Up to 128x128 117 int stride = 4 << rnd_(7); // Up to 256 stride 118 while (stride < width) { // Make sure it's valid 119 stride = 4 << rnd_(7); 120 } 121 GenExtremeData(width, height, stride); 122 const int num_loops = 1000000000 / (width + height); 123 int sum_ref = 0, sum_tst = 0; 124 125 aom_usec_timer timer; 126 aom_usec_timer_start(&timer); 127 128 for (int i = 0; i < num_loops; ++i) 129 params_.ref_func(src_, stride, width, height, &sum_ref); 130 131 aom_usec_timer_mark(&timer); 132 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer)); 133 printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width, height, 134 1000.0 * elapsed_time / num_loops); 135 136 aom_usec_timer timer1; 137 aom_usec_timer_start(&timer1); 138 for (int i = 0; i < num_loops; ++i) 139 params_.tst_func(src_, stride, width, height, &sum_tst); 140 aom_usec_timer_mark(&timer1); 141 const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1)); 142 printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width, height, 143 1000.0 * elapsed_time1 / num_loops); 144 } 145 } 146 147 TEST_P(SumSSETest, OperationCheck) { 148 RunTest(1); // GenRandomData 149 } 150 151 TEST_P(SumSSETest, ExtremeValues) { 152 RunTest(0); // GenExtremeData 153 } 154 155 TEST_P(SumSSETest, DISABLED_Speed) { RunSpeedTest(); } 156 157 #if HAVE_SSE2 158 INSTANTIATE_TEST_SUITE_P(SSE2, SumSSETest, 159 ::testing::Values(TestFuncs( 160 &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_sse2))); 161 162 #endif // HAVE_SSE2 163 164 #if HAVE_NEON 165 INSTANTIATE_TEST_SUITE_P(NEON, SumSSETest, 166 ::testing::Values(TestFuncs( 167 &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_neon))); 168 #endif // HAVE_NEON 169 170 #if HAVE_AVX2 171 INSTANTIATE_TEST_SUITE_P(AVX2, SumSSETest, 172 ::testing::Values(TestFuncs( 173 &aom_sum_sse_2d_i16_c, &aom_sum_sse_2d_i16_avx2))); 174 #endif // HAVE_AVX2 175 176 #if HAVE_SVE 177 INSTANTIATE_TEST_SUITE_P(SVE, SumSSETest, 178 ::testing::Values(TestFuncs(&aom_sum_sse_2d_i16_c, 179 &aom_sum_sse_2d_i16_sve))); 180 #endif // HAVE_SVE 181 182 } // namespace