skeleton_test.cc (4819B)
1 // Copyright 2020 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // Example of unit test for the "skeleton" library. 17 18 #include "hwy/examples/skeleton.h" 19 20 #include <stdint.h> 21 #include <stdio.h> 22 23 #undef HWY_TARGET_INCLUDE 24 #define HWY_TARGET_INCLUDE "examples/skeleton_test.cc" 25 #include "hwy/foreach_target.h" // IWYU pragma: keep 26 27 // Must come after foreach_target.h to avoid redefinition errors. 28 #include "hwy/highway.h" 29 #include "hwy/nanobenchmark.h" // Unpredictable1 30 #include "hwy/tests/test_util-inl.h" 31 32 // Optional: factor out parts of the implementation into *-inl.h 33 // (must also come after foreach_target.h to avoid redefinition errors) 34 #include "hwy/examples/skeleton-inl.h" 35 36 HWY_BEFORE_NAMESPACE(); 37 namespace skeleton { 38 namespace HWY_NAMESPACE { 39 namespace { 40 41 namespace hn = hwy::HWY_NAMESPACE; 42 43 // Calls function defined in skeleton.cc. 44 struct TestFloorLog2 { 45 template <class T, class DF> 46 HWY_NOINLINE void operator()(T /*unused*/, DF df) { 47 const size_t count = 5 * hn::Lanes(df); 48 auto in = hwy::AllocateAligned<uint8_t>(count); 49 auto expected = hwy::AllocateAligned<uint8_t>(count); 50 auto out = hwy::AllocateAligned<uint8_t>(count); 51 HWY_ASSERT(in && expected && out); 52 53 hwy::RandomState rng; 54 for (size_t i = 0; i < count; ++i) { 55 expected[i] = Random32(&rng) & 7; 56 in[i] = static_cast<uint8_t>(1u << expected[i]); 57 } 58 CallFloorLog2(in.get(), count, out.get()); 59 int sum = 0; 60 for (size_t i = 0; i < count; ++i) { 61 HWY_ASSERT_EQ(expected[i], out[i]); 62 sum += out[i]; 63 } 64 65 for (size_t i = 0; i < count; ++i) { 66 out[i] = static_cast<uint8_t>(hwy::Unpredictable1()); 67 } 68 69 SavedCallFloorLog2(in.get(), count, out.get()); 70 for (size_t i = 0; i < count; ++i) { 71 HWY_ASSERT_EQ(expected[i], out[i]); 72 sum += out[i]; 73 } 74 75 hwy::PreventElision(sum); 76 } 77 }; 78 79 HWY_NOINLINE void TestAllFloorLog2() { 80 hn::ForPartialVectors<TestFloorLog2>()(float()); 81 } 82 83 // Calls function defined in skeleton-inl.h. 84 struct TestSumMulAdd { 85 template <class T, class D> 86 HWY_NOINLINE void operator()(T /*unused*/, D d) { 87 hwy::RandomState rng; 88 const size_t count = 4096; 89 HWY_ASSERT_EQ(size_t{0}, count % hn::Lanes(d)); 90 auto mul = hwy::AllocateAligned<T>(count); 91 auto x = hwy::AllocateAligned<T>(count); 92 auto add = hwy::AllocateAligned<T>(count); 93 HWY_ASSERT(mul && x && add); 94 95 for (size_t i = 0; i < count; ++i) { 96 mul[i] = hwy::ConvertScalarTo<T>(Random32(&rng) & 0xF); 97 x[i] = hwy::ConvertScalarTo<T>(Random32(&rng) & 0xFF); 98 add[i] = hwy::ConvertScalarTo<T>(Random32(&rng) & 0xFF); 99 } 100 double expected_sum = 0.0; 101 for (size_t i = 0; i < count; ++i) { 102 expected_sum += hwy::ConvertScalarTo<double>(mul[i]) * 103 hwy::ConvertScalarTo<double>(x[i]) + 104 hwy::ConvertScalarTo<double>(add[i]); 105 } 106 107 MulAddLoop(d, mul.get(), add.get(), count, x.get()); 108 double vector_sum = 0.0; 109 for (size_t i = 0; i < count; ++i) { 110 vector_sum += hwy::ConvertScalarTo<double>(x[i]); 111 } 112 113 if (hwy::IsSame<T, hwy::float16_t>()) { 114 // The expected value for float16 will vary based on the underlying 115 // implementation (compiler emulation, ARM ACLE __fp16 vs _Float16, etc). 116 // In some cases the scalar and vector paths will have different results; 117 // we check them against known values where possible, else we ignore them. 118 #if HWY_COMPILER_CLANG && HWY_NEON_HAVE_F16C 119 HWY_ASSERT_EQ(4344240.0, expected_sum); // Full-width float 120 HWY_ASSERT_EQ(4344235.0, vector_sum); // __fp16 121 #endif 122 return; 123 } 124 HWY_ASSERT_EQ(4344240.0, expected_sum); 125 HWY_ASSERT_EQ(expected_sum, vector_sum); 126 } 127 }; 128 129 HWY_NOINLINE void TestAllSumMulAdd() { 130 hn::ForFloatTypes(hn::ForPartialVectors<TestSumMulAdd>()); 131 } 132 133 } // namespace 134 // NOLINTNEXTLINE(google-readability-namespace-comments) 135 } // namespace HWY_NAMESPACE 136 } // namespace skeleton 137 HWY_AFTER_NAMESPACE(); 138 139 #if HWY_ONCE 140 namespace skeleton { 141 namespace { 142 HWY_BEFORE_TEST(SkeletonTest); 143 HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllFloorLog2); 144 HWY_EXPORT_AND_TEST_P(SkeletonTest, TestAllSumMulAdd); 145 HWY_AFTER_TEST(); 146 } // namespace 147 } // namespace skeleton 148 HWY_TEST_MAIN(); 149 #endif // HWY_ONCE