math_test.cc (8164B)
1 // Copyright 2020 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <stdint.h> 17 #include <stdio.h> 18 19 #include <cfloat> // FLT_MAX 20 #include <cmath> // std::abs 21 22 #include "hwy/base.h" 23 24 // clang-format off 25 #undef HWY_TARGET_INCLUDE 26 #define HWY_TARGET_INCLUDE "hwy/contrib/math/math_test.cc" 27 #include "hwy/foreach_target.h" // IWYU pragma: keep 28 #include "hwy/highway.h" 29 #include "hwy/contrib/math/math-inl.h" 30 #include "hwy/tests/test_util-inl.h" 31 // clang-format on 32 33 HWY_BEFORE_NAMESPACE(); 34 namespace hwy { 35 namespace HWY_NAMESPACE { 36 namespace { 37 38 // We have had test failures caused by excess precision due to keeping 39 // intermediate results in 80-bit x87 registers. One such failure mode is that 40 // Log1p computes a 1.0 which is not exactly equal to 1.0f, causing is_pole to 41 // incorrectly evaluate to false. 42 #undef HWY_MATH_TEST_EXCESS_PRECISION 43 #if HWY_ARCH_X86_32 && HWY_COMPILER_GCC_ACTUAL && \ 44 (HWY_TARGET == HWY_SCALAR || HWY_TARGET == HWY_EMU128) 45 46 // GCC 13+: because CMAKE_CXX_EXTENSIONS is OFF, we build with -std= and hence 47 // also -fexcess-precision=standard, so there is no problem. See #1708 and 48 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323. 49 #if HWY_COMPILER_GCC_ACTUAL >= 1300 50 #define HWY_MATH_TEST_EXCESS_PRECISION 0 51 52 #else // HWY_COMPILER_GCC_ACTUAL < 1300 53 54 // The build system must enable SSE2, e.g. via HWY_CMAKE_SSE2 - see 55 // https://stackoverflow.com/questions/20869904/c-handling-of-excess-precision . 56 #if defined(__SSE2__) // correct flag given, no problem 57 #define HWY_MATH_TEST_EXCESS_PRECISION 0 58 #else 59 #define HWY_MATH_TEST_EXCESS_PRECISION 1 60 #pragma message( \ 61 "Skipping scalar math_test on 32-bit x86 GCC <13 without HWY_CMAKE_SSE2") 62 #endif // defined(__SSE2__) 63 64 #endif // HWY_COMPILER_GCC_ACTUAL 65 #else // not (x86-32, GCC, scalar target): running math_test normally 66 #define HWY_MATH_TEST_EXCESS_PRECISION 0 67 #endif // HWY_ARCH_X86_32 etc 68 69 template <class T, class D> 70 HWY_NOINLINE void TestMath(const char* name, T (*fx1)(T), 71 Vec<D> (*fxN)(D, VecArg<Vec<D>>), D d, T min, T max, 72 uint64_t max_error_ulp) { 73 if (HWY_MATH_TEST_EXCESS_PRECISION) { 74 static bool once = true; 75 if (once) { 76 once = false; 77 HWY_WARN("Skipping math_test due to GCC issue with excess precision.\n"); 78 } 79 return; 80 } 81 82 using UintT = MakeUnsigned<T>; 83 84 const UintT min_bits = BitCastScalar<UintT>(min); 85 const UintT max_bits = BitCastScalar<UintT>(max); 86 87 // If min is negative and max is positive, the range needs to be broken into 88 // two pieces, [+0, max] and [-0, min], otherwise [min, max]. 89 int range_count = 1; 90 UintT ranges[2][2] = {{min_bits, max_bits}, {0, 0}}; 91 if ((min < T{0}) && (max > T{0})) { 92 ranges[0][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(+0.0)); 93 ranges[0][1] = max_bits; 94 ranges[1][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(-0.0)); 95 ranges[1][1] = min_bits; 96 range_count = 2; 97 } 98 99 uint64_t max_ulp = 0; 100 // Emulation is slower, so cannot afford as many. 101 constexpr UintT kSamplesPerRange = static_cast<UintT>(AdjustedReps(4000)); 102 for (int range_index = 0; range_index < range_count; ++range_index) { 103 const UintT start = ranges[range_index][0]; 104 const UintT stop = ranges[range_index][1]; 105 const UintT step = HWY_MAX(1, ((stop - start) / kSamplesPerRange)); 106 for (UintT value_bits = start; value_bits <= stop; value_bits += step) { 107 // For reasons unknown, the HWY_MAX is necessary on RVV, otherwise 108 // value_bits can be less than start, and thus possibly NaN. 109 const T value = 110 BitCastScalar<T>(HWY_MIN(HWY_MAX(start, value_bits), stop)); 111 const T actual = GetLane(fxN(d, Set(d, value))); 112 const T expected = fx1(value); 113 114 // Skip small inputs and outputs on armv7, it flushes subnormals to zero. 115 #if HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7 116 if ((std::abs(value) < 1e-37f) || (std::abs(expected) < 1e-37f)) { 117 continue; 118 } 119 #endif 120 121 const auto ulp = hwy::detail::ComputeUlpDelta(actual, expected); 122 max_ulp = HWY_MAX(max_ulp, ulp); 123 if (ulp > max_error_ulp) { 124 fprintf(stderr, "%s: %s(%f) expected %E actual %E ulp %g max ulp %u\n", 125 hwy::TypeName(T(), Lanes(d)).c_str(), name, value, 126 static_cast<double>(expected), static_cast<double>(actual), 127 static_cast<double>(ulp), static_cast<uint32_t>(max_error_ulp)); 128 } 129 } 130 } 131 fprintf(stderr, "%s: %s max_ulp %g\n", hwy::TypeName(T(), Lanes(d)).c_str(), 132 name, static_cast<double>(max_ulp)); 133 HWY_ASSERT(max_ulp <= max_error_ulp); 134 } 135 136 #define DEFINE_MATH_TEST_FUNC(NAME) \ 137 HWY_NOINLINE void TestAll##NAME() { \ 138 ForFloat3264Types(ForPartialVectors<Test##NAME>()); \ 139 } 140 141 #undef DEFINE_MATH_TEST 142 #define DEFINE_MATH_TEST(NAME, F32x1, F32xN, F32_MIN, F32_MAX, F32_ERROR, \ 143 F64x1, F64xN, F64_MIN, F64_MAX, F64_ERROR) \ 144 struct Test##NAME { \ 145 template <class T, class D, HWY_IF_T_SIZE(T, 4)> \ 146 HWY_NOINLINE void operator()(T, D d) { \ 147 TestMath<T, D>(HWY_STR(NAME), F32x1, F32xN, d, F32_MIN, F32_MAX, \ 148 F32_ERROR); \ 149 } \ 150 template <class T, class D, HWY_IF_T_SIZE(T, 8)> \ 151 HWY_NOINLINE void operator()(T, D d) { \ 152 TestMath<T, D>(HWY_STR(NAME), F64x1, F64xN, d, static_cast<T>(F64_MIN), \ 153 static_cast<T>(F64_MAX), F64_ERROR); \ 154 } \ 155 }; \ 156 DEFINE_MATH_TEST_FUNC(NAME) 157 158 // clang-format off 159 DEFINE_MATH_TEST(Exp, 160 std::exp, CallExp, -FLT_MAX, +104.0f, 1, 161 std::exp, CallExp, -DBL_MAX, +104.0, 1) 162 DEFINE_MATH_TEST(Exp2, 163 std::exp2, CallExp2, -FLT_MAX, +128.0f, 2, 164 std::exp2, CallExp2, -DBL_MAX, +128.0, 2) 165 DEFINE_MATH_TEST(Expm1, 166 std::expm1, CallExpm1, -FLT_MAX, +104.0f, 4, 167 std::expm1, CallExpm1, -DBL_MAX, +104.0, 4) 168 DEFINE_MATH_TEST(Log, 169 std::log, CallLog, +FLT_MIN, +FLT_MAX, 1, 170 std::log, CallLog, +DBL_MIN, +DBL_MAX, 1) 171 DEFINE_MATH_TEST(Log10, 172 std::log10, CallLog10, +FLT_MIN, +FLT_MAX, 2, 173 std::log10, CallLog10, +DBL_MIN, +DBL_MAX, 2) 174 DEFINE_MATH_TEST(Log1p, 175 std::log1p, CallLog1p, +0.0f, +1e37f, 3, // NEON is 3 instead of 2 176 std::log1p, CallLog1p, +0.0, +DBL_MAX, 2) 177 DEFINE_MATH_TEST(Log2, 178 std::log2, CallLog2, +FLT_MIN, +FLT_MAX, 2, 179 std::log2, CallLog2, +DBL_MIN, +DBL_MAX, 2) 180 // clang-format on 181 182 } // namespace 183 // NOLINTNEXTLINE(google-readability-namespace-comments) 184 } // namespace HWY_NAMESPACE 185 } // namespace hwy 186 HWY_AFTER_NAMESPACE(); 187 188 #if HWY_ONCE 189 namespace hwy { 190 namespace { 191 HWY_BEFORE_TEST(HwyMathTest); 192 HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExp); 193 HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExp2); 194 HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllExpm1); 195 HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog); 196 HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog10); 197 HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog1p); 198 HWY_EXPORT_AND_TEST_P(HwyMathTest, TestAllLog2); 199 HWY_AFTER_TEST(); 200 } // namespace 201 } // namespace hwy 202 HWY_TEST_MAIN(); 203 #endif // HWY_ONCE