fast_math_test.cc (8865B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #undef HWY_TARGET_INCLUDE 7 #define HWY_TARGET_INCLUDE "lib/jxl/fast_math_test.cc" 8 #include <jxl/cms.h> 9 10 #include <hwy/foreach_target.h> 11 12 #include "lib/jxl/base/random.h" 13 #include "lib/jxl/cms/transfer_functions-inl.h" 14 #include "lib/jxl/dec_xyb-inl.h" 15 #include "lib/jxl/enc_xyb.h" 16 #include "lib/jxl/test_memory_manager.h" 17 #include "lib/jxl/test_utils.h" 18 #include "lib/jxl/testing.h" 19 20 // Test utils 21 #include <hwy/highway.h> 22 #include <hwy/tests/hwy_gtest.h> 23 HWY_BEFORE_NAMESPACE(); 24 namespace jxl { 25 namespace HWY_NAMESPACE { 26 namespace { 27 28 HWY_NOINLINE void TestFastLog2() { 29 constexpr size_t kNumTrials = 1 << 23; 30 Rng rng(1); 31 float max_abs_err = 0; 32 HWY_FULL(float) d; 33 for (size_t i = 0; i < kNumTrials; i++) { 34 const float f = rng.UniformF(1e-7f, 1e3f); 35 const auto actual_v = FastLog2f(d, Set(d, f)); 36 const float actual = GetLane(actual_v); 37 const float abs_err = std::abs(std::log2(f) - actual); 38 EXPECT_LT(abs_err, 3.1E-6) << "f = " << f; 39 max_abs_err = std::max(max_abs_err, abs_err); 40 } 41 printf("max abs err %e\n", static_cast<double>(max_abs_err)); 42 } 43 44 HWY_NOINLINE void TestFastPow2() { 45 constexpr size_t kNumTrials = 1 << 23; 46 Rng rng(1); 47 float max_rel_err = 0; 48 HWY_FULL(float) d; 49 for (size_t i = 0; i < kNumTrials; i++) { 50 const float f = rng.UniformF(-100, 100); 51 const auto actual_v = FastPow2f(d, Set(d, f)); 52 const float actual = GetLane(actual_v); 53 const float expected = std::pow(2, f); 54 const float rel_err = std::abs(expected - actual) / expected; 55 EXPECT_LT(rel_err, 3.1E-6) << "f = " << f; 56 max_rel_err = std::max(max_rel_err, rel_err); 57 } 58 printf("max rel err %e\n", static_cast<double>(max_rel_err)); 59 } 60 61 HWY_NOINLINE void TestFastPow() { 62 constexpr size_t kNumTrials = 1 << 23; 63 Rng rng(1); 64 float max_rel_err = 0; 65 HWY_FULL(float) d; 66 for (size_t i = 0; i < kNumTrials; i++) { 67 const float b = rng.UniformF(1e-3f, 1e3f); 68 const float e = rng.UniformF(-10, 10); 69 const auto actual_v = FastPowf(d, Set(d, b), Set(d, e)); 70 const float actual = GetLane(actual_v); 71 const float expected = std::pow(b, e); 72 const float rel_err = std::abs(expected - actual) / expected; 73 EXPECT_LT(rel_err, 3E-5) << "b = " << b << " e = " << e; 74 max_rel_err = std::max(max_rel_err, rel_err); 75 } 76 printf("max rel err %e\n", static_cast<double>(max_rel_err)); 77 } 78 79 HWY_NOINLINE void TestFastCos() { 80 constexpr size_t kNumTrials = 1 << 23; 81 Rng rng(1); 82 float max_abs_err = 0; 83 HWY_FULL(float) d; 84 for (size_t i = 0; i < kNumTrials; i++) { 85 const float f = rng.UniformF(-1e3f, 1e3f); 86 const auto actual_v = FastCosf(d, Set(d, f)); 87 const float actual = GetLane(actual_v); 88 const float abs_err = std::abs(std::cos(f) - actual); 89 EXPECT_LT(abs_err, 7E-5) << "f = " << f; 90 max_abs_err = std::max(max_abs_err, abs_err); 91 } 92 printf("max abs err %e\n", static_cast<double>(max_abs_err)); 93 } 94 95 HWY_NOINLINE void TestFastErf() { 96 constexpr size_t kNumTrials = 1 << 23; 97 Rng rng(1); 98 float max_abs_err = 0; 99 HWY_FULL(float) d; 100 for (size_t i = 0; i < kNumTrials; i++) { 101 const float f = rng.UniformF(-5.f, 5.f); 102 const auto actual_v = FastErff(d, Set(d, f)); 103 const float actual = GetLane(actual_v); 104 const float abs_err = std::abs(std::erf(f) - actual); 105 EXPECT_LT(abs_err, 7E-4) << "f = " << f; 106 max_abs_err = std::max(max_abs_err, abs_err); 107 } 108 printf("max abs err %e\n", static_cast<double>(max_abs_err)); 109 } 110 111 HWY_NOINLINE void TestCubeRoot() { 112 const HWY_FULL(float) d; 113 for (uint64_t x5 = 0; x5 < 2000000; x5++) { 114 const float x = x5 * 1E-5f; 115 const float expected = cbrtf(x); 116 HWY_ALIGN float approx[MaxLanes(d)]; 117 Store(CubeRootAndAdd(Set(d, x), Zero(d)), d, approx); 118 119 // All lanes are same 120 for (size_t i = 1; i < Lanes(d); ++i) { 121 EXPECT_NEAR(approx[0], approx[i], 5E-7f); 122 } 123 EXPECT_NEAR(approx[0], expected, 8E-7f); 124 } 125 } 126 127 HWY_NOINLINE void TestFastSRGB() { 128 constexpr size_t kNumTrials = 1 << 23; 129 Rng rng(1); 130 float max_abs_err = 0; 131 HWY_FULL(float) d; 132 for (size_t i = 0; i < kNumTrials; i++) { 133 const float f = rng.UniformF(0.0f, 1.0f); 134 const auto actual_v = FastLinearToSRGB(d, Set(d, f)); 135 const float actual = GetLane(actual_v); 136 const float expected = GetLane(TF_SRGB().EncodedFromDisplay(d, Set(d, f))); 137 const float abs_err = std::abs(expected - actual); 138 EXPECT_LT(abs_err, 1.2E-4) << "f = " << f; 139 max_abs_err = std::max(max_abs_err, abs_err); 140 } 141 printf("max abs err %e\n", static_cast<double>(max_abs_err)); 142 } 143 144 HWY_NOINLINE void TestFast709EFD() { 145 constexpr size_t kNumTrials = 1 << 23; 146 Rng rng(1); 147 float max_abs_err = 0; 148 HWY_FULL(float) d; 149 for (size_t i = 0; i < kNumTrials; i++) { 150 const float f = rng.UniformF(0.0f, 1.0f); 151 const float actual = GetLane(TF_709().EncodedFromDisplay(d, Set(d, f))); 152 const float expected = TF_709().EncodedFromDisplay(f); 153 const float abs_err = std::abs(expected - actual); 154 EXPECT_LT(abs_err, 2e-6) << "f = " << f; 155 max_abs_err = std::max(max_abs_err, abs_err); 156 } 157 printf("max abs err %e\n", static_cast<double>(max_abs_err)); 158 } 159 160 HWY_NOINLINE void TestFastXYB() { 161 if (!HasFastXYBTosRGB8()) return; 162 ImageMetadata metadata; 163 ImageBundle ib(jxl::test::MemoryManager(), &metadata); 164 int scaling = 1; 165 int n = 256 * scaling; 166 float inv_scaling = 1.0f / scaling; 167 int kChunk = 32; 168 // The image is divided in chunks to reduce total memory usage. 169 for (int cr = 0; cr < n; cr += kChunk) { 170 for (int cg = 0; cg < n; cg += kChunk) { 171 for (int cb = 0; cb < n; cb += kChunk) { 172 JXL_TEST_ASSIGN_OR_DIE(Image3F chunk, 173 Image3F::Create(jxl::test::MemoryManager(), 174 kChunk * kChunk, kChunk)); 175 for (int ir = 0; ir < kChunk; ir++) { 176 for (int ig = 0; ig < kChunk; ig++) { 177 for (int ib = 0; ib < kChunk; ib++) { 178 float r = (cr + ir) * inv_scaling; 179 float g = (cg + ig) * inv_scaling; 180 float b = (cb + ib) * inv_scaling; 181 chunk.PlaneRow(0, ir)[ig * kChunk + ib] = r * (1.0f / 255); 182 chunk.PlaneRow(1, ir)[ig * kChunk + ib] = g * (1.0f / 255); 183 chunk.PlaneRow(2, ir)[ig * kChunk + ib] = b * (1.0f / 255); 184 } 185 } 186 } 187 ASSERT_TRUE(ib.SetFromImage(std::move(chunk), ColorEncoding::SRGB())); 188 JXL_TEST_ASSIGN_OR_DIE(Image3F xyb, 189 Image3F::Create(jxl::test::MemoryManager(), 190 kChunk * kChunk, kChunk)); 191 std::vector<uint8_t> roundtrip(kChunk * kChunk * kChunk * 3); 192 ASSERT_TRUE(ToXYB(ib, nullptr, &xyb, *JxlGetDefaultCms())); 193 for (int y = 0; y < kChunk; y++) { 194 const float* xyba[4] = {xyb.PlaneRow(0, y), xyb.PlaneRow(1, y), 195 xyb.PlaneRow(2, y), nullptr}; 196 ASSERT_TRUE(jxl::HWY_NAMESPACE::FastXYBTosRGB8( 197 xyba, roundtrip.data() + 3 * xyb.xsize() * y, false, 198 xyb.xsize())); 199 } 200 for (int ir = 0; ir < kChunk; ir++) { 201 for (int ig = 0; ig < kChunk; ig++) { 202 for (int ib = 0; ib < kChunk; ib++) { 203 float r = (cr + ir) * inv_scaling; 204 float g = (cg + ig) * inv_scaling; 205 float b = (cb + ib) * inv_scaling; 206 size_t idx = ir * kChunk * kChunk + ig * kChunk + ib; 207 int rr = roundtrip[3 * idx]; 208 int rg = roundtrip[3 * idx + 1]; 209 int rb = roundtrip[3 * idx + 2]; 210 EXPECT_LT(abs(r - rr), 2) << "expected " << r << " got " << rr; 211 EXPECT_LT(abs(g - rg), 2) << "expected " << g << " got " << rg; 212 EXPECT_LT(abs(b - rb), 2) << "expected " << b << " got " << rb; 213 } 214 } 215 } 216 } 217 } 218 } 219 } 220 221 } // namespace 222 // NOLINTNEXTLINE(google-readability-namespace-comments) 223 } // namespace HWY_NAMESPACE 224 } // namespace jxl 225 HWY_AFTER_NAMESPACE(); 226 227 #if HWY_ONCE 228 namespace jxl { 229 230 class FastMathTargetTest : public hwy::TestWithParamTarget {}; 231 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastMathTargetTest); 232 233 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastLog2); 234 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow2); 235 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow); 236 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastCos); 237 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastErf); 238 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestCubeRoot); 239 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastSRGB); 240 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFast709EFD); 241 HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastXYB); 242 243 } // namespace jxl 244 #endif // HWY_ONCE