tf_gbench.cc (5563B)
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved. 2 // 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #include "benchmark/benchmark.h" 7 #include "lib/jxl/image_ops.h" 8 9 #undef HWY_TARGET_INCLUDE 10 #define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc" 11 #include <hwy/foreach_target.h> 12 #include <hwy/highway.h> 13 14 #include "lib/jxl/cms/transfer_functions-inl.h" 15 16 HWY_BEFORE_NAMESPACE(); 17 namespace jxl { 18 namespace HWY_NAMESPACE { 19 namespace { 20 21 #define RUN_BENCHMARK(F) \ 22 constexpr size_t kNum = 1 << 12; \ 23 HWY_FULL(float) d; \ 24 /* Three parallel runs, as this will run on R, G and B. */ \ 25 auto sum1 = Zero(d); \ 26 auto sum2 = Zero(d); \ 27 auto sum3 = Zero(d); \ 28 for (auto _ : state) { \ 29 auto x = Set(d, 1e-5); \ 30 auto v1 = Set(d, 1e-5); \ 31 auto v2 = Set(d, 1.1e-5); \ 32 auto v3 = Set(d, 1.2e-5); \ 33 for (size_t i = 0; i < kNum; i++) { \ 34 sum1 += F(d, v1); \ 35 sum2 += F(d, v2); \ 36 sum3 += F(d, v3); \ 37 v1 += x; \ 38 v2 += x; \ 39 v3 += x; \ 40 } \ 41 } \ 42 /* floats per second */ \ 43 state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \ 44 benchmark::DoNotOptimize(sum1 + sum2 + sum3); 45 46 #define RUN_BENCHMARK_SCALAR(F, I) \ 47 constexpr size_t kNum = 1 << 12; \ 48 /* Three parallel runs, as this will run on R, G and B. */ \ 49 float sum1 = 0; \ 50 float sum2 = 0; \ 51 float sum3 = 0; \ 52 for (auto _ : state) { \ 53 float x = 1e-5; \ 54 float v1 = 1e-5; \ 55 float v2 = 1.1e-5; \ 56 float v3 = 1.2e-5; \ 57 for (size_t i = 0; i < kNum; i++) { \ 58 sum1 += F(I, v1); \ 59 sum2 += F(I, v2); \ 60 sum3 += F(I, v3); \ 61 v1 += x; \ 62 v2 += x; \ 63 v3 += x; \ 64 } \ 65 } \ 66 /* floats per second */ \ 67 state.SetItemsProcessed(kNum* state.iterations() * 3); \ 68 benchmark::DoNotOptimize(sum1 + sum2 + sum3); 69 70 HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) { 71 RUN_BENCHMARK(FastLinearToSRGB); 72 } 73 74 HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) { 75 RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay); 76 } 77 78 HWY_NOINLINE void BM_PQDFE(benchmark::State& state) { 79 TF_PQ tf_pq(10000.0); 80 RUN_BENCHMARK(tf_pq.DisplayFromEncoded); 81 } 82 83 HWY_NOINLINE void BM_PQEFD(benchmark::State& state) { 84 TF_PQ tf_pq(10000.0); 85 RUN_BENCHMARK(tf_pq.EncodedFromDisplay); 86 } 87 88 HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) { 89 RUN_BENCHMARK_SCALAR(TF_PQ_Base::DisplayFromEncoded, 10000.0); 90 } 91 92 HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) { 93 RUN_BENCHMARK_SCALAR(TF_PQ_Base::EncodedFromDisplay, 10000.0); 94 } 95 } // namespace 96 // NOLINTNEXTLINE(google-readability-namespace-comments) 97 } // namespace HWY_NAMESPACE 98 } // namespace jxl 99 HWY_AFTER_NAMESPACE(); 100 101 #if HWY_ONCE 102 namespace jxl { 103 namespace { 104 105 HWY_EXPORT(BM_FastSRGB); 106 HWY_EXPORT(BM_TFSRGB); 107 HWY_EXPORT(BM_PQDFE); 108 HWY_EXPORT(BM_PQEFD); 109 HWY_EXPORT(BM_PQSlowDFE); 110 HWY_EXPORT(BM_PQSlowEFD); 111 112 float SRGB_pow(float _, float x) { 113 return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f; 114 } 115 116 void BM_FastSRGB(benchmark::State& state) { 117 HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state); 118 } 119 void BM_TFSRGB(benchmark::State& state) { 120 HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state); 121 } 122 void BM_PQDFE(benchmark::State& state) { 123 HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state); 124 } 125 void BM_PQEFD(benchmark::State& state) { 126 HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state); 127 } 128 void BM_PQSlowDFE(benchmark::State& state) { 129 HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state); 130 } 131 void BM_PQSlowEFD(benchmark::State& state) { 132 HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state); 133 } 134 135 void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow, 0); } 136 137 BENCHMARK(BM_FastSRGB); 138 BENCHMARK(BM_TFSRGB); 139 BENCHMARK(BM_SRGB_pow); 140 BENCHMARK(BM_PQDFE); 141 BENCHMARK(BM_PQEFD); 142 BENCHMARK(BM_PQSlowDFE); 143 BENCHMARK(BM_PQSlowEFD); 144 145 } // namespace 146 } // namespace jxl 147 #endif