tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

tf_gbench.cc (5563B)


      1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 #include "benchmark/benchmark.h"
      7 #include "lib/jxl/image_ops.h"
      8 
      9 #undef HWY_TARGET_INCLUDE
     10 #define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc"
     11 #include <hwy/foreach_target.h>
     12 #include <hwy/highway.h>
     13 
     14 #include "lib/jxl/cms/transfer_functions-inl.h"
     15 
     16 HWY_BEFORE_NAMESPACE();
     17 namespace jxl {
     18 namespace HWY_NAMESPACE {
     19 namespace {
     20 
     21 #define RUN_BENCHMARK(F)                                            \
     22  constexpr size_t kNum = 1 << 12;                                  \
     23  HWY_FULL(float) d;                                                \
     24  /* Three parallel runs, as this will run on R, G and B. */        \
     25  auto sum1 = Zero(d);                                              \
     26  auto sum2 = Zero(d);                                              \
     27  auto sum3 = Zero(d);                                              \
     28  for (auto _ : state) {                                            \
     29    auto x = Set(d, 1e-5);                                          \
     30    auto v1 = Set(d, 1e-5);                                         \
     31    auto v2 = Set(d, 1.1e-5);                                       \
     32    auto v3 = Set(d, 1.2e-5);                                       \
     33    for (size_t i = 0; i < kNum; i++) {                             \
     34      sum1 += F(d, v1);                                             \
     35      sum2 += F(d, v2);                                             \
     36      sum3 += F(d, v3);                                             \
     37      v1 += x;                                                      \
     38      v2 += x;                                                      \
     39      v3 += x;                                                      \
     40    }                                                               \
     41  }                                                                 \
     42  /* floats per second */                                           \
     43  state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \
     44  benchmark::DoNotOptimize(sum1 + sum2 + sum3);
     45 
     46 #define RUN_BENCHMARK_SCALAR(F, I)                           \
     47  constexpr size_t kNum = 1 << 12;                           \
     48  /* Three parallel runs, as this will run on R, G and B. */ \
     49  float sum1 = 0;                                            \
     50  float sum2 = 0;                                            \
     51  float sum3 = 0;                                            \
     52  for (auto _ : state) {                                     \
     53    float x = 1e-5;                                          \
     54    float v1 = 1e-5;                                         \
     55    float v2 = 1.1e-5;                                       \
     56    float v3 = 1.2e-5;                                       \
     57    for (size_t i = 0; i < kNum; i++) {                      \
     58      sum1 += F(I, v1);                                      \
     59      sum2 += F(I, v2);                                      \
     60      sum3 += F(I, v3);                                      \
     61      v1 += x;                                               \
     62      v2 += x;                                               \
     63      v3 += x;                                               \
     64    }                                                        \
     65  }                                                          \
     66  /* floats per second */                                    \
     67  state.SetItemsProcessed(kNum* state.iterations() * 3);     \
     68  benchmark::DoNotOptimize(sum1 + sum2 + sum3);
     69 
     70 HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) {
     71  RUN_BENCHMARK(FastLinearToSRGB);
     72 }
     73 
     74 HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) {
     75  RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay);
     76 }
     77 
     78 HWY_NOINLINE void BM_PQDFE(benchmark::State& state) {
     79  TF_PQ tf_pq(10000.0);
     80  RUN_BENCHMARK(tf_pq.DisplayFromEncoded);
     81 }
     82 
     83 HWY_NOINLINE void BM_PQEFD(benchmark::State& state) {
     84  TF_PQ tf_pq(10000.0);
     85  RUN_BENCHMARK(tf_pq.EncodedFromDisplay);
     86 }
     87 
     88 HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) {
     89  RUN_BENCHMARK_SCALAR(TF_PQ_Base::DisplayFromEncoded, 10000.0);
     90 }
     91 
     92 HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) {
     93  RUN_BENCHMARK_SCALAR(TF_PQ_Base::EncodedFromDisplay, 10000.0);
     94 }
     95 }  // namespace
     96 // NOLINTNEXTLINE(google-readability-namespace-comments)
     97 }  // namespace HWY_NAMESPACE
     98 }  // namespace jxl
     99 HWY_AFTER_NAMESPACE();
    100 
    101 #if HWY_ONCE
    102 namespace jxl {
    103 namespace {
    104 
    105 HWY_EXPORT(BM_FastSRGB);
    106 HWY_EXPORT(BM_TFSRGB);
    107 HWY_EXPORT(BM_PQDFE);
    108 HWY_EXPORT(BM_PQEFD);
    109 HWY_EXPORT(BM_PQSlowDFE);
    110 HWY_EXPORT(BM_PQSlowEFD);
    111 
    112 float SRGB_pow(float _, float x) {
    113  return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f;
    114 }
    115 
    116 void BM_FastSRGB(benchmark::State& state) {
    117  HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state);
    118 }
    119 void BM_TFSRGB(benchmark::State& state) {
    120  HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state);
    121 }
    122 void BM_PQDFE(benchmark::State& state) {
    123  HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state);
    124 }
    125 void BM_PQEFD(benchmark::State& state) {
    126  HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state);
    127 }
    128 void BM_PQSlowDFE(benchmark::State& state) {
    129  HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state);
    130 }
    131 void BM_PQSlowEFD(benchmark::State& state) {
    132  HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state);
    133 }
    134 
    135 void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow, 0); }
    136 
    137 BENCHMARK(BM_FastSRGB);
    138 BENCHMARK(BM_TFSRGB);
    139 BENCHMARK(BM_SRGB_pow);
    140 BENCHMARK(BM_PQDFE);
    141 BENCHMARK(BM_PQEFD);
    142 BENCHMARK(BM_PQSlowDFE);
    143 BENCHMARK(BM_PQSlowEFD);
    144 
    145 }  // namespace
    146 }  // namespace jxl
    147 #endif