tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bit_pack_test.cc (9078B)


      1 // Copyright 2022 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include <stdio.h>
     17 
     18 #include <vector>
     19 
     20 #include "hwy/aligned_allocator.h"
     21 #include "hwy/base.h"
     22 #include "hwy/nanobenchmark.h"
     23 
     24 // clang-format off
     25 #undef HWY_TARGET_INCLUDE
     26 #define HWY_TARGET_INCLUDE "hwy/contrib/bit_pack/bit_pack_test.cc"  // NOLINT
     27 #include "hwy/foreach_target.h"  // IWYU pragma: keep
     28 #include "hwy/highway.h"
     29 #include "hwy/timer.h"
     30 #include "hwy/contrib/bit_pack/bit_pack-inl.h"
     31 #include "hwy/tests/test_util-inl.h"
     32 // clang-format on
     33 
     34 #ifndef HWY_BIT_PACK_BENCHMARK
     35 #define HWY_BIT_PACK_BENCHMARK 0
     36 #endif
     37 
     38 HWY_BEFORE_NAMESPACE();
     39 namespace hwy {
     40 // Used to prevent running benchmark (slow) for partial vectors and targets
     41 // except the best available. Global, not per-target, hence must be outside
     42 // HWY_NAMESPACE. Declare first because HWY_ONCE is only true after some code
     43 // has been re-included.
     44 extern size_t last_bits;
     45 extern uint64_t best_target;
     46 #if HWY_ONCE
     47 size_t last_bits = 0;
     48 uint64_t best_target = ~0ull;
     49 #endif
     50 namespace HWY_NAMESPACE {
     51 namespace {
     52 
     53 template <size_t kBits, typename T>
     54 T Random(RandomState& rng) {
     55  return ConvertScalarTo<T>(Random32(&rng) & kBits);
     56 }
     57 
     58 template <typename T>
     59 class Checker {
     60 public:
     61  explicit Checker(size_t num) { raw_.reserve(num); }
     62  void NotifyRaw(T raw) { raw_.push_back(raw); }
     63 
     64  void NotifyRawOutput(size_t bits, T raw) {
     65    if (raw_[num_verified_] != raw) {
     66      HWY_ABORT("%zu bits: pos %zu of %zu, expected %.0f actual %.0f\n", bits,
     67                num_verified_, raw_.size(),
     68                ConvertScalarTo<double>(raw_[num_verified_]),
     69                ConvertScalarTo<double>(raw));
     70    }
     71    ++num_verified_;
     72  }
     73 
     74 private:
     75  std::vector<T> raw_;
     76  size_t num_verified_ = 0;
     77 };
     78 
     79 template <template <size_t> class PackT, size_t kVectors, size_t kBits>
     80 struct TestPack {
     81  template <typename T, class D>
     82  void operator()(T /* t */, D d) {
     83    constexpr size_t kLoops = 16;  // working set slightly larger than L1
     84    const size_t N = Lanes(d);
     85    RandomState rng(N * 129);
     86    static_assert(kBits <= kVectors, "");
     87    const size_t num_per_loop = N * kVectors;
     88    const size_t num = num_per_loop * kLoops;
     89    const size_t num_packed_per_loop = N * kBits;
     90    const size_t num_packed = num_packed_per_loop * kLoops;
     91    Checker<T> checker(num);
     92    AlignedFreeUniquePtr<T[]> raw = hwy::AllocateAligned<T>(num);
     93    AlignedFreeUniquePtr<T[]> raw2 = hwy::AllocateAligned<T>(num);
     94    AlignedFreeUniquePtr<T[]> packed = hwy::AllocateAligned<T>(num_packed);
     95    HWY_ASSERT(raw && raw2 && packed);
     96 
     97    for (size_t i = 0; i < num; ++i) {
     98      raw[i] = Random<kBits, T>(rng);
     99      checker.NotifyRaw(raw[i]);
    100    }
    101 
    102    best_target = HWY_MIN(best_target, HWY_TARGET);
    103    const bool run_bench = HWY_BIT_PACK_BENCHMARK && (kBits != last_bits) &&
    104                           (HWY_TARGET == best_target);
    105    last_bits = kBits;
    106 
    107    const PackT<kBits> func;
    108 
    109    if (run_bench) {
    110      const size_t kNumInputs = 1;
    111      const size_t num_items = num * size_t(Unpredictable1());
    112      const FuncInput inputs[kNumInputs] = {num_items};
    113      Result results[kNumInputs];
    114 
    115      Params p;
    116      p.verbose = false;
    117      p.max_evals = 7;
    118      p.target_rel_mad = 0.002;
    119      const size_t num_results = MeasureClosure(
    120          [&](FuncInput) HWY_ATTR {
    121            for (size_t i = 0, pi = 0; i < num;
    122                 i += num_per_loop, pi += num_packed_per_loop) {
    123              func.Pack(d, raw.get() + i, packed.get() + pi);
    124            }
    125            T& val = packed.get()[Random32(&rng) % num_packed];
    126            T zero = static_cast<T>(Unpredictable1() - 1);
    127            val = static_cast<T>(val + zero);
    128            for (size_t i = 0, pi = 0; i < num;
    129                 i += num_per_loop, pi += num_packed_per_loop) {
    130              func.Unpack(d, packed.get() + pi, raw2.get() + i);
    131            }
    132            return raw2[Random32(&rng) % num];
    133          },
    134          inputs, kNumInputs, results, p);
    135      if (num_results != kNumInputs) {
    136        HWY_WARN("MeasureClosure failed.\n");
    137        return;
    138      }
    139      // Print throughput for pack+unpack round trip
    140      for (size_t i = 0; i < num_results; ++i) {
    141        const size_t bytes_per_element = (kBits + 7) / 8;
    142        const double bytes =
    143            static_cast<double>(results[i].input * bytes_per_element);
    144        const double seconds =
    145            results[i].ticks / platform::InvariantTicksPerSecond();
    146        printf("Bits:%2d elements:%3d GB/s:%4.1f (+/-%3.1f%%)\n",
    147               static_cast<int>(kBits), static_cast<int>(results[i].input),
    148               1E-9 * bytes / seconds, results[i].variability * 100.0);
    149      }
    150    } else {
    151      for (size_t i = 0, pi = 0; i < num;
    152           i += num_per_loop, pi += num_packed_per_loop) {
    153        func.Pack(d, raw.get() + i, packed.get() + pi);
    154      }
    155      T& val = packed.get()[Random32(&rng) % num_packed];
    156      T zero = static_cast<T>(Unpredictable1() - 1);
    157      val = static_cast<T>(val + zero);
    158      for (size_t i = 0, pi = 0; i < num;
    159           i += num_per_loop, pi += num_packed_per_loop) {
    160        func.Unpack(d, packed.get() + pi, raw2.get() + i);
    161      }
    162    }
    163 
    164    for (size_t i = 0; i < num; ++i) {
    165      checker.NotifyRawOutput(kBits, raw2[i]);
    166    }
    167  }
    168 };
    169 
    170 void TestAllPack8() {
    171  ForShrinkableVectors<TestPack<Pack8, 8, 1>>()(uint8_t());
    172  ForShrinkableVectors<TestPack<Pack8, 8, 2>>()(uint8_t());
    173  ForShrinkableVectors<TestPack<Pack8, 8, 3>>()(uint8_t());
    174  ForShrinkableVectors<TestPack<Pack8, 8, 4>>()(uint8_t());
    175  ForShrinkableVectors<TestPack<Pack8, 8, 5>>()(uint8_t());
    176  ForShrinkableVectors<TestPack<Pack8, 8, 6>>()(uint8_t());
    177  ForShrinkableVectors<TestPack<Pack8, 8, 7>>()(uint8_t());
    178  ForShrinkableVectors<TestPack<Pack8, 8, 8>>()(uint8_t());
    179 }
    180 
    181 void TestAllPack16() {
    182  ForShrinkableVectors<TestPack<Pack16, 16, 1>>()(uint16_t());
    183  ForShrinkableVectors<TestPack<Pack16, 16, 2>>()(uint16_t());
    184  ForShrinkableVectors<TestPack<Pack16, 16, 3>>()(uint16_t());
    185  ForShrinkableVectors<TestPack<Pack16, 16, 4>>()(uint16_t());
    186  ForShrinkableVectors<TestPack<Pack16, 16, 5>>()(uint16_t());
    187  ForShrinkableVectors<TestPack<Pack16, 16, 6>>()(uint16_t());
    188  ForShrinkableVectors<TestPack<Pack16, 16, 7>>()(uint16_t());
    189  ForShrinkableVectors<TestPack<Pack16, 16, 8>>()(uint16_t());
    190  ForShrinkableVectors<TestPack<Pack16, 16, 9>>()(uint16_t());
    191  ForShrinkableVectors<TestPack<Pack16, 16, 10>>()(uint16_t());
    192  ForShrinkableVectors<TestPack<Pack16, 16, 11>>()(uint16_t());
    193  ForShrinkableVectors<TestPack<Pack16, 16, 12>>()(uint16_t());
    194  ForShrinkableVectors<TestPack<Pack16, 16, 13>>()(uint16_t());
    195  ForShrinkableVectors<TestPack<Pack16, 16, 14>>()(uint16_t());
    196  ForShrinkableVectors<TestPack<Pack16, 16, 15>>()(uint16_t());
    197  ForShrinkableVectors<TestPack<Pack16, 16, 16>>()(uint16_t());
    198 }
    199 
    200 void TestAllPack32() {
    201  ForShrinkableVectors<TestPack<Pack32, 32, 1>>()(uint32_t());
    202  ForShrinkableVectors<TestPack<Pack32, 32, 2>>()(uint32_t());
    203  ForShrinkableVectors<TestPack<Pack32, 32, 6>>()(uint32_t());
    204  ForShrinkableVectors<TestPack<Pack32, 32, 11>>()(uint32_t());
    205  ForShrinkableVectors<TestPack<Pack32, 32, 16>>()(uint32_t());
    206  ForShrinkableVectors<TestPack<Pack32, 32, 31>>()(uint32_t());
    207  ForShrinkableVectors<TestPack<Pack32, 32, 32>>()(uint32_t());
    208 }
    209 
    210 void TestAllPack64() {
    211  // Fails, but only on GCC 13.
    212 #if !(HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1400 && \
    213      HWY_TARGET == HWY_RVV)
    214  ForShrinkableVectors<TestPack<Pack64, 64, 1>>()(uint64_t());
    215  ForShrinkableVectors<TestPack<Pack64, 64, 5>>()(uint64_t());
    216  ForShrinkableVectors<TestPack<Pack64, 64, 12>>()(uint64_t());
    217  ForShrinkableVectors<TestPack<Pack64, 64, 16>>()(uint64_t());
    218  ForShrinkableVectors<TestPack<Pack64, 64, 27>>()(uint64_t());
    219  ForShrinkableVectors<TestPack<Pack64, 64, 31>>()(uint64_t());
    220  ForShrinkableVectors<TestPack<Pack64, 64, 33>>()(uint64_t());
    221  ForShrinkableVectors<TestPack<Pack64, 64, 41>>()(uint64_t());
    222  ForShrinkableVectors<TestPack<Pack64, 64, 61>>()(uint64_t());
    223 #endif
    224 }
    225 
    226 }  // namespace
    227 // NOLINTNEXTLINE(google-readability-namespace-comments)
    228 }  // namespace HWY_NAMESPACE
    229 }  // namespace hwy
    230 HWY_AFTER_NAMESPACE();
    231 
    232 #if HWY_ONCE
    233 namespace hwy {
    234 namespace {
    235 HWY_BEFORE_TEST(BitPackTest);
    236 HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack8);
    237 HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack16);
    238 HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack32);
    239 HWY_EXPORT_AND_TEST_P(BitPackTest, TestAllPack64);
    240 HWY_AFTER_TEST();
    241 }  // namespace
    242 }  // namespace hwy
    243 HWY_TEST_MAIN();
    244 #endif  // HWY_ONCE