tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

perf_counters_test.cc (5746B)


      1 // Copyright 2024 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include "hwy/perf_counters.h"
     17 
     18 #include <stddef.h>
     19 #include <stdint.h>
     20 #include <stdio.h>
     21 
     22 #include <vector>
     23 
     24 #include "hwy/contrib/thread_pool/futex.h"  // NanoSleep
     25 #include "hwy/nanobenchmark.h"  // Unpredictable1
     26 #include "hwy/tests/hwy_gtest.h"
     27 #include "hwy/tests/test_util-inl.h"
     28 #include "hwy/timer.h"
     29 
     30 namespace hwy {
     31 namespace {
     32 
     33 using ::hwy::platform::PerfCounters;
     34 
     35 void ReadAndPrint(uint64_t r, double* values) {
     36  char cpu100[100];
     37  const bool have_stop = hwy::platform::HaveTimerStop(cpu100);
     38  const uint64_t t0 = timer::Start();
     39 
     40  PerfCounters counters;
     41  const uint64_t t1 = have_stop ? timer::Stop() : timer::Start();
     42  const double elapsed_ns =
     43      static_cast<double>(t1 - t0) * 1E9 / platform::InvariantTicksPerSecond();
     44  fprintf(stderr, "r: %d, any valid %d extrapolate %f, overhead %.1f ns\n",
     45          static_cast<int>(r), counters.AnyValid(), counters.MaxExtrapolate(),
     46          elapsed_ns);
     47 
     48  if (counters.AnyValid()) {
     49    HWY_ASSERT(counters.MaxExtrapolate() >= 1.0);
     50  }
     51 
     52  counters.Foreach([&counters, values](double val, PerfCounters::Counter c) {
     53    HWY_ASSERT(counters.IsValid(c));
     54    fprintf(stderr, "%-20s: %.3E\n", PerfCounters::Name(c), val);
     55    values[static_cast<size_t>(c)] = val;
     56  });
     57  PerfCounters::StopAllAndReset();
     58 }
     59 
     60 // Ensures a memory-intensive workload has high memory-related counters.
     61 TEST(PerfCountersTest, TestMem) {
     62  RandomState rng;
     63  if (!PerfCounters::Init() || !PerfCounters::StartAll()) {
     64    HWY_WARN("Perf counters unavailable, skipping test\n");
     65    return;
     66  }
     67  // Force L3 cache misses (loads).
     68  std::vector<uint64_t> big_array(128 * 1024 * 1024);
     69  for (uint64_t& x : big_array) {
     70    x = rng() & static_cast<uint64_t>(hwy::Unpredictable1());
     71  }
     72  const uint64_t r = big_array[rng() & 0xFFFF];
     73 
     74  double values[64] = {0.0};
     75  ReadAndPrint(r, values);
     76 
     77  // Note that counters might not be available, and values differ considerably
     78  // for debug/sanitizer builds.
     79  HWY_ASSERT(values[PerfCounters::kRefCycles] == 0.0 ||
     80             values[PerfCounters::kRefCycles] > 1E8);  // 470M..9B
     81  HWY_ASSERT(values[PerfCounters::kInstructions] == 0.0 ||
     82             values[PerfCounters::kInstructions] > 1E5);  // 1.5M..10B
     83  HWY_ASSERT(values[PerfCounters::kPageFaults] == 0.0 ||
     84             values[PerfCounters::kPageFaults] > 1);  // 4..500K
     85  HWY_ASSERT(values[PerfCounters::kBranches] == 0.0 ||
     86             values[PerfCounters::kBranches] > 1E5);           // > 900K
     87  HWY_ASSERT(values[PerfCounters::kBranchMispredicts] < 1E9);  // 273K..400M
     88 
     89  HWY_ASSERT(values[PerfCounters::kL3Loads] == 0.0 ||
     90             values[PerfCounters::kL3Loads] > 10.0);  // ~90K, 50 with L4
     91  HWY_ASSERT(values[PerfCounters::kL3Stores] == 0.0 ||
     92             values[PerfCounters::kL3Stores] > 10.0);  // 9K..5M
     93 
     94  HWY_ASSERT(values[PerfCounters::kCacheRefs] == 0.0 ||
     95             values[PerfCounters::kCacheRefs] > 1E4);  // 75K..66M
     96  HWY_ASSERT(values[PerfCounters::kCacheMisses] == 0.0 ||
     97             values[PerfCounters::kCacheMisses] > 1.0);  // 10..51M
     98  HWY_ASSERT(values[PerfCounters::kBusCycles] == 0.0 ||
     99             values[PerfCounters::kBusCycles] > 1E6);  // 8M
    100 }
    101 
    102 // Ensures a branch-heavy workload has high branch-related counters and not
    103 // too high memory-related counters.
    104 TEST(PerfCountersTest, RunBranches) {
    105  RandomState rng;
    106  if (!PerfCounters::Init() || !PerfCounters::StartAll()) {
    107    HWY_WARN("Perf counters unavailable, skipping test\n");
    108    return;
    109  }
    110 
    111  // Branch-heavy, non-constexpr calculation so we see changes to counters.
    112  const size_t iters =
    113      static_cast<size_t>(hwy::Unpredictable1()) * 100000 + (rng() & 1);
    114  uint64_t r = rng();
    115  for (size_t i = 0; i < iters; ++i) {
    116    if (PopCount(rng()) < 36) {
    117      r += rng() & 0xFF;
    118    } else {
    119      // Entirely different operation to ensure there is a branch.
    120      r >>= 1;
    121    }
    122    // Ensure test runs long enough for counter multiplexing to happen.
    123    NanoSleep(100 * 1000);
    124  }
    125 
    126  double values[64] = {0.0};
    127  ReadAndPrint(r, values);
    128 
    129  // Note that counters might not be available, and values differ considerably
    130  // for debug/sanitizer builds.
    131  HWY_ASSERT(values[PerfCounters::kRefCycles] == 0.0 ||
    132             values[PerfCounters::kRefCycles] > 1E3);  // 13K..18M
    133  HWY_ASSERT(values[PerfCounters::kInstructions] == 0.0 ||
    134             values[PerfCounters::kInstructions] > 100.0);  // 900..2M
    135  HWY_ASSERT(values[PerfCounters::kBranches] == 0.0 ||
    136             values[PerfCounters::kBranches] > 100.0);  // 1K..273K
    137  HWY_ASSERT(values[PerfCounters::kBranchMispredicts] == 0 ||
    138             values[PerfCounters::kBranchMispredicts] > 10.0);  // 65..5K
    139 
    140  HWY_ASSERT(values[PerfCounters::kL3Loads] < 1E8);       // 174K..12M
    141  HWY_ASSERT(values[PerfCounters::kL3Stores] < 1E7);      // 44K..1.8M
    142  HWY_ASSERT(values[PerfCounters::kCacheRefs] < 1E9);     // 5M..104M
    143  HWY_ASSERT(values[PerfCounters::kCacheMisses] < 1E8);   // 500K..10M
    144  HWY_ASSERT(values[PerfCounters::kBusCycles] < 1E11);    // 1M..10B
    145  HWY_ASSERT(values[PerfCounters::kPageFaults] < 1E4);    // 0..1.1K (in SDE)
    146 }
    147 
    148 }  // namespace
    149 }  // namespace hwy
    150 
    151 HWY_TEST_MAIN();