tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

perf_counters.h (5554B)


      1 // Copyright 2024 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef HIGHWAY_HWY_PERF_COUNTERS_H_
     17 #define HIGHWAY_HWY_PERF_COUNTERS_H_
     18 
     19 // Reads OS/CPU performance counters.
     20 
     21 #include <stddef.h>
     22 
     23 #include "hwy/base.h"  // HWY_ABORT
     24 #include "hwy/bit_set.h"
     25 
     26 namespace hwy {
     27 namespace platform {
     28 
     29 // Avoid padding in case callers such as profiler.h store many instances.
     30 #pragma pack(push, 1)
     31 // Provides access to CPU/OS performance counters. Each instance has space for
     32 // multiple counter values; which counters these are may change in future.
     33 // Although counters are per-CPU, Linux accesses them via a syscall, hence we
     34 // use the monostate pattern to avoid callers having to pass around a pointer.
     35 // Note that this is not thread-safe, so the static member functions should only
     36 // be called from the main thread.
     37 class PerfCounters {
     38 public:
     39  // Chosen such that this class occupies one or two cache lines.
     40  static constexpr size_t kCapacity = 14;
     41 
     42  // Bit indices used to identify counters. The ordering is arbitrary. Some of
     43  // these counters may be 'removed' in the sense of not being visited by
     44  // `Foreach`, but their enumerators will remain. New counters may be appended.
     45  enum Counter {
     46    kRefCycles = 0,
     47    kInstructions,
     48    kBranches,
     49    kBranchMispredicts,
     50    kBusCycles,
     51    kCacheRefs,
     52    kCacheMisses,
     53    kL3Loads,
     54    kL3Stores,
     55    kPageFaults,  // SW
     56    kMigrations   // SW
     57  };  // BitSet64 requires these values to be less than 64.
     58 
     59  // Strings for user-facing messages, not used in the implementation.
     60  static inline const char* Name(Counter c) {
     61    switch (c) {
     62      case kRefCycles:
     63        return "ref_cycles";
     64      case kInstructions:
     65        return "instructions";
     66      case kBranches:
     67        return "branches";
     68      case kBranchMispredicts:
     69        return "branch_mispredicts";
     70      case kBusCycles:
     71        return "bus_cycles";
     72      case kCacheRefs:
     73        return "cache_refs";
     74      case kCacheMisses:
     75        return "cache_misses";
     76      case kL3Loads:
     77        return "l3_load";
     78      case kL3Stores:
     79        return "l3_store";
     80      case kPageFaults:
     81        return "page_fault";
     82      case kMigrations:
     83        return "migration";
     84      default:
     85        HWY_UNREACHABLE;
     86    }
     87  }
     88 
     89  // Returns false if counters are unavailable. Must be called at least once
     90  // before `StartAll`; it is separate to reduce the overhead of repeatedly
     91  // stopping/starting counters.
     92  HWY_DLLEXPORT static bool Init();
     93 
     94  // Returns false if counters are unavailable, otherwise starts them. Note that
     95  // they default to stopped. Unless this is called, the values read may be 0.
     96  HWY_DLLEXPORT static bool StartAll();
     97 
     98  // Stops and zeros all counters. This is not necessary if users subtract the
     99  // previous counter values, but can increase precision because floating-point
    100  // has more precision near zero.
    101  HWY_DLLEXPORT static void StopAllAndReset();
    102 
    103  // Reads the current (extrapolated, in case of multiplexing) counter values.
    104  HWY_DLLEXPORT PerfCounters();
    105 
    106  // Returns whether any counters were successfully read.
    107  bool AnyValid() const { return valid_.Any(); }
    108 
    109  // Returns whether the given counter was successfully read.
    110  bool IsValid(Counter c) const {
    111    const size_t bit_idx = static_cast<size_t>(c);
    112    return valid_.Get(bit_idx);
    113  }
    114 
    115  // Returns the maximum extrapolation factor for any counter, which is the
    116  // total time between `StartAll` and now or the last `StopAllAndReset`,
    117  // divided by the time that the counter was actually running. This
    118  // approximates the number of counter groups that the CPU multiplexes onto the
    119  // actual counter hardware. It is only meaningful if AnyValid().
    120  double MaxExtrapolate() const { return max_extrapolate_; }
    121 
    122  // Returns the value of the given counter, or zero if it is not valid.
    123  double Get(Counter c) const {
    124    return IsValid(c) ? values_[IndexForCounter(c)] : 0.0;
    125  }
    126 
    127  // For each valid counter in increasing numerical order, calls `visitor` with
    128  // the value and `Counter`.
    129  template <class Visitor>
    130  void Foreach(const Visitor& visitor) {
    131    valid_.Foreach([&](size_t bit_idx) {
    132      const Counter c = static_cast<Counter>(bit_idx);
    133      visitor(values_[IndexForCounter(c)], c);
    134    });
    135  }
    136 
    137 private:
    138  // Index within `values_` for a given counter.
    139  HWY_DLLEXPORT static size_t IndexForCounter(Counter c);
    140 
    141  BitSet64 valid_;
    142  double max_extrapolate_;
    143  // Floating-point because these are extrapolated (multiplexing). It would be
    144  // nice for this to fit in one cache line to reduce the cost of reading
    145  // counters in profiler.h, but some of the values are too large for float and
    146  // we want more than 8 counters. Ensure all values are sums, not ratios, so
    147  // that profiler.h can add/subtract them. These are contiguous in memory, in
    148  // the order that counters were initialized.
    149  double values_[kCapacity];
    150 };
    151 #pragma pack(pop)
    152 
    153 }  // namespace platform
    154 }  // namespace hwy
    155 
    156 #endif  // HIGHWAY_HWY_PERF_COUNTERS_H_