tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

BaseProfilerState.h (20892B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // The Gecko Profiler is an always-on profiler that takes fast and low overhead
      8 // samples of the program execution using only userspace functionality for
      9 // portability. The goal of this module is to provide performance data in a
     10 // generic cross-platform way without requiring custom tools or kernel support.
     11 //
     12 // Samples are collected to form a timeline with optional timeline event
     13 // (markers) used for filtering. The samples include both native stacks and
     14 // platform-independent "label stack" frames.
     15 
     16 #ifndef BaseProfilerState_h
     17 #define BaseProfilerState_h
     18 
     19 // This header contains most functions that give information about the Base
     20 // Profiler: Whether it is active or not, paused, the selected features, and
     21 // some generic process and thread information.
     22 // It is safe to include unconditionally, but uses of structs and functions must
     23 // be guarded by `#ifdef MOZ_GECKO_PROFILER`.
     24 
     25 #include "mozilla/BaseProfilerUtils.h"
     26 
     27 #ifndef MOZ_GECKO_PROFILER
     28 
     29 #  define AUTO_PROFILER_STATS(name)
     30 
     31 namespace mozilla::baseprofiler {
     32 
     33 [[nodiscard]] inline bool profiler_is_active() { return false; }
     34 [[nodiscard]] inline bool profiler_is_active_and_unpaused() { return false; }
     35 
     36 }  // namespace mozilla::baseprofiler
     37 
     38 #else  // !MOZ_GECKO_PROFILER
     39 
     40 #  include "mozilla/Atomics.h"
     41 #  include "mozilla/Maybe.h"
     42 
     43 #  include <stdint.h>
     44 
     45 // Uncomment the following line to display profiler runtime statistics at
     46 // shutdown.
     47 // #  define PROFILER_RUNTIME_STATS
     48 
     49 #  ifdef PROFILER_RUNTIME_STATS
     50 #    include "mozilla/TimeStamp.h"
     51 #  endif
     52 
     53 namespace mozilla::baseprofiler {
     54 
     55 #  ifdef PROFILER_RUNTIME_STATS
     56 // This class gathers durations and displays some basic stats when destroyed.
     57 // It is intended to be used as a static variable (see `AUTO_PROFILER_STATS`
     58 // below), to display stats at the end of the program.
     59 class StaticBaseProfilerStats {
     60 public:
     61  explicit StaticBaseProfilerStats(const char* aName) : mName(aName) {}
     62 
     63  ~StaticBaseProfilerStats() {
     64    // Using unsigned long long for computations and printfs.
     65    using ULL = unsigned long long;
     66    ULL n = static_cast<ULL>(mNumberDurations);
     67    if (n != 0) {
     68      ULL sumNs = static_cast<ULL>(mSumDurationsNs);
     69      printf(
     70          "[%d] Profiler stats `%s`: %llu ns / %llu = %llu ns, max %llu ns\n",
     71          int(profiler_current_process_id().ToNumber()), mName, sumNs, n,
     72          sumNs / n, static_cast<ULL>(mLongestDurationNs));
     73    } else {
     74      printf("[%d] Profiler stats `%s`: (nothing)\n",
     75             int(profiler_current_process_id().ToNumber()), mName);
     76    }
     77  }
     78 
     79  void AddDurationFrom(TimeStamp aStart) {
     80    DurationNs duration = static_cast<DurationNs>(
     81        (TimeStamp::Now() - aStart).ToMicroseconds() * 1000 + 0.5);
     82    mSumDurationsNs += duration;
     83    ++mNumberDurations;
     84    // Update mLongestDurationNs if this one is longer.
     85    for (;;) {
     86      DurationNs longest = mLongestDurationNs;
     87      if (MOZ_LIKELY(longest >= duration)) {
     88        // This duration is not the longest, nothing to do.
     89        break;
     90      }
     91      if (MOZ_LIKELY(mLongestDurationNs.compareExchange(longest, duration))) {
     92        // Successfully updated `mLongestDurationNs` with the new value.
     93        break;
     94      }
     95      // Otherwise someone else just updated `mLongestDurationNs`, we need to
     96      // try again by looping.
     97    }
     98  }
     99 
    100 private:
    101  using DurationNs = uint64_t;
    102  using Count = uint32_t;
    103 
    104  Atomic<DurationNs> mSumDurationsNs{0};
    105  Atomic<DurationNs> mLongestDurationNs{0};
    106  Atomic<Count> mNumberDurations{0};
    107  const char* mName;
    108 };
    109 
    110 // RAII object that measure its scoped lifetime duration and reports it to a
    111 // `StaticBaseProfilerStats`.
    112 class MOZ_RAII AutoProfilerStats {
    113 public:
    114  explicit AutoProfilerStats(StaticBaseProfilerStats& aStats)
    115      : mStats(aStats), mStart(TimeStamp::Now()) {}
    116 
    117  ~AutoProfilerStats() { mStats.AddDurationFrom(mStart); }
    118 
    119 private:
    120  StaticBaseProfilerStats& mStats;
    121  TimeStamp mStart;
    122 };
    123 
    124 // Macro that should be used to collect basic statistics from measurements of
    125 // block durations, from where this macro is, until the end of its enclosing
    126 // scope. The name is used in the static variable name and when displaying stats
    127 // at the end of the program; Another location could use the same name but their
    128 // stats will not be combined, so use different name if these locations should
    129 // be distinguished.
    130 #    define AUTO_PROFILER_STATS(name)                                      \
    131      static ::mozilla::baseprofiler::StaticBaseProfilerStats sStat##name( \
    132          #name);                                                          \
    133      ::mozilla::baseprofiler::AutoProfilerStats autoStat##name(sStat##name);
    134 
    135 #  else  // PROFILER_RUNTIME_STATS
    136 
    137 #    define AUTO_PROFILER_STATS(name)
    138 
    139 #  endif  // PROFILER_RUNTIME_STATS else
    140 
    141 //---------------------------------------------------------------------------
    142 // Profiler features
    143 //---------------------------------------------------------------------------
    144 
    145 #  if defined(__APPLE__) && defined(__aarch64__)
    146 #    define POWER_HELP "Sample per process power use"
    147 #  elif defined(__APPLE__) && defined(__x86_64__)
    148 #    define POWER_HELP \
    149      "Record the power used by the entire system with each sample."
    150 #  elif defined(__linux__) && defined(__x86_64__)
    151 #    define POWER_HELP                                                \
    152      "Record the power used by the entire system with each sample. " \
    153      "Only available with Intel CPUs and requires setting "          \
    154      "the sysctl kernel.perf_event_paranoid to 0."
    155 #  elif defined(_MSC_VER)
    156 #    define POWER_HELP                                                       \
    157      "Record the value of every energy meter available on the system with " \
    158      "each sample. Only available on Windows 11 with Intel CPUs."
    159 #  else
    160 #    define POWER_HELP "Not supported on this platform."
    161 #  endif
    162 
    163 // Higher-order macro containing all the feature info in one place. Define
    164 // |MACRO| appropriately to extract the relevant parts. Note that the number
    165 // values are used internally only and so can be changed without consequence.
    166 // Any changes to this list should also be applied to the feature list in
    167 // toolkit/components/extensions/schemas/geckoProfiler.json.
    168 // *** Synchronize with lists in ProfilerState.h and geckoProfiler.json ***
    169 #  define BASE_PROFILER_FOR_EACH_FEATURE(MACRO)                              \
    170    MACRO(0, "java", Java, "Profile Java code, Android only")                \
    171                                                                             \
    172    MACRO(1, "js", JS,                                                       \
    173          "Get the JS engine to expose the JS stack to the profiler")        \
    174                                                                             \
    175    MACRO(2, "mainthreadio", MainThreadIO, "Add main thread file I/O")       \
    176                                                                             \
    177    MACRO(3, "fileio", FileIO,                                               \
    178          "Add file I/O from all profiled threads, implies mainthreadio")    \
    179                                                                             \
    180    MACRO(4, "fileioall", FileIOAll,                                         \
    181          "Add file I/O from all threads, implies fileio")                   \
    182                                                                             \
    183    MACRO(5, "nomarkerstacks", NoMarkerStacks,                               \
    184          "Markers do not capture stacks, to reduce overhead")               \
    185                                                                             \
    186    MACRO(6, "screenshots", Screenshots,                                     \
    187          "Take a snapshot of the window on every composition")              \
    188                                                                             \
    189    MACRO(7, "seqstyle", SequentialStyle,                                    \
    190          "Disable parallel traversal in styling")                           \
    191                                                                             \
    192    MACRO(8, "stackwalk", StackWalk,                                         \
    193          "Walk the C++ stack, not available on all platforms")              \
    194                                                                             \
    195    MACRO(9, "jsallocations", JSAllocations,                                 \
    196          "Have the JavaScript engine track allocations")                    \
    197                                                                             \
    198    MACRO(10, "nostacksampling", NoStackSampling,                            \
    199          "Disable all stack sampling: Cancels \"js\", \"stackwalk\" and "   \
    200          "labels")                                                          \
    201                                                                             \
    202    MACRO(11, "nativeallocations", NativeAllocations,                        \
    203          "Collect the stacks from a smaller subset of all native "          \
    204          "allocations, biasing towards collecting larger allocations")      \
    205                                                                             \
    206    MACRO(12, "ipcmessages", IPCMessages,                                    \
    207          "Have the IPC layer track cross-process messages")                 \
    208                                                                             \
    209    MACRO(13, "audiocallbacktracing", AudioCallbackTracing,                  \
    210          "Audio callback tracing")                                          \
    211                                                                             \
    212    MACRO(14, "cpu", CPUUtilization, "CPU utilization")                      \
    213                                                                             \
    214    MACRO(15, "notimerresolutionchange", NoTimerResolutionChange,            \
    215          "Do not adjust the timer resolution for fast sampling, so that "   \
    216          "other Firefox timers do not get affected")                        \
    217                                                                             \
    218    MACRO(16, "cpuallthreads", CPUAllThreads,                                \
    219          "Sample the CPU utilization of all registered threads")            \
    220                                                                             \
    221    MACRO(17, "samplingallthreads", SamplingAllThreads,                      \
    222          "Sample the stacks of all registered threads")                     \
    223                                                                             \
    224    MACRO(18, "markersallthreads", MarkersAllThreads,                        \
    225          "Record markers from all registered threads")                      \
    226                                                                             \
    227    MACRO(19, "unregisteredthreads", UnregisteredThreads,                    \
    228          "Discover and profile unregistered threads -- beware: expensive!") \
    229                                                                             \
    230    MACRO(20, "processcpu", ProcessCPU,                                      \
    231          "Sample the CPU utilization of each process")                      \
    232                                                                             \
    233    MACRO(21, "power", Power, POWER_HELP)                                    \
    234                                                                             \
    235    MACRO(22, "cpufreq", CPUFrequency,                                       \
    236          "Record the clock frequency of "                                   \
    237          "every CPU core for every profiler sample.")                       \
    238                                                                             \
    239    MACRO(23, "bandwidth", Bandwidth,                                        \
    240          "Record the network bandwidth used for every profiler sample.")    \
    241                                                                             \
    242    MACRO(24, "memory", Memory,                                              \
    243          "Track the memory allocations and deallocations per process over " \
    244          "time.")                                                           \
    245                                                                             \
    246    MACRO(25, "tracing", Tracing,                                            \
    247          "Instead of sampling periodically, captures information about "    \
    248          "every function executed for the duration (JS only)")              \
    249                                                                             \
    250    MACRO(26, "sandbox", Sandbox,                                            \
    251          "Report sandbox syscalls and logs in the "                         \
    252          "profiler.")                                                       \
    253                                                                             \
    254    MACRO(27, "flows", Flows,                                                \
    255          "Include all flow-related markers. These markers show the program" \
    256          "better but can cause more overhead in some places than normal.")  \
    257                                                                             \
    258    MACRO(28, "jssources", JSSources,                                        \
    259          "Collect JavaScript source code information for profiled scripts.")
    260 
    261 // *** Synchronize with lists in ProfilerState.h and geckoProfiler.json ***
    262 
    263 struct ProfilerFeature {
    264 #  define DECLARE(n_, str_, Name_, desc_)                                \
    265    static constexpr uint32_t Name_ = (1u << n_);                        \
    266    [[nodiscard]] static constexpr bool Has##Name_(uint32_t aFeatures) { \
    267      return aFeatures & Name_;                                          \
    268    }                                                                    \
    269    static constexpr void Set##Name_(uint32_t& aFeatures) {              \
    270      aFeatures |= Name_;                                                \
    271    }                                                                    \
    272    static constexpr void Clear##Name_(uint32_t& aFeatures) {            \
    273      aFeatures &= ~Name_;                                               \
    274    }
    275 
    276  // Define a bitfield constant, a getter, and two setters for each feature.
    277  BASE_PROFILER_FOR_EACH_FEATURE(DECLARE)
    278 
    279 #  undef DECLARE
    280 };
    281 
    282 namespace detail {
    283 
    284 // RacyFeatures is only defined in this header file so that its methods can
    285 // be inlined into profiler_is_active(). Please do not use anything from the
    286 // detail namespace outside the profiler.
    287 
    288 // Within the profiler's code, the preferred way to check profiler activeness
    289 // and features is via ActivePS(). However, that requires locking gPSMutex.
    290 // There are some hot operations where absolute precision isn't required, so we
    291 // duplicate the activeness/feature state in a lock-free manner in this class.
    292 class RacyFeatures {
    293 public:
    294  MFBT_API static void SetActive(uint32_t aFeatures);
    295 
    296  MFBT_API static void SetInactive();
    297 
    298  MFBT_API static void SetPaused();
    299 
    300  MFBT_API static void SetUnpaused();
    301 
    302  MFBT_API static void SetSamplingPaused();
    303 
    304  MFBT_API static void SetSamplingUnpaused();
    305 
    306  [[nodiscard]] MFBT_API static mozilla::Maybe<uint32_t> FeaturesIfActive() {
    307    if (uint32_t af = sActiveAndFeatures; af & Active) {
    308      // Active, remove the Active&Paused bits to get all features.
    309      return Some(af & ~(Active | Paused | SamplingPaused));
    310    }
    311    return Nothing();
    312  }
    313 
    314  [[nodiscard]] MFBT_API static bool IsActive();
    315 
    316  [[nodiscard]] MFBT_API static bool IsActiveWithFeature(uint32_t aFeature);
    317 
    318  [[nodiscard]] MFBT_API static bool IsActiveWithoutFeature(uint32_t aFeature);
    319 
    320  // True if profiler is active, and not fully paused.
    321  // Note that periodic sampling *could* be paused!
    322  [[nodiscard]] MFBT_API static bool IsActiveAndUnpaused();
    323 
    324  // True if profiler is active, and sampling is not paused (though generic
    325  // `SetPaused()` or specific `SetSamplingPaused()`).
    326  [[nodiscard]] MFBT_API static bool IsActiveAndSamplingUnpaused();
    327 
    328 private:
    329  static constexpr uint32_t Active = 1u << 31;
    330  static constexpr uint32_t Paused = 1u << 30;
    331  static constexpr uint32_t SamplingPaused = 1u << 29;
    332 
    333 // Ensure Active/Paused don't overlap with any of the feature bits.
    334 #  define NO_OVERLAP(n_, str_, Name_, desc_)                \
    335    static_assert(ProfilerFeature::Name_ != SamplingPaused, \
    336                  "bad feature value");
    337 
    338  BASE_PROFILER_FOR_EACH_FEATURE(NO_OVERLAP);
    339 
    340 #  undef NO_OVERLAP
    341 
    342  // We combine the active bit with the feature bits so they can be read or
    343  // written in a single atomic operation.
    344  // TODO: Could this be MFBT_DATA for better inlining optimization?
    345  MFBT_DATA static Atomic<uint32_t, MemoryOrdering::Relaxed> sActiveAndFeatures;
    346 };
    347 
    348 MFBT_API bool IsThreadBeingProfiled();
    349 
    350 }  // namespace detail
    351 
    352 //---------------------------------------------------------------------------
    353 // Get information from the profiler
    354 //---------------------------------------------------------------------------
    355 
    356 // Is the profiler active? Note: the return value of this function can become
    357 // immediately out-of-date. E.g. the profile might be active but then
    358 // profiler_stop() is called immediately afterward. One common and reasonable
    359 // pattern of usage is the following:
    360 //
    361 //   if (profiler_is_active()) {
    362 //     ExpensiveData expensiveData = CreateExpensiveData();
    363 //     PROFILER_OPERATION(expensiveData);
    364 //   }
    365 //
    366 // where PROFILER_OPERATION is a no-op if the profiler is inactive. In this
    367 // case the profiler_is_active() check is just an optimization -- it prevents
    368 // us calling CreateExpensiveData() unnecessarily in most cases, but the
    369 // expensive data will end up being created but not used if another thread
    370 // stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION
    371 // calls.
    372 [[nodiscard]] inline bool profiler_is_active() {
    373  return baseprofiler::detail::RacyFeatures::IsActive();
    374 }
    375 
    376 // Same as profiler_is_active(), but also checks if the profiler is not paused.
    377 [[nodiscard]] inline bool profiler_is_active_and_unpaused() {
    378  return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused();
    379 }
    380 
    381 // Is the profiler active and unpaused, and is the current thread being
    382 // profiled? (Same caveats and recommented usage as profiler_is_active().)
    383 [[nodiscard]] inline bool profiler_thread_is_being_profiled() {
    384  return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused() &&
    385         baseprofiler::detail::IsThreadBeingProfiled();
    386 }
    387 
    388 // Is the profiler active and paused? Returns false if the profiler is inactive.
    389 [[nodiscard]] MFBT_API bool profiler_is_paused();
    390 
    391 // Is the profiler active and sampling is paused? Returns false if the profiler
    392 // is inactive.
    393 [[nodiscard]] MFBT_API bool profiler_is_sampling_paused();
    394 
    395 // Is the current thread sleeping?
    396 [[nodiscard]] MFBT_API bool profiler_thread_is_sleeping();
    397 
    398 // Get all the features supported by the profiler that are accepted by
    399 // profiler_start(). The result is the same whether the profiler is active or
    400 // not.
    401 [[nodiscard]] MFBT_API uint32_t profiler_get_available_features();
    402 
    403 // Returns the full feature set if the profiler is active.
    404 // Note: the return value can become immediately out-of-date, much like the
    405 // return value of profiler_is_active().
    406 [[nodiscard]] inline mozilla::Maybe<uint32_t> profiler_features_if_active() {
    407  return baseprofiler::detail::RacyFeatures::FeaturesIfActive();
    408 }
    409 
    410 // Check if a profiler feature (specified via the ProfilerFeature type) is
    411 // active. Returns false if the profiler is inactive. Note: the return value
    412 // can become immediately out-of-date, much like the return value of
    413 // profiler_is_active().
    414 [[nodiscard]] MFBT_API bool profiler_feature_active(uint32_t aFeature);
    415 
    416 // Check if the profiler is active without a feature (specified via the
    417 // ProfilerFeature type). Note: the return value can become immediately
    418 // out-of-date, much like the return value of profiler_is_active().
    419 [[nodiscard]] MFBT_API bool profiler_active_without_feature(uint32_t aFeature);
    420 
    421 // Returns true if any of the profiler mutexes are currently locked *on the
    422 // current thread*. This may be used by re-entrant code that may call profiler
    423 // functions while the same of a different profiler mutex is locked, which could
    424 // deadlock.
    425 [[nodiscard]] bool profiler_is_locked_on_current_thread();
    426 
    427 }  // namespace mozilla::baseprofiler
    428 
    429 #endif  // !MOZ_GECKO_PROFILER
    430 
    431 #endif  // BaseProfilerState_h