BaseProfilerState.h (20892B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 // The Gecko Profiler is an always-on profiler that takes fast and low overhead 8 // samples of the program execution using only userspace functionality for 9 // portability. The goal of this module is to provide performance data in a 10 // generic cross-platform way without requiring custom tools or kernel support. 11 // 12 // Samples are collected to form a timeline with optional timeline event 13 // (markers) used for filtering. The samples include both native stacks and 14 // platform-independent "label stack" frames. 15 16 #ifndef BaseProfilerState_h 17 #define BaseProfilerState_h 18 19 // This header contains most functions that give information about the Base 20 // Profiler: Whether it is active or not, paused, the selected features, and 21 // some generic process and thread information. 22 // It is safe to include unconditionally, but uses of structs and functions must 23 // be guarded by `#ifdef MOZ_GECKO_PROFILER`. 24 25 #include "mozilla/BaseProfilerUtils.h" 26 27 #ifndef MOZ_GECKO_PROFILER 28 29 # define AUTO_PROFILER_STATS(name) 30 31 namespace mozilla::baseprofiler { 32 33 [[nodiscard]] inline bool profiler_is_active() { return false; } 34 [[nodiscard]] inline bool profiler_is_active_and_unpaused() { return false; } 35 36 } // namespace mozilla::baseprofiler 37 38 #else // !MOZ_GECKO_PROFILER 39 40 # include "mozilla/Atomics.h" 41 # include "mozilla/Maybe.h" 42 43 # include <stdint.h> 44 45 // Uncomment the following line to display profiler runtime statistics at 46 // shutdown. 47 // # define PROFILER_RUNTIME_STATS 48 49 # ifdef PROFILER_RUNTIME_STATS 50 # include "mozilla/TimeStamp.h" 51 # endif 52 53 namespace mozilla::baseprofiler { 54 55 # ifdef PROFILER_RUNTIME_STATS 56 // This class gathers durations and displays some basic stats when destroyed. 57 // It is intended to be used as a static variable (see `AUTO_PROFILER_STATS` 58 // below), to display stats at the end of the program. 59 class StaticBaseProfilerStats { 60 public: 61 explicit StaticBaseProfilerStats(const char* aName) : mName(aName) {} 62 63 ~StaticBaseProfilerStats() { 64 // Using unsigned long long for computations and printfs. 65 using ULL = unsigned long long; 66 ULL n = static_cast<ULL>(mNumberDurations); 67 if (n != 0) { 68 ULL sumNs = static_cast<ULL>(mSumDurationsNs); 69 printf( 70 "[%d] Profiler stats `%s`: %llu ns / %llu = %llu ns, max %llu ns\n", 71 int(profiler_current_process_id().ToNumber()), mName, sumNs, n, 72 sumNs / n, static_cast<ULL>(mLongestDurationNs)); 73 } else { 74 printf("[%d] Profiler stats `%s`: (nothing)\n", 75 int(profiler_current_process_id().ToNumber()), mName); 76 } 77 } 78 79 void AddDurationFrom(TimeStamp aStart) { 80 DurationNs duration = static_cast<DurationNs>( 81 (TimeStamp::Now() - aStart).ToMicroseconds() * 1000 + 0.5); 82 mSumDurationsNs += duration; 83 ++mNumberDurations; 84 // Update mLongestDurationNs if this one is longer. 85 for (;;) { 86 DurationNs longest = mLongestDurationNs; 87 if (MOZ_LIKELY(longest >= duration)) { 88 // This duration is not the longest, nothing to do. 89 break; 90 } 91 if (MOZ_LIKELY(mLongestDurationNs.compareExchange(longest, duration))) { 92 // Successfully updated `mLongestDurationNs` with the new value. 93 break; 94 } 95 // Otherwise someone else just updated `mLongestDurationNs`, we need to 96 // try again by looping. 97 } 98 } 99 100 private: 101 using DurationNs = uint64_t; 102 using Count = uint32_t; 103 104 Atomic<DurationNs> mSumDurationsNs{0}; 105 Atomic<DurationNs> mLongestDurationNs{0}; 106 Atomic<Count> mNumberDurations{0}; 107 const char* mName; 108 }; 109 110 // RAII object that measure its scoped lifetime duration and reports it to a 111 // `StaticBaseProfilerStats`. 112 class MOZ_RAII AutoProfilerStats { 113 public: 114 explicit AutoProfilerStats(StaticBaseProfilerStats& aStats) 115 : mStats(aStats), mStart(TimeStamp::Now()) {} 116 117 ~AutoProfilerStats() { mStats.AddDurationFrom(mStart); } 118 119 private: 120 StaticBaseProfilerStats& mStats; 121 TimeStamp mStart; 122 }; 123 124 // Macro that should be used to collect basic statistics from measurements of 125 // block durations, from where this macro is, until the end of its enclosing 126 // scope. The name is used in the static variable name and when displaying stats 127 // at the end of the program; Another location could use the same name but their 128 // stats will not be combined, so use different name if these locations should 129 // be distinguished. 130 # define AUTO_PROFILER_STATS(name) \ 131 static ::mozilla::baseprofiler::StaticBaseProfilerStats sStat##name( \ 132 #name); \ 133 ::mozilla::baseprofiler::AutoProfilerStats autoStat##name(sStat##name); 134 135 # else // PROFILER_RUNTIME_STATS 136 137 # define AUTO_PROFILER_STATS(name) 138 139 # endif // PROFILER_RUNTIME_STATS else 140 141 //--------------------------------------------------------------------------- 142 // Profiler features 143 //--------------------------------------------------------------------------- 144 145 # if defined(__APPLE__) && defined(__aarch64__) 146 # define POWER_HELP "Sample per process power use" 147 # elif defined(__APPLE__) && defined(__x86_64__) 148 # define POWER_HELP \ 149 "Record the power used by the entire system with each sample." 150 # elif defined(__linux__) && defined(__x86_64__) 151 # define POWER_HELP \ 152 "Record the power used by the entire system with each sample. " \ 153 "Only available with Intel CPUs and requires setting " \ 154 "the sysctl kernel.perf_event_paranoid to 0." 155 # elif defined(_MSC_VER) 156 # define POWER_HELP \ 157 "Record the value of every energy meter available on the system with " \ 158 "each sample. Only available on Windows 11 with Intel CPUs." 159 # else 160 # define POWER_HELP "Not supported on this platform." 161 # endif 162 163 // Higher-order macro containing all the feature info in one place. Define 164 // |MACRO| appropriately to extract the relevant parts. Note that the number 165 // values are used internally only and so can be changed without consequence. 166 // Any changes to this list should also be applied to the feature list in 167 // toolkit/components/extensions/schemas/geckoProfiler.json. 168 // *** Synchronize with lists in ProfilerState.h and geckoProfiler.json *** 169 # define BASE_PROFILER_FOR_EACH_FEATURE(MACRO) \ 170 MACRO(0, "java", Java, "Profile Java code, Android only") \ 171 \ 172 MACRO(1, "js", JS, \ 173 "Get the JS engine to expose the JS stack to the profiler") \ 174 \ 175 MACRO(2, "mainthreadio", MainThreadIO, "Add main thread file I/O") \ 176 \ 177 MACRO(3, "fileio", FileIO, \ 178 "Add file I/O from all profiled threads, implies mainthreadio") \ 179 \ 180 MACRO(4, "fileioall", FileIOAll, \ 181 "Add file I/O from all threads, implies fileio") \ 182 \ 183 MACRO(5, "nomarkerstacks", NoMarkerStacks, \ 184 "Markers do not capture stacks, to reduce overhead") \ 185 \ 186 MACRO(6, "screenshots", Screenshots, \ 187 "Take a snapshot of the window on every composition") \ 188 \ 189 MACRO(7, "seqstyle", SequentialStyle, \ 190 "Disable parallel traversal in styling") \ 191 \ 192 MACRO(8, "stackwalk", StackWalk, \ 193 "Walk the C++ stack, not available on all platforms") \ 194 \ 195 MACRO(9, "jsallocations", JSAllocations, \ 196 "Have the JavaScript engine track allocations") \ 197 \ 198 MACRO(10, "nostacksampling", NoStackSampling, \ 199 "Disable all stack sampling: Cancels \"js\", \"stackwalk\" and " \ 200 "labels") \ 201 \ 202 MACRO(11, "nativeallocations", NativeAllocations, \ 203 "Collect the stacks from a smaller subset of all native " \ 204 "allocations, biasing towards collecting larger allocations") \ 205 \ 206 MACRO(12, "ipcmessages", IPCMessages, \ 207 "Have the IPC layer track cross-process messages") \ 208 \ 209 MACRO(13, "audiocallbacktracing", AudioCallbackTracing, \ 210 "Audio callback tracing") \ 211 \ 212 MACRO(14, "cpu", CPUUtilization, "CPU utilization") \ 213 \ 214 MACRO(15, "notimerresolutionchange", NoTimerResolutionChange, \ 215 "Do not adjust the timer resolution for fast sampling, so that " \ 216 "other Firefox timers do not get affected") \ 217 \ 218 MACRO(16, "cpuallthreads", CPUAllThreads, \ 219 "Sample the CPU utilization of all registered threads") \ 220 \ 221 MACRO(17, "samplingallthreads", SamplingAllThreads, \ 222 "Sample the stacks of all registered threads") \ 223 \ 224 MACRO(18, "markersallthreads", MarkersAllThreads, \ 225 "Record markers from all registered threads") \ 226 \ 227 MACRO(19, "unregisteredthreads", UnregisteredThreads, \ 228 "Discover and profile unregistered threads -- beware: expensive!") \ 229 \ 230 MACRO(20, "processcpu", ProcessCPU, \ 231 "Sample the CPU utilization of each process") \ 232 \ 233 MACRO(21, "power", Power, POWER_HELP) \ 234 \ 235 MACRO(22, "cpufreq", CPUFrequency, \ 236 "Record the clock frequency of " \ 237 "every CPU core for every profiler sample.") \ 238 \ 239 MACRO(23, "bandwidth", Bandwidth, \ 240 "Record the network bandwidth used for every profiler sample.") \ 241 \ 242 MACRO(24, "memory", Memory, \ 243 "Track the memory allocations and deallocations per process over " \ 244 "time.") \ 245 \ 246 MACRO(25, "tracing", Tracing, \ 247 "Instead of sampling periodically, captures information about " \ 248 "every function executed for the duration (JS only)") \ 249 \ 250 MACRO(26, "sandbox", Sandbox, \ 251 "Report sandbox syscalls and logs in the " \ 252 "profiler.") \ 253 \ 254 MACRO(27, "flows", Flows, \ 255 "Include all flow-related markers. These markers show the program" \ 256 "better but can cause more overhead in some places than normal.") \ 257 \ 258 MACRO(28, "jssources", JSSources, \ 259 "Collect JavaScript source code information for profiled scripts.") 260 261 // *** Synchronize with lists in ProfilerState.h and geckoProfiler.json *** 262 263 struct ProfilerFeature { 264 # define DECLARE(n_, str_, Name_, desc_) \ 265 static constexpr uint32_t Name_ = (1u << n_); \ 266 [[nodiscard]] static constexpr bool Has##Name_(uint32_t aFeatures) { \ 267 return aFeatures & Name_; \ 268 } \ 269 static constexpr void Set##Name_(uint32_t& aFeatures) { \ 270 aFeatures |= Name_; \ 271 } \ 272 static constexpr void Clear##Name_(uint32_t& aFeatures) { \ 273 aFeatures &= ~Name_; \ 274 } 275 276 // Define a bitfield constant, a getter, and two setters for each feature. 277 BASE_PROFILER_FOR_EACH_FEATURE(DECLARE) 278 279 # undef DECLARE 280 }; 281 282 namespace detail { 283 284 // RacyFeatures is only defined in this header file so that its methods can 285 // be inlined into profiler_is_active(). Please do not use anything from the 286 // detail namespace outside the profiler. 287 288 // Within the profiler's code, the preferred way to check profiler activeness 289 // and features is via ActivePS(). However, that requires locking gPSMutex. 290 // There are some hot operations where absolute precision isn't required, so we 291 // duplicate the activeness/feature state in a lock-free manner in this class. 292 class RacyFeatures { 293 public: 294 MFBT_API static void SetActive(uint32_t aFeatures); 295 296 MFBT_API static void SetInactive(); 297 298 MFBT_API static void SetPaused(); 299 300 MFBT_API static void SetUnpaused(); 301 302 MFBT_API static void SetSamplingPaused(); 303 304 MFBT_API static void SetSamplingUnpaused(); 305 306 [[nodiscard]] MFBT_API static mozilla::Maybe<uint32_t> FeaturesIfActive() { 307 if (uint32_t af = sActiveAndFeatures; af & Active) { 308 // Active, remove the Active&Paused bits to get all features. 309 return Some(af & ~(Active | Paused | SamplingPaused)); 310 } 311 return Nothing(); 312 } 313 314 [[nodiscard]] MFBT_API static bool IsActive(); 315 316 [[nodiscard]] MFBT_API static bool IsActiveWithFeature(uint32_t aFeature); 317 318 [[nodiscard]] MFBT_API static bool IsActiveWithoutFeature(uint32_t aFeature); 319 320 // True if profiler is active, and not fully paused. 321 // Note that periodic sampling *could* be paused! 322 [[nodiscard]] MFBT_API static bool IsActiveAndUnpaused(); 323 324 // True if profiler is active, and sampling is not paused (though generic 325 // `SetPaused()` or specific `SetSamplingPaused()`). 326 [[nodiscard]] MFBT_API static bool IsActiveAndSamplingUnpaused(); 327 328 private: 329 static constexpr uint32_t Active = 1u << 31; 330 static constexpr uint32_t Paused = 1u << 30; 331 static constexpr uint32_t SamplingPaused = 1u << 29; 332 333 // Ensure Active/Paused don't overlap with any of the feature bits. 334 # define NO_OVERLAP(n_, str_, Name_, desc_) \ 335 static_assert(ProfilerFeature::Name_ != SamplingPaused, \ 336 "bad feature value"); 337 338 BASE_PROFILER_FOR_EACH_FEATURE(NO_OVERLAP); 339 340 # undef NO_OVERLAP 341 342 // We combine the active bit with the feature bits so they can be read or 343 // written in a single atomic operation. 344 // TODO: Could this be MFBT_DATA for better inlining optimization? 345 MFBT_DATA static Atomic<uint32_t, MemoryOrdering::Relaxed> sActiveAndFeatures; 346 }; 347 348 MFBT_API bool IsThreadBeingProfiled(); 349 350 } // namespace detail 351 352 //--------------------------------------------------------------------------- 353 // Get information from the profiler 354 //--------------------------------------------------------------------------- 355 356 // Is the profiler active? Note: the return value of this function can become 357 // immediately out-of-date. E.g. the profile might be active but then 358 // profiler_stop() is called immediately afterward. One common and reasonable 359 // pattern of usage is the following: 360 // 361 // if (profiler_is_active()) { 362 // ExpensiveData expensiveData = CreateExpensiveData(); 363 // PROFILER_OPERATION(expensiveData); 364 // } 365 // 366 // where PROFILER_OPERATION is a no-op if the profiler is inactive. In this 367 // case the profiler_is_active() check is just an optimization -- it prevents 368 // us calling CreateExpensiveData() unnecessarily in most cases, but the 369 // expensive data will end up being created but not used if another thread 370 // stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION 371 // calls. 372 [[nodiscard]] inline bool profiler_is_active() { 373 return baseprofiler::detail::RacyFeatures::IsActive(); 374 } 375 376 // Same as profiler_is_active(), but also checks if the profiler is not paused. 377 [[nodiscard]] inline bool profiler_is_active_and_unpaused() { 378 return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused(); 379 } 380 381 // Is the profiler active and unpaused, and is the current thread being 382 // profiled? (Same caveats and recommented usage as profiler_is_active().) 383 [[nodiscard]] inline bool profiler_thread_is_being_profiled() { 384 return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused() && 385 baseprofiler::detail::IsThreadBeingProfiled(); 386 } 387 388 // Is the profiler active and paused? Returns false if the profiler is inactive. 389 [[nodiscard]] MFBT_API bool profiler_is_paused(); 390 391 // Is the profiler active and sampling is paused? Returns false if the profiler 392 // is inactive. 393 [[nodiscard]] MFBT_API bool profiler_is_sampling_paused(); 394 395 // Is the current thread sleeping? 396 [[nodiscard]] MFBT_API bool profiler_thread_is_sleeping(); 397 398 // Get all the features supported by the profiler that are accepted by 399 // profiler_start(). The result is the same whether the profiler is active or 400 // not. 401 [[nodiscard]] MFBT_API uint32_t profiler_get_available_features(); 402 403 // Returns the full feature set if the profiler is active. 404 // Note: the return value can become immediately out-of-date, much like the 405 // return value of profiler_is_active(). 406 [[nodiscard]] inline mozilla::Maybe<uint32_t> profiler_features_if_active() { 407 return baseprofiler::detail::RacyFeatures::FeaturesIfActive(); 408 } 409 410 // Check if a profiler feature (specified via the ProfilerFeature type) is 411 // active. Returns false if the profiler is inactive. Note: the return value 412 // can become immediately out-of-date, much like the return value of 413 // profiler_is_active(). 414 [[nodiscard]] MFBT_API bool profiler_feature_active(uint32_t aFeature); 415 416 // Check if the profiler is active without a feature (specified via the 417 // ProfilerFeature type). Note: the return value can become immediately 418 // out-of-date, much like the return value of profiler_is_active(). 419 [[nodiscard]] MFBT_API bool profiler_active_without_feature(uint32_t aFeature); 420 421 // Returns true if any of the profiler mutexes are currently locked *on the 422 // current thread*. This may be used by re-entrant code that may call profiler 423 // functions while the same of a different profiler mutex is locked, which could 424 // deadlock. 425 [[nodiscard]] bool profiler_is_locked_on_current_thread(); 426 427 } // namespace mozilla::baseprofiler 428 429 #endif // !MOZ_GECKO_PROFILER 430 431 #endif // BaseProfilerState_h