tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

profiler.cc (5459B)


      1 // Copyright 2025 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include "hwy/profiler.h"
     17 
     18 #include "hwy/highway_export.h"  // HWY_DLLEXPORT
     19 
     20 #if PROFILER_ENABLED
     21 
     22 #include <stddef.h>
     23 #include <stdint.h>
     24 #include <stdio.h>
     25 
     26 #include "hwy/base.h"
     27 #include "hwy/robust_statistics.h"
     28 #include "hwy/timer.h"
     29 
     30 #endif  // PROFILER_ENABLED
     31 
     32 namespace hwy {
     33 
     34 #if PROFILER_ENABLED
     35 
     36 static constexpr bool kPrintOverhead = true;
     37 
     38 // Must zero-init because `ThreadFunc` calls `SetGlobalIdx()` potentially after
     39 // this is first used in the `pool::Worker` ctor.
     40 /*static*/ thread_local size_t Profiler::s_global_idx = 0;
     41 
     42 // Detects duration of a zero-length zone: timer plus packet overhead.
     43 static uint64_t DetectSelfOverhead(Profiler& profiler, size_t global_idx) {
     44  static const profiler::ZoneHandle zone = profiler.AddZone("DetectSelf");
     45  profiler::Results results;
     46  const size_t kNumSamples = 25;
     47  uint32_t samples[kNumSamples];
     48  for (size_t idx_sample = 0; idx_sample < kNumSamples; ++idx_sample) {
     49    // Enough for stable measurements, but only about 50 ms startup cost.
     50    const size_t kNumDurations = 700;
     51    uint32_t durations[kNumDurations];
     52    for (size_t idx_duration = 0; idx_duration < kNumDurations;
     53         ++idx_duration) {
     54      {
     55        PROFILER_ZONE3(profiler, global_idx, zone);
     56      }
     57      durations[idx_duration] =
     58          static_cast<uint32_t>(profiler.GetFirstDurationAndReset(global_idx));
     59    }
     60    samples[idx_sample] = robust_statistics::Mode(durations, kNumDurations);
     61  }
     62  return robust_statistics::Mode(samples, kNumSamples);
     63 }
     64 
     65 // Detects average duration of a zero-length zone, after deducting self
     66 // overhead. This accounts for the delay before/after capturing start/end
     67 // timestamps, for example due to fence instructions in timer::Start/Stop.
     68 static uint64_t DetectChildOverhead(Profiler& profiler, size_t global_idx,
     69                                    uint64_t self_overhead) {
     70  static const profiler::ZoneHandle zone = profiler.AddZone("DetectChild");
     71  // Enough for stable measurements, but only about 50 ms startup cost.
     72  const size_t kMaxSamples = 30;
     73  uint32_t samples[kMaxSamples];
     74  size_t num_samples = 0;
     75  // Upper bound because timer resolution might be too coarse to get nonzero.
     76  for (size_t s = 0; s < 2 * kMaxSamples && num_samples < kMaxSamples; ++s) {
     77    const size_t kNumDurations = 50;
     78    uint32_t durations[kNumDurations];
     79    for (size_t d = 0; d < kNumDurations; ++d) {
     80      constexpr size_t kReps = 500;
     81      HWY_FENCE;
     82      const uint64_t t0 = timer::Start();
     83      for (size_t r = 0; r < kReps; ++r) {
     84        PROFILER_ZONE3(profiler, global_idx, zone);
     85      }
     86      const uint64_t t1 = timer::Stop();
     87      HWY_FENCE;
     88      // We are measuring the total, not individual zone durations, to include
     89      // cross-zone overhead.
     90      (void)profiler.GetFirstDurationAndReset(global_idx);
     91 
     92      const uint64_t avg_duration = (t1 - t0 + kReps / 2) / kReps;
     93      durations[d] = static_cast<uint32_t>(
     94          profiler::PerWorker::ClampedSubtract(avg_duration, self_overhead));
     95    }
     96    samples[num_samples] = robust_statistics::Mode(durations, kNumDurations);
     97    // Overhead is nonzero, but we often measure zero; skip them to prevent
     98    // getting a zero result.
     99    num_samples += (samples[num_samples] != 0);
    100  }
    101  return num_samples == 0 ? 0 : robust_statistics::Mode(samples, num_samples);
    102 }
    103 
    104 Profiler::Profiler() {
    105  const uint64_t t0 = timer::Start();
    106 
    107  char cpu[100];
    108  if (HWY_UNLIKELY(!platform::HaveTimerStop(cpu))) {
    109    HWY_ABORT("CPU %s is too old for PROFILER_ENABLED=1, exiting", cpu);
    110  }
    111 
    112  // `ThreadPool` calls `Profiler::Get()` before it creates threads, hence this
    113  // is guaranteed to be running on the main thread.
    114  constexpr size_t kMain = 0;
    115  // Must be called before any use of `PROFILER_ZONE*/PROFILER_FUNC*`. This runs
    116  // only once because `Profiler` is a singleton.
    117  ReserveWorker(kMain);
    118  SetGlobalIdx(kMain);
    119 
    120  profiler::Overheads overheads;
    121  // WARNING: must pass in `*this` and use `PROFILER_ZONE3` to avoid calling
    122  // `Profiler::Get()`, because that would re-enter the magic static init.
    123  overheads.self = DetectSelfOverhead(*this, kMain);
    124  overheads.child = DetectChildOverhead(*this, kMain, overheads.self);
    125  for (size_t worker = 0; worker < profiler::kMaxWorkers; ++worker) {
    126    workers_[worker].SetOverheads(overheads);
    127  }
    128 
    129  HWY_IF_CONSTEXPR(kPrintOverhead) {
    130    printf("Self overhead: %.0f; child: %.0f; elapsed %.1f ms\n",
    131           static_cast<double>(overheads.self),
    132           static_cast<double>(overheads.child),
    133           static_cast<double>(timer::Stop() - t0) /
    134               platform::InvariantTicksPerSecond() * 1E3);
    135  }
    136 }
    137 
    138 #endif  // PROFILER_ENABLED
    139 
    140 // Even if disabled, we want to export the symbol.
    141 HWY_DLLEXPORT Profiler& Profiler::Get() {
    142  static Profiler* profiler = new Profiler();
    143  return *profiler;
    144 }
    145 
    146 }  // namespace hwy