tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

BaseProfilingStack.h (20214B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef BaseProfilingStack_h
      8 #define BaseProfilingStack_h
      9 
     10 #ifndef MOZ_GECKO_PROFILER
     11 #  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
     12 #endif
     13 
     14 #include "mozilla/Assertions.h"
     15 #include "mozilla/Atomics.h"
     16 #include "mozilla/BaseProfilingCategory.h"
     17 
     18 #include <stdint.h>
     19 
     20 // This file defines the classes ProfilingStack and ProfilingStackFrame.
     21 // The ProfilingStack manages an array of ProfilingStackFrames.
     22 // It keeps track of the "label stack" and the JS interpreter stack.
     23 // The two stack types are interleaved.
     24 //
     25 // Usage:
     26 //
     27 //  ProfilingStack* profilingStack = ...;
     28 //
     29 //  // For label frames:
     30 //  profilingStack->pushLabelFrame(...);
     31 //  // Execute some code. When finished, pop the frame:
     32 //  profilingStack->pop();
     33 //
     34 //  // For JS stack frames:
     35 //  profilingStack->pushJSFrame(...);
     36 //  // Execute some code. When finished, pop the frame:
     37 //  profilingStack->pop();
     38 //
     39 //
     40 // Concurrency considerations
     41 //
     42 // A thread's profiling stack (and the frames inside it) is only modified by
     43 // that thread. However, the profiling stack can be *read* by a different
     44 // thread, the sampler thread: Whenever the profiler wants to sample a given
     45 // thread A, the following happens:
     46 //  (1) Thread A is suspended.
     47 //  (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
     48 //      including all ProfilingStackFrames that are currently in that stack
     49 //      (profilingStack->frames[0..profilingStack->stackSize()]).
     50 //  (3) Thread A is resumed.
     51 //
     52 // Thread suspension is achieved using platform-specific APIs; refer to each
     53 // platform's Sampler::SuspendAndSampleAndResumeThread implementation in
     54 // platform-*.cpp for details.
     55 //
     56 // When the thread is suspended, the values in profilingStack->stackPointer and
     57 // in the stack frame range
     58 // profilingStack->frames[0..profilingStack->stackPointer] need to be in a
     59 // consistent state, so that thread S does not read partially- constructed stack
     60 // frames. More specifically, we have two requirements:
     61 //  (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
     62 //      data needs to be put in place *before* the stackPointer is incremented,
     63 //      and the compiler + CPU need to know that this order matters.
     64 //  (2) When popping an frame from the stack and then preparing the
     65 //      ProfilingStackFrame data for the next frame that is about to be pushed,
     66 //      the decrement of the stackPointer in pop() needs to happen *before* the
     67 //      ProfilingStackFrame for the new frame is being popuplated, and the
     68 //      compiler + CPU need to know that this order matters.
     69 //
     70 // We can express the relevance of these orderings in multiple ways.
     71 // Option A is to make stackPointer an atomic with SequentiallyConsistent
     72 // memory ordering. This would ensure that no writes in thread A would be
     73 // reordered across any writes to stackPointer, which satisfies requirements
     74 // (1) and (2) at the same time. Option A is the simplest.
     75 // Option B is to use ReleaseAcquire memory ordering both for writes to
     76 // stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
     77 // ensure that all writes that happened *before this write in program order* are
     78 // not reordered to happen after this write. ReleaseAcquire ordering places no
     79 // requirements on the ordering of writes that happen *after* this write in
     80 // program order.
     81 // Using release-stores for writes to stackPointer expresses requirement (1),
     82 // and using release-stores for writes to the ProfilingStackFrame fields
     83 // expresses requirement (2).
     84 //
     85 // Option B is more complicated than option A, but has much better performance
     86 // on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
     87 // from option A to option B reduced the overhead of pushing+popping a
     88 // ProfilingStackFrame by 10 nanoseconds.
     89 // On x86/64, release-stores require no explicit hardware barriers or lock
     90 // instructions.
     91 // On ARM/64, option B may be slower than option A, because the compiler will
     92 // generate hardware barriers for every single release-store instead of just
     93 // for the writes to stackPointer. However, the actual performance impact of
     94 // this has not yet been measured on ARM, so we're currently using option B
     95 // everywhere. This is something that we may want to change in the future once
     96 // we've done measurements.
     97 
     98 namespace mozilla {
     99 namespace baseprofiler {
    100 
    101 // A call stack can be specified to the JS engine such that all JS entry/exits
    102 // to functions push/pop a stack frame to/from the specified stack.
    103 //
    104 // For more detailed information, see vm/GeckoProfiler.h.
    105 //
    106 class ProfilingStackFrame {
    107  // A ProfilingStackFrame represents either a label frame or a JS frame.
    108 
    109  // WARNING WARNING WARNING
    110  //
    111  // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
    112  // that writes to these fields are release-writes, which ensures that
    113  // earlier writes in this thread don't get reordered after the writes to
    114  // these fields. In particular, the decrement of the stack pointer in
    115  // ProfilingStack::pop() is a write that *must* happen before the values in
    116  // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
    117  // see an inconsistent state where the stack pointer still points to a
    118  // ProfilingStackFrame which has already been popped off the stack and whose
    119  // fields have now been partially repopulated with new values.
    120  // See the "Concurrency considerations" paragraph at the top of this file
    121  // for more details.
    122 
    123  // Descriptive label for this stack frame. Must be a static string! Can be
    124  // an empty string, but not a null pointer.
    125  Atomic<const char*, ReleaseAcquire> label_;
    126 
    127  // An additional descriptive string of this frame which is combined with
    128  // |label_| in profiler output. Need not be (and usually isn't) static. Can
    129  // be null.
    130  Atomic<const char*, ReleaseAcquire> dynamicString_;
    131 
    132  // Stack pointer for non-JS stack frames, the script pointer otherwise.
    133  Atomic<void*, ReleaseAcquire> spOrScript;
    134 
    135  // ID of the JS Realm for JS stack frames.
    136  // Must not be used on non-JS frames; it'll contain either the default 0,
    137  // or a leftover value from a previous JS stack frame that was using this
    138  // ProfilingStackFrame object.
    139  mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_;
    140 
    141  // The bytecode offset for JS stack frames.
    142  // Must not be used on non-JS frames; it'll contain either the default 0,
    143  // or a leftover value from a previous JS stack frame that was using this
    144  // ProfilingStackFrame object.
    145  Atomic<int32_t, ReleaseAcquire> pcOffsetIfJS_;
    146 
    147  // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair.
    148  Atomic<uint32_t, ReleaseAcquire> flagsAndCategoryPair_;
    149 
    150 public:
    151  ProfilingStackFrame() = default;
    152  ProfilingStackFrame& operator=(const ProfilingStackFrame& other) {
    153    label_ = other.label();
    154    dynamicString_ = other.dynamicString();
    155    void* spScript = other.spOrScript;
    156    spOrScript = spScript;
    157    int32_t offsetIfJS = other.pcOffsetIfJS_;
    158    pcOffsetIfJS_ = offsetIfJS;
    159    int64_t realmID = other.realmID_;
    160    realmID_ = realmID;
    161    uint32_t flagsAndCategory = other.flagsAndCategoryPair_;
    162    flagsAndCategoryPair_ = flagsAndCategory;
    163    return *this;
    164  }
    165 
    166  // Reserve up to 16 bits for flags, and 16 for category pair.
    167  enum class Flags : uint32_t {
    168    // The first three flags describe the kind of the frame and are
    169    // mutually exclusive. (We still give them individual bits for
    170    // simplicity.)
    171 
    172    // A regular label frame. These usually come from AutoProfilerLabel.
    173    IS_LABEL_FRAME = 1 << 0,
    174 
    175    // A special frame indicating the start of a run of JS profiling stack
    176    // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp
    177    // field. These frames are needed to get correct ordering between JS
    178    // and LABEL frames because JS frames don't carry sp information.
    179    // SP is short for "stack pointer".
    180    IS_SP_MARKER_FRAME = 1 << 1,
    181 
    182    // A JS frame.
    183    IS_JS_FRAME = 1 << 2,
    184 
    185    // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME
    186    // frames can have this flag set and unset during their lifetime.
    187    // JS_OSR frames are ignored.
    188    JS_OSR = 1 << 3,
    189 
    190    // The next three are mutually exclusive.
    191    // By default, for profiling stack frames that have both a label and a
    192    // dynamic string, the two strings are combined into one string of the
    193    // form "<label> <dynamicString>" during JSON serialization. The
    194    // following flags can be used to change this preset.
    195    STRING_TEMPLATE_METHOD = 1 << 4,  // "<label>.<dynamicString>"
    196    STRING_TEMPLATE_GETTER = 1 << 5,  // "get <label>.<dynamicString>"
    197    STRING_TEMPLATE_SETTER = 1 << 6,  // "set <label>.<dynamicString>"
    198 
    199    // If set, causes this stack frame to be marked as "relevantForJS" in
    200    // the profile JSON, which will make it show up in the "JS only" call
    201    // tree view.
    202    RELEVANT_FOR_JS = 1 << 7,
    203 
    204    // If set, causes the label on this ProfilingStackFrame to be ignored
    205    // and to be replaced by the subcategory's label.
    206    LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8,
    207 
    208    // Frame dynamic string does not contain user data.
    209    NONSENSITIVE = 1 << 9,
    210 
    211    // A JS Baseline Interpreter frame.
    212    IS_BLINTERP_FRAME = 1 << 10,
    213 
    214    FLAGS_BITCOUNT = 16,
    215    FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1
    216  };
    217 
    218  static_assert(
    219      uint32_t(ProfilingCategoryPair::LAST) <=
    220          (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)),
    221      "Too many category pairs to fit into u32 with together with the "
    222      "reserved bits for the flags");
    223 
    224  bool isLabelFrame() const {
    225    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME);
    226  }
    227 
    228  bool isSpMarkerFrame() const {
    229    return uint32_t(flagsAndCategoryPair_) &
    230           uint32_t(Flags::IS_SP_MARKER_FRAME);
    231  }
    232 
    233  bool isJsFrame() const {
    234    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME);
    235  }
    236 
    237  bool isOSRFrame() const {
    238    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR);
    239  }
    240 
    241  void setIsOSRFrame(bool isOSR) {
    242    if (isOSR) {
    243      flagsAndCategoryPair_ =
    244          uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR);
    245    } else {
    246      flagsAndCategoryPair_ =
    247          uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR);
    248    }
    249  }
    250 
    251  const char* label() const {
    252    uint32_t flagsAndCategoryPair = flagsAndCategoryPair_;
    253    if (flagsAndCategoryPair &
    254        uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) {
    255      auto categoryPair = ProfilingCategoryPair(
    256          flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT));
    257      return GetProfilingCategoryPairInfo(categoryPair).mLabel;
    258    }
    259    return label_;
    260  }
    261 
    262  const char* dynamicString() const { return dynamicString_; }
    263 
    264  void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
    265                      ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {
    266    label_ = aLabel;
    267    dynamicString_ = aDynamicString;
    268    spOrScript = sp;
    269    // pcOffsetIfJS_ is not set and must not be used on label frames.
    270    flagsAndCategoryPair_ =
    271        uint32_t(Flags::IS_LABEL_FRAME) |
    272        (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags;
    273    MOZ_ASSERT(isLabelFrame());
    274  }
    275 
    276  void initSpMarkerFrame(void* sp) {
    277    label_ = "";
    278    dynamicString_ = nullptr;
    279    spOrScript = sp;
    280    // pcOffsetIfJS_ is not set and must not be used on sp marker frames.
    281    flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) |
    282                            (uint32_t(ProfilingCategoryPair::OTHER)
    283                             << uint32_t(Flags::FLAGS_BITCOUNT));
    284    MOZ_ASSERT(isSpMarkerFrame());
    285  }
    286 
    287  void initJsFrame(const char* aLabel, const char* aDynamicString,
    288                   void* /* JSScript* */ aScript, int32_t aOffset,
    289                   uint64_t aRealmID) {
    290    label_ = aLabel;
    291    dynamicString_ = aDynamicString;
    292    spOrScript = aScript;
    293    pcOffsetIfJS_ = aOffset;
    294    realmID_ = aRealmID;
    295    flagsAndCategoryPair_ =
    296        uint32_t(Flags::IS_JS_FRAME) | (uint32_t(ProfilingCategoryPair::JS)
    297                                        << uint32_t(Flags::FLAGS_BITCOUNT));
    298    MOZ_ASSERT(isJsFrame());
    299  }
    300 
    301  uint32_t flags() const {
    302    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK);
    303  }
    304 
    305  ProfilingCategoryPair categoryPair() const {
    306    return ProfilingCategoryPair(flagsAndCategoryPair_ >>
    307                                 uint32_t(Flags::FLAGS_BITCOUNT));
    308  }
    309 
    310  uint64_t realmID() const { return realmID_; }
    311 
    312  void* stackAddress() const {
    313    MOZ_ASSERT(!isJsFrame());
    314    return spOrScript;
    315  }
    316 
    317  // Note that the pointer returned might be invalid.
    318  void* rawScript() const {
    319    MOZ_ASSERT(isJsFrame());
    320    return spOrScript;
    321  }
    322  void setRawScript(void* aScript) {
    323    MOZ_ASSERT(isJsFrame());
    324    spOrScript = aScript;
    325  }
    326 
    327  int32_t pcOffset() const {
    328    MOZ_ASSERT(isJsFrame());
    329    return pcOffsetIfJS_;
    330  }
    331 
    332  void setPCOffset(int32_t aOffset) {
    333    MOZ_ASSERT(isJsFrame());
    334    pcOffsetIfJS_ = aOffset;
    335  }
    336 
    337  // The offset of a pc into a script's code can actually be 0, so to
    338  // signify a nullptr pc, use a -1 index. This is checked against in
    339  // pc() and setPC() to set/get the right pc.
    340  static const int32_t NullPCOffset = -1;
    341 };
    342 
    343 // Each thread has its own ProfilingStack. That thread modifies the
    344 // ProfilingStack, pushing and popping elements as necessary.
    345 //
    346 // The ProfilingStack is also read periodically by the profiler's sampler
    347 // thread. This happens only when the thread that owns the ProfilingStack is
    348 // suspended. So there are no genuine parallel accesses.
    349 //
    350 // However, it is possible for pushing/popping to be interrupted by a periodic
    351 // sample. Because of this, we need pushing/popping to be effectively atomic.
    352 //
    353 // - When pushing a new frame, we increment the stack pointer -- making the new
    354 //   frame visible to the sampler thread -- only after the new frame has been
    355 //   fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
    356 //   the increment is a release-store, which ensures that this store is not
    357 //   reordered before the writes of the frame.
    358 //
    359 // - When popping an old frame, the only operation is the decrementing of the
    360 //   stack pointer, which is obviously atomic.
    361 //
    362 class ProfilingStack final {
    363 public:
    364  ProfilingStack() = default;
    365 
    366  MFBT_API ~ProfilingStack();
    367 
    368  void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
    369                      ProfilingCategoryPair categoryPair, uint32_t flags = 0) {
    370    // This thread is the only one that ever changes the value of
    371    // stackPointer.
    372    // Store the value of the atomic in a non-atomic local variable so that
    373    // the compiler won't generate two separate loads from the atomic for
    374    // the size check and the frames[] array indexing operation.
    375    uint32_t stackPointerVal = stackPointer;
    376 
    377    if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
    378      ensureCapacitySlow();
    379    }
    380    frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
    381                                           categoryPair, flags);
    382 
    383    // This must happen at the end! The compiler will not reorder this
    384    // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
    385    // the writes above will not be reordered below the stackPointer store.
    386    // Do the read and the write as two separate statements, in order to
    387    // make it clear that we don't need an atomic increment, which would be
    388    // more expensive on x86 than the separate operations done here.
    389    // However, don't use stackPointerVal here; instead, allow the compiler
    390    // to turn this store into a non-atomic increment instruction which
    391    // takes up less code size.
    392    stackPointer = stackPointer + 1;
    393  }
    394 
    395  void pushSpMarkerFrame(void* sp) {
    396    uint32_t oldStackPointer = stackPointer;
    397 
    398    if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
    399      ensureCapacitySlow();
    400    }
    401    frames[oldStackPointer].initSpMarkerFrame(sp);
    402 
    403    // This must happen at the end, see the comment in pushLabelFrame.
    404    stackPointer = oldStackPointer + 1;
    405  }
    406 
    407  void pushJsOffsetFrame(const char* label, const char* dynamicString,
    408                         void* script, int32_t offset, uint64_t aRealmID) {
    409    // This thread is the only one that ever changes the value of
    410    // stackPointer. Only load the atomic once.
    411    uint32_t oldStackPointer = stackPointer;
    412 
    413    if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
    414      ensureCapacitySlow();
    415    }
    416    frames[oldStackPointer].initJsFrame(label, dynamicString, script, offset,
    417                                        aRealmID);
    418 
    419    // This must happen at the end, see the comment in pushLabelFrame.
    420    stackPointer = stackPointer + 1;
    421  }
    422 
    423  void pop() {
    424    MOZ_ASSERT(stackPointer > 0);
    425    // Do the read and the write as two separate statements, in order to
    426    // make it clear that we don't need an atomic decrement, which would be
    427    // more expensive on x86 than the separate operations done here.
    428    // This thread is the only one that ever changes the value of
    429    // stackPointer.
    430    uint32_t oldStackPointer = stackPointer;
    431    stackPointer = oldStackPointer - 1;
    432  }
    433 
    434  uint32_t stackSize() const { return stackPointer; }
    435  uint32_t stackCapacity() const { return capacity; }
    436 
    437 private:
    438  // Out of line path for expanding the buffer, since otherwise this would get
    439  // inlined in every DOM WebIDL call.
    440  MFBT_API MOZ_COLD void ensureCapacitySlow();
    441 
    442  // No copying.
    443  ProfilingStack(const ProfilingStack&) = delete;
    444  void operator=(const ProfilingStack&) = delete;
    445 
    446  // No moving either.
    447  ProfilingStack(ProfilingStack&&) = delete;
    448  void operator=(ProfilingStack&&) = delete;
    449 
    450  uint32_t capacity = 0;
    451 
    452 public:
    453  // The pointer to the stack frames, this is read from the profiler thread and
    454  // written from the current thread.
    455  //
    456  // This is effectively a unique pointer.
    457  Atomic<ProfilingStackFrame*, SequentiallyConsistent> frames{nullptr};
    458 
    459  // This may exceed the capacity, so instead use the stackSize() method to
    460  // determine the number of valid frames in stackFrames. When this is less
    461  // than stackCapacity(), it refers to the first free stackframe past the top
    462  // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
    463  // frame).
    464  //
    465  // WARNING WARNING WARNING
    466  //
    467  // This is an atomic variable that uses ReleaseAcquire memory ordering.
    468  // See the "Concurrency considerations" paragraph at the top of this file
    469  // for more details.
    470  Atomic<uint32_t, ReleaseAcquire> stackPointer{0};
    471 };
    472 
    473 class AutoGeckoProfilerEntry;
    474 class GeckoProfilerEntryMarker;
    475 class GeckoProfilerBaselineOSRMarker;
    476 
    477 class GeckoProfilerThread {
    478  friend class AutoGeckoProfilerEntry;
    479  friend class GeckoProfilerEntryMarker;
    480  friend class GeckoProfilerBaselineOSRMarker;
    481 
    482  ProfilingStack* profilingStack_;
    483 
    484  // Same as profilingStack_ if the profiler is currently active, otherwise
    485  // null.
    486  ProfilingStack* profilingStackIfEnabled_;
    487 
    488 public:
    489  MFBT_API GeckoProfilerThread();
    490 
    491  uint32_t stackPointer() {
    492    MOZ_ASSERT(infraInstalled());
    493    return profilingStack_->stackPointer;
    494  }
    495  ProfilingStackFrame* stack() { return profilingStack_->frames; }
    496  ProfilingStack* getProfilingStack() { return profilingStack_; }
    497  ProfilingStack* getProfilingStackIfEnabled() {
    498    return profilingStackIfEnabled_;
    499  }
    500 
    501  /*
    502   * True if the profiler infrastructure is setup.  Should be true in builds
    503   * that include profiler support except during early startup or late
    504   * shutdown.  Unrelated to the presence of the Gecko Profiler addon.
    505   */
    506  bool infraInstalled() { return profilingStack_ != nullptr; }
    507 
    508  MFBT_API void setProfilingStack(ProfilingStack* profilingStack, bool enabled);
    509  void enable(bool enable) {
    510    profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr;
    511  }
    512 };
    513 
    514 }  // namespace baseprofiler
    515 }  // namespace mozilla
    516 
    517 #endif /* BaseProfilingStack_h */