tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit ee3f35799ff58929360b98c252c454c6b1a2a798
parent 53e770b01f1a855b61cc0a681aed10ee7afc8ae1
Author: Atila Butkovits <abutkovits@mozilla.com>
Date:   Tue, 11 Nov 2025 11:54:31 +0200

Revert "Bug 1995254 - Always rely on QueryPerformanceCounter for Windows TimeStamp. r=glandium,profiler-reviewers,mstange" for causing bustages at TimeStamp_windows.cpp.

This reverts commit a2598ac83898413b2019b523a97bbf19ec91ad2a.

Diffstat:
Mdom/midi/midir_impl/src/lib.rs | 12++++++++++++
Mdom/performance/Performance.cpp | 4++--
Mipc/glue/IPCMessageUtilsSpecializations.h | 24++++++++++++++++++++++++
Mjs/src/jit/PerfSpewer.cpp | 2+-
Mmozglue/baseprofiler/core/platform.cpp | 7++++---
Mmozglue/misc/TimeStamp.h | 63++++++++++++++++++++++++++++++++++++++++++++-------------------
Mmozglue/misc/TimeStamp_windows.cpp | 528++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Amozglue/misc/TimeStamp_windows.h | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmozglue/misc/moz.build | 1+
Mtools/profiler/core/platform.cpp | 7++++---
Mtools/profiler/public/ETWTools.h | 16+++++++++++-----
11 files changed, 698 insertions(+), 79 deletions(-)

diff --git a/dom/midi/midir_impl/src/lib.rs b/dom/midi/midir_impl/src/lib.rs @@ -14,6 +14,18 @@ use uuid::Uuid; * You can obtain one at http://mozilla.org/MPL/2.0/. */ extern crate midir; +#[cfg(target_os = "windows")] +#[repr(C)] +#[derive(Clone, Copy)] +pub struct GeckoTimeStamp { + gtc: u64, + qpc: u64, + + is_null: u8, + has_qpc: u8, +} + +#[cfg(not(target_os = "windows"))] #[repr(C)] #[derive(Clone, Copy)] pub struct GeckoTimeStamp { diff --git a/dom/performance/Performance.cpp b/dom/performance/Performance.cpp @@ -708,8 +708,8 @@ void Performance::MaybeEmitExternalProfilerMarker( uint64_t rawStart = startTimeStamp.RawClockMonotonicNanosecondsSinceBoot(); uint64_t rawEnd = endTimeStamp.RawClockMonotonicNanosecondsSinceBoot(); #elif XP_WIN - uint64_t rawStart = startTimeStamp.RawQueryPerformanceCounterValue(); - uint64_t rawEnd = endTimeStamp.RawQueryPerformanceCounterValue(); + uint64_t rawStart = startTimeStamp.RawQueryPerformanceCounterValue().value(); + uint64_t rawEnd = endTimeStamp.RawQueryPerformanceCounterValue().value(); #elif XP_MACOSX uint64_t rawStart = startTimeStamp.RawMachAbsoluteTimeNanoseconds(); uint64_t rawEnd = endTimeStamp.RawMachAbsoluteTimeNanoseconds(); diff --git a/ipc/glue/IPCMessageUtilsSpecializations.h b/ipc/glue/IPCMessageUtilsSpecializations.h @@ -27,6 +27,9 @@ #include "mozilla/IntegerRange.h" #include "mozilla/Maybe.h" #include "mozilla/TimeStamp.h" +#ifdef XP_WIN +# include "mozilla/TimeStamp_windows.h" +#endif #include "mozilla/UniquePtr.h" #include "mozilla/Vector.h" @@ -432,6 +435,27 @@ struct ParamTraits<mozilla::TimeStamp> { }; }; +#ifdef XP_WIN + +template <> +struct ParamTraits<mozilla::TimeStampValue> { + typedef mozilla::TimeStampValue paramType; + static void Write(MessageWriter* aWriter, const paramType& aParam) { + WriteParam(aWriter, aParam.mGTC); + WriteParam(aWriter, aParam.mQPC); + WriteParam(aWriter, aParam.mIsNull); + WriteParam(aWriter, aParam.mHasQPC); + } + static bool Read(MessageReader* aReader, paramType* aResult) { + return (ReadParam(aReader, &aResult->mGTC) && + ReadParam(aReader, &aResult->mQPC) && + ReadParam(aReader, &aResult->mIsNull) && + ReadParam(aReader, &aResult->mHasQPC)); + } +}; + +#endif + template <> struct ParamTraits<mozilla::dom::ipc::StructuredCloneData> { typedef mozilla::dom::ipc::StructuredCloneData paramType; diff --git a/js/src/jit/PerfSpewer.cpp b/js/src/jit/PerfSpewer.cpp @@ -122,7 +122,7 @@ static uint64_t GetMonotonicTimestamp() { # ifdef XP_LINUX return TimeStamp::Now().RawClockMonotonicNanosecondsSinceBoot(); # elif XP_WIN - return TimeStamp::Now().RawQueryPerformanceCounterValue(); + return TimeStamp::Now().RawQueryPerformanceCounterValue().value(); # elif XP_DARWIN return TimeStamp::Now().RawMachAbsoluteTimeNanoseconds(); # else diff --git a/mozglue/baseprofiler/core/platform.cpp b/mozglue/baseprofiler/core/platform.cpp @@ -1618,9 +1618,10 @@ static void MaybeWriteRawStartTimeValue(SpliceableJSONWriter& aWriter, #endif #ifdef XP_WIN - uint64_t startTimeQPC = aStartTime.RawQueryPerformanceCounterValue(); - aWriter.DoubleProperty("startTimeAsQueryPerformanceCounterValue", - static_cast<double>(startTimeQPC)); + Maybe<uint64_t> startTimeQPC = aStartTime.RawQueryPerformanceCounterValue(); + if (startTimeQPC) + aWriter.DoubleProperty("startTimeAsQueryPerformanceCounterValue", + static_cast<double>(*startTimeQPC)); #endif } diff --git a/mozglue/misc/TimeStamp.h b/mozglue/misc/TimeStamp.h @@ -21,9 +21,19 @@ template <typename T> struct ParamTraits; } // namespace IPC +#ifdef XP_WIN +// defines TimeStampValue as a complex value keeping both +// GetTickCount and QueryPerformanceCounter values +# include "TimeStamp_windows.h" + +# include "mozilla/Maybe.h" // For TimeStamp::RawQueryPerformanceCounterValue +#endif + namespace mozilla { -using TimeStampValue = uint64_t; +#ifndef XP_WIN +typedef uint64_t TimeStampValue; +#endif class TimeStamp; class TimeStampTests; @@ -43,7 +53,11 @@ class BaseTimeDurationPlatformUtils { * Instances of this class represent the length of an interval of time. * Negative durations are allowed, meaning the end is before the start. * - * Internally the duration is stored as a system-dependent unit. + * Internally the duration is stored as a int64_t in units of + * PR_TicksPerSecond() when building with NSPR interval timers, or a + * system-dependent unit when building with system clocks. The + * system-dependent unit must be constant, otherwise the semantics of + * this class would be broken. * * The ValueCalculator template parameter determines how arithmetic * operations are performed on the integer count of ticks (mValue). @@ -341,8 +355,11 @@ typedef BaseTimeDuration<TimeDurationValueCalculator> TimeDuration; * to a TimeStamp to get a new TimeStamp. You can't do something * meaningless like add two TimeStamps. * - * Internally this is implemented as a wrapper around high-resolution, - * monotonic, platform-dependent system clocks. + * Internally this is implemented as either a wrapper around + * - high-resolution, monotonic, system clocks if they exist on this + * platform + * - PRIntervalTime otherwise. We detect wraparounds of + * PRIntervalTime and work around them. * * This class is similar to C++11's time_point, however it is * explicitly nullable and provides an IsNull() method. time_point @@ -354,6 +371,20 @@ typedef BaseTimeDuration<TimeDurationValueCalculator> TimeDuration; * Note that, since TimeStamp objects are small, prefer to pass them by value * unless there is a specific reason not to do so. */ +#if defined(XP_WIN) +// If this static_assert fails then possibly the warning comment below is no +// longer valid and should be removed. +static_assert(sizeof(TimeStampValue) > 8); +#endif +/* + * WARNING: On Windows, each TimeStamp is represented internally by two + * different raw values (one from GTC and one from QPC) and which value gets + * used for a given operation depends on whether both operands have QPC values + * or not. This duality of values can lead to some surprising results when + * mixing TimeStamps with and without QPC values, such as comparisons being + * non-transitive (ie, a > b > c might not imply a > c). See bug 1829983 for + * more details/an example. + */ class TimeStamp { public: using DurationType = TimeDuration; @@ -402,21 +433,13 @@ class TimeStamp { * * Now() is trying to ensure the best possible precision on each platform, * at least one millisecond. - */ - static TimeStamp Now() { return Now(true); } - - /** - * Return a (coarse) timestamp reflecting the current elapsed system time. - * NowLoRes() behaves different depending on the OS: * - * Windows: NowLoRes() == Now(), uses always QueryPerformanceCounter. - * MacOS: NowLoRes() == Now(), uses always mach_absolute_time. - * Posix: If the kernel supports CLOCK_MONOTONIC_COARSE use that, - * CLOCK_MONOTONIC otherwise. - * - * Used to promise better performance, which might still be true only for - * Posix. + * NowLoRes() has been introduced to workaround performance problems of + * QueryPerformanceCounter on the Windows platform. NowLoRes() is giving + * lower precision, usually 15.6 ms, but with very good performance benefit. + * Use it for measurements of longer times, like >200ms timeouts. */ + static TimeStamp Now() { return Now(true); } static TimeStamp NowLoRes() { return Now(false); } /** @@ -458,8 +481,10 @@ class TimeStamp { #endif #ifdef XP_WIN - uint64_t RawQueryPerformanceCounterValue() const { - return static_cast<uint64_t>(mValue); + Maybe<uint64_t> RawQueryPerformanceCounterValue() const { + // mQPC is stored in `mt` i.e. QueryPerformanceCounter * 1000 + // so divide out the 1000 + return mValue.mHasQPC ? Some(mValue.mQPC / 1000ULL) : Nothing(); } #endif diff --git a/mozglue/misc/TimeStamp_windows.cpp b/mozglue/misc/TimeStamp_windows.cpp @@ -4,112 +4,548 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// Implement TimeStamp::Now() with QueryPerformanceCounter() controlled with +// values of GetTickCount64(). + #include "mozilla/DynamicallyLinkedFunctionPtr.h" +#include "mozilla/MathAlgorithms.h" #include "mozilla/TimeStamp.h" +#include "mozilla/Uptime.h" + +#include <stdio.h> +#include <stdlib.h> #include <intrin.h> #include <windows.h> -// Historical note: We used to sample both QueryPerformanceCounter (QPC) and -// GetTickCount (GTC) timestamps in the past, as very early implementations of -// QPC were buggy. We had heuristics to determine if QPC is unreliable and -// would have switched to GTC in case, which could cause unexpected time -// travels between QPC and GPC values when that occured. +// To enable logging define to your favorite logging API +#define LOG(x) + +class AutoCriticalSection { + public: + explicit AutoCriticalSection(LPCRITICAL_SECTION aSection) + : mSection(aSection) { + ::EnterCriticalSection(mSection); + } + ~AutoCriticalSection() { ::LeaveCriticalSection(mSection); } + + private: + LPCRITICAL_SECTION mSection; +}; + +// Estimate of the smallest duration of time we can measure. +static volatile ULONGLONG sResolution; +static volatile ULONGLONG sResolutionSigDigs; +static const double kNsPerSecd = 1000000000.0; +static const LONGLONG kNsPerMillisec = 1000000; + +// ---------------------------------------------------------------------------- +// Global constants +// ---------------------------------------------------------------------------- + +// Tolerance to failures settings. // -// Since Windows 8 together with the then modern CPUs, QPC became both reliable -// and almost as fast as GTC timestamps and provides a much higher resolution. -// QPC in general exists long enough even on older systems than Windows 8, such -// that we can just always rely on it, as we do in rust. +// What is the interval we want to have failure free. +// in [ms] +static const uint32_t kFailureFreeInterval = 5000; +// How many failures we are willing to tolerate in the interval. +static const uint32_t kMaxFailuresPerInterval = 4; +// What is the threshold to treat fluctuations as actual failures. +// in [ms] +static const uint32_t kFailureThreshold = 50; + +// If we are not able to get the value of GTC time increment, use this value +// which is the most usual increment. +static const DWORD kDefaultTimeIncrement = 156001; // ---------------------------------------------------------------------------- // Global variables, not changing at runtime // ---------------------------------------------------------------------------- -// Result of QueryPerformanceFrequency, set only once on startup. -static double sTicksPerSecd; -static double sTicksPerMsd; +// Result of QueryPerformanceFrequency +// We use default of 1 for the case we can't use QueryPerformanceCounter +// to make mt/ms conversions work despite that. +static uint64_t sFrequencyPerSec = 1; + +namespace mozilla { + +MFBT_API uint64_t GetQueryPerformanceFrequencyPerSec() { + return sFrequencyPerSec; +} + +} // namespace mozilla + +// How much we are tolerant to GTC occasional loose of resoltion. +// This number says how many multiples of the minimal GTC resolution +// detected on the system are acceptable. This number is empirical. +static const LONGLONG kGTCTickLeapTolerance = 4; + +// Base tolerance (more: "inability of detection" range) threshold is calculated +// dynamically, and kept in sGTCResolutionThreshold. +// +// Schematically, QPC worked "100%" correctly if ((GTC_now - GTC_epoch) - +// (QPC_now - QPC_epoch)) was in [-sGTCResolutionThreshold, +// sGTCResolutionThreshold] interval every time we'd compared two time stamps. +// If not, then we check the overflow behind this basic threshold +// is in kFailureThreshold. If not, we condider it as a QPC failure. If too +// many failures in short time are detected, QPC is considered faulty and +// disabled. +// +// Kept in [mt] +static LONGLONG sGTCResolutionThreshold; + +// If QPC is found faulty for two stamps in this interval, we engage +// the fault detection algorithm. For duration larger then this limit +// we bypass using durations calculated from QPC when jitter is detected, +// but don't touch the sUseQPC flag. +// +// Value is in [ms]. +static const uint32_t kHardFailureLimit = 2000; +// Conversion to [mt] +static LONGLONG sHardFailureLimit; + +// Conversion of kFailureFreeInterval and kFailureThreshold to [mt] +static LONGLONG sFailureFreeInterval; +static LONGLONG sFailureThreshold; + +// ---------------------------------------------------------------------------- +// Systemm status flags +// ---------------------------------------------------------------------------- + +// Flag for stable TSC that indicates platform where QPC is stable. +static bool sHasStableTSC = false; + +// ---------------------------------------------------------------------------- +// Global state variables, changing at runtime +// ---------------------------------------------------------------------------- + +// Initially true, set to false when QPC is found unstable and never +// returns back to true since that time. +static bool volatile sUseQPC = true; // ---------------------------------------------------------------------------- -// Useful constants +// Global lock // ---------------------------------------------------------------------------- -static constexpr double kMsPerSecd = 1000.0; -// Note: Resolution used to be sampled based on a loop of QPC calls. -// While it is true that on most systems we cannot expect to subsequently -// sample QPC values as fast as the QPC frequency, we still will get that -// as resolution of the sampled values, that is we have 1 tick resolution. -static constexpr LONGLONG kResolution = 1; +// Thread spin count before entering the full wait state for sTimeStampLock. +// Inspired by Rob Arnold's work on PRMJ_Now(). +static const DWORD kLockSpinCount = 4096; + +// Common mutex (thanks the relative complexity of the logic, this is better +// then using CMPXCHG8B.) +// It is protecting the globals bellow. +static CRITICAL_SECTION sTimeStampLock; + +// ---------------------------------------------------------------------------- +// Global lock protected variables +// ---------------------------------------------------------------------------- + +// Timestamp in future until QPC must behave correctly. +// Set to now + kFailureFreeInterval on first QPC failure detection. +// Set to now + E * kFailureFreeInterval on following errors, +// where E is number of errors detected during last kFailureFreeInterval +// milliseconds, calculated simply as: +// E = (sFaultIntoleranceCheckpoint - now) / kFailureFreeInterval + 1. +// When E > kMaxFailuresPerInterval -> disable QPC. +// +// Kept in [mt] +static ULONGLONG sFaultIntoleranceCheckpoint = 0; namespace mozilla { -// Result is in ticks. +// Result is in [mt] static inline ULONGLONG PerformanceCounter() { LARGE_INTEGER pc; - bool success = ::QueryPerformanceCounter(&pc); - MOZ_DIAGNOSTIC_ASSERT(success); - return pc.QuadPart; + ::QueryPerformanceCounter(&pc); + + // QueryPerformanceCounter may slightly jitter (not be 100% monotonic.) + // This is a simple go-backward protection for such a faulty hardware. + AutoCriticalSection lock(&sTimeStampLock); + + static decltype(LARGE_INTEGER::QuadPart) last; + if (last > pc.QuadPart) { + return last * 1000ULL; + } + last = pc.QuadPart; + return pc.QuadPart * 1000ULL; } -static void InitConstants() { - // Query the frequency from QPC and rely on it for all values. - LARGE_INTEGER freq; - bool hasQPC = ::QueryPerformanceFrequency(&freq); - MOZ_RELEASE_ASSERT(hasQPC); - sTicksPerSecd = double(freq.QuadPart); - sTicksPerMsd = sTicksPerSecd / kMsPerSecd; +static void InitThresholds() { + DWORD timeAdjustment = 0, timeIncrement = 0; + BOOL timeAdjustmentDisabled; + GetSystemTimeAdjustment(&timeAdjustment, &timeIncrement, + &timeAdjustmentDisabled); + + LOG(("TimeStamp: timeIncrement=%d [100ns]", timeIncrement)); + + if (!timeIncrement) { + timeIncrement = kDefaultTimeIncrement; + } + + // Ceiling to a millisecond + // Example values: 156001, 210000 + DWORD timeIncrementCeil = timeIncrement; + // Don't want to round up if already rounded, values will be: 156000, 209999 + timeIncrementCeil -= 1; + // Convert to ms, values will be: 15, 20 + timeIncrementCeil /= 10000; + // Round up, values will be: 16, 21 + timeIncrementCeil += 1; + // Convert back to 100ns, values will be: 160000, 210000 + timeIncrementCeil *= 10000; + + // How many milli-ticks has the interval rounded up + LONGLONG ticksPerGetTickCountResolutionCeiling = + (int64_t(timeIncrementCeil) * sFrequencyPerSec) / 10000LL; + + // GTC may jump by 32 (2*16) ms in two steps, therefor use the ceiling value. + sGTCResolutionThreshold = + LONGLONG(kGTCTickLeapTolerance * ticksPerGetTickCountResolutionCeiling); + + sHardFailureLimit = ms2mt(kHardFailureLimit); + sFailureFreeInterval = ms2mt(kFailureFreeInterval); + sFailureThreshold = ms2mt(kFailureThreshold); +} + +static void InitResolution() { + // 10 total trials is arbitrary: what we're trying to avoid by + // looping is getting unlucky and being interrupted by a context + // switch or signal, or being bitten by paging/cache effects + + ULONGLONG minres = ~0ULL; + if (sUseQPC) { + int loops = 10; + do { + ULONGLONG start = PerformanceCounter(); + ULONGLONG end = PerformanceCounter(); + + ULONGLONG candidate = (end - start); + if (candidate < minres) { + minres = candidate; + } + } while (--loops && minres); + + if (0 == minres) { + minres = 1; + } + } else { + // GetTickCount has only ~16ms known resolution + minres = ms2mt(16); + } + + // Converting minres that is in [mt] to nanosecods, multiplicating + // the argument to preserve resolution. + ULONGLONG result = mt2ms(minres * kNsPerMillisec); + if (0 == result) { + result = 1; + } + + sResolution = result; + + // find the number of significant digits in mResolution, for the + // sake of ToSecondsSigDigits() + ULONGLONG sigDigs; + for (sigDigs = 1; !(sigDigs == result || 10 * sigDigs > result); + sigDigs *= 10); + + sResolutionSigDigs = sigDigs; } // ---------------------------------------------------------------------------- +// TimeStampValue implementation +// ---------------------------------------------------------------------------- +MFBT_API TimeStampValue& TimeStampValue::operator+=(const int64_t aOther) { + mGTC += aOther; + mQPC += aOther; + return *this; +} + +MFBT_API TimeStampValue& TimeStampValue::operator-=(const int64_t aOther) { + mGTC -= aOther; + mQPC -= aOther; + return *this; +} + +// If the duration is less then two seconds, perform check of QPC stability +// by comparing both GTC and QPC calculated durations of this and aOther. +MFBT_API uint64_t TimeStampValue::CheckQPC(const TimeStampValue& aOther) const { + uint64_t deltaGTC = mGTC - aOther.mGTC; + + if (!mHasQPC || !aOther.mHasQPC) { // Both not holding QPC + return deltaGTC; + } + + uint64_t deltaQPC = mQPC - aOther.mQPC; + + if (sHasStableTSC) { // For stable TSC there is no need to check + return deltaQPC; + } + + // Check QPC is sane before using it. + int64_t diff = DeprecatedAbs(int64_t(deltaQPC) - int64_t(deltaGTC)); + if (diff <= sGTCResolutionThreshold) { + return deltaQPC; + } + + // Treat absolutely for calibration purposes + int64_t duration = DeprecatedAbs(int64_t(deltaGTC)); + int64_t overflow = diff - sGTCResolutionThreshold; + + LOG(("TimeStamp: QPC check after %llums with overflow %1.4fms", + mt2ms(duration), mt2ms_f(overflow))); + + if (overflow <= sFailureThreshold) { // We are in the limit, let go. + return deltaQPC; + } + + // QPC deviates, don't use it, since now this method may only return deltaGTC. + + if (!sUseQPC) { // QPC already disabled, no need to run the fault tolerance + // algorithm. + return deltaGTC; + } + + LOG(("TimeStamp: QPC jittered over failure threshold")); + + if (duration < sHardFailureLimit) { + // Interval between the two time stamps is very short, consider + // QPC as unstable and record a failure. + uint64_t now = ms2mt(GetTickCount64()); + + AutoCriticalSection lock(&sTimeStampLock); + + if (sFaultIntoleranceCheckpoint && sFaultIntoleranceCheckpoint > now) { + // There's already been an error in the last fault intollerant interval. + // Time since now to the checkpoint actually holds information on how many + // failures there were in the failure free interval we have defined. + uint64_t failureCount = + (sFaultIntoleranceCheckpoint - now + sFailureFreeInterval - 1) / + sFailureFreeInterval; + if (failureCount > kMaxFailuresPerInterval) { + sUseQPC = false; + LOG(("TimeStamp: QPC disabled")); + } else { + // Move the fault intolerance checkpoint more to the future, prolong it + // to reflect the number of detected failures. + ++failureCount; + sFaultIntoleranceCheckpoint = now + failureCount * sFailureFreeInterval; + LOG(("TimeStamp: recording %dth QPC failure", failureCount)); + } + } else { + // Setup fault intolerance checkpoint in the future for first detected + // error. + sFaultIntoleranceCheckpoint = now + sFailureFreeInterval; + LOG(("TimeStamp: recording 1st QPC failure")); + } + } + + return deltaGTC; +} + +MFBT_API uint64_t +TimeStampValue::operator-(const TimeStampValue& aOther) const { + if (IsNull() && aOther.IsNull()) { + return uint64_t(0); + } + + return CheckQPC(aOther); +} + +class TimeStampValueTests { + // Check that nullity is set/not set correctly. + static_assert(TimeStampValue{0}.IsNull()); + static_assert(!TimeStampValue{1}.IsNull()); + + // Check that we ignore GTC when both TimeStampValues have QPC. (In each of + // these tests, looking at GTC would give a different result.) + static_assert(TimeStampValue{1, 2, true} < TimeStampValue{1, 3, true}); + static_assert(!(TimeStampValue{1, 2, true} == TimeStampValue{1, 3, true})); + + static_assert(TimeStampValue{2, 2, true} < TimeStampValue{1, 3, true}); + static_assert(TimeStampValue{2, 2, true} <= TimeStampValue{1, 3, true}); + static_assert(!(TimeStampValue{2, 2, true} > TimeStampValue{1, 3, true})); + + static_assert(TimeStampValue{1, 3, true} > TimeStampValue{1, 2, true}); + static_assert(!(TimeStampValue{1, 3, true} == TimeStampValue{1, 2, true})); + + static_assert(TimeStampValue{1, 3, true} > TimeStampValue{2, 2, true}); + static_assert(TimeStampValue{1, 3, true} >= TimeStampValue{2, 2, true}); + static_assert(!(TimeStampValue{1, 3, true} < TimeStampValue{2, 2, true})); + + static_assert(TimeStampValue{1, 3, true} == TimeStampValue{2, 3, true}); + static_assert(!(TimeStampValue{1, 3, true} < TimeStampValue{2, 3, true})); + + static_assert(TimeStampValue{1, 2, true} != TimeStampValue{1, 3, true}); + static_assert(!(TimeStampValue{1, 2, true} == TimeStampValue{1, 3, true})); + + // Check that, if either TimeStampValue doesn't have QPC, we only look at the + // GTC values. These are the same cases as above, except that we accept the + // opposite results because we turn off QPC on one or both of the + // TimeStampValue's. + static_assert(TimeStampValue{1, 2, false} == TimeStampValue{1, 3, true}); + static_assert(TimeStampValue{1, 2, true} == TimeStampValue{1, 3, false}); + static_assert(TimeStampValue{1, 2, false} == TimeStampValue{1, 3, false}); + + static_assert(TimeStampValue{2, 2, false} > TimeStampValue{1, 3, true}); + static_assert(TimeStampValue{2, 2, true} > TimeStampValue{1, 3, false}); + static_assert(TimeStampValue{2, 2, false} > TimeStampValue{1, 3, false}); + + static_assert(TimeStampValue{1, 3, false} == TimeStampValue{1, 2, true}); + static_assert(TimeStampValue{1, 3, true} == TimeStampValue{1, 2, false}); + static_assert(TimeStampValue{1, 3, false} == TimeStampValue{1, 2, false}); + + static_assert(TimeStampValue{1, 3, false} < TimeStampValue{2, 2, true}); + static_assert(TimeStampValue{1, 3, true} < TimeStampValue{2, 2, false}); + static_assert(TimeStampValue{1, 3, false} < TimeStampValue{2, 2, false}); + + static_assert(TimeStampValue{1, 3, false} < TimeStampValue{2, 3, true}); + static_assert(TimeStampValue{1, 3, true} < TimeStampValue{2, 3, false}); + static_assert(TimeStampValue{1, 3, false} < TimeStampValue{2, 3, false}); + + static_assert(TimeStampValue{1, 2, false} == TimeStampValue{1, 3, true}); + static_assert(TimeStampValue{1, 2, true} == TimeStampValue{1, 3, false}); + static_assert(TimeStampValue{1, 2, false} == TimeStampValue{1, 3, false}); +}; + +// ---------------------------------------------------------------------------- // TimeDuration and TimeStamp implementation // ---------------------------------------------------------------------------- MFBT_API double BaseTimeDurationPlatformUtils::ToSeconds(int64_t aTicks) { - return double(aTicks) / sTicksPerSecd; + // Converting before arithmetic avoids blocked store forward + return double(aTicks) / (double(sFrequencyPerSec) * 1000.0); } MFBT_API double BaseTimeDurationPlatformUtils::ToSecondsSigDigits( int64_t aTicks) { - // As we fix the resolution to 1, all digits are significant and there are - // no extra calculations needed. Ensure we do not change this inadvertedly. - static_assert(kResolution == 1); - return ToSeconds(aTicks); + // don't report a value < mResolution ... + LONGLONG resolution = sResolution; + LONGLONG resolutionSigDigs = sResolutionSigDigs; + LONGLONG valueSigDigs = resolution * (aTicks / resolution); + // and chop off insignificant digits + valueSigDigs = resolutionSigDigs * (valueSigDigs / resolutionSigDigs); + return double(valueSigDigs) / kNsPerSecd; } MFBT_API int64_t BaseTimeDurationPlatformUtils::TicksFromMilliseconds(double aMilliseconds) { - double result = sTicksPerMsd * aMilliseconds; + double result = ms2mt(aMilliseconds); // NOTE: this MUST be a >= test, because int64_t(double(INT64_MAX)) // overflows and gives INT64_MIN. if (result >= double(INT64_MAX)) { return INT64_MAX; - } - if (result <= double(INT64_MIN)) { + } else if (result <= double(INT64_MIN)) { return INT64_MIN; } - return (int64_t)result; + return result; } MFBT_API int64_t BaseTimeDurationPlatformUtils::ResolutionInTicks() { - return static_cast<int64_t>(kResolution); + return static_cast<int64_t>(sResolution); +} + +static bool HasStableTSC() { +#if defined(_M_ARM64) + // AArch64 defines that its system counter run at a constant rate + // regardless of the current clock frequency of the system. See "The + // Generic Timer", section D7, in the ARMARM for ARMv8. + return true; +#else + union { + int regs[4]; + struct { + int nIds; + char cpuString[12]; + }; + } cpuInfo; + + __cpuid(cpuInfo.regs, 0); + // Only allow Intel or AMD CPUs for now. + // The order of the registers is reg[1], reg[3], reg[2]. We just adjust the + // string so that we can compare in one go. + if (_strnicmp(cpuInfo.cpuString, "GenuntelineI", sizeof(cpuInfo.cpuString)) && + _strnicmp(cpuInfo.cpuString, "AuthcAMDenti", sizeof(cpuInfo.cpuString))) { + return false; + } + + int regs[4]; + + // detect if the Advanced Power Management feature is supported + __cpuid(regs, 0x80000000); + if ((unsigned int)regs[0] < 0x80000007) { + // XXX should we return true here? If there is no APM there may be + // no way how TSC can run out of sync among cores. + return false; + } + + __cpuid(regs, 0x80000007); + // if bit 8 is set than TSC will run at a constant rate + // in all ACPI P-states, C-states and T-states + return regs[3] & (1 << 8); +#endif } -// Note that we init early enough during startup such that we are supposed to -// not yet have started other threads which could try to use us. static bool gInitialized = false; MFBT_API void TimeStamp::Startup() { if (gInitialized) { return; } - InitConstants(); + gInitialized = true; + + // Decide which implementation to use for the high-performance timer. + + InitializeCriticalSectionAndSpinCount(&sTimeStampLock, kLockSpinCount); + + bool forceGTC = false; + bool forceQPC = false; + + char* modevar = getenv("MOZ_TIMESTAMP_MODE"); + if (modevar) { + if (!strcmp(modevar, "QPC")) { + forceQPC = true; + } else if (!strcmp(modevar, "GTC")) { + forceGTC = true; + } + } + + LARGE_INTEGER freq; + sUseQPC = !forceGTC && ::QueryPerformanceFrequency(&freq); + if (!sUseQPC) { + // No Performance Counter. Fall back to use GetTickCount64. + InitResolution(); + + LOG(("TimeStamp: using GetTickCount64")); + return; + } + + sHasStableTSC = forceQPC || HasStableTSC(); + LOG(("TimeStamp: HasStableTSC=%d", sHasStableTSC)); + + sFrequencyPerSec = freq.QuadPart; + LOG(("TimeStamp: QPC frequency=%llu", sFrequencyPerSec)); + + InitThresholds(); + InitResolution(); + + return; } -MFBT_API void TimeStamp::Shutdown() {} +MFBT_API void TimeStamp::Shutdown() { DeleteCriticalSection(&sTimeStampLock); } + +TimeStampValue NowInternal(bool aHighResolution) { + // sUseQPC is volatile + bool useQPC = (aHighResolution && sUseQPC); + + // Both values are in [mt] units. + ULONGLONG QPC = useQPC ? PerformanceCounter() : uint64_t(0); + ULONGLONG GTC = ms2mt(GetTickCount64()); + return TimeStampValue(GTC, QPC, useQPC); +} MFBT_API TimeStamp TimeStamp::Now(bool aHighResolution) { - MOZ_ASSERT(gInitialized); - return TimeStamp((TimeStampValue)PerformanceCounter()); + return TimeStamp(NowInternal(aHighResolution)); } // Computes and returns the process uptime in microseconds. diff --git a/mozglue/misc/TimeStamp_windows.h b/mozglue/misc/TimeStamp_windows.h @@ -0,0 +1,113 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_TimeStamp_windows_h +#define mozilla_TimeStamp_windows_h + +#include "mozilla/Types.h" + +namespace mozilla { + +/** + * The [mt] unit: + * + * Many values are kept in ticks of the Performance Counter x 1000, + * further just referred as [mt], meaning milli-ticks. + * + * This is needed to preserve maximum precision of the performance frequency + * representation. GetTickCount64 values in milliseconds are multiplied with + * frequency per second. Therefore we need to multiply QPC value by 1000 to + * have the same units to allow simple arithmentic with both QPC and GTC. + */ +#define ms2mt(x) ((x) * mozilla::GetQueryPerformanceFrequencyPerSec()) +#define mt2ms(x) ((x) / mozilla::GetQueryPerformanceFrequencyPerSec()) +#define mt2ms_f(x) (double(x) / mozilla::GetQueryPerformanceFrequencyPerSec()) + +MFBT_API uint64_t GetQueryPerformanceFrequencyPerSec(); + +class TimeStamp; +class TimeStampValue; +class TimeStampValueTests; +class TimeStampTests; + +TimeStampValue NowInternal(bool aHighResolution); + +class TimeStampValue { + friend TimeStampValue NowInternal(bool); + friend bool IsCanonicalTimeStamp(TimeStampValue); + friend struct IPC::ParamTraits<mozilla::TimeStampValue>; + friend class TimeStamp; + friend class TimeStampValueTests; + friend class TimeStampTests; + + // Both QPC and GTC are kept in [mt] units. + uint64_t mGTC; + uint64_t mQPC; + + bool mIsNull; + bool mHasQPC; + + constexpr MFBT_API TimeStampValue(uint64_t aGTC, uint64_t aQPC, bool aHasQPC) + : mGTC(aGTC), + mQPC(aQPC), + mIsNull(aGTC == 0 && aQPC == 0), + mHasQPC(aHasQPC) {} + + // This constructor should be explicit but it is replacing a constructor that + // was MOZ_IMPLICIT and there are many locations that are using the automatic + // conversion. + constexpr MOZ_IMPLICIT MFBT_API TimeStampValue(uint64_t aGTCAndQPC) + : TimeStampValue(aGTCAndQPC, aGTCAndQPC, true) {} + + MFBT_API uint64_t CheckQPC(const TimeStampValue& aOther) const; + + public: + MFBT_API uint64_t operator-(const TimeStampValue& aOther) const; + + TimeStampValue operator+(const int64_t aOther) const { + return TimeStampValue(mGTC + aOther, mQPC + aOther, mHasQPC); + } + TimeStampValue operator-(const int64_t aOther) const { + return TimeStampValue(mGTC - aOther, mQPC - aOther, mHasQPC); + } + MFBT_API TimeStampValue& operator+=(const int64_t aOther); + MFBT_API TimeStampValue& operator-=(const int64_t aOther); + + constexpr bool operator<(const TimeStampValue& aOther) const { + return mHasQPC && aOther.mHasQPC ? mQPC < aOther.mQPC : mGTC < aOther.mGTC; + } + constexpr bool operator>(const TimeStampValue& aOther) const { + return mHasQPC && aOther.mHasQPC ? mQPC > aOther.mQPC : mGTC > aOther.mGTC; + } + constexpr bool operator<=(const TimeStampValue& aOther) const { + return mHasQPC && aOther.mHasQPC ? mQPC <= aOther.mQPC + : mGTC <= aOther.mGTC; + } + constexpr bool operator>=(const TimeStampValue& aOther) const { + return mHasQPC && aOther.mHasQPC ? mQPC >= aOther.mQPC + : mGTC >= aOther.mGTC; + } + constexpr bool operator==(const TimeStampValue& aOther) const { + return mHasQPC && aOther.mHasQPC ? mQPC == aOther.mQPC + : mGTC == aOther.mGTC; + } + constexpr bool operator!=(const TimeStampValue& aOther) const { + return mHasQPC && aOther.mHasQPC ? mQPC != aOther.mQPC + : mGTC != aOther.mGTC; + } + constexpr bool IsNull() const { return mIsNull; } + +#if defined(DEBUG) + uint64_t GTC() const { return mGTC; } + uint64_t QPC() const { return mQPC; } + + bool HasQPC() const { return mHasQPC; } +#endif +}; + +} // namespace mozilla + +#endif /* mozilla_TimeStamp_h */ diff --git a/mozglue/misc/moz.build b/mozglue/misc/moz.build @@ -38,6 +38,7 @@ if CONFIG["OS_ARCH"] == "WINNT": "PreXULSkeletonUI.h", "StackWalk_windows.h", "StackWalkThread.h", + "TimeStamp_windows.h", "WindowsDpiAwareness.h", ] diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp @@ -3363,9 +3363,10 @@ static void MaybeWriteRawStartTimeValue(SpliceableJSONWriter& aWriter, #endif #ifdef XP_WIN - uint64_t startTimeQPC = aStartTime.RawQueryPerformanceCounterValue(); - aWriter.DoubleProperty("startTimeAsQueryPerformanceCounterValue", - static_cast<double>(startTimeQPC)); + Maybe<uint64_t> startTimeQPC = aStartTime.RawQueryPerformanceCounterValue(); + if (startTimeQPC) + aWriter.DoubleProperty("startTimeAsQueryPerformanceCounterValue", + static_cast<double>(*startTimeQPC)); #endif } diff --git a/tools/profiler/public/ETWTools.h b/tools/profiler/public/ETWTools.h @@ -247,8 +247,14 @@ static inline void CreateDataDescForPayloadNonPOD( static inline void CreateDataDescForPayloadNonPOD( PayloadBuffer& aBuffer, EVENT_DATA_DESCRIPTOR& aDescriptor, const mozilla::TimeStamp& aPayload) { - CreateDataDescForPayloadPOD(aBuffer, aDescriptor, - aPayload.RawQueryPerformanceCounterValue()); + if (aPayload.RawQueryPerformanceCounterValue().isNothing()) { + // This should never happen? + EventDataDescCreate(&aDescriptor, nullptr, 0); + return; + } + + CreateDataDescForPayloadPOD( + aBuffer, aDescriptor, aPayload.RawQueryPerformanceCounterValue().value()); } static inline void CreateDataDescForPayloadNonPOD( @@ -302,13 +308,13 @@ static inline void StoreBaseEventDataDesc( const mozilla::MarkerOptions& aOptions) { if (aOptions.IsTimingUnspecified()) { aStorage.mStartTime = - mozilla::TimeStamp::Now().RawQueryPerformanceCounterValue(); + mozilla::TimeStamp::Now().RawQueryPerformanceCounterValue().value(); aStorage.mPhase = 0; } else { aStorage.mStartTime = - aOptions.Timing().StartTime().RawQueryPerformanceCounterValue(); + aOptions.Timing().StartTime().RawQueryPerformanceCounterValue().value(); aStorage.mEndTime = - aOptions.Timing().EndTime().RawQueryPerformanceCounterValue(); + aOptions.Timing().EndTime().RawQueryPerformanceCounterValue().value(); aStorage.mPhase = uint8_t(aOptions.Timing().MarkerPhase()); } if (!aOptions.InnerWindowId().IsUnspecified()) {