tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Atomics.h (20073B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 /*
      8 * Implements (almost always) lock-free atomic operations. The operations here
      9 * are a subset of that which can be found in C++11's <atomic> header, with a
     10 * different API to enforce consistent memory ordering constraints.
     11 *
     12 * Anyone caught using |volatile| for inter-thread memory safety needs to be
     13 * sent a copy of this header and the C++11 standard.
     14 */
     15 
     16 #ifndef mozilla_Atomics_h
     17 #define mozilla_Atomics_h
     18 
     19 #include "mozilla/Attributes.h"
     20 
     21 #ifdef __wasi__
     22 #  include "mozilla/WasiAtomic.h"
     23 #else
     24 #  include <atomic>
     25 #endif  // __wasi__
     26 
     27 #include <cstddef>
     28 #include <cstdint>
     29 #include <type_traits>
     30 
     31 #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || \
     32    defined(_M_X64)
     33 #  include <emmintrin.h>
     34 #endif
     35 
     36 namespace mozilla {
     37 
     38 /**
     39 * An enum of memory ordering possibilities for atomics.
     40 *
     41 * Memory ordering is the observable state of distinct values in memory.
     42 * (It's a separate concept from atomicity, which concerns whether an
     43 * operation can ever be observed in an intermediate state.  Don't
     44 * conflate the two!)  Given a sequence of operations in source code on
     45 * memory, it is *not* always the case that, at all times and on all
     46 * cores, those operations will appear to have occurred in that exact
     47 * sequence.  First, the compiler might reorder that sequence, if it
     48 * thinks another ordering will be more efficient.  Second, the CPU may
     49 * not expose so consistent a view of memory.  CPUs will often perform
     50 * their own instruction reordering, above and beyond that performed by
     51 * the compiler.  And each core has its own memory caches, and accesses
     52 * (reads and writes both) to "memory" may only resolve to out-of-date
     53 * cache entries -- not to the "most recently" performed operation in
     54 * some global sense.  Any access to a value that may be used by
     55 * multiple threads, potentially across multiple cores, must therefore
     56 * have a memory ordering imposed on it, for all code on all
     57 * threads/cores to have a sufficiently coherent worldview.
     58 *
     59 * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
     60 * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
     61 * detail on all this, including examples of how each mode works.
     62 *
     63 * Note that for simplicity and practicality, not all of the modes in
     64 * C++11 are supported.  The missing C++11 modes are either subsumed by
     65 * the modes we provide below, or not relevant for the CPUs we support
     66 * in Gecko.  These three modes are confusing enough as it is!
     67 */
     68 enum MemoryOrdering : uint8_t {
     69  /*
     70   * Relaxed ordering is the simplest memory ordering: none at all.
     71   * When the result of a write is observed, nothing may be inferred
     72   * about other memory.  Writes ostensibly performed "before" on the
     73   * writing thread may not yet be visible.  Writes performed "after" on
     74   * the writing thread may already be visible, if the compiler or CPU
     75   * reordered them.  (The latter can happen if reads and/or writes get
     76   * held up in per-processor caches.)  Relaxed ordering means
     77   * operations can always use cached values (as long as the actual
     78   * updates to atomic values actually occur, correctly, eventually), so
     79   * it's usually the fastest sort of atomic access.  For this reason,
     80   * *it's also the most dangerous kind of access*.
     81   *
     82   * Relaxed ordering is good for things like process-wide statistics
     83   * counters that don't need to be consistent with anything else, so
     84   * long as updates themselves are atomic.  (And so long as any
     85   * observations of that value can tolerate being out-of-date -- if you
     86   * need some sort of up-to-date value, you need some sort of other
     87   * synchronizing operation.)  It's *not* good for locks, mutexes,
     88   * reference counts, etc. that mediate access to other memory, or must
     89   * be observably consistent with other memory.
     90   *
     91   * x86 architectures don't take advantage of the optimization
     92   * opportunities that relaxed ordering permits.  Thus it's possible
     93   * that using relaxed ordering will "work" on x86 but fail elsewhere
     94   * (ARM, say, which *does* implement non-sequentially-consistent
     95   * relaxed ordering semantics).  Be extra-careful using relaxed
     96   * ordering if you can't easily test non-x86 architectures!
     97   */
     98  Relaxed,
     99 
    100  /*
    101   * When an atomic value is updated with ReleaseAcquire ordering, and
    102   * that new value is observed with ReleaseAcquire ordering, prior
    103   * writes (atomic or not) are also observable.  What ReleaseAcquire
    104   * *doesn't* give you is any observable ordering guarantees for
    105   * ReleaseAcquire-ordered operations on different objects.  For
    106   * example, if there are two cores that each perform ReleaseAcquire
    107   * operations on separate objects, each core may or may not observe
    108   * the operations made by the other core.  The only way the cores can
    109   * be synchronized with ReleaseAcquire is if they both
    110   * ReleaseAcquire-access the same object.  This implies that you can't
    111   * necessarily describe some global total ordering of ReleaseAcquire
    112   * operations.
    113   *
    114   * ReleaseAcquire ordering is good for (as the name implies) atomic
    115   * operations on values controlling ownership of things: reference
    116   * counts, mutexes, and the like.  However, if you are thinking about
    117   * using these to implement your own locks or mutexes, you should take
    118   * a good, hard look at actual lock or mutex primitives first.
    119   */
    120  ReleaseAcquire,
    121 
    122  /*
    123   * When an atomic value is updated with SequentiallyConsistent
    124   * ordering, all writes observable when the update is observed, just
    125   * as with ReleaseAcquire ordering.  But, furthermore, a global total
    126   * ordering of SequentiallyConsistent operations *can* be described.
    127   * For example, if two cores perform SequentiallyConsistent operations
    128   * on separate objects, one core will observably perform its update
    129   * (and all previous operations will have completed), then the other
    130   * core will observably perform its update (and all previous
    131   * operations will have completed).  (Although those previous
    132   * operations aren't themselves ordered -- they could be intermixed,
    133   * or ordered if they occur on atomic values with ordering
    134   * requirements.)  SequentiallyConsistent is the *simplest and safest*
    135   * ordering of atomic operations -- it's always as if one operation
    136   * happens, then another, then another, in some order -- and every
    137   * core observes updates to happen in that single order.  Because it
    138   * has the most synchronization requirements, operations ordered this
    139   * way also tend to be slowest.
    140   *
    141   * SequentiallyConsistent ordering can be desirable when multiple
    142   * threads observe objects, and they all have to agree on the
    143   * observable order of changes to them.  People expect
    144   * SequentiallyConsistent ordering, even if they shouldn't, when
    145   * writing code, atomic or otherwise.  SequentiallyConsistent is also
    146   * the ordering of choice when designing lockless data structures.  If
    147   * you don't know what order to use, use this one.
    148   */
    149  SequentiallyConsistent,
    150 };
    151 
    152 namespace detail {
    153 
    154 /*
    155 * We provide CompareExchangeFailureOrder to work around a bug in some
    156 * versions of GCC's <atomic> header.  See bug 898491.
    157 */
    158 template <MemoryOrdering Order>
    159 struct AtomicOrderConstraints;
    160 
    161 template <>
    162 struct AtomicOrderConstraints<Relaxed> {
    163  static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
    164  static const std::memory_order LoadOrder = std::memory_order_relaxed;
    165  static const std::memory_order StoreOrder = std::memory_order_relaxed;
    166  static const std::memory_order CompareExchangeFailureOrder =
    167      std::memory_order_relaxed;
    168 };
    169 
    170 template <>
    171 struct AtomicOrderConstraints<ReleaseAcquire> {
    172  static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
    173  static const std::memory_order LoadOrder = std::memory_order_acquire;
    174  static const std::memory_order StoreOrder = std::memory_order_release;
    175  static const std::memory_order CompareExchangeFailureOrder =
    176      std::memory_order_acquire;
    177 };
    178 
    179 template <>
    180 struct AtomicOrderConstraints<SequentiallyConsistent> {
    181  static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
    182  static const std::memory_order LoadOrder = std::memory_order_seq_cst;
    183  static const std::memory_order StoreOrder = std::memory_order_seq_cst;
    184  static const std::memory_order CompareExchangeFailureOrder =
    185      std::memory_order_seq_cst;
    186 };
    187 
    188 template <typename T, MemoryOrdering Order>
    189 struct IntrinsicBase {
    190  typedef std::atomic<T> ValueType;
    191  typedef AtomicOrderConstraints<Order> OrderedOp;
    192 };
    193 
    194 template <typename T, MemoryOrdering Order>
    195 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order> {
    196  typedef IntrinsicBase<T, Order> Base;
    197 
    198  static T load(const typename Base::ValueType& aPtr) {
    199    return aPtr.load(Base::OrderedOp::LoadOrder);
    200  }
    201 
    202  static void store(typename Base::ValueType& aPtr, T aVal) {
    203    aPtr.store(aVal, Base::OrderedOp::StoreOrder);
    204  }
    205 
    206  static T exchange(typename Base::ValueType& aPtr, T aVal) {
    207    return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder);
    208  }
    209 
    210  static bool compareExchange(typename Base::ValueType& aPtr, T aOldVal,
    211                              T aNewVal) {
    212    return aPtr.compare_exchange_strong(
    213        aOldVal, aNewVal, Base::OrderedOp::AtomicRMWOrder,
    214        Base::OrderedOp::CompareExchangeFailureOrder);
    215  }
    216 };
    217 
    218 template <typename T, MemoryOrdering Order>
    219 struct IntrinsicAddSub : public IntrinsicBase<T, Order> {
    220  typedef IntrinsicBase<T, Order> Base;
    221 
    222  static T add(typename Base::ValueType& aPtr, T aVal) {
    223    return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
    224  }
    225 
    226  static T sub(typename Base::ValueType& aPtr, T aVal) {
    227    return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
    228  }
    229 };
    230 
    231 template <typename T, MemoryOrdering Order>
    232 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order> {
    233  typedef IntrinsicBase<T*, Order> Base;
    234 
    235  static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
    236    return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder);
    237  }
    238 
    239  static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal) {
    240    return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder);
    241  }
    242 };
    243 
    244 template <typename T, MemoryOrdering Order>
    245 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order> {
    246  typedef IntrinsicBase<T, Order> Base;
    247 
    248  static T inc(typename Base::ValueType& aPtr) {
    249    return IntrinsicAddSub<T, Order>::add(aPtr, 1);
    250  }
    251 
    252  static T dec(typename Base::ValueType& aPtr) {
    253    return IntrinsicAddSub<T, Order>::sub(aPtr, 1);
    254  }
    255 };
    256 
    257 template <typename T, MemoryOrdering Order>
    258 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
    259                          public IntrinsicIncDec<T, Order> {
    260  typedef IntrinsicBase<T, Order> Base;
    261 
    262  static T or_(typename Base::ValueType& aPtr, T aVal) {
    263    return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder);
    264  }
    265 
    266  static T xor_(typename Base::ValueType& aPtr, T aVal) {
    267    return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder);
    268  }
    269 
    270  static T and_(typename Base::ValueType& aPtr, T aVal) {
    271    return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder);
    272  }
    273 };
    274 
    275 template <typename T, MemoryOrdering Order>
    276 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
    277                                     public IntrinsicIncDec<T*, Order> {};
    278 
    279 template <typename T>
    280 struct ToStorageTypeArgument {
    281  static constexpr T convert(T aT) { return aT; }
    282 };
    283 
    284 template <typename T, MemoryOrdering Order>
    285 class AtomicBase {
    286  static_assert(sizeof(T) == 4 || sizeof(T) == 8,
    287                "mozilla/Atomics.h only supports 32-bit and 64-bit types");
    288 
    289 protected:
    290  typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
    291  typedef typename Intrinsics::ValueType ValueType;
    292  ValueType mValue;
    293 
    294 public:
    295  constexpr AtomicBase() : mValue() {}
    296  explicit constexpr AtomicBase(T aInit)
    297      : mValue(ToStorageTypeArgument<T>::convert(aInit)) {}
    298 
    299  // Note: we can't provide operator T() here because Atomic<bool> inherits
    300  // from AtomcBase with T=uint32_t and not T=bool. If we implemented
    301  // operator T() here, it would cause errors when comparing Atomic<bool> with
    302  // a regular bool.
    303 
    304  T operator=(T aVal) {
    305    Intrinsics::store(mValue, aVal);
    306    return aVal;
    307  }
    308 
    309  /**
    310   * Performs an atomic swap operation.  aVal is stored and the previous
    311   * value of this variable is returned.
    312   */
    313  T exchange(T aVal) { return Intrinsics::exchange(mValue, aVal); }
    314 
    315  /**
    316   * Performs an atomic compare-and-swap operation and returns true if it
    317   * succeeded. This is equivalent to atomically doing
    318   *
    319   *   if (mValue == aOldValue) {
    320   *     mValue = aNewValue;
    321   *     return true;
    322   *   } else {
    323   *     return false;
    324   *   }
    325   */
    326  bool compareExchange(T aOldValue, T aNewValue) {
    327    return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
    328  }
    329 
    330 private:
    331  AtomicBase(const AtomicBase& aCopy) = delete;
    332 };
    333 
    334 template <typename T, MemoryOrdering Order>
    335 class AtomicBaseIncDec : public AtomicBase<T, Order> {
    336  typedef typename detail::AtomicBase<T, Order> Base;
    337 
    338 public:
    339  constexpr AtomicBaseIncDec() : Base() {}
    340  explicit constexpr AtomicBaseIncDec(T aInit) : Base(aInit) {}
    341 
    342  using Base::operator=;
    343 
    344  operator T() const { return Base::Intrinsics::load(Base::mValue); }
    345  T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
    346  T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
    347  T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
    348  T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
    349 
    350 private:
    351  AtomicBaseIncDec(const AtomicBaseIncDec& aCopy) = delete;
    352 };
    353 
    354 }  // namespace detail
    355 
    356 /**
    357 * A wrapper for a type that enforces that all memory accesses are atomic.
    358 *
    359 * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
    360 * its place.  Implementations for integral and pointer types are provided
    361 * below.
    362 *
    363 * Atomic accesses are sequentially consistent by default.  You should
    364 * use the default unless you are tall enough to ride the
    365 * memory-ordering roller coaster (if you're not sure, you aren't) and
    366 * you have a compelling reason to do otherwise.
    367 *
    368 * There is one exception to the case of atomic memory accesses: providing an
    369 * initial value of the atomic value is not guaranteed to be atomic.  This is a
    370 * deliberate design choice that enables static atomic variables to be declared
    371 * without introducing extra static constructors.
    372 */
    373 template <typename T, MemoryOrdering Order = SequentiallyConsistent,
    374          typename Enable = void>
    375 class Atomic;
    376 
    377 /**
    378 * Atomic<T> implementation for integral types.
    379 *
    380 * In addition to atomic store and load operations, compound assignment and
    381 * increment/decrement operators are implemented which perform the
    382 * corresponding read-modify-write operation atomically.  Finally, an atomic
    383 * swap method is provided.
    384 */
    385 template <typename T, MemoryOrdering Order>
    386 class Atomic<
    387    T, Order,
    388    std::enable_if_t<std::is_integral_v<T> && !std::is_same_v<T, bool>>>
    389    : public detail::AtomicBaseIncDec<T, Order> {
    390  typedef typename detail::AtomicBaseIncDec<T, Order> Base;
    391 
    392 public:
    393  constexpr Atomic() : Base() {}
    394  explicit constexpr Atomic(T aInit) : Base(aInit) {}
    395 
    396  using Base::operator=;
    397 
    398  T operator+=(T aDelta) {
    399    return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
    400  }
    401 
    402  T operator-=(T aDelta) {
    403    return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
    404  }
    405 
    406  T operator|=(T aVal) {
    407    return Base::Intrinsics::or_(Base::mValue, aVal) | aVal;
    408  }
    409 
    410  T operator^=(T aVal) {
    411    return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal;
    412  }
    413 
    414  T operator&=(T aVal) {
    415    return Base::Intrinsics::and_(Base::mValue, aVal) & aVal;
    416  }
    417 
    418 private:
    419  Atomic(Atomic& aOther) = delete;
    420 };
    421 
    422 /**
    423 * Atomic<T> implementation for pointer types.
    424 *
    425 * An atomic compare-and-swap primitive for pointer variables is provided, as
    426 * are atomic increment and decement operators.  Also provided are the compound
    427 * assignment operators for addition and subtraction. Atomic swap (via
    428 * exchange()) is included as well.
    429 */
    430 template <typename T, MemoryOrdering Order>
    431 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order> {
    432  typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
    433 
    434 public:
    435  constexpr Atomic() : Base() {}
    436  explicit constexpr Atomic(T* aInit) : Base(aInit) {}
    437 
    438  using Base::operator=;
    439 
    440  T* operator+=(ptrdiff_t aDelta) {
    441    return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta;
    442  }
    443 
    444  T* operator-=(ptrdiff_t aDelta) {
    445    return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta;
    446  }
    447 
    448 private:
    449  Atomic(Atomic& aOther) = delete;
    450 };
    451 
    452 /**
    453 * Atomic<T> implementation for enum types.
    454 *
    455 * The atomic store and load operations and the atomic swap method is provided.
    456 */
    457 template <typename T, MemoryOrdering Order>
    458 class Atomic<T, Order, std::enable_if_t<std::is_enum_v<T>>>
    459    : public detail::AtomicBase<T, Order> {
    460  typedef typename detail::AtomicBase<T, Order> Base;
    461 
    462 public:
    463  constexpr Atomic() : Base() {}
    464  explicit constexpr Atomic(T aInit) : Base(aInit) {}
    465 
    466  operator T() const { return T(Base::Intrinsics::load(Base::mValue)); }
    467 
    468  using Base::operator=;
    469 
    470 private:
    471  Atomic(Atomic& aOther) = delete;
    472 };
    473 
    474 /**
    475 * Atomic<T> implementation for boolean types.
    476 *
    477 * The atomic store and load operations and the atomic swap method is provided.
    478 *
    479 * Note:
    480 *
    481 * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
    482 *   bool and/or some implementations of std::atomic. This is allowed in
    483 *   [atomic.types.generic]p9.
    484 *
    485 * - It's not obvious whether the 8-bit atomic functions on Windows are always
    486 *   inlined or not. If they are not inlined, the corresponding functions in the
    487 *   runtime library are not available on Windows XP. This is why we implement
    488 *   Atomic<bool> with an underlying type of uint32_t.
    489 */
    490 template <MemoryOrdering Order>
    491 class Atomic<bool, Order> : protected detail::AtomicBase<uint32_t, Order> {
    492  typedef typename detail::AtomicBase<uint32_t, Order> Base;
    493 
    494 public:
    495  constexpr Atomic() : Base() {}
    496  explicit constexpr Atomic(bool aInit) : Base(aInit) {}
    497 
    498  // We provide boolean wrappers for the underlying AtomicBase methods.
    499  MOZ_IMPLICIT operator bool() const {
    500    return Base::Intrinsics::load(Base::mValue);
    501  }
    502 
    503  bool operator=(bool aVal) { return Base::operator=(aVal); }
    504 
    505  bool exchange(bool aVal) { return Base::exchange(aVal); }
    506 
    507  bool compareExchange(bool aOldValue, bool aNewValue) {
    508    return Base::compareExchange(aOldValue, aNewValue);
    509  }
    510 
    511 private:
    512  Atomic(Atomic& aOther) = delete;
    513 };
    514 
    515 /**
    516 * Atomic<T> implementation for double type.
    517 */
    518 template <MemoryOrdering Order>
    519 class Atomic<double, Order> : protected detail::AtomicBase<double, Order> {
    520  typedef typename detail::AtomicBase<double, Order> Base;
    521 
    522 public:
    523  constexpr Atomic() : Base() {}
    524  explicit constexpr Atomic(double aInit) : Base(aInit) {}
    525 
    526  operator double() const {
    527    return double(Base::Intrinsics::load(Base::mValue));
    528  }
    529 
    530  double operator=(double aVal) { return Base::operator=(aVal); }
    531 
    532 private:
    533  Atomic(Atomic& aOther) = delete;
    534 };
    535 
    536 // Relax the CPU during a spinlock.  It's a good idea to place this in a
    537 // spinlock so that the CPU doesn't pipeline the loop otherwise flushing the
    538 // pipeline when the loop finally breaks can be expensive.
    539 inline void cpu_pause() {
    540 #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || \
    541    defined(_M_X64)
    542  _mm_pause();
    543 #endif
    544 }
    545 
    546 }  // namespace mozilla
    547 
    548 namespace std {
    549 
    550 // If you want to atomically swap two atomic values, use exchange().
    551 template <typename T, mozilla::MemoryOrdering Order>
    552 void swap(mozilla::Atomic<T, Order>&, mozilla::Atomic<T, Order>&) = delete;
    553 
    554 }  // namespace std
    555 
    556 #endif /* mozilla_Atomics_h */