tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

futex.h (8834B)


      1 // Copyright 2024 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef HIGHWAY_HWY_CONTRIB_THREAD_POOL_FUTEX_H_
     17 #define HIGHWAY_HWY_CONTRIB_THREAD_POOL_FUTEX_H_
     18 
     19 // Keyed event (futex): kernel queue of blocked threads, identified by the
     20 // address of an atomic u32 called `current` within the same process (do NOT
     21 // use with shared-memory mappings).
     22 //
     23 // Futex equivalents: https://outerproduct.net/futex-dictionary.html; we
     24 // support Linux/Emscripten/Apple/Windows and C++20 std::atomic::wait, plus a
     25 // NanoSleep fallback.
     26 
     27 #include <time.h>
     28 
     29 #include <atomic>
     30 #include <climits>  // INT_MAX
     31 
     32 #include "hwy/base.h"
     33 
     34 #if HWY_OS_APPLE
     35 #include <AvailabilityMacros.h>
     36 // __ulock* were added in OS X 10.12 (Sierra, 2016).
     37 #if MAC_OS_X_VERSION_MAX_ALLOWED < 101200 && !defined(HWY_DISABLE_FUTEX)
     38 #define HWY_DISABLE_FUTEX
     39 #endif
     40 #endif  // HWY_OS_APPLE
     41 
     42 #if HWY_OS_WIN
     43 // Need to include <windows.h> on Windows, even if HWY_DISABLE_FUTEX is defined,
     44 // since hwy::NanoSleep uses Windows API's that are defined in windows.h.
     45 #ifndef NOMINMAX
     46 #define NOMINMAX
     47 #endif  // NOMINMAX
     48 #ifndef WIN32_LEAN_AND_MEAN
     49 #define WIN32_LEAN_AND_MEAN
     50 #endif  // WIN32_LEAN_AND_MEAN
     51 #include <windows.h>
     52 #endif
     53 
     54 #if HWY_ARCH_WASM
     55 #include <emscripten/threading.h>
     56 #include <math.h>  // INFINITY
     57 
     58 #elif HWY_OS_LINUX
     59 #include <errno.h>        // IWYU pragma: keep
     60 #include <linux/futex.h>  // FUTEX_*
     61 #include <pthread.h>
     62 #include <sys/syscall.h>  // SYS_*
     63 #include <unistd.h>
     64 // Android may not declare these:
     65 #ifndef SYS_futex
     66 #ifdef SYS_futex_time64  // 32-bit with 64-bit time_t
     67 #define SYS_futex SYS_futex_time64
     68 #else
     69 #define SYS_futex __NR_futex
     70 #endif  // SYS_futex_time64
     71 #endif  // SYS_futex
     72 #ifndef FUTEX_WAIT_PRIVATE
     73 #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | 128)
     74 #endif
     75 #ifndef FUTEX_WAKE_PRIVATE
     76 #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | 128)
     77 #endif
     78 
     79 #elif HWY_OS_APPLE && !defined(HWY_DISABLE_FUTEX)
     80 // These are private APIs, so add an opt-out.
     81 extern "C" {
     82 int __ulock_wait(uint32_t op, void* address, uint64_t val, uint32_t max_us);
     83 int __ulock_wake(uint32_t op, void* address, uint64_t zero);
     84 }  // extern "C"
     85 #define UL_COMPARE_AND_WAIT 1
     86 #define ULF_WAKE_ALL 0x00000100
     87 
     88 #elif HWY_OS_WIN && !defined(HWY_DISABLE_FUTEX)
     89 // WakeByAddressAll requires Windows 8, so add an opt-out.
     90 #if HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL
     91 #pragma comment(lib, "synchronization.lib")
     92 #endif
     93 
     94 #elif HWY_CXX_LANG < 202002L  // NOT C++20, which has native support
     95 #define HWY_FUTEX_SLEEP
     96 #endif
     97 
     98 namespace hwy {
     99 
    100 // Attempts to pause for the specified nanoseconds, though the resolution is
    101 // closer to 0.1 microseconds. Returns false if no wait happened. Thread-safe.
    102 static inline bool NanoSleep(uint64_t ns) {
    103 #if HWY_OS_WIN
    104  static thread_local HANDLE hTimer = nullptr;
    105  if (HWY_UNLIKELY(hTimer == nullptr)) {
    106    // Must be manual reset: auto-reset would immediately signal after the next
    107    // SetWaitableTimer.
    108    hTimer = CreateWaitableTimer(nullptr, TRUE, nullptr);
    109    if (hTimer == nullptr) return false;
    110  }
    111 
    112  // Negative means relative, in units of 100 ns.
    113  LARGE_INTEGER time;
    114  time.QuadPart = -static_cast<LONGLONG>(ns / 100);
    115  const LONG period = 0;  // signal once
    116  if (!SetWaitableTimer(hTimer, &time, period, nullptr, nullptr, FALSE)) {
    117    return false;
    118  }
    119 
    120  (void)WaitForSingleObject(hTimer, INFINITE);
    121  return true;
    122 #else
    123  timespec duration;
    124  duration.tv_sec = static_cast<time_t>(ns / 1000000000);
    125  duration.tv_nsec = static_cast<decltype(duration.tv_nsec)>(ns % 1000000000);
    126  timespec remainder;
    127  // Repeat if interrupted by a signal. Note that the remainder may be rounded
    128  // up, which could cause an infinite loop if continually interrupted. Using
    129  // clock_nanosleep would work, but we'd have to get the current time. We
    130  // assume durations are short, and instead just cap the number of retries.
    131  for (int rep = 0; rep < 3; ++rep) {
    132    if (nanosleep(&duration, &remainder) == 0 || errno != EINTR) break;
    133    duration = remainder;
    134  }
    135  return true;
    136 #endif
    137 }
    138 
    139 // Waits until `current != prev` and returns the new value. May return
    140 // immediately if `current` already changed, or after blocking and waking.
    141 static inline uint32_t BlockUntilDifferent(
    142    const uint32_t prev, const std::atomic<uint32_t>& current) {
    143  const auto acq = std::memory_order_acquire;
    144 
    145 #if HWY_ARCH_WASM
    146  // It is always safe to cast to void.
    147  volatile void* address =
    148      const_cast<volatile void*>(static_cast<const volatile void*>(&current));
    149  const double max_ms = INFINITY;
    150  for (;;) {
    151    const uint32_t next = current.load(acq);
    152    if (next != prev) return next;
    153    const int ret = emscripten_futex_wait(address, prev, max_ms);
    154    HWY_DASSERT(ret >= 0);
    155    (void)ret;
    156  }
    157 
    158 #elif HWY_OS_LINUX
    159  // Safe to cast because std::atomic is a standard layout type.
    160  const uint32_t* address = reinterpret_cast<const uint32_t*>(&current);
    161  // _PRIVATE requires this only be used in the same process, and avoids
    162  // virtual->physical lookups and atomic reference counting.
    163  const int op = FUTEX_WAIT_PRIVATE;
    164  for (;;) {
    165    const uint32_t next = current.load(acq);
    166    if (next != prev) return next;
    167    // timeout=null may prevent interrupts via signal. No lvalue because
    168    // the timespec type is only standardized since C++17 or C11.
    169    const auto ret = syscall(SYS_futex, address, op, prev, nullptr, nullptr, 0);
    170    if (ret == -1) {
    171      HWY_DASSERT(errno == EAGAIN);  // otherwise an actual error
    172    }
    173  }
    174 
    175 #elif HWY_OS_WIN && !defined(HWY_DISABLE_FUTEX)
    176  // It is always safe to cast to void.
    177  volatile void* address =
    178      const_cast<volatile void*>(static_cast<const volatile void*>(&current));
    179  // API is not const-correct, but only loads from the pointer.
    180  PVOID pprev = const_cast<void*>(static_cast<const void*>(&prev));
    181  const DWORD max_ms = INFINITE;
    182  for (;;) {
    183    const uint32_t next = current.load(acq);
    184    if (next != prev) return next;
    185    const BOOL ok = WaitOnAddress(address, pprev, sizeof(prev), max_ms);
    186    HWY_DASSERT(ok);
    187    (void)ok;
    188  }
    189 
    190 #elif HWY_OS_APPLE && !defined(HWY_DISABLE_FUTEX)
    191  // It is always safe to cast to void.
    192  void* address = const_cast<void*>(static_cast<const void*>(&current));
    193  for (;;) {
    194    const uint32_t next = current.load(acq);
    195    if (next != prev) return next;
    196    __ulock_wait(UL_COMPARE_AND_WAIT, address, prev, 0);
    197  }
    198 
    199 #elif defined(HWY_FUTEX_SLEEP)
    200  for (;;) {
    201    const uint32_t next = current.load(acq);
    202    if (next != prev) return next;
    203    NanoSleep(2000);
    204  }
    205 
    206 #elif HWY_CXX_LANG >= 202002L
    207  current.wait(prev, acq);  // No spurious wakeup.
    208  const uint32_t next = current.load(acq);
    209  HWY_DASSERT(next != prev);
    210  return next;
    211 
    212 #else
    213 #error "Logic error, should have reached HWY_FUTEX_SLEEP"
    214 #endif  // HWY_OS_*
    215 }  // BlockUntilDifferent
    216 
    217 // Wakes all threads, if any, that are waiting because they called
    218 // `BlockUntilDifferent` with the same `current`.
    219 static inline void WakeAll(std::atomic<uint32_t>& current) {
    220 #if HWY_ARCH_WASM
    221  // It is always safe to cast to void.
    222  volatile void* address = static_cast<volatile void*>(&current);
    223  const int max_to_wake = INT_MAX;  // actually signed
    224  const int ret = emscripten_futex_wake(address, max_to_wake);
    225  HWY_DASSERT(ret >= 0);
    226  (void)ret;
    227 
    228 #elif HWY_OS_LINUX
    229  // Safe to cast because std::atomic is a standard layout type.
    230  uint32_t* address = reinterpret_cast<uint32_t*>(&current);
    231  const int max_to_wake = INT_MAX;  // actually signed
    232  const auto ret = syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, max_to_wake,
    233                           nullptr, nullptr, 0);
    234  HWY_DASSERT(ret >= 0);  // number woken
    235  (void)ret;
    236 
    237 #elif HWY_OS_WIN && !defined(HWY_DISABLE_FUTEX)
    238  // It is always safe to cast to void.
    239  void* address = static_cast<void*>(&current);
    240  WakeByAddressAll(address);
    241 
    242 #elif HWY_OS_APPLE && !defined(HWY_DISABLE_FUTEX)
    243  // It is always safe to cast to void.
    244  void* address = static_cast<void*>(&current);
    245  __ulock_wake(UL_COMPARE_AND_WAIT | ULF_WAKE_ALL, address, 0);
    246 
    247 #elif defined(HWY_FUTEX_SLEEP)
    248  // NanoSleep loop does not require wakeup.
    249  (void)current;
    250 #elif HWY_CXX_LANG >= 202002L
    251  current.notify_all();
    252 
    253 #else
    254 #error "Logic error, should have reached HWY_FUTEX_SLEEP"
    255 #endif
    256 }  // WakeAll
    257 
    258 }  // namespace hwy
    259 
    260 #endif  // HIGHWAY_HWY_CONTRIB_THREAD_POOL_FUTEX_H_