tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

targets.h (15830B)


      1 // Copyright 2020 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef HIGHWAY_HWY_TARGETS_H_
     17 #define HIGHWAY_HWY_TARGETS_H_
     18 
     19 // Allows opting out of C++ standard library usage, which is not available in
     20 // some Compiler Explorer environments.
     21 #ifndef HWY_NO_LIBCXX
     22 #include <vector>
     23 #endif
     24 
     25 // For SIMD module implementations and their callers. Defines which targets to
     26 // generate and call.
     27 
     28 #include "hwy/base.h"
     29 #include "hwy/detect_targets.h"
     30 #include "hwy/highway_export.h"
     31 
     32 #if !defined(HWY_NO_LIBCXX)
     33 #include <atomic>
     34 #endif
     35 
     36 namespace hwy {
     37 
     38 // Returns bitfield of enabled targets that are supported on this CPU; there is
     39 // always at least one such target, hence the return value is never 0. The
     40 // targets returned may change after calling DisableTargets. This function is
     41 // always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
     42 // calls to it if there is only a single target enabled.
     43 HWY_DLLEXPORT int64_t SupportedTargets();
     44 
     45 // Evaluates to a function call, or literal if there is a single target.
     46 #if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
     47 #define HWY_SUPPORTED_TARGETS HWY_TARGETS
     48 #else
     49 #define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
     50 #endif
     51 
     52 // Subsequent SupportedTargets will not return targets whose bit(s) are set in
     53 // `disabled_targets`. Exception: if SupportedTargets would return 0, it will
     54 // instead return HWY_STATIC_TARGET (there must always be one target to call).
     55 //
     56 // This function is useful for disabling targets known to be buggy, or if the
     57 // best available target is undesirable (perhaps due to throttling or memory
     58 // bandwidth limitations). Use SetSupportedTargetsForTest instead of this
     59 // function for iteratively enabling specific targets for testing.
     60 HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets);
     61 
     62 // Subsequent SupportedTargets will return the given set of targets, except
     63 // those disabled via DisableTargets. Call with a mask of 0 to disable the mock
     64 // and return to the normal SupportedTargets behavior. Used to run tests for
     65 // all targets.
     66 HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets);
     67 
     68 #ifndef HWY_NO_LIBCXX
     69 
     70 // Return the list of targets in HWY_TARGETS supported by the CPU as a list of
     71 // individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
     72 // is affected by the current SetSupportedTargetsForTest() mock if any.
     73 HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() {
     74  std::vector<int64_t> ret;
     75  for (int64_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
     76       targets = targets & (targets - 1)) {
     77    int64_t current_target = targets & ~(targets - 1);
     78    ret.push_back(current_target);
     79  }
     80  return ret;
     81 }
     82 
     83 #endif  // HWY_NO_LIBCXX
     84 
     85 // Returns a string that satisfies gtest IsValidParamName(). No longer report
     86 // targets as "Unknown" if they are for a different architecture, because some
     87 // users unconditionally disable targets and we want to see which.
     88 static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
     89  switch (target) {
     90    case HWY_EMU128:
     91      return "EMU128";
     92    case HWY_SCALAR:
     93      return "SCALAR";
     94 
     95    // X86
     96    case HWY_SSE2:
     97      return "SSE2";
     98    case HWY_SSSE3:
     99      return "SSSE3";
    100    case HWY_SSE4:
    101      return "SSE4";
    102    case HWY_AVX2:
    103      return "AVX2";
    104    case HWY_AVX3:
    105      return "AVX3";
    106    case HWY_AVX3_DL:
    107      return "AVX3_DL";
    108    case HWY_AVX3_ZEN4:
    109      return "AVX3_ZEN4";
    110    case HWY_AVX3_SPR:
    111      return "AVX3_SPR";
    112    case HWY_AVX10_2:
    113      return "AVX10_2";
    114 
    115      // ARM
    116    case HWY_SVE2_128:
    117      return "SVE2_128";
    118    case HWY_SVE_256:
    119      return "SVE_256";
    120    case HWY_SVE2:
    121      return "SVE2";
    122    case HWY_SVE:
    123      return "SVE";
    124    case HWY_NEON_BF16:
    125      return "NEON_BF16";
    126    case HWY_NEON:
    127      return "NEON";
    128    case HWY_NEON_WITHOUT_AES:
    129      return "NEON_WITHOUT_AES";
    130 
    131      // PPC
    132    case HWY_PPC8:
    133      return "PPC8";
    134    case HWY_PPC9:
    135      return "PPC9";
    136    case HWY_PPC10:
    137      return "PPC10";
    138 
    139      // S390X
    140    case HWY_Z14:
    141      return "Z14";
    142    case HWY_Z15:
    143      return "Z15";
    144 
    145      // WASM
    146    case HWY_WASM:
    147      return "WASM";
    148    case HWY_WASM_EMU256:
    149      return "WASM_EMU256";
    150 
    151      // RISCV
    152    case HWY_RVV:
    153      return "RVV";
    154 
    155      // LOONGARCH
    156    case HWY_LSX:
    157      return "LSX";
    158    case HWY_LASX:
    159      return "LASX";
    160  }
    161 
    162  return "Unknown";
    163 }
    164 
    165 // Invokes VISITOR(TARGET, NAMESPACE) for all enabled targets. Alphabetic order.
    166 #define HWY_VISIT_TARGETS(VISITOR)    \
    167  HWY_VISIT_AVX10_2(VISITOR)          \
    168  HWY_VISIT_AVX2(VISITOR)             \
    169  HWY_VISIT_AVX3(VISITOR)             \
    170  HWY_VISIT_AVX3_DL(VISITOR)          \
    171  HWY_VISIT_AVX3_SPR(VISITOR)         \
    172  HWY_VISIT_AVX3_ZEN4(VISITOR)        \
    173  HWY_VISIT_FALLBACK(VISITOR)         \
    174  HWY_VISIT_LASX(VISITOR)             \
    175  HWY_VISIT_LSX(VISITOR)              \
    176  HWY_VISIT_NEON(VISITOR)             \
    177  HWY_VISIT_NEON_BF16(VISITOR)        \
    178  HWY_VISIT_NEON_WITHOUT_AES(VISITOR) \
    179  HWY_VISIT_PPC10(VISITOR)            \
    180  HWY_VISIT_PPC8(VISITOR)             \
    181  HWY_VISIT_PPC9(VISITOR)             \
    182  HWY_VISIT_RVV(VISITOR)              \
    183  HWY_VISIT_SSE2(VISITOR)             \
    184  HWY_VISIT_SSE4(VISITOR)             \
    185  HWY_VISIT_SSSE3(VISITOR)            \
    186  HWY_VISIT_SVE(VISITOR)              \
    187  HWY_VISIT_SVE2(VISITOR)             \
    188  HWY_VISIT_SVE2_128(VISITOR)         \
    189  HWY_VISIT_SVE_256(VISITOR)          \
    190  HWY_VISIT_WASM(VISITOR)             \
    191  HWY_VISIT_WASM_EMU256(VISITOR)      \
    192  HWY_VISIT_Z14(VISITOR)              \
    193  HWY_VISIT_Z15(VISITOR)
    194 
    195 // The maximum number of dynamic targets on any architecture is defined by
    196 // HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
    197 
    198 // For the ChosenTarget mask and index we use a different bit arrangement than
    199 // in the HWY_TARGETS mask. Only the targets involved in the current
    200 // architecture are used in this mask, and therefore only the least significant
    201 // (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least
    202 // significant bit is set when the mask is not initialized, the next
    203 // HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
    204 // HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
    205 // that position and the next more significant bit is used for HWY_SCALAR (if
    206 // HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
    207 // define equivalent values for HWY_TARGETS in this representation.
    208 // This mask representation allows to use ctz() on this mask and obtain a small
    209 // number that's used as an index of the table for dynamic dispatch. In this
    210 // way the first entry is used when the mask is uninitialized, the following
    211 // HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
    212 // scalar.
    213 
    214 // The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
    215 #define HWY_CHOSEN_TARGET_MASK_SCALAR (1LL << (HWY_MAX_DYNAMIC_TARGETS + 1))
    216 
    217 // Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
    218 // current architecture.
    219 #define HWY_CHOSEN_TARGET_SHIFT(X)                                    \
    220  ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
    221    ((1LL << HWY_MAX_DYNAMIC_TARGETS) - 1))                           \
    222   << 1)
    223 
    224 // The HWY_TARGETS mask in the ChosenTarget mask format.
    225 #define HWY_CHOSEN_TARGET_MASK_TARGETS \
    226  (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1LL)
    227 
    228 #if HWY_ARCH_X86
    229 // Maximum number of dynamic targets, changing this value is an ABI incompatible
    230 // change
    231 #define HWY_MAX_DYNAMIC_TARGETS 15
    232 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
    233 // These must match the order in which the HWY_TARGETS are defined
    234 // starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
    235 // HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
    236 // HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
    237 // corresponds to the best target. Don't include a "," at the end of the list.
    238 #define HWY_CHOOSE_TARGET_LIST(func_name)                     \
    239  nullptr,                             /* reserved */         \
    240      nullptr,                         /* reserved */         \
    241      nullptr,                         /* reserved */         \
    242      HWY_CHOOSE_AVX10_2(func_name),   /* AVX10_2 */          \
    243      HWY_CHOOSE_AVX3_SPR(func_name),  /* AVX3_SPR */         \
    244      nullptr,                         /* reserved */         \
    245      HWY_CHOOSE_AVX3_ZEN4(func_name), /* AVX3_ZEN4 */        \
    246      HWY_CHOOSE_AVX3_DL(func_name),   /* AVX3_DL */          \
    247      HWY_CHOOSE_AVX3(func_name),      /* AVX3 */             \
    248      HWY_CHOOSE_AVX2(func_name),      /* AVX2 */             \
    249      nullptr,                         /* AVX */              \
    250      HWY_CHOOSE_SSE4(func_name),      /* SSE4 */             \
    251      HWY_CHOOSE_SSSE3(func_name),     /* SSSE3 */            \
    252      nullptr,                         /* reserved - SSE3? */ \
    253      HWY_CHOOSE_SSE2(func_name)       /* SSE2 */
    254 
    255 #elif HWY_ARCH_ARM
    256 // See HWY_ARCH_X86 above for details.
    257 #define HWY_MAX_DYNAMIC_TARGETS 15
    258 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
    259 #define HWY_CHOOSE_TARGET_LIST(func_name)                              \
    260  nullptr,                                   /* reserved */            \
    261      nullptr,                               /* reserved */            \
    262      nullptr,                               /* reserved */            \
    263      HWY_CHOOSE_SVE2_128(func_name),        /* SVE2 128-bit */        \
    264      HWY_CHOOSE_SVE_256(func_name),         /* SVE 256-bit */         \
    265      nullptr,                               /* reserved */            \
    266      nullptr,                               /* reserved */            \
    267      nullptr,                               /* reserved */            \
    268      HWY_CHOOSE_SVE2(func_name),            /* SVE2 */                \
    269      HWY_CHOOSE_SVE(func_name),             /* SVE */                 \
    270      nullptr,                               /* reserved */            \
    271      HWY_CHOOSE_NEON_BF16(func_name),       /* NEON + f16/dot/bf16 */ \
    272      nullptr,                               /* reserved */            \
    273      HWY_CHOOSE_NEON(func_name),            /* NEON */                \
    274      HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */
    275 
    276 #elif HWY_ARCH_RISCV
    277 // See HWY_ARCH_X86 above for details.
    278 #define HWY_MAX_DYNAMIC_TARGETS 9
    279 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
    280 #define HWY_CHOOSE_TARGET_LIST(func_name)       \
    281  nullptr,                       /* reserved */ \
    282      nullptr,                   /* reserved */ \
    283      nullptr,                   /* reserved */ \
    284      nullptr,                   /* reserved */ \
    285      nullptr,                   /* reserved */ \
    286      nullptr,                   /* reserved */ \
    287      nullptr,                   /* reserved */ \
    288      HWY_CHOOSE_RVV(func_name), /* RVV */      \
    289      nullptr                    /* reserved */
    290 
    291 #elif HWY_ARCH_PPC || HWY_ARCH_S390X
    292 // See HWY_ARCH_X86 above for details.
    293 #define HWY_MAX_DYNAMIC_TARGETS 9
    294 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
    295 #define HWY_CHOOSE_TARGET_LIST(func_name)         \
    296  nullptr,                         /* reserved */ \
    297      nullptr,                     /* reserved */ \
    298      nullptr,                     /* reserved */ \
    299      nullptr,                     /* reserved */ \
    300      HWY_CHOOSE_PPC10(func_name), /* PPC10 */    \
    301      HWY_CHOOSE_PPC9(func_name),  /* PPC9 */     \
    302      HWY_CHOOSE_PPC8(func_name),  /* PPC8 */     \
    303      HWY_CHOOSE_Z15(func_name),   /* Z15 */      \
    304      HWY_CHOOSE_Z14(func_name)    /* Z14 */
    305 
    306 #elif HWY_ARCH_WASM
    307 // See HWY_ARCH_X86 above for details.
    308 #define HWY_MAX_DYNAMIC_TARGETS 9
    309 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
    310 #define HWY_CHOOSE_TARGET_LIST(func_name)                  \
    311  nullptr,                               /* reserved */    \
    312      nullptr,                           /* reserved */    \
    313      nullptr,                           /* reserved */    \
    314      nullptr,                           /* reserved */    \
    315      nullptr,                           /* reserved */    \
    316      nullptr,                           /* reserved */    \
    317      HWY_CHOOSE_WASM_EMU256(func_name), /* WASM_EMU256 */ \
    318      HWY_CHOOSE_WASM(func_name),        /* WASM */        \
    319      nullptr                            /* reserved */
    320 
    321 #elif HWY_ARCH_LOONGARCH
    322 #define HWY_MAX_DYNAMIC_TARGETS 3
    323 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_LOONGARCH
    324 #define HWY_CHOOSE_TARGET_LIST(func_name)        \
    325  nullptr,                        /* reserved */ \
    326      HWY_CHOOSE_LASX(func_name), /* LASX */     \
    327      HWY_CHOOSE_LSX(func_name)   /* LSX */
    328 
    329 #else
    330 // Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
    331 // still creating single-entry tables in HWY_EXPORT to ensure portability.
    332 #define HWY_MAX_DYNAMIC_TARGETS 1
    333 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
    334 #endif
    335 
    336 // Bitfield of supported and enabled targets. The format differs from that of
    337 // HWY_TARGETS; the lowest bit governs the first function pointer (which is
    338 // special in that it calls FunctionCache, then Update, then dispatches to the
    339 // actual implementation) in the tables created by HWY_EXPORT. Monostate (see
    340 // GetChosenTarget), thread-safe except on RVV.
    341 struct ChosenTarget {
    342 public:
    343  // Reset bits according to `targets` (typically the return value of
    344  // SupportedTargets()). Postcondition: IsInitialized() == true.
    345  void Update(int64_t targets) {
    346    // These are `targets` shifted downwards, see above. Also include SCALAR
    347    // (corresponds to the last entry in the function table) as fallback.
    348    StoreMask(HWY_CHOSEN_TARGET_SHIFT(targets) | HWY_CHOSEN_TARGET_MASK_SCALAR);
    349  }
    350 
    351  // Reset to the uninitialized state, so that FunctionCache will call Update
    352  // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
    353  void DeInit() { StoreMask(1); }
    354 
    355  // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
    356  // function was called, which we check in tests.
    357  bool IsInitialized() const { return LoadMask() != 1; }
    358 
    359  // Return the index in the dynamic dispatch table to be used by the current
    360  // CPU. Note that this method must be in the header file so it uses the value
    361  // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
    362  // calls it, which may be different from others. This means we only enable
    363  // those targets that were actually compiled in this module.
    364  size_t HWY_INLINE GetIndex() const {
    365    return hwy::Num0BitsBelowLS1Bit_Nonzero64(
    366        static_cast<uint64_t>(LoadMask() & HWY_CHOSEN_TARGET_MASK_TARGETS));
    367  }
    368 
    369 private:
    370 #if defined(HWY_NO_LIBCXX)
    371  int64_t LoadMask() const { return mask_; }
    372  void StoreMask(int64_t mask) { mask_ = mask; }
    373 
    374  int64_t mask_{1};  // Initialized to 1 so GetIndex() returns 0.
    375 #else
    376  int64_t LoadMask() const { return mask_.load(); }
    377  void StoreMask(int64_t mask) { mask_.store(mask); }
    378 
    379  std::atomic<int64_t> mask_{1};  // Initialized to 1 so GetIndex() returns 0.
    380 #endif  // HWY_ARCH_RISCV
    381 };
    382 
    383 // For internal use (e.g. by FunctionCache and DisableTargets).
    384 HWY_DLLEXPORT ChosenTarget& GetChosenTarget();
    385 
    386 }  // namespace hwy
    387 
    388 #endif  // HIGHWAY_HWY_TARGETS_H_