tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

highway.h (28605B)


      1 // Copyright 2020 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 // Main header required before using vector types.
     17 
     18 // IWYU pragma: begin_exports
     19 #include "hwy/base.h"
     20 #include "hwy/detect_compiler_arch.h"
     21 #include "hwy/detect_targets.h"
     22 #include "hwy/highway_export.h"
     23 #include "hwy/targets.h"
     24 // IWYU pragma: end_exports
     25 
     26 #if HWY_CXX_LANG < 201703L
     27 #define HWY_DISPATCH_MAP 1
     28 #else
     29 #define HWY_DISPATCH_MAP 0
     30 #endif
     31 
     32 // This include guard is checked by foreach_target, so avoid the usual _H_
     33 // suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
     34 // after/outside this include guard.
     35 #ifndef HWY_HIGHWAY_INCLUDED
     36 #define HWY_HIGHWAY_INCLUDED
     37 
     38 namespace hwy {
     39 
     40 //------------------------------------------------------------------------------
     41 // Shorthand for tags (defined in shared-inl.h) used to select overloads.
     42 // Note that ScalableTag<T> is preferred over HWY_FULL, and CappedTag<T, N> over
     43 // HWY_CAPPED(T, N).
     44 
     45 // HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of
     46 // registers in the group, and is ignored on targets that do not support groups.
     47 #define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T>
     48 #define HWY_FULL2(T, LMUL) \
     49  hwy::HWY_NAMESPACE::ScalableTag<T, hwy::CeilLog2(HWY_MAX(0, LMUL))>
     50 #define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
     51 // Workaround for MSVC grouping __VA_ARGS__ into a single argument
     52 #define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
     53 // Trailing comma avoids -pedantic false alarm
     54 #define HWY_CHOOSE_FULL(...) \
     55  HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
     56 #define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
     57 
     58 // Vector of up to MAX_N lanes. It's better to use full vectors where possible.
     59 #define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N>
     60 
     61 //------------------------------------------------------------------------------
     62 // Export user functions for static/dynamic dispatch
     63 
     64 // The static target is the best baseline. When using foreach_target.h, this is
     65 // the last target compiled. Otherwise, it is the only target.
     66 
     67 // Evaluates to 0 inside a translation unit if it is generating anything but the
     68 // static target. Used to prevent redefinitions of HWY_EXPORT. Unless
     69 // foreach_target.h is included, we only compile once anyway, so this is 1
     70 // unless it is or has been included.
     71 #ifndef HWY_ONCE
     72 #define HWY_ONCE 1
     73 #endif
     74 
     75 // `HWY_STATIC_NAMESPACE` expands to its namespace name, e.g. `N_AVX2`.
     76 #if HWY_STATIC_TARGET == HWY_SCALAR
     77 #define HWY_STATIC_NAMESPACE N_SCALAR
     78 #elif HWY_STATIC_TARGET == HWY_EMU128
     79 #define HWY_STATIC_NAMESPACE N_EMU128
     80 #elif HWY_STATIC_TARGET == HWY_WASM
     81 #define HWY_STATIC_NAMESPACE N_WASM
     82 #elif HWY_STATIC_TARGET == HWY_WASM_EMU256
     83 #define HWY_STATIC_NAMESPACE N_WASM_EMU256
     84 #elif HWY_STATIC_TARGET == HWY_Z14
     85 #define HWY_STATIC_NAMESPACE N_Z14
     86 #elif HWY_STATIC_TARGET == HWY_Z15
     87 #define HWY_STATIC_NAMESPACE N_Z15
     88 #elif HWY_STATIC_TARGET == HWY_PPC8
     89 #define HWY_STATIC_NAMESPACE N_PPC8
     90 #elif HWY_STATIC_TARGET == HWY_PPC9
     91 #define HWY_STATIC_NAMESPACE N_PPC9
     92 #elif HWY_STATIC_TARGET == HWY_PPC10
     93 #define HWY_STATIC_NAMESPACE N_PPC10
     94 #elif HWY_STATIC_TARGET == HWY_LSX
     95 #define HWY_STATIC_NAMESPACE N_LSX
     96 #elif HWY_STATIC_TARGET == HWY_LASX
     97 #define HWY_STATIC_NAMESPACE N_LASX
     98 #elif HWY_STATIC_TARGET == HWY_RVV
     99 #define HWY_STATIC_NAMESPACE N_RVV
    100 #elif HWY_STATIC_TARGET == HWY_NEON_WITHOUT_AES
    101 #define HWY_STATIC_NAMESPACE N_NEON_WITHOUT_AES
    102 #elif HWY_STATIC_TARGET == HWY_NEON
    103 #define HWY_STATIC_NAMESPACE N_NEON
    104 #elif HWY_STATIC_TARGET == HWY_NEON_BF16
    105 #define HWY_STATIC_NAMESPACE N_NEON_BF16
    106 #elif HWY_STATIC_TARGET == HWY_SVE
    107 #define HWY_STATIC_NAMESPACE N_SVE
    108 #elif HWY_STATIC_TARGET == HWY_SVE2
    109 #define HWY_STATIC_NAMESPACE N_SVE2
    110 #elif HWY_STATIC_TARGET == HWY_SVE_256
    111 #define HWY_STATIC_NAMESPACE N_SVE_256
    112 #elif HWY_STATIC_TARGET == HWY_SVE2_128
    113 #define HWY_STATIC_NAMESPACE N_SVE2_128
    114 #elif HWY_STATIC_TARGET == HWY_SSE2
    115 #define HWY_STATIC_NAMESPACE N_SSE2
    116 #elif HWY_STATIC_TARGET == HWY_SSSE3
    117 #define HWY_STATIC_NAMESPACE N_SSSE3
    118 #elif HWY_STATIC_TARGET == HWY_SSE4
    119 #define HWY_STATIC_NAMESPACE N_SSE4
    120 #elif HWY_STATIC_TARGET == HWY_AVX2
    121 #define HWY_STATIC_NAMESPACE N_AVX2
    122 #elif HWY_STATIC_TARGET == HWY_AVX3
    123 #define HWY_STATIC_NAMESPACE N_AVX3
    124 #elif HWY_STATIC_TARGET == HWY_AVX3_DL
    125 #define HWY_STATIC_NAMESPACE N_AVX3_DL
    126 #elif HWY_STATIC_TARGET == HWY_AVX3_ZEN4
    127 #define HWY_STATIC_NAMESPACE N_AVX3_ZEN4
    128 #elif HWY_STATIC_TARGET == HWY_AVX3_SPR
    129 #define HWY_STATIC_NAMESPACE N_AVX3_SPR
    130 #elif HWY_STATIC_TARGET == HWY_AVX10_2
    131 #define HWY_STATIC_NAMESPACE N_AVX10_2
    132 #endif
    133 
    134 // `HWY_STATIC_DISPATCH(FUNC_NAME)` is the namespace-qualified FUNC_NAME for
    135 // `HWY_STATIC_TARGET`, and can be used to deduce the return type of Choose*.
    136 #define HWY_STATIC_DISPATCH(FUNC_NAME) HWY_STATIC_NAMESPACE::FUNC_NAME
    137 
    138 // `HWY_CHOOSE_*(FUNC_NAME)` expands to the function pointer for that target or
    139 // nullptr if that target was not compiled.
    140 // `HWY_VISIT_*(VISITOR)` expands to `VISITOR(TARGET, NAMESPACE)` or nothing if
    141 // that target was not compiled.
    142 #if HWY_TARGETS & HWY_EMU128
    143 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME
    144 #define HWY_VISIT_FALLBACK(VISITOR) VISITOR(HWY_EMU128, N_EMU128)
    145 #elif HWY_TARGETS & HWY_SCALAR
    146 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME
    147 #define HWY_VISIT_FALLBACK(VISITOR) VISITOR(HWY_SCALAR, N_SCALAR)
    148 #else
    149 // When HWY_SCALAR/HWY_EMU128 are not present and other targets were disabled at
    150 // runtime, fall back to the baseline with HWY_STATIC_DISPATCH().
    151 #define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
    152 #define HWY_VISIT_FALLBACK(VISITOR) \
    153  VISITOR(HWY_STATIC_TARGET, HWY_STATIC_NAMESPACE)
    154 #endif
    155 
    156 #if HWY_TARGETS & HWY_WASM
    157 #define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
    158 #define HWY_VISIT_WASM(VISITOR) VISITOR(HWY_WASM, N_WASM)
    159 #else
    160 #define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
    161 #define HWY_VISIT_WASM(VISITOR)
    162 #endif
    163 
    164 #if HWY_TARGETS & HWY_WASM_EMU256
    165 #define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) &N_WASM_EMU256::FUNC_NAME
    166 #define HWY_VISIT_WASM_EMU256(VISITOR) VISITOR(HWY_WASM_EMU256, N_WASM_EMU256)
    167 #else
    168 #define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) nullptr
    169 #define HWY_VISIT_WASM_EMU256(VISITOR)
    170 #endif
    171 
    172 #if HWY_TARGETS & HWY_Z14
    173 #define HWY_CHOOSE_Z14(FUNC_NAME) &N_Z14::FUNC_NAME
    174 #define HWY_VISIT_Z14(VISITOR) VISITOR(HWY_Z14, N_Z14)
    175 #else
    176 #define HWY_CHOOSE_Z14(FUNC_NAME) nullptr
    177 #define HWY_VISIT_Z14(VISITOR)
    178 #endif
    179 
    180 #if HWY_TARGETS & HWY_Z15
    181 #define HWY_CHOOSE_Z15(FUNC_NAME) &N_Z15::FUNC_NAME
    182 #define HWY_VISIT_Z15(VISITOR) VISITOR(HWY_Z15, N_Z15)
    183 #else
    184 #define HWY_CHOOSE_Z15(FUNC_NAME) nullptr
    185 #define HWY_VISIT_Z15(VISITOR)
    186 #endif
    187 
    188 #if HWY_TARGETS & HWY_PPC8
    189 #define HWY_CHOOSE_PPC8(FUNC_NAME) &N_PPC8::FUNC_NAME
    190 #define HWY_VISIT_PPC8(VISITOR) VISITOR(HWY_PPC8, N_PPC8)
    191 #else
    192 #define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
    193 #define HWY_VISIT_PPC8(VISITOR)
    194 #endif
    195 
    196 #if HWY_TARGETS & HWY_PPC9
    197 #define HWY_CHOOSE_PPC9(FUNC_NAME) &N_PPC9::FUNC_NAME
    198 #define HWY_VISIT_PPC9(VISITOR) VISITOR(HWY_PPC9, N_PPC9)
    199 #else
    200 #define HWY_CHOOSE_PPC9(FUNC_NAME) nullptr
    201 #define HWY_VISIT_PPC9(VISITOR)
    202 #endif
    203 
    204 #if HWY_TARGETS & HWY_LSX
    205 #define HWY_CHOOSE_LSX(FUNC_NAME) &N_LSX::FUNC_NAME
    206 #define HWY_VISIT_LSX(VISITOR) VISITOR(HWY_LSX, N_LSX)
    207 #else
    208 #define HWY_CHOOSE_LSX(FUNC_NAME) nullptr
    209 #define HWY_VISIT_LSX(VISITOR)
    210 #endif
    211 
    212 #if HWY_TARGETS & HWY_LASX
    213 #define HWY_CHOOSE_LASX(FUNC_NAME) &N_LASX::FUNC_NAME
    214 #define HWY_VISIT_LASX(VISITOR) VISITOR(HWY_LASX, N_LASX)
    215 #else
    216 #define HWY_CHOOSE_LASX(FUNC_NAME) nullptr
    217 #define HWY_VISIT_LASX(VISITOR)
    218 #endif
    219 
    220 #if HWY_TARGETS & HWY_PPC10
    221 #define HWY_CHOOSE_PPC10(FUNC_NAME) &N_PPC10::FUNC_NAME
    222 #define HWY_VISIT_PPC10(VISITOR) VISITOR(HWY_PPC10, N_PPC10)
    223 #else
    224 #define HWY_CHOOSE_PPC10(FUNC_NAME) nullptr
    225 #define HWY_VISIT_PPC10(VISITOR)
    226 #endif
    227 
    228 #if HWY_TARGETS & HWY_RVV
    229 #define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
    230 #define HWY_VISIT_RVV(VISITOR) VISITOR(HWY_RVV, N_RVV)
    231 #else
    232 #define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
    233 #define HWY_VISIT_RVV(VISITOR)
    234 #endif
    235 
    236 #if HWY_TARGETS & HWY_NEON_WITHOUT_AES
    237 #define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) &N_NEON_WITHOUT_AES::FUNC_NAME
    238 #define HWY_VISIT_NEON_WITHOUT_AES(VISITOR) \
    239  VISITOR(HWY_NEON_WITHOUT_AES, N_NEON_WITHOUT_AES)
    240 #else
    241 #define HWY_CHOOSE_NEON_WITHOUT_AES(FUNC_NAME) nullptr
    242 #define HWY_VISIT_NEON_WITHOUT_AES(VISITOR)
    243 #endif
    244 
    245 #if HWY_TARGETS & HWY_NEON
    246 #define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
    247 #define HWY_VISIT_NEON(VISITOR) VISITOR(HWY_NEON, N_NEON)
    248 #else
    249 #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
    250 #define HWY_VISIT_NEON(VISITOR)
    251 #endif
    252 
    253 #if HWY_TARGETS & HWY_NEON_BF16
    254 #define HWY_CHOOSE_NEON_BF16(FUNC_NAME) &N_NEON_BF16::FUNC_NAME
    255 #define HWY_VISIT_NEON_BF16(VISITOR) VISITOR(HWY_NEON_BF16, N_NEON_BF16)
    256 #else
    257 #define HWY_CHOOSE_NEON_BF16(FUNC_NAME) nullptr
    258 #define HWY_VISIT_NEON_BF16(VISITOR)
    259 #endif
    260 
    261 #if HWY_TARGETS & HWY_SVE
    262 #define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
    263 #define HWY_VISIT_SVE(VISITOR) VISITOR(HWY_SVE, N_SVE)
    264 #else
    265 #define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
    266 #define HWY_VISIT_SVE(VISITOR)
    267 #endif
    268 
    269 #if HWY_TARGETS & HWY_SVE2
    270 #define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
    271 #define HWY_VISIT_SVE2(VISITOR) VISITOR(HWY_SVE2, N_SVE2)
    272 #else
    273 #define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
    274 #define HWY_VISIT_SVE2(VISITOR)
    275 #endif
    276 
    277 #if HWY_TARGETS & HWY_SVE_256
    278 #define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME
    279 #define HWY_VISIT_SVE_256(VISITOR) VISITOR(HWY_SVE_256, N_SVE_256)
    280 #else
    281 #define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr
    282 #define HWY_VISIT_SVE_256(VISITOR)
    283 #endif
    284 
    285 #if HWY_TARGETS & HWY_SVE2_128
    286 #define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME
    287 #define HWY_VISIT_SVE2_128(VISITOR) VISITOR(HWY_SVE2_128, N_SVE2_128)
    288 #else
    289 #define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr
    290 #define HWY_VISIT_SVE2_128(VISITOR)
    291 #endif
    292 
    293 #if HWY_TARGETS & HWY_SSE2
    294 #define HWY_CHOOSE_SSE2(FUNC_NAME) &N_SSE2::FUNC_NAME
    295 #define HWY_VISIT_SSE2(VISITOR) VISITOR(HWY_SSE2, N_SSE2)
    296 #else
    297 #define HWY_CHOOSE_SSE2(FUNC_NAME) nullptr
    298 #define HWY_VISIT_SSE2(VISITOR)
    299 #endif
    300 
    301 #if HWY_TARGETS & HWY_SSSE3
    302 #define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
    303 #define HWY_VISIT_SSSE3(VISITOR) VISITOR(HWY_SSSE3, N_SSSE3)
    304 #else
    305 #define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
    306 #define HWY_VISIT_SSSE3(VISITOR)
    307 #endif
    308 
    309 #if HWY_TARGETS & HWY_SSE4
    310 #define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
    311 #define HWY_VISIT_SSE4(VISITOR) VISITOR(HWY_SSE4, N_SSE4)
    312 #else
    313 #define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
    314 #define HWY_VISIT_SSE4(VISITOR)
    315 #endif
    316 
    317 #if HWY_TARGETS & HWY_AVX2
    318 #define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
    319 #define HWY_VISIT_AVX2(VISITOR) VISITOR(HWY_AVX2, N_AVX2)
    320 #else
    321 #define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
    322 #define HWY_VISIT_AVX2(VISITOR)
    323 #endif
    324 
    325 #if HWY_TARGETS & HWY_AVX3
    326 #define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
    327 #define HWY_VISIT_AVX3(VISITOR) VISITOR(HWY_AVX3, N_AVX3)
    328 #else
    329 #define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
    330 #define HWY_VISIT_AVX3(VISITOR)
    331 #endif
    332 
    333 #if HWY_TARGETS & HWY_AVX3_DL
    334 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
    335 #define HWY_VISIT_AVX3_DL(VISITOR) VISITOR(HWY_AVX3_DL, N_AVX3_DL)
    336 #else
    337 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
    338 #define HWY_VISIT_AVX3_DL(VISITOR)
    339 #endif
    340 
    341 #if HWY_TARGETS & HWY_AVX3_ZEN4
    342 #define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) &N_AVX3_ZEN4::FUNC_NAME
    343 #define HWY_VISIT_AVX3_ZEN4(VISITOR) VISITOR(HWY_AVX3_ZEN4, N_AVX3_ZEN4)
    344 #else
    345 #define HWY_CHOOSE_AVX3_ZEN4(FUNC_NAME) nullptr
    346 #define HWY_VISIT_AVX3_ZEN4(VISITOR)
    347 #endif
    348 
    349 #if HWY_TARGETS & HWY_AVX3_SPR
    350 #define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) &N_AVX3_SPR::FUNC_NAME
    351 #define HWY_VISIT_AVX3_SPR(VISITOR) VISITOR(HWY_AVX3_SPR, N_AVX3_SPR)
    352 #else
    353 #define HWY_CHOOSE_AVX3_SPR(FUNC_NAME) nullptr
    354 #define HWY_VISIT_AVX3_SPR(VISITOR)
    355 #endif
    356 
    357 #if HWY_TARGETS & HWY_AVX10_2
    358 #define HWY_CHOOSE_AVX10_2(FUNC_NAME) &N_AVX10_2::FUNC_NAME
    359 #define HWY_VISIT_AVX10_2(VISITOR) VISITOR(HWY_AVX10_2, N_AVX10_2)
    360 #else
    361 #define HWY_CHOOSE_AVX10_2(FUNC_NAME) nullptr
    362 #define HWY_VISIT_AVX10_2(VISITOR)
    363 #endif
    364 
    365 // MSVC 2017 workaround: the non-type template parameter to ChooseAndCall
    366 // apparently cannot be an array. Use a function pointer instead, which has the
    367 // disadvantage that we call the static (not best) target on the first call to
    368 // any HWY_DYNAMIC_DISPATCH.
    369 #if (HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915) || \
    370    (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 700)
    371 #define HWY_DISPATCH_WORKAROUND 1
    372 #else
    373 #define HWY_DISPATCH_WORKAROUND 0
    374 #endif
    375 
    376 #if HWY_DISPATCH_MAP
    377 struct AllExports {
    378  template <class FuncPtr, class ExportsKey, uint64_t kHash>
    379  static const FuncPtr*& GetRefToExportsPtr() {
    380    static const FuncPtr* s_exports = nullptr;
    381    return s_exports;
    382  }
    383 };
    384 #endif
    385 
    386 // Provides a static member function which is what is called during the first
    387 // HWY_DYNAMIC_DISPATCH, where GetIndex is still zero, and instantiations of
    388 // this function are the first entry in the tables created by HWY_EXPORT[_T].
    389 template <typename RetType, typename... Args>
    390 struct FunctionCache {
    391 public:
    392  typedef RetType(FuncType)(Args...);
    393  using FuncPtr = FuncType*;
    394 
    395  // A template function that when instantiated has the same signature as the
    396  // function being called. This function initializes the bit array of targets
    397  // supported by the current CPU and then calls the appropriate entry within
    398  // the HWY_EXPORT table. Subsequent calls via HWY_DYNAMIC_DISPATCH to any
    399  // exported functions, even those defined by different translation units,
    400  // will dispatch directly to the best available target.
    401 #if HWY_DISPATCH_MAP
    402  template <class ExportsKey, uint64_t kHash>
    403  static RetType ChooseAndCall(Args... args) {
    404    ChosenTarget& chosen_target = GetChosenTarget();
    405    chosen_target.Update(SupportedTargets());
    406 
    407    const FuncPtr* table = AllExports::template GetRefToExportsPtr<
    408        FuncPtr, RemoveCvRef<ExportsKey>, kHash>();
    409    HWY_ASSERT(table);
    410 
    411    return (table[chosen_target.GetIndex()])(args...);
    412  }
    413 
    414 #if !HWY_DISPATCH_WORKAROUND
    415  template <const FuncPtr* table>
    416  static RetType TableChooseAndCall(Args... args) {
    417    ChosenTarget& chosen_target = GetChosenTarget();
    418    chosen_target.Update(SupportedTargets());
    419    return (table[chosen_target.GetIndex()])(args...);
    420  }
    421 #endif  // !HWY_DISPATCH_WORKAROUND
    422 
    423 #else   // !HWY_DISPATCH_MAP: zero-overhead, but requires C++17
    424  template <const FuncPtr* table>
    425  static RetType ChooseAndCall(Args... args) {
    426    ChosenTarget& chosen_target = GetChosenTarget();
    427    chosen_target.Update(SupportedTargets());
    428    return (table[chosen_target.GetIndex()])(args...);
    429  }
    430 #endif  // HWY_DISPATCH_MAP
    431 };
    432 
    433 // Used to deduce the template parameters RetType and Args from a function.
    434 template <typename RetType, typename... Args>
    435 FunctionCache<RetType, Args...> DeduceFunctionCache(RetType (*)(Args...)) {
    436  return FunctionCache<RetType, Args...>();
    437 }
    438 
    439 #define HWY_DISPATCH_TABLE(FUNC_NAME) \
    440  HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
    441 
    442 // HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
    443 // HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime.
    444 // After being exported, it can be called from other parts of the same source
    445 // file using HWY_DYNAMIC_DISPATCH(), in particular from a function wrapper
    446 // like in the following example:
    447 //
    448 //   #include "hwy/highway.h"
    449 //   HWY_BEFORE_NAMESPACE();
    450 //   namespace skeleton {
    451 //   namespace HWY_NAMESPACE {
    452 //
    453 //   void MyFunction(int a, char b, const char* c) { ... }
    454 //
    455 //   // NOLINTNEXTLINE(google-readability-namespace-comments)
    456 //   }  // namespace HWY_NAMESPACE
    457 //   }  // namespace skeleton
    458 //   HWY_AFTER_NAMESPACE();
    459 //
    460 //   namespace skeleton {
    461 //   HWY_EXPORT(MyFunction);  // Defines the dispatch table in this scope.
    462 //
    463 //   void MyFunction(int a, char b, const char* c) {
    464 //     return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
    465 //   }
    466 //   }  // namespace skeleton
    467 //
    468 // For templated code with a single type parameter, instead use HWY_EXPORT_T and
    469 // its HWY_DYNAMIC_DISPATCH_T counterpart:
    470 //
    471 //   template <typename T>
    472 //   void MyFunctionCaller(T ...) {
    473 //     // First argument to both HWY_EXPORT_T and HWY_DYNAMIC_DISPATCH_T is an
    474 //     // arbitrary table name; you must provide the same name for each call.
    475 //     // It is fine to have multiple HWY_EXPORT_T in a function, but a 64-bit
    476 //     // FNV hash collision among *any* table names will trigger HWY_ABORT.
    477 //     HWY_EXPORT_T(Table1, MyFunction<T>)
    478 //     HWY_DYNAMIC_DISPATCH_T(Table1)(a, b, c);
    479 //   }
    480 //
    481 // Note that HWY_EXPORT_T must be invoked inside a template (in the above
    482 // example: `MyFunctionCaller`), so that a separate table will be created for
    483 // each template instantiation. For convenience, we also provide a macro that
    484 // combines both steps and avoids the need to pick a table name:
    485 //
    486 //   template <typename T>
    487 //   void MyFunctionCaller(T ...) {
    488 //     // Table name is automatically chosen. Note that this variant must be
    489 //     // called in statement context; it is not a valid expression.
    490 //     HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(MyFunction<T>)(a, b, c);
    491 //   }
    492 
    493 // Simplified version for IDE or the dynamic dispatch case with only one target.
    494 #if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
    495 
    496 // We use a table to provide the same compile error conditions as with the
    497 // non-simplified case, but the table only has a single entry.
    498 #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME)                               \
    499  HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
    500  HWY_DISPATCH_TABLE(TABLE_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
    501 
    502 // Use the table, not just STATIC_DISPATCH as in DYNAMIC_DISPATCH, because
    503 // TABLE_NAME might not match the function name.
    504 #define HWY_DYNAMIC_POINTER_T(TABLE_NAME) (HWY_DISPATCH_TABLE(TABLE_NAME)[0])
    505 #define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) \
    506  (*(HWY_DYNAMIC_POINTER_T(TABLE_NAME)))
    507 
    508 #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
    509 #define HWY_DYNAMIC_POINTER(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
    510 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
    511 
    512 #else  // not simplified: full table
    513 
    514 // Pre-C++17 workaround: non-type template arguments must have linkage, which
    515 // means we cannot pass &table as a template argument to ChooseAndCall.
    516 // ChooseAndCall must find a way to access the table in order to dispatch to the
    517 // chosen target:
    518 // 0) Skipping this by dispatching to the static target would be surprising to
    519 //    users and may have serious performance implications.
    520 // 1) An extra function parameter would be unacceptable because it changes the
    521 //    user-visible function signature.
    522 // 2) Declaring a table, then defining a pointer to it would work, but requires
    523 //    an additional DECLARE step outside the function so that the pointer has
    524 //    linkage, which breaks existing code.
    525 // 3) We instead associate the function with the table using an instance of an
    526 //    unnamed struct and the hash of the table name as the key. Because
    527 //    ChooseAndCall has the type information, it can then cast to the function
    528 //    pointer type. However, we cannot simply pass the name as a template
    529 //    argument to ChooseAndCall because this requires char*, which hits the same
    530 //    linkage problem. We instead hash the table name, which assumes the
    531 //    function names do not have collisions.
    532 #if HWY_DISPATCH_MAP
    533 
    534 static constexpr uint64_t FNV(const char* name) {
    535  return *name ? static_cast<uint64_t>(static_cast<uint8_t>(*name)) ^
    536                     (0x100000001b3ULL * FNV(name + 1))
    537               : 0xcbf29ce484222325ULL;
    538 }
    539 
    540 template <uint64_t kHash>
    541 struct AddExport {
    542  template <class ExportsKey, class FuncPtr>
    543  AddExport(ExportsKey /*exports_key*/, const char* table_name,
    544            const FuncPtr* table) {
    545    using FuncCache = decltype(DeduceFunctionCache(hwy::DeclVal<FuncPtr>()));
    546    static_assert(
    547        hwy::IsSame<RemoveCvRef<FuncPtr>, typename FuncCache::FuncPtr>(),
    548        "FuncPtr should be same type as FuncCache::FuncPtr");
    549 
    550    const FuncPtr*& exports_ptr = AllExports::template GetRefToExportsPtr<
    551        RemoveCvRef<FuncPtr>, RemoveCvRef<ExportsKey>, kHash>();
    552    if (exports_ptr && exports_ptr != table) {
    553      HWY_ABORT("Hash collision for %s, rename the function\n", table_name);
    554    } else {
    555      exports_ptr = table;
    556    }
    557  }
    558 };
    559 
    560 // Dynamic dispatch: defines table of function pointers. This must be invoked
    561 // from inside the function template that calls the template we are exporting.
    562 // TABLE_NAME must match the one passed to HWY_DYNAMIC_DISPATCH_T. This
    563 // argument allows multiple exports within one function.
    564 #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME)                                   \
    565  static const struct {                                                       \
    566  } HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey) = {};                   \
    567  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE(  \
    568      TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = {       \
    569      /* The first entry in the table initializes the global cache and        \
    570       * calls the appropriate function. */                                   \
    571      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME)))::  \
    572          template ChooseAndCall<decltype(HWY_CONCAT(                         \
    573                                     TABLE_NAME, HighwayDispatchExportsKey)), \
    574                                 hwy::FNV(#TABLE_NAME)>,                      \
    575      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                      \
    576      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                         \
    577  };                                                                          \
    578  HWY_MAYBE_UNUSED static hwy::AddExport<hwy::FNV(#TABLE_NAME)> HWY_CONCAT(   \
    579      HighwayAddTable, __LINE__)(                                             \
    580      HWY_CONCAT(TABLE_NAME, HighwayDispatchExportsKey), #TABLE_NAME,         \
    581      HWY_DISPATCH_TABLE(TABLE_NAME))
    582 
    583 // For non-template functions. Not necessarily invoked within a function, hence
    584 // we derive the string and variable names from FUNC_NAME, not HWY_FUNCTION.
    585 #if HWY_DISPATCH_WORKAROUND
    586 #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
    587 #else
    588 #define HWY_EXPORT(FUNC_NAME)                                                \
    589  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
    590      FUNC_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = {       \
    591      /* The first entry in the table initializes the global cache and       \
    592       * calls the appropriate function. */                                  \
    593      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
    594          template TableChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>,        \
    595      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                     \
    596      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                        \
    597  }
    598 #endif  // HWY_DISPATCH_WORKAROUND
    599 
    600 #else  // !HWY_DISPATCH_MAP
    601 
    602 // Zero-overhead, but requires C++17 for non-type template arguments without
    603 // linkage, because HWY_EXPORT_T tables are local static variables.
    604 #define HWY_EXPORT_T(TABLE_NAME, FUNC_NAME)                                  \
    605  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
    606      TABLE_NAME)[static_cast<size_t>(HWY_MAX_DYNAMIC_TARGETS + 2)] = {      \
    607      /* The first entry in the table initializes the global cache and       \
    608       * calls the appropriate function. */                                  \
    609      &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH(FUNC_NAME))):: \
    610          template ChooseAndCall<HWY_DISPATCH_TABLE(TABLE_NAME)>,            \
    611      HWY_CHOOSE_TARGET_LIST(FUNC_NAME),                                     \
    612      HWY_CHOOSE_FALLBACK(FUNC_NAME),                                        \
    613  }
    614 
    615 #define HWY_EXPORT(FUNC_NAME) HWY_EXPORT_T(FUNC_NAME, FUNC_NAME)
    616 
    617 #endif  // HWY_DISPATCH_MAP
    618 
    619 // HWY_DISPATCH_MAP only affects how tables are created, not their usage.
    620 
    621 // Evaluates to the function pointer for the chosen target.
    622 #define HWY_DYNAMIC_POINTER(FUNC_NAME) \
    623  (HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()])
    624 
    625 // Calls the function pointer for the chosen target.
    626 #if HWY_COMPILER_GCC || HWY_COMPILER_CLANG
    627 
    628 // On GCC or Clang, we call hwy::PreventElision(...) to work around a compiler
    629 // crash where the LLVM inliner crashes due to inlining incompatible intrinsics.
    630 
    631 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME)         \
    632  __extension__({                               \
    633    auto HWY_CONCAT(hwy_tmp_, __LINE__) = *(HWY_DYNAMIC_POINTER(FUNC_NAME)); \
    634    hwy::PreventElision(HWY_CONCAT(hwy_tmp_, __LINE__));                     \
    635    HWY_CONCAT(hwy_tmp_, __LINE__);                                          \
    636  })
    637 
    638 #else  // !(HWY_COMPILER_GCC || HWY_COMPILER_CLANG)
    639 
    640 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) (*(HWY_DYNAMIC_POINTER(FUNC_NAME)))
    641 
    642 #endif  // HWY_COMPILER_GCC || HWY_COMPILER_CLANG
    643 
    644 // Same as DISPATCH, but provide a different arg name to clarify usage.
    645 #define HWY_DYNAMIC_DISPATCH_T(TABLE_NAME) HWY_DYNAMIC_DISPATCH(TABLE_NAME)
    646 #define HWY_DYNAMIC_POINTER_T(TABLE_NAME) HWY_DYNAMIC_POINTER(TABLE_NAME)
    647 
    648 #endif  // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
    649 
    650 // Returns the name of an anonymous dispatch table that is only shared with
    651 // macro invocations coming from the same source line.
    652 #define HWY_DISPATCH_TABLE_T() HWY_CONCAT(HighwayDispatchTableT, __LINE__)
    653 
    654 // For templated code, combines export and dispatch using an anonymous table.
    655 #define HWY_EXPORT_AND_DYNAMIC_DISPATCH_T(FUNC_NAME) \
    656  HWY_EXPORT_T(HWY_DISPATCH_TABLE_T(), FUNC_NAME);   \
    657  HWY_DYNAMIC_DISPATCH_T(HWY_DISPATCH_TABLE_T())
    658 
    659 // DEPRECATED names; please use HWY_HAVE_* instead.
    660 #define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
    661 #define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
    662 #define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64
    663 
    664 }  // namespace hwy
    665 
    666 #endif  // HWY_HIGHWAY_INCLUDED
    667 
    668 //------------------------------------------------------------------------------
    669 
    670 // NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want
    671 // to include them once per target, which is ensured by the toggle check.
    672 // Because ops/*.h are included under it, they do not need their own guard.
    673 #if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
    674 #ifdef HWY_HIGHWAY_PER_TARGET
    675 #undef HWY_HIGHWAY_PER_TARGET
    676 #else
    677 #define HWY_HIGHWAY_PER_TARGET
    678 #endif
    679 
    680 // No SIMD target enabled, skip header inclusion.
    681 #if HWY_ENABLED_BASELINE == 0
    682 
    683 // We would expect that HWY_TARGET and HWY_STATIC_TARGET are now both 0.
    684 #if HWY_TARGET != 0
    685 #error "Why is HWY_TARGET not 0 when HWY_ENABLED_BASELINE == 0?"
    686 #endif
    687 #if HWY_STATIC_TARGET != 0
    688 #error "Why is HWY_STATIC_TARGET not 0 when HWY_ENABLED_BASELINE == 0?"
    689 #endif
    690 
    691 #else
    692 
    693 // These define ops inside namespace hwy::HWY_NAMESPACE.
    694 #if HWY_TARGET == HWY_SSE2 || HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
    695 #include "hwy/ops/x86_128-inl.h"
    696 #elif HWY_TARGET == HWY_AVX2
    697 #include "hwy/ops/x86_256-inl.h"
    698 #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL ||     \
    699    HWY_TARGET == HWY_AVX3_ZEN4 || HWY_TARGET == HWY_AVX3_SPR || \
    700    HWY_TARGET == HWY_AVX10_2
    701 #include "hwy/ops/x86_avx3-inl.h"
    702 #elif HWY_TARGET == HWY_Z14 || HWY_TARGET == HWY_Z15 || \
    703    (HWY_TARGET & HWY_ALL_PPC)
    704 #include "hwy/ops/ppc_vsx-inl.h"
    705 #elif HWY_TARGET & HWY_ALL_NEON
    706 #include "hwy/ops/arm_neon-inl.h"
    707 #elif HWY_TARGET & HWY_ALL_SVE
    708 #include "hwy/ops/arm_sve-inl.h"
    709 #elif HWY_TARGET == HWY_WASM_EMU256
    710 #include "hwy/ops/wasm_256-inl.h"
    711 #elif HWY_TARGET == HWY_WASM
    712 #include "hwy/ops/wasm_128-inl.h"
    713 #elif HWY_TARGET == HWY_RVV
    714 #include "hwy/ops/rvv-inl.h"
    715 #elif HWY_TARGET == HWY_LSX
    716 #include "hwy/ops/loongarch_lsx-inl.h"
    717 #elif HWY_TARGET == HWY_LASX
    718 #include "hwy/ops/loongarch_lasx-inl.h"
    719 #elif HWY_TARGET == HWY_EMU128
    720 #include "hwy/ops/emu128-inl.h"
    721 #elif HWY_TARGET == HWY_SCALAR
    722 #include "hwy/ops/scalar-inl.h"
    723 #else
    724 #pragma message("HWY_TARGET does not match any known target")
    725 #endif  // HWY_TARGET
    726 
    727 #include "hwy/ops/generic_ops-inl.h"
    728 
    729 #endif  // HWY_ENABLED_BASELINE
    730 
    731 #endif  // HWY_HIGHWAY_PER_TARGET