tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

traits-inl.h (20512B)


      1 // Copyright 2021 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 // Per-target
     17 #if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE) == \
     18    defined(HWY_TARGET_TOGGLE)
     19 #ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
     20 #undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
     21 #else
     22 #define HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE
     23 #endif
     24 
     25 #include <stddef.h>
     26 #include <stdint.h>
     27 
     28 #include "hwy/contrib/sort/order.h"       // SortDescending
     29 #include "hwy/contrib/sort/shared-inl.h"  // SortConstants
     30 #include "hwy/highway.h"
     31 
     32 HWY_BEFORE_NAMESPACE();
     33 namespace hwy {
     34 namespace HWY_NAMESPACE {
     35 namespace detail {
     36 
     37 // Base class of both KeyLane variants
     38 template <typename LaneTypeArg, typename KeyTypeArg>
     39 struct KeyLaneBase {
     40  static constexpr bool Is128() { return false; }
     41  constexpr size_t LanesPerKey() const { return 1; }
     42 
     43  // What type bench_sort should allocate for generating inputs.
     44  using LaneType = LaneTypeArg;
     45  // What type to pass to VQSort.
     46  using KeyType = KeyTypeArg;
     47 
     48  const char* KeyString() const {
     49    return IsSame<KeyTypeArg, float16_t>()     ? "f16"
     50           : IsSame<KeyTypeArg, float>()       ? "f32"
     51           : IsSame<KeyTypeArg, double>()      ? "f64"
     52           : IsSame<KeyTypeArg, int16_t>()     ? "i16"
     53           : IsSame<KeyTypeArg, int32_t>()     ? "i32"
     54           : IsSame<KeyTypeArg, int64_t>()     ? "i64"
     55           : IsSame<KeyTypeArg, uint16_t>()    ? "u32"
     56           : IsSame<KeyTypeArg, uint32_t>()    ? "u32"
     57           : IsSame<KeyTypeArg, uint64_t>()    ? "u64"
     58           : IsSame<KeyTypeArg, hwy::K32V32>() ? "k+v=64"
     59                                               : "?";
     60  }
     61 };
     62 
     63 // Wrapper functions so we can specialize for floats - infinity trumps
     64 // HighestValue (the normal value with the largest magnitude). Must be outside
     65 // Order* classes to enable SFINAE.
     66 
     67 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
     68 Vec<D> LargestSortValue(D d) {
     69  return Inf(d);
     70 }
     71 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
     72 Vec<D> LargestSortValue(D d) {
     73  return Set(d, hwy::HighestValue<TFromD<D>>());
     74 }
     75 
     76 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
     77 Vec<D> SmallestSortValue(D d) {
     78  return Neg(Inf(d));
     79 }
     80 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
     81 Vec<D> SmallestSortValue(D d) {
     82  return Set(d, hwy::LowestValue<TFromD<D>>());
     83 }
     84 
     85 // Returns the next distinct larger value unless already +inf.
     86 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
     87 Vec<D> LargerSortValue(D d, Vec<D> v) {
     88  HWY_DASSERT(AllFalse(d, IsNaN(v)));  // we replaced all NaN with LastValue.
     89  using T = TFromD<decltype(d)>;
     90  const RebindToUnsigned<D> du;
     91  using VU = Vec<decltype(du)>;
     92 
     93  const VU vu = BitCast(du, Abs(v));
     94 
     95  // The direction depends on the original sign. Integer comparison is cheaper
     96  // than float comparison and treats -0 as 0 (so we return +epsilon).
     97  const Mask<decltype(du)> was_pos = Le(BitCast(du, v), SignBit(du));
     98  // If positive, add 1, else -1.
     99 #if HWY_ARCH_ARM_V7
    100  // Workaround for incorrect codegen. ~x - x is equivalent to x? 1 : -1.
    101  const VU was_pos_u = VecFromMask(du, was_pos);
    102  const VU add = Not(was_pos_u) - was_pos_u;
    103 #else
    104  using TU = TFromD<decltype(du)>;
    105  const VU add = IfThenElse(was_pos, Set(du, 1u), Set(du, LimitsMax<TU>()));
    106 #endif
    107  // Prev/next integer is the prev/next value, even if mantissa under/overflows.
    108  v = BitCast(d, Add(vu, add));
    109  // But we may have overflowed into inf or NaN; replace with inf if positive,
    110  // but the largest (later negated!) value if the input was -inf.
    111  const Mask<D> was_pos_f = RebindMask(d, was_pos);
    112  v = IfThenElse(IsFinite(v), v,
    113                 IfThenElse(was_pos_f, Inf(d), Set(d, HighestValue<T>())));
    114  // Restore the original sign - not via CopySignToAbs because we used a mask.
    115  return IfThenElse(was_pos_f, v, Neg(v));
    116 }
    117 
    118 // Returns the next distinct smaller value unless already -inf.
    119 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)>
    120 Vec<D> SmallerSortValue(D d, Vec<D> v) {
    121  HWY_DASSERT(AllFalse(d, IsNaN(v)));  // we replaced all NaN with LastValue.
    122  using T = TFromD<decltype(d)>;
    123  const RebindToUnsigned<D> du;
    124  using VU = Vec<decltype(du)>;
    125  using TU = TFromD<decltype(du)>;
    126 
    127  const VU vu = BitCast(du, Abs(v));
    128 
    129  // The direction depends on the original sign. Float comparison because we
    130  // want to treat 0 as -0 so we return -epsilon.
    131  const Mask<D> was_pos = Gt(v, Zero(d));
    132  // If positive, add -1, else 1.
    133  const VU add =
    134      IfThenElse(RebindMask(du, was_pos), Set(du, LimitsMax<TU>()), Set(du, 1));
    135  // Prev/next integer is the prev/next value, even if mantissa under/overflows.
    136  v = BitCast(d, Add(vu, add));
    137  // But we may have overflowed into inf or NaN; replace with +inf (which will
    138  // later be negated) if negative, but the largest value if the input was +inf.
    139  v = IfThenElse(IsFinite(v), v,
    140                 IfThenElse(was_pos, Set(d, HighestValue<T>()), Inf(d)));
    141  // Restore the original sign - not via CopySignToAbs because we used a mask.
    142  return IfThenElse(was_pos, v, Neg(v));
    143 }
    144 
    145 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
    146 Vec<D> LargerSortValue(D d, Vec<D> v) {
    147  return Add(v, Set(d, TFromD<D>{1}));
    148 }
    149 
    150 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)>
    151 Vec<D> SmallerSortValue(D d, Vec<D> v) {
    152  return Sub(v, Set(d, TFromD<D>{1}));
    153 }
    154 
    155 // Highway does not provide a lane type for 128-bit keys, so we use uint64_t
    156 // along with an abstraction layer for single-lane vs. lane-pair, which is
    157 // independent of the order.
    158 template <typename LaneType, typename KeyType>
    159 struct KeyLane : public KeyLaneBase<LaneType, KeyType> {
    160  // For HeapSort
    161  HWY_INLINE void Swap(LaneType* a, LaneType* b) const {
    162    const LaneType temp = *a;
    163    *a = *b;
    164    *b = temp;
    165  }
    166 
    167  template <class V, class M>
    168  HWY_INLINE V CompressKeys(V keys, M mask) const {
    169    return CompressNot(keys, mask);
    170  }
    171 
    172  // Broadcasts one key into a vector
    173  template <class D>
    174  HWY_INLINE Vec<D> SetKey(D d, const LaneType* key) const {
    175    return Set(d, *key);
    176  }
    177 
    178  template <class D>
    179  HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
    180    return Eq(a, b);
    181  }
    182 
    183  template <class D>
    184  HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
    185    return Ne(a, b);
    186  }
    187 
    188  // For keys=lanes, any difference counts.
    189  template <class D>
    190  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
    191    // Must avoid floating-point comparisons (for -0)
    192    const RebindToUnsigned<D> du;
    193    return AllTrue(du, Eq(BitCast(du, diff), Zero(du)));
    194  }
    195 
    196  HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
    197    return *a == *b;
    198  }
    199 
    200  template <class D>
    201  HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const {
    202    return Reverse(d, v);
    203  }
    204 
    205  template <class D>
    206  HWY_INLINE Vec<D> ReverseKeys2(D d, Vec<D> v) const {
    207    return Reverse2(d, v);
    208  }
    209 
    210  template <class D>
    211  HWY_INLINE Vec<D> ReverseKeys4(D d, Vec<D> v) const {
    212    return Reverse4(d, v);
    213  }
    214 
    215  template <class D>
    216  HWY_INLINE Vec<D> ReverseKeys8(D d, Vec<D> v) const {
    217    return Reverse8(d, v);
    218  }
    219 
    220  template <class D>
    221  HWY_INLINE Vec<D> ReverseKeys16(D d, Vec<D> v) const {
    222    static_assert(SortConstants::kMaxCols <= 16, "Assumes u32x16 = 512 bit");
    223    return ReverseKeys(d, v);
    224  }
    225 
    226  template <class V>
    227  HWY_INLINE V OddEvenKeys(const V odd, const V even) const {
    228    return OddEven(odd, even);
    229  }
    230 
    231  template <class D, HWY_IF_T_SIZE_D(D, 2)>
    232  HWY_INLINE Vec<D> SwapAdjacentPairs(D d, const Vec<D> v) const {
    233    const Repartition<uint32_t, D> du32;
    234    return BitCast(d, Shuffle2301(BitCast(du32, v)));
    235  }
    236  template <class D, HWY_IF_T_SIZE_D(D, 4)>
    237  HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const {
    238    return Shuffle1032(v);
    239  }
    240  template <class D, HWY_IF_T_SIZE_D(D, 8)>
    241  HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const {
    242    return SwapAdjacentBlocks(v);
    243  }
    244 
    245  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
    246  HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const {
    247 #if HWY_HAVE_FLOAT64  // in case D is float32
    248    const RepartitionToWide<D> dw;
    249 #else
    250    const RepartitionToWide<RebindToUnsigned<D>> dw;
    251 #endif
    252    return BitCast(d, SwapAdjacentPairs(dw, BitCast(dw, v)));
    253  }
    254  template <class D, HWY_IF_T_SIZE_D(D, 8)>
    255  HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const {
    256    // Assumes max vector size = 512
    257    return ConcatLowerUpper(d, v, v);
    258  }
    259 
    260  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
    261  HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd,
    262                                 const Vec<D> even) const {
    263 #if HWY_HAVE_FLOAT64  // in case D is float32
    264    const RepartitionToWide<D> dw;
    265 #else
    266    const RepartitionToWide<RebindToUnsigned<D>> dw;
    267 #endif
    268    return BitCast(d, OddEven(BitCast(dw, odd), BitCast(dw, even)));
    269  }
    270  template <class D, HWY_IF_T_SIZE_D(D, 8)>
    271  HWY_INLINE Vec<D> OddEvenPairs(D /* tag */, Vec<D> odd, Vec<D> even) const {
    272    return OddEvenBlocks(odd, even);
    273  }
    274 
    275  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
    276  HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const {
    277 #if HWY_HAVE_FLOAT64  // in case D is float32
    278    const RepartitionToWide<D> dw;
    279 #else
    280    const RepartitionToWide<RebindToUnsigned<D>> dw;
    281 #endif
    282    return BitCast(d, OddEvenPairs(dw, BitCast(dw, odd), BitCast(dw, even)));
    283  }
    284  template <class D, HWY_IF_T_SIZE_D(D, 8)>
    285  HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const {
    286    return ConcatUpperLower(d, odd, even);
    287  }
    288 };
    289 
    290 // Anything order-related depends on the key traits *and* the order (see
    291 // FirstOfLanes). We cannot implement just one Compare function because Lt128
    292 // only compiles if the lane type is u64. Thus we need either overloaded
    293 // functions with a tag type, class specializations, or separate classes.
    294 // We avoid overloaded functions because we want all functions to be callable
    295 // from a SortTraits without per-function wrappers. Specializing would work, but
    296 // we are anyway going to specialize at a higher level.
    297 template <typename T>
    298 struct OrderAscending : public KeyLane<T, T> {
    299  // False indicates the entire key (i.e. lane) should be compared. KV stands
    300  // for key-value.
    301  static constexpr bool IsKV() { return false; }
    302 
    303  using Order = SortAscending;
    304  using OrderForSortingNetwork = OrderAscending<T>;
    305 
    306  HWY_INLINE bool Compare1(const T* a, const T* b) const { return *a < *b; }
    307 
    308  template <class D>
    309  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
    310    return Lt(a, b);
    311  }
    312 
    313  // Two halves of Sort2, used in ScanMinMax.
    314  template <class D>
    315  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    316    return Min(a, b);
    317  }
    318 
    319  template <class D>
    320  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    321    return Max(a, b);
    322  }
    323 
    324  template <class D>
    325  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
    326                                 T* HWY_RESTRICT /* buf */) const {
    327    return MinOfLanes(d, v);
    328  }
    329 
    330  template <class D>
    331  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
    332                                T* HWY_RESTRICT /* buf */) const {
    333    return MaxOfLanes(d, v);
    334  }
    335 
    336  template <class D>
    337  HWY_INLINE Vec<D> FirstValue(D d) const {
    338    return SmallestSortValue(d);
    339  }
    340 
    341  template <class D>
    342  HWY_INLINE Vec<D> LastValue(D d) const {
    343    return LargestSortValue(d);
    344  }
    345 
    346  template <class D>
    347  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    348    return SmallerSortValue(d, v);
    349  }
    350 };
    351 
    352 template <typename T>
    353 struct OrderDescending : public KeyLane<T, T> {
    354  // False indicates the entire key (i.e. lane) should be compared. KV stands
    355  // for key-value.
    356  static constexpr bool IsKV() { return false; }
    357 
    358  using Order = SortDescending;
    359  using OrderForSortingNetwork = OrderDescending<T>;
    360 
    361  HWY_INLINE bool Compare1(const T* a, const T* b) const { return *b < *a; }
    362 
    363  template <class D>
    364  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
    365    return Lt(b, a);
    366  }
    367 
    368  template <class D>
    369  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    370    return Max(a, b);
    371  }
    372 
    373  template <class D>
    374  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    375    return Min(a, b);
    376  }
    377 
    378  template <class D>
    379  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
    380                                 T* HWY_RESTRICT /* buf */) const {
    381    return MaxOfLanes(d, v);
    382  }
    383 
    384  template <class D>
    385  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
    386                                T* HWY_RESTRICT /* buf */) const {
    387    return MinOfLanes(d, v);
    388  }
    389 
    390  template <class D>
    391  HWY_INLINE Vec<D> FirstValue(D d) const {
    392    return LargestSortValue(d);
    393  }
    394 
    395  template <class D>
    396  HWY_INLINE Vec<D> LastValue(D d) const {
    397    return SmallestSortValue(d);
    398  }
    399 
    400  template <class D>
    401  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    402    return LargerSortValue(d, v);
    403  }
    404 };
    405 
    406 struct KeyValue64 : public KeyLane<uint64_t, hwy::K32V32> {
    407  // True indicates only part of the key (i.e. lane) should be compared. KV
    408  // stands for key-value.
    409  static constexpr bool IsKV() { return true; }
    410 
    411  template <class D>
    412  HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
    413    return Eq(ShiftRight<32>(a), ShiftRight<32>(b));
    414  }
    415 
    416  template <class D>
    417  HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const {
    418    return Ne(ShiftRight<32>(a), ShiftRight<32>(b));
    419  }
    420 
    421  HWY_INLINE bool Equal1(const uint64_t* a, const uint64_t* b) const {
    422    return (*a >> 32) == (*b >> 32);
    423  }
    424 
    425  // Only count differences in the actual key, not the value.
    426  template <class D>
    427  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
    428    // Must avoid floating-point comparisons (for -0)
    429    const RebindToUnsigned<D> du;
    430    const Vec<decltype(du)> zero = Zero(du);
    431    const Vec<decltype(du)> keys = ShiftRight<32>(diff);  // clear values
    432    return AllTrue(du, Eq(BitCast(du, keys), zero));
    433  }
    434 };
    435 
    436 struct OrderAscendingKV64 : public KeyValue64 {
    437  using Order = SortAscending;
    438  using OrderForSortingNetwork = OrderAscending<LaneType>;
    439 
    440  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
    441    return (*a >> 32) < (*b >> 32);
    442  }
    443 
    444  template <class D>
    445  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
    446    return Lt(ShiftRight<32>(a), ShiftRight<32>(b));
    447  }
    448 
    449  // Not required to be stable (preserving the order of equivalent keys), so
    450  // we can include the value in the comparison.
    451  template <class D>
    452  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    453    return Min(a, b);
    454  }
    455 
    456  template <class D>
    457  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    458    return Max(a, b);
    459  }
    460 
    461  template <class D>
    462  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
    463                                 uint64_t* HWY_RESTRICT /* buf */) const {
    464    return MinOfLanes(d, v);
    465  }
    466 
    467  template <class D>
    468  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
    469                                uint64_t* HWY_RESTRICT /* buf */) const {
    470    return MaxOfLanes(d, v);
    471  }
    472 
    473  // Same as for regular lanes.
    474  template <class D>
    475  HWY_INLINE Vec<D> FirstValue(D d) const {
    476    return Set(d, hwy::LowestValue<TFromD<D>>());
    477  }
    478 
    479  template <class D>
    480  HWY_INLINE Vec<D> LastValue(D d) const {
    481    return Set(d, hwy::HighestValue<TFromD<D>>());
    482  }
    483 
    484  template <class D>
    485  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    486    return Sub(v, Set(d, uint64_t{1} << 32));
    487  }
    488 };
    489 
    490 struct OrderDescendingKV64 : public KeyValue64 {
    491  using Order = SortDescending;
    492  using OrderForSortingNetwork = OrderDescending<LaneType>;
    493 
    494  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
    495    return (*b >> 32) < (*a >> 32);
    496  }
    497 
    498  template <class D>
    499  HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const {
    500    return Lt(ShiftRight<32>(b), ShiftRight<32>(a));
    501  }
    502 
    503  // Not required to be stable (preserving the order of equivalent keys), so
    504  // we can include the value in the comparison.
    505  template <class D>
    506  HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    507    return Max(a, b);
    508  }
    509 
    510  template <class D>
    511  HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const {
    512    return Min(a, b);
    513  }
    514 
    515  template <class D>
    516  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
    517                                 uint64_t* HWY_RESTRICT /* buf */) const {
    518    return MaxOfLanes(d, v);
    519  }
    520 
    521  template <class D>
    522  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
    523                                uint64_t* HWY_RESTRICT /* buf */) const {
    524    return MinOfLanes(d, v);
    525  }
    526 
    527  template <class D>
    528  HWY_INLINE Vec<D> FirstValue(D d) const {
    529    return Set(d, hwy::HighestValue<TFromD<D>>());
    530  }
    531 
    532  template <class D>
    533  HWY_INLINE Vec<D> LastValue(D d) const {
    534    return Set(d, hwy::LowestValue<TFromD<D>>());
    535  }
    536 
    537  template <class D>
    538  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    539    return Add(v, Set(d, uint64_t{1} << 32));
    540  }
    541 };
    542 
    543 // Shared code that depends on Order.
    544 template <class Base>
    545 struct TraitsLane : public Base {
    546  using TraitsForSortingNetwork =
    547      TraitsLane<typename Base::OrderForSortingNetwork>;
    548 
    549  // For each lane i: replaces a[i] with the first and b[i] with the second
    550  // according to Base.
    551  // Corresponds to a conditional swap, which is one "node" of a sorting
    552  // network. Min/Max are cheaper than compare + blend at least for integers.
    553  template <class D>
    554  HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const {
    555    const Base* base = static_cast<const Base*>(this);
    556 
    557    const Vec<D> a_copy = a;
    558    // Prior to AVX3, there is no native 64-bit Min/Max, so they compile to 4
    559    // instructions. We can reduce it to a compare + 2 IfThenElse.
    560 #if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3
    561    if (sizeof(TFromD<D>) == 8) {
    562      const Mask<D> cmp = base->Compare(d, a, b);
    563      a = IfThenElse(cmp, a, b);
    564      b = IfThenElse(cmp, b, a_copy);
    565      return;
    566    }
    567 #endif
    568    a = base->First(d, a, b);
    569    b = base->Last(d, a_copy, b);
    570  }
    571 
    572  // Conditionally swaps even-numbered lanes with their odd-numbered neighbor.
    573  template <class D, HWY_IF_T_SIZE_D(D, 8)>
    574  HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
    575    const Base* base = static_cast<const Base*>(this);
    576    Vec<D> swapped = base->ReverseKeys2(d, v);
    577    // Further to the above optimization, Sort2+OddEvenKeys compile to four
    578    // instructions; we can save one by combining two blends.
    579 #if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3
    580    const Vec<D> cmp = VecFromMask(d, base->Compare(d, v, swapped));
    581    return IfVecThenElse(DupOdd(cmp), swapped, v);
    582 #else
    583    Sort2(d, v, swapped);
    584    return base->OddEvenKeys(swapped, v);
    585 #endif
    586  }
    587 
    588  // (See above - we use Sort2 for non-64-bit types.)
    589  template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)>
    590  HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
    591    const Base* base = static_cast<const Base*>(this);
    592    Vec<D> swapped = base->ReverseKeys2(d, v);
    593    Sort2(d, v, swapped);
    594    return base->OddEvenKeys(swapped, v);
    595  }
    596 
    597  // Swaps with the vector formed by reversing contiguous groups of 4 keys.
    598  template <class D>
    599  HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const {
    600    const Base* base = static_cast<const Base*>(this);
    601    Vec<D> swapped = base->ReverseKeys4(d, v);
    602    Sort2(d, v, swapped);
    603    return base->OddEvenPairs(d, swapped, v);
    604  }
    605 
    606  // Conditionally swaps lane 0 with 4, 1 with 5 etc.
    607  template <class D>
    608  HWY_INLINE Vec<D> SortPairsDistance4(D d, Vec<D> v) const {
    609    const Base* base = static_cast<const Base*>(this);
    610    Vec<D> swapped = base->SwapAdjacentQuads(d, v);
    611    // Only used in Merge16, so this will not be used on AVX2 (which only has 4
    612    // u64 lanes), so skip the above optimization for 64-bit AVX2.
    613    Sort2(d, v, swapped);
    614    return base->OddEvenQuads(d, swapped, v);
    615  }
    616 };
    617 
    618 }  // namespace detail
    619 // NOLINTNEXTLINE(google-readability-namespace-comments)
    620 }  // namespace HWY_NAMESPACE
    621 }  // namespace hwy
    622 HWY_AFTER_NAMESPACE();
    623 
    624 #endif  // HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE