tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

traits128-inl.h (18345B)


      1 // Copyright 2021 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 // Per-target
     17 #if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
     18    defined(HWY_TARGET_TOGGLE)
     19 #ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
     20 #undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
     21 #else
     22 #define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
     23 #endif
     24 
     25 #include <stddef.h>
     26 #include <stdint.h>
     27 
     28 #include "hwy/contrib/sort/order.h"  // SortDescending
     29 #include "hwy/contrib/sort/shared-inl.h"
     30 #include "hwy/highway.h"
     31 
     32 HWY_BEFORE_NAMESPACE();
     33 namespace hwy {
     34 namespace HWY_NAMESPACE {
     35 namespace detail {
     36 
     37 // Also used by HeapSort, so do not require VQSORT_ENABLED.
     38 #if HWY_TARGET != HWY_SCALAR || HWY_IDE
     39 
     40 // Highway does not provide a lane type for 128-bit keys, so we use uint64_t
     41 // along with an abstraction layer for single-lane vs. lane-pair, which is
     42 // independent of the order.
     43 struct KeyAny128 {
     44  static constexpr bool Is128() { return true; }
     45  constexpr size_t LanesPerKey() const { return 2; }
     46 
     47  // What type bench_sort should allocate for generating inputs.
     48  using LaneType = uint64_t;
     49  // KeyType and KeyString are defined by derived classes.
     50 
     51  HWY_INLINE void Swap(LaneType* a, LaneType* b) const {
     52    const FixedTag<LaneType, 2> d;
     53    const auto temp = LoadU(d, a);
     54    StoreU(LoadU(d, b), d, a);
     55    StoreU(temp, d, b);
     56  }
     57 
     58  template <class V, class M>
     59  HWY_INLINE V CompressKeys(V keys, M mask) const {
     60    return CompressBlocksNot(keys, mask);
     61  }
     62 
     63  template <class D, HWY_IF_U64_D(D)>
     64  HWY_INLINE Vec<D> SetKey(D d, const TFromD<D>* key) const {
     65    return LoadDup128(d, key);
     66  }
     67 
     68  template <class D, HWY_IF_U64_D(D)>
     69  HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const {
     70    return ReverseBlocks(d, v);
     71  }
     72 
     73  template <class D, HWY_IF_U64_D(D)>
     74  HWY_INLINE Vec<D> ReverseKeys2(D /* tag */, const Vec<D> v) const {
     75    HWY_DASSERT(Lanes(D()) >= 4);  // at least 2 keys
     76    return SwapAdjacentBlocks(v);
     77  }
     78 
     79  // Only called for 4 keys because we do not support >512-bit vectors.
     80  template <class D, HWY_IF_U64_D(D)>
     81  HWY_INLINE Vec<D> ReverseKeys4(D d, const Vec<D> v) const {
     82    HWY_DASSERT(Lanes(D()) == 8);  // exactly 4 keys: the 512-bit limit
     83    return ReverseKeys(d, v);
     84  }
     85 
     86  // Only called for 4 keys because we do not support >512-bit vectors.
     87  template <class D, HWY_IF_U64_D(D)>
     88  HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd,
     89                                 const Vec<D> even) const {
     90    HWY_DASSERT(Lanes(D()) == 8);  // exactly 4 keys: the 512-bit limit
     91    return ConcatUpperLower(d, odd, even);
     92  }
     93 
     94  template <class V>
     95  HWY_INLINE V OddEvenKeys(const V odd, const V even) const {
     96    return OddEvenBlocks(odd, even);
     97  }
     98 
     99  template <class D, HWY_IF_U64_D(D)>
    100  HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>) const {
    101    HWY_ASSERT(0);  // not supported: would require 1024-bit vectors
    102  }
    103 
    104  template <class D, HWY_IF_U64_D(D)>
    105  HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>) const {
    106    HWY_ASSERT(0);  // not supported: would require 2048-bit vectors
    107  }
    108 
    109  // This is only called for 8/16 col networks (not supported).
    110  template <class D, HWY_IF_U64_D(D)>
    111  HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>) const {
    112    HWY_ASSERT(0);
    113  }
    114 
    115  // This is only called for 16 col networks (not supported).
    116  template <class D, HWY_IF_U64_D(D)>
    117  HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>) const {
    118    HWY_ASSERT(0);
    119  }
    120 
    121  // This is only called for 8 col networks (not supported).
    122  template <class D, HWY_IF_U64_D(D)>
    123  HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>) const {
    124    HWY_ASSERT(0);
    125  }
    126 };
    127 
    128 // Base class shared between OrderAscending128, OrderDescending128.
    129 struct Key128 : public KeyAny128 {
    130  // False indicates the entire key should be compared. KV means key-value.
    131  static constexpr bool IsKV() { return false; }
    132 
    133  // What type to pass to VQSort.
    134  using KeyType = hwy::uint128_t;
    135 
    136  const char* KeyString() const { return "U128"; }
    137 
    138  template <class D, HWY_IF_U64_D(D)>
    139  HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
    140    return Eq128(d, a, b);
    141  }
    142 
    143  template <class D, HWY_IF_U64_D(D)>
    144  HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
    145    return Ne128(d, a, b);
    146  }
    147 
    148  // For keys=entire 128 bits, any difference counts.
    149  template <class D, HWY_IF_U64_D(D)>
    150  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
    151    // Must avoid floating-point comparisons (for -0)
    152    const RebindToUnsigned<D> du;
    153    return AllTrue(du, Eq(BitCast(du, diff), Zero(du)));
    154  }
    155 
    156  HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
    157    return a[0] == b[0] && a[1] == b[1];
    158  }
    159 
    160  // Returns vector with only the top half of each block valid. This allows
    161  // fusing the "replicate upper to lower half" step with a subsequent permute.
    162  template <class Order, class D>
    163  HWY_INLINE HWY_MAYBE_UNUSED Vec<D> CompareTop(D d, Vec<D> a, Vec<D> b) const {
    164    const Mask<D> eqHL = Eq(a, b);
    165    const Vec<D> ltHL = VecFromMask(d, Order().CompareLanes(a, b));
    166 #if HWY_TARGET <= HWY_AVX2  // slightly faster
    167    const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
    168    return OrAnd(ltHL, VecFromMask(d, eqHL), ltLX);
    169 #else
    170    return IfThenElse(eqHL, DupEven(ltHL), ltHL);
    171 #endif
    172  }
    173 };
    174 
    175 // Anything order-related depends on the key traits *and* the order (see
    176 // FirstOfLanes). We cannot implement just one Compare function because Lt128
    177 // only compiles if the lane type is u64. Thus we need either overloaded
    178 // functions with a tag type, class specializations, or separate classes.
    179 // We avoid overloaded functions because we want all functions to be callable
    180 // from a SortTraits without per-function wrappers. Specializing would work, but
    181 // we are anyway going to specialize at a higher level.
    182 struct OrderAscending128 : public Key128 {
    183  using Order = SortAscending;
    184  using OrderForSortingNetwork = OrderAscending128;
    185 
    186  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
    187    return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
    188  }
    189 
    190  template <class D, HWY_IF_U64_D(D)>
    191  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
    192    return Lt128(d, a, b);
    193  }
    194 
    195  template <class D, HWY_IF_U64_D(D)>
    196  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
    197    return Min128(d, a, b);
    198  }
    199 
    200  template <class D, HWY_IF_U64_D(D)>
    201  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
    202    return Max128(d, a, b);
    203  }
    204 
    205  // FirstOfLanes/LastOfLanes are implemented in Traits128.
    206 
    207  // Same as for regular lanes because 128-bit keys are u64.
    208  template <class D, HWY_IF_U64_D(D)>
    209  HWY_INLINE Vec<D> FirstValue(D d) const {
    210    return Set(d, hwy::LowestValue<TFromD<D> >());
    211  }
    212 
    213  template <class D, HWY_IF_U64_D(D)>
    214  HWY_INLINE Vec<D> LastValue(D d) const {
    215    return Set(d, hwy::HighestValue<TFromD<D> >());
    216  }
    217 
    218  template <class D, HWY_IF_U64_D(D)>
    219  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    220    const Vec<D> k0 = Zero(d);
    221    const Vec<D> k1 = OddEven(k0, Set(d, uint64_t{1}));
    222    const Mask<D> borrow = Eq(v, k0);  // don't-care, lo == 0
    223    // lo == 0? 1 : 0, 0
    224    const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(borrow, k1));
    225    return Sub(Sub(v, k1), adjust);
    226  }
    227 
    228  // 'Private', used by base class Key128::CompareTop.
    229  template <class V>
    230  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
    231    return Lt(a, b);
    232  }
    233 };
    234 
    235 struct OrderDescending128 : public Key128 {
    236  using Order = SortDescending;
    237  using OrderForSortingNetwork = OrderDescending128;
    238 
    239  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
    240    return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
    241  }
    242 
    243  template <class D, HWY_IF_U64_D(D)>
    244  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
    245    return Lt128(d, b, a);
    246  }
    247 
    248  template <class D, HWY_IF_U64_D(D)>
    249  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
    250    return Max128(d, a, b);
    251  }
    252 
    253  template <class D, HWY_IF_U64_D(D)>
    254  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
    255    return Min128(d, a, b);
    256  }
    257 
    258  // FirstOfLanes/LastOfLanes are implemented in Traits128.
    259 
    260  // Same as for regular lanes because 128-bit keys are u64.
    261  template <class D, HWY_IF_U64_D(D)>
    262  HWY_INLINE Vec<D> FirstValue(D d) const {
    263    return Set(d, hwy::HighestValue<TFromD<D> >());
    264  }
    265 
    266  template <class D, HWY_IF_U64_D(D)>
    267  HWY_INLINE Vec<D> LastValue(D d) const {
    268    return Set(d, hwy::LowestValue<TFromD<D> >());
    269  }
    270 
    271  template <class D, HWY_IF_U64_D(D)>
    272  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    273    const Vec<D> k1 = OddEven(Zero(d), Set(d, uint64_t{1}));
    274    const Vec<D> added = Add(v, k1);
    275    const Mask<D> overflowed = Lt(added, v);  // false, overflowed
    276    // overflowed? 1 : 0, 0
    277    const Vec<D> adjust = ShiftLeftLanes<1>(IfThenElseZero(overflowed, k1));
    278    return Add(added, adjust);
    279  }
    280 
    281  // 'Private', used by base class Key128::CompareTop.
    282  template <class V>
    283  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
    284    return Lt(b, a);
    285  }
    286 };
    287 
    288 // Base class shared between OrderAscendingKV128, OrderDescendingKV128.
    289 struct KeyValue128 : public KeyAny128 {
    290  // True indicates only part of the key (the more significant lane) should be
    291  // compared. KV stands for key-value.
    292  static constexpr bool IsKV() { return true; }
    293 
    294  // What type to pass to VQSort.
    295  using KeyType = K64V64;
    296 
    297  const char* KeyString() const { return "k+v=128"; }
    298 
    299  template <class D, HWY_IF_U64_D(D)>
    300  HWY_INLINE Mask<D> EqualKeys(D d, Vec<D> a, Vec<D> b) const {
    301    return Eq128Upper(d, a, b);
    302  }
    303 
    304  template <class D, HWY_IF_U64_D(D)>
    305  HWY_INLINE Mask<D> NotEqualKeys(D d, Vec<D> a, Vec<D> b) const {
    306    return Ne128Upper(d, a, b);
    307  }
    308 
    309  HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const {
    310    return a[1] == b[1];
    311  }
    312 
    313  // Only count differences in the actual key, not the value.
    314  template <class D, HWY_IF_U64_D(D)>
    315  HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const {
    316    // Must avoid floating-point comparisons (for -0)
    317    const RebindToUnsigned<D> du;
    318    const Vec<decltype(du)> zero = Zero(du);
    319    const Vec<decltype(du)> keys = OddEven(diff, zero);  // clear values
    320    return AllTrue(du, Eq(BitCast(du, keys), zero));
    321  }
    322 
    323  // Returns vector with only the top half of each block valid. This allows
    324  // fusing the "replicate upper to lower half" step with a subsequent permute.
    325  template <class Order, class D>
    326  HWY_INLINE HWY_MAYBE_UNUSED Vec<D> CompareTop(D d, Vec<D> a, Vec<D> b) const {
    327    // Only the upper lane of each block is a key, and only that lane is
    328    // required to be valid, so comparing all lanes is sufficient.
    329    return VecFromMask(d, Order().CompareLanes(a, b));
    330  }
    331 };
    332 
    333 struct OrderAscendingKV128 : public KeyValue128 {
    334  using Order = SortAscending;
    335  using OrderForSortingNetwork = OrderAscending128;
    336 
    337  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
    338    return a[1] < b[1];
    339  }
    340 
    341  template <class D, HWY_IF_U64_D(D)>
    342  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
    343    return Lt128Upper(d, a, b);
    344  }
    345 
    346  template <class D, HWY_IF_U64_D(D)>
    347  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
    348    return Min128Upper(d, a, b);
    349  }
    350 
    351  template <class D, HWY_IF_U64_D(D)>
    352  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
    353    return Max128Upper(d, a, b);
    354  }
    355 
    356  // FirstOfLanes/LastOfLanes are implemented in Traits128.
    357 
    358  // Same as for regular lanes because 128-bit keys are u64.
    359  template <class D, HWY_IF_U64_D(D)>
    360  HWY_INLINE Vec<D> FirstValue(D d) const {
    361    return Set(d, hwy::LowestValue<TFromD<D> >());
    362  }
    363 
    364  template <class D, HWY_IF_U64_D(D)>
    365  HWY_INLINE Vec<D> LastValue(D d) const {
    366    return Set(d, hwy::HighestValue<TFromD<D> >());
    367  }
    368 
    369  template <class D, HWY_IF_U64_D(D)>
    370  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    371    const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
    372    return Sub(v, k1);
    373  }
    374 
    375  // 'Private', used by base class KeyValue128::CompareTop.
    376  template <class V>
    377  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
    378    return Lt(a, b);
    379  }
    380 };
    381 
    382 struct OrderDescendingKV128 : public KeyValue128 {
    383  using Order = SortDescending;
    384  using OrderForSortingNetwork = OrderDescending128;
    385 
    386  HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const {
    387    return b[1] < a[1];
    388  }
    389 
    390  template <class D, HWY_IF_U64_D(D)>
    391  HWY_INLINE Mask<D> Compare(D d, Vec<D> a, Vec<D> b) const {
    392    return Lt128Upper(d, b, a);
    393  }
    394 
    395  template <class D, HWY_IF_U64_D(D)>
    396  HWY_INLINE Vec<D> First(D d, const Vec<D> a, const Vec<D> b) const {
    397    return Max128Upper(d, a, b);
    398  }
    399 
    400  template <class D, HWY_IF_U64_D(D)>
    401  HWY_INLINE Vec<D> Last(D d, const Vec<D> a, const Vec<D> b) const {
    402    return Min128Upper(d, a, b);
    403  }
    404 
    405  // FirstOfLanes/LastOfLanes are implemented in Traits128.
    406 
    407  // Same as for regular lanes because 128-bit keys are u64.
    408  template <class D, HWY_IF_U64_D(D)>
    409  HWY_INLINE Vec<D> FirstValue(D d) const {
    410    return Set(d, hwy::HighestValue<TFromD<D> >());
    411  }
    412 
    413  template <class D, HWY_IF_U64_D(D)>
    414  HWY_INLINE Vec<D> LastValue(D d) const {
    415    return Set(d, hwy::LowestValue<TFromD<D> >());
    416  }
    417 
    418  template <class D, HWY_IF_U64_D(D)>
    419  HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const {
    420    const Vec<D> k1 = OddEven(Set(d, uint64_t{1}), Zero(d));
    421    return Add(v, k1);
    422  }
    423 
    424  // 'Private', used by base class KeyValue128::CompareTop.
    425  template <class V>
    426  HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b) const {
    427    return Lt(b, a);
    428  }
    429 };
    430 
    431 // We want to swap 2 u128, i.e. 4 u64 lanes, based on the 0 or FF..FF mask in
    432 // the most-significant of those lanes (the result of CompareTop), so
    433 // replicate it 4x. Only called for >= 256-bit vectors.
    434 
    435 #if HWY_TARGET <= HWY_AVX3
    436 template <class V, HWY_IF_V_SIZE_V(V, 64)>
    437 HWY_INLINE V ReplicateTop4x(V v) {
    438  return V{_mm512_permutex_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
    439 }
    440 #endif  // HWY_TARGET <= HWY_AVX3
    441 
    442 #if HWY_TARGET <= HWY_AVX2
    443 
    444 template <class V, HWY_IF_V_SIZE_V(V, 32)>
    445 HWY_INLINE V ReplicateTop4x(V v) {
    446  return V{_mm256_permute4x64_epi64(v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
    447 }
    448 
    449 #else  // HWY_TARGET > HWY_AVX2
    450 
    451 template <class V>
    452 HWY_INLINE V ReplicateTop4x(V v) {
    453 #if HWY_TARGET == HWY_SVE_256
    454  return svdup_lane_u64(v, 3);
    455 #else
    456  const ScalableTag<uint64_t> d;
    457  HWY_DASSERT(Lanes(d) == 4 || Lanes(d) == 8);  // for table below
    458  HWY_ALIGN static constexpr uint64_t kIndices[8] = {3, 3, 3, 3, 7, 7, 7, 7};
    459  return TableLookupLanes(v, SetTableIndices(d, kIndices));
    460 #endif
    461 }
    462 
    463 #endif  // HWY_TARGET <= HWY_AVX2
    464 
    465 // Shared code that depends on Order.
    466 template <class Base>
    467 struct Traits128 : public Base {
    468  using TraitsForSortingNetwork =
    469      Traits128<typename Base::OrderForSortingNetwork>;
    470 
    471  template <class D, HWY_IF_U64_D(D)>
    472  HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v,
    473                                 TFromD<D>* HWY_RESTRICT buf) const {
    474    const Base* base = static_cast<const Base*>(this);
    475    const size_t N = Lanes(d);
    476    Store(v, d, buf);
    477    v = base->SetKey(d, buf + 0);  // result must be broadcasted
    478    for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
    479      v = base->First(d, v, base->SetKey(d, buf + i));
    480    }
    481    return v;
    482  }
    483 
    484  template <class D, HWY_IF_U64_D(D)>
    485  HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v,
    486                                TFromD<D>* HWY_RESTRICT buf) const {
    487    const Base* base = static_cast<const Base*>(this);
    488    const size_t N = Lanes(d);
    489    Store(v, d, buf);
    490    v = base->SetKey(d, buf + 0);  // result must be broadcasted
    491    for (size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
    492      v = base->Last(d, v, base->SetKey(d, buf + i));
    493    }
    494    return v;
    495  }
    496 
    497  template <class D, HWY_IF_U64_D(D)>
    498  HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const {
    499    const Base* base = static_cast<const Base*>(this);
    500 
    501    const Vec<D> a_copy = a;
    502    const auto lt = base->Compare(d, a, b);
    503    a = IfThenElse(lt, a, b);
    504    b = IfThenElse(lt, b, a_copy);
    505  }
    506 
    507  // Conditionally swaps even-numbered keys with their odd-numbered neighbor.
    508  template <class D, HWY_IF_U64_D(D)>
    509  HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const {
    510    HWY_DASSERT(Lanes(d) >= 4);  // required by ReplicateTop4x
    511    const Base* base = static_cast<const Base*>(this);
    512    Vec<D> swapped = base->ReverseKeys2(d, v);
    513    const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
    514    return IfVecThenElse(ReplicateTop4x(cmpHx), swapped, v);
    515  }
    516 
    517  // Swaps with the vector formed by reversing contiguous groups of four 128-bit
    518  // keys, which implies 512-bit vectors (we do not support more than that).
    519  template <class D, HWY_IF_U64_D(D)>
    520  HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const {
    521    HWY_DASSERT(Lanes(d) == 8);  // For TableLookupLanes below
    522    const Base* base = static_cast<const Base*>(this);
    523    Vec<D> swapped = base->ReverseKeys4(d, v);
    524 
    525    const Vec<D> cmpHx = base->template CompareTop<Base>(d, v, swapped);
    526    // Similar to ReplicateTop4x, we want to gang together 2 comparison results
    527    // (4 lanes). They are not contiguous, so use permute to replicate 4x.
    528    HWY_ALIGN uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
    529    const Vec<D> select = TableLookupLanes(cmpHx, SetTableIndices(d, kIndices));
    530    return IfVecThenElse(select, swapped, v);
    531  }
    532 
    533  // Conditionally swaps lane 0 with 4, 1 with 5 etc.
    534  template <class D, HWY_IF_U64_D(D)>
    535  HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>) const {
    536    // Only used by Merge16, which would require 2048 bit vectors (unsupported).
    537    HWY_ASSERT(0);
    538  }
    539 };
    540 
    541 #endif  // HWY_TARGET != HWY_SCALAR
    542 
    543 }  // namespace detail
    544 // NOLINTNEXTLINE(google-readability-namespace-comments)
    545 }  // namespace HWY_NAMESPACE
    546 }  // namespace hwy
    547 HWY_AFTER_NAMESPACE();
    548 
    549 #endif  // HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE