traits-inl.h (20512B)
1 // Copyright 2021 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // Per-target 17 #if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE) == \ 18 defined(HWY_TARGET_TOGGLE) 19 #ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE 20 #undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE 21 #else 22 #define HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE 23 #endif 24 25 #include <stddef.h> 26 #include <stdint.h> 27 28 #include "hwy/contrib/sort/order.h" // SortDescending 29 #include "hwy/contrib/sort/shared-inl.h" // SortConstants 30 #include "hwy/highway.h" 31 32 HWY_BEFORE_NAMESPACE(); 33 namespace hwy { 34 namespace HWY_NAMESPACE { 35 namespace detail { 36 37 // Base class of both KeyLane variants 38 template <typename LaneTypeArg, typename KeyTypeArg> 39 struct KeyLaneBase { 40 static constexpr bool Is128() { return false; } 41 constexpr size_t LanesPerKey() const { return 1; } 42 43 // What type bench_sort should allocate for generating inputs. 44 using LaneType = LaneTypeArg; 45 // What type to pass to VQSort. 46 using KeyType = KeyTypeArg; 47 48 const char* KeyString() const { 49 return IsSame<KeyTypeArg, float16_t>() ? "f16" 50 : IsSame<KeyTypeArg, float>() ? "f32" 51 : IsSame<KeyTypeArg, double>() ? "f64" 52 : IsSame<KeyTypeArg, int16_t>() ? "i16" 53 : IsSame<KeyTypeArg, int32_t>() ? "i32" 54 : IsSame<KeyTypeArg, int64_t>() ? "i64" 55 : IsSame<KeyTypeArg, uint16_t>() ? "u32" 56 : IsSame<KeyTypeArg, uint32_t>() ? "u32" 57 : IsSame<KeyTypeArg, uint64_t>() ? "u64" 58 : IsSame<KeyTypeArg, hwy::K32V32>() ? "k+v=64" 59 : "?"; 60 } 61 }; 62 63 // Wrapper functions so we can specialize for floats - infinity trumps 64 // HighestValue (the normal value with the largest magnitude). Must be outside 65 // Order* classes to enable SFINAE. 66 67 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)> 68 Vec<D> LargestSortValue(D d) { 69 return Inf(d); 70 } 71 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)> 72 Vec<D> LargestSortValue(D d) { 73 return Set(d, hwy::HighestValue<TFromD<D>>()); 74 } 75 76 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)> 77 Vec<D> SmallestSortValue(D d) { 78 return Neg(Inf(d)); 79 } 80 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)> 81 Vec<D> SmallestSortValue(D d) { 82 return Set(d, hwy::LowestValue<TFromD<D>>()); 83 } 84 85 // Returns the next distinct larger value unless already +inf. 86 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)> 87 Vec<D> LargerSortValue(D d, Vec<D> v) { 88 HWY_DASSERT(AllFalse(d, IsNaN(v))); // we replaced all NaN with LastValue. 89 using T = TFromD<decltype(d)>; 90 const RebindToUnsigned<D> du; 91 using VU = Vec<decltype(du)>; 92 93 const VU vu = BitCast(du, Abs(v)); 94 95 // The direction depends on the original sign. Integer comparison is cheaper 96 // than float comparison and treats -0 as 0 (so we return +epsilon). 97 const Mask<decltype(du)> was_pos = Le(BitCast(du, v), SignBit(du)); 98 // If positive, add 1, else -1. 99 #if HWY_ARCH_ARM_V7 100 // Workaround for incorrect codegen. ~x - x is equivalent to x? 1 : -1. 101 const VU was_pos_u = VecFromMask(du, was_pos); 102 const VU add = Not(was_pos_u) - was_pos_u; 103 #else 104 using TU = TFromD<decltype(du)>; 105 const VU add = IfThenElse(was_pos, Set(du, 1u), Set(du, LimitsMax<TU>())); 106 #endif 107 // Prev/next integer is the prev/next value, even if mantissa under/overflows. 108 v = BitCast(d, Add(vu, add)); 109 // But we may have overflowed into inf or NaN; replace with inf if positive, 110 // but the largest (later negated!) value if the input was -inf. 111 const Mask<D> was_pos_f = RebindMask(d, was_pos); 112 v = IfThenElse(IsFinite(v), v, 113 IfThenElse(was_pos_f, Inf(d), Set(d, HighestValue<T>()))); 114 // Restore the original sign - not via CopySignToAbs because we used a mask. 115 return IfThenElse(was_pos_f, v, Neg(v)); 116 } 117 118 // Returns the next distinct smaller value unless already -inf. 119 template <class D, HWY_IF_FLOAT_OR_SPECIAL_D(D)> 120 Vec<D> SmallerSortValue(D d, Vec<D> v) { 121 HWY_DASSERT(AllFalse(d, IsNaN(v))); // we replaced all NaN with LastValue. 122 using T = TFromD<decltype(d)>; 123 const RebindToUnsigned<D> du; 124 using VU = Vec<decltype(du)>; 125 using TU = TFromD<decltype(du)>; 126 127 const VU vu = BitCast(du, Abs(v)); 128 129 // The direction depends on the original sign. Float comparison because we 130 // want to treat 0 as -0 so we return -epsilon. 131 const Mask<D> was_pos = Gt(v, Zero(d)); 132 // If positive, add -1, else 1. 133 const VU add = 134 IfThenElse(RebindMask(du, was_pos), Set(du, LimitsMax<TU>()), Set(du, 1)); 135 // Prev/next integer is the prev/next value, even if mantissa under/overflows. 136 v = BitCast(d, Add(vu, add)); 137 // But we may have overflowed into inf or NaN; replace with +inf (which will 138 // later be negated) if negative, but the largest value if the input was +inf. 139 v = IfThenElse(IsFinite(v), v, 140 IfThenElse(was_pos, Set(d, HighestValue<T>()), Inf(d))); 141 // Restore the original sign - not via CopySignToAbs because we used a mask. 142 return IfThenElse(was_pos, v, Neg(v)); 143 } 144 145 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)> 146 Vec<D> LargerSortValue(D d, Vec<D> v) { 147 return Add(v, Set(d, TFromD<D>{1})); 148 } 149 150 template <class D, HWY_IF_NOT_FLOAT_NOR_SPECIAL_D(D)> 151 Vec<D> SmallerSortValue(D d, Vec<D> v) { 152 return Sub(v, Set(d, TFromD<D>{1})); 153 } 154 155 // Highway does not provide a lane type for 128-bit keys, so we use uint64_t 156 // along with an abstraction layer for single-lane vs. lane-pair, which is 157 // independent of the order. 158 template <typename LaneType, typename KeyType> 159 struct KeyLane : public KeyLaneBase<LaneType, KeyType> { 160 // For HeapSort 161 HWY_INLINE void Swap(LaneType* a, LaneType* b) const { 162 const LaneType temp = *a; 163 *a = *b; 164 *b = temp; 165 } 166 167 template <class V, class M> 168 HWY_INLINE V CompressKeys(V keys, M mask) const { 169 return CompressNot(keys, mask); 170 } 171 172 // Broadcasts one key into a vector 173 template <class D> 174 HWY_INLINE Vec<D> SetKey(D d, const LaneType* key) const { 175 return Set(d, *key); 176 } 177 178 template <class D> 179 HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const { 180 return Eq(a, b); 181 } 182 183 template <class D> 184 HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const { 185 return Ne(a, b); 186 } 187 188 // For keys=lanes, any difference counts. 189 template <class D> 190 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const { 191 // Must avoid floating-point comparisons (for -0) 192 const RebindToUnsigned<D> du; 193 return AllTrue(du, Eq(BitCast(du, diff), Zero(du))); 194 } 195 196 HWY_INLINE bool Equal1(const LaneType* a, const LaneType* b) const { 197 return *a == *b; 198 } 199 200 template <class D> 201 HWY_INLINE Vec<D> ReverseKeys(D d, Vec<D> v) const { 202 return Reverse(d, v); 203 } 204 205 template <class D> 206 HWY_INLINE Vec<D> ReverseKeys2(D d, Vec<D> v) const { 207 return Reverse2(d, v); 208 } 209 210 template <class D> 211 HWY_INLINE Vec<D> ReverseKeys4(D d, Vec<D> v) const { 212 return Reverse4(d, v); 213 } 214 215 template <class D> 216 HWY_INLINE Vec<D> ReverseKeys8(D d, Vec<D> v) const { 217 return Reverse8(d, v); 218 } 219 220 template <class D> 221 HWY_INLINE Vec<D> ReverseKeys16(D d, Vec<D> v) const { 222 static_assert(SortConstants::kMaxCols <= 16, "Assumes u32x16 = 512 bit"); 223 return ReverseKeys(d, v); 224 } 225 226 template <class V> 227 HWY_INLINE V OddEvenKeys(const V odd, const V even) const { 228 return OddEven(odd, even); 229 } 230 231 template <class D, HWY_IF_T_SIZE_D(D, 2)> 232 HWY_INLINE Vec<D> SwapAdjacentPairs(D d, const Vec<D> v) const { 233 const Repartition<uint32_t, D> du32; 234 return BitCast(d, Shuffle2301(BitCast(du32, v))); 235 } 236 template <class D, HWY_IF_T_SIZE_D(D, 4)> 237 HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const { 238 return Shuffle1032(v); 239 } 240 template <class D, HWY_IF_T_SIZE_D(D, 8)> 241 HWY_INLINE Vec<D> SwapAdjacentPairs(D /* tag */, const Vec<D> v) const { 242 return SwapAdjacentBlocks(v); 243 } 244 245 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)> 246 HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const { 247 #if HWY_HAVE_FLOAT64 // in case D is float32 248 const RepartitionToWide<D> dw; 249 #else 250 const RepartitionToWide<RebindToUnsigned<D>> dw; 251 #endif 252 return BitCast(d, SwapAdjacentPairs(dw, BitCast(dw, v))); 253 } 254 template <class D, HWY_IF_T_SIZE_D(D, 8)> 255 HWY_INLINE Vec<D> SwapAdjacentQuads(D d, const Vec<D> v) const { 256 // Assumes max vector size = 512 257 return ConcatLowerUpper(d, v, v); 258 } 259 260 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)> 261 HWY_INLINE Vec<D> OddEvenPairs(D d, const Vec<D> odd, 262 const Vec<D> even) const { 263 #if HWY_HAVE_FLOAT64 // in case D is float32 264 const RepartitionToWide<D> dw; 265 #else 266 const RepartitionToWide<RebindToUnsigned<D>> dw; 267 #endif 268 return BitCast(d, OddEven(BitCast(dw, odd), BitCast(dw, even))); 269 } 270 template <class D, HWY_IF_T_SIZE_D(D, 8)> 271 HWY_INLINE Vec<D> OddEvenPairs(D /* tag */, Vec<D> odd, Vec<D> even) const { 272 return OddEvenBlocks(odd, even); 273 } 274 275 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)> 276 HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const { 277 #if HWY_HAVE_FLOAT64 // in case D is float32 278 const RepartitionToWide<D> dw; 279 #else 280 const RepartitionToWide<RebindToUnsigned<D>> dw; 281 #endif 282 return BitCast(d, OddEvenPairs(dw, BitCast(dw, odd), BitCast(dw, even))); 283 } 284 template <class D, HWY_IF_T_SIZE_D(D, 8)> 285 HWY_INLINE Vec<D> OddEvenQuads(D d, Vec<D> odd, Vec<D> even) const { 286 return ConcatUpperLower(d, odd, even); 287 } 288 }; 289 290 // Anything order-related depends on the key traits *and* the order (see 291 // FirstOfLanes). We cannot implement just one Compare function because Lt128 292 // only compiles if the lane type is u64. Thus we need either overloaded 293 // functions with a tag type, class specializations, or separate classes. 294 // We avoid overloaded functions because we want all functions to be callable 295 // from a SortTraits without per-function wrappers. Specializing would work, but 296 // we are anyway going to specialize at a higher level. 297 template <typename T> 298 struct OrderAscending : public KeyLane<T, T> { 299 // False indicates the entire key (i.e. lane) should be compared. KV stands 300 // for key-value. 301 static constexpr bool IsKV() { return false; } 302 303 using Order = SortAscending; 304 using OrderForSortingNetwork = OrderAscending<T>; 305 306 HWY_INLINE bool Compare1(const T* a, const T* b) const { return *a < *b; } 307 308 template <class D> 309 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const { 310 return Lt(a, b); 311 } 312 313 // Two halves of Sort2, used in ScanMinMax. 314 template <class D> 315 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const { 316 return Min(a, b); 317 } 318 319 template <class D> 320 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const { 321 return Max(a, b); 322 } 323 324 template <class D> 325 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v, 326 T* HWY_RESTRICT /* buf */) const { 327 return MinOfLanes(d, v); 328 } 329 330 template <class D> 331 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v, 332 T* HWY_RESTRICT /* buf */) const { 333 return MaxOfLanes(d, v); 334 } 335 336 template <class D> 337 HWY_INLINE Vec<D> FirstValue(D d) const { 338 return SmallestSortValue(d); 339 } 340 341 template <class D> 342 HWY_INLINE Vec<D> LastValue(D d) const { 343 return LargestSortValue(d); 344 } 345 346 template <class D> 347 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const { 348 return SmallerSortValue(d, v); 349 } 350 }; 351 352 template <typename T> 353 struct OrderDescending : public KeyLane<T, T> { 354 // False indicates the entire key (i.e. lane) should be compared. KV stands 355 // for key-value. 356 static constexpr bool IsKV() { return false; } 357 358 using Order = SortDescending; 359 using OrderForSortingNetwork = OrderDescending<T>; 360 361 HWY_INLINE bool Compare1(const T* a, const T* b) const { return *b < *a; } 362 363 template <class D> 364 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const { 365 return Lt(b, a); 366 } 367 368 template <class D> 369 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const { 370 return Max(a, b); 371 } 372 373 template <class D> 374 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const { 375 return Min(a, b); 376 } 377 378 template <class D> 379 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v, 380 T* HWY_RESTRICT /* buf */) const { 381 return MaxOfLanes(d, v); 382 } 383 384 template <class D> 385 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v, 386 T* HWY_RESTRICT /* buf */) const { 387 return MinOfLanes(d, v); 388 } 389 390 template <class D> 391 HWY_INLINE Vec<D> FirstValue(D d) const { 392 return LargestSortValue(d); 393 } 394 395 template <class D> 396 HWY_INLINE Vec<D> LastValue(D d) const { 397 return SmallestSortValue(d); 398 } 399 400 template <class D> 401 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const { 402 return LargerSortValue(d, v); 403 } 404 }; 405 406 struct KeyValue64 : public KeyLane<uint64_t, hwy::K32V32> { 407 // True indicates only part of the key (i.e. lane) should be compared. KV 408 // stands for key-value. 409 static constexpr bool IsKV() { return true; } 410 411 template <class D> 412 HWY_INLINE Mask<D> EqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const { 413 return Eq(ShiftRight<32>(a), ShiftRight<32>(b)); 414 } 415 416 template <class D> 417 HWY_INLINE Mask<D> NotEqualKeys(D /*tag*/, Vec<D> a, Vec<D> b) const { 418 return Ne(ShiftRight<32>(a), ShiftRight<32>(b)); 419 } 420 421 HWY_INLINE bool Equal1(const uint64_t* a, const uint64_t* b) const { 422 return (*a >> 32) == (*b >> 32); 423 } 424 425 // Only count differences in the actual key, not the value. 426 template <class D> 427 HWY_INLINE bool NoKeyDifference(D /*tag*/, Vec<D> diff) const { 428 // Must avoid floating-point comparisons (for -0) 429 const RebindToUnsigned<D> du; 430 const Vec<decltype(du)> zero = Zero(du); 431 const Vec<decltype(du)> keys = ShiftRight<32>(diff); // clear values 432 return AllTrue(du, Eq(BitCast(du, keys), zero)); 433 } 434 }; 435 436 struct OrderAscendingKV64 : public KeyValue64 { 437 using Order = SortAscending; 438 using OrderForSortingNetwork = OrderAscending<LaneType>; 439 440 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const { 441 return (*a >> 32) < (*b >> 32); 442 } 443 444 template <class D> 445 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const { 446 return Lt(ShiftRight<32>(a), ShiftRight<32>(b)); 447 } 448 449 // Not required to be stable (preserving the order of equivalent keys), so 450 // we can include the value in the comparison. 451 template <class D> 452 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const { 453 return Min(a, b); 454 } 455 456 template <class D> 457 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const { 458 return Max(a, b); 459 } 460 461 template <class D> 462 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v, 463 uint64_t* HWY_RESTRICT /* buf */) const { 464 return MinOfLanes(d, v); 465 } 466 467 template <class D> 468 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v, 469 uint64_t* HWY_RESTRICT /* buf */) const { 470 return MaxOfLanes(d, v); 471 } 472 473 // Same as for regular lanes. 474 template <class D> 475 HWY_INLINE Vec<D> FirstValue(D d) const { 476 return Set(d, hwy::LowestValue<TFromD<D>>()); 477 } 478 479 template <class D> 480 HWY_INLINE Vec<D> LastValue(D d) const { 481 return Set(d, hwy::HighestValue<TFromD<D>>()); 482 } 483 484 template <class D> 485 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const { 486 return Sub(v, Set(d, uint64_t{1} << 32)); 487 } 488 }; 489 490 struct OrderDescendingKV64 : public KeyValue64 { 491 using Order = SortDescending; 492 using OrderForSortingNetwork = OrderDescending<LaneType>; 493 494 HWY_INLINE bool Compare1(const LaneType* a, const LaneType* b) const { 495 return (*b >> 32) < (*a >> 32); 496 } 497 498 template <class D> 499 HWY_INLINE Mask<D> Compare(D /* tag */, Vec<D> a, Vec<D> b) const { 500 return Lt(ShiftRight<32>(b), ShiftRight<32>(a)); 501 } 502 503 // Not required to be stable (preserving the order of equivalent keys), so 504 // we can include the value in the comparison. 505 template <class D> 506 HWY_INLINE Vec<D> First(D /* tag */, const Vec<D> a, const Vec<D> b) const { 507 return Max(a, b); 508 } 509 510 template <class D> 511 HWY_INLINE Vec<D> Last(D /* tag */, const Vec<D> a, const Vec<D> b) const { 512 return Min(a, b); 513 } 514 515 template <class D> 516 HWY_INLINE Vec<D> FirstOfLanes(D d, Vec<D> v, 517 uint64_t* HWY_RESTRICT /* buf */) const { 518 return MaxOfLanes(d, v); 519 } 520 521 template <class D> 522 HWY_INLINE Vec<D> LastOfLanes(D d, Vec<D> v, 523 uint64_t* HWY_RESTRICT /* buf */) const { 524 return MinOfLanes(d, v); 525 } 526 527 template <class D> 528 HWY_INLINE Vec<D> FirstValue(D d) const { 529 return Set(d, hwy::HighestValue<TFromD<D>>()); 530 } 531 532 template <class D> 533 HWY_INLINE Vec<D> LastValue(D d) const { 534 return Set(d, hwy::LowestValue<TFromD<D>>()); 535 } 536 537 template <class D> 538 HWY_INLINE Vec<D> PrevValue(D d, Vec<D> v) const { 539 return Add(v, Set(d, uint64_t{1} << 32)); 540 } 541 }; 542 543 // Shared code that depends on Order. 544 template <class Base> 545 struct TraitsLane : public Base { 546 using TraitsForSortingNetwork = 547 TraitsLane<typename Base::OrderForSortingNetwork>; 548 549 // For each lane i: replaces a[i] with the first and b[i] with the second 550 // according to Base. 551 // Corresponds to a conditional swap, which is one "node" of a sorting 552 // network. Min/Max are cheaper than compare + blend at least for integers. 553 template <class D> 554 HWY_INLINE void Sort2(D d, Vec<D>& a, Vec<D>& b) const { 555 const Base* base = static_cast<const Base*>(this); 556 557 const Vec<D> a_copy = a; 558 // Prior to AVX3, there is no native 64-bit Min/Max, so they compile to 4 559 // instructions. We can reduce it to a compare + 2 IfThenElse. 560 #if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3 561 if (sizeof(TFromD<D>) == 8) { 562 const Mask<D> cmp = base->Compare(d, a, b); 563 a = IfThenElse(cmp, a, b); 564 b = IfThenElse(cmp, b, a_copy); 565 return; 566 } 567 #endif 568 a = base->First(d, a, b); 569 b = base->Last(d, a_copy, b); 570 } 571 572 // Conditionally swaps even-numbered lanes with their odd-numbered neighbor. 573 template <class D, HWY_IF_T_SIZE_D(D, 8)> 574 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const { 575 const Base* base = static_cast<const Base*>(this); 576 Vec<D> swapped = base->ReverseKeys2(d, v); 577 // Further to the above optimization, Sort2+OddEvenKeys compile to four 578 // instructions; we can save one by combining two blends. 579 #if HWY_AVX3 < HWY_TARGET && HWY_TARGET <= HWY_SSSE3 580 const Vec<D> cmp = VecFromMask(d, base->Compare(d, v, swapped)); 581 return IfVecThenElse(DupOdd(cmp), swapped, v); 582 #else 583 Sort2(d, v, swapped); 584 return base->OddEvenKeys(swapped, v); 585 #endif 586 } 587 588 // (See above - we use Sort2 for non-64-bit types.) 589 template <class D, HWY_IF_NOT_T_SIZE_D(D, 8)> 590 HWY_INLINE Vec<D> SortPairsDistance1(D d, Vec<D> v) const { 591 const Base* base = static_cast<const Base*>(this); 592 Vec<D> swapped = base->ReverseKeys2(d, v); 593 Sort2(d, v, swapped); 594 return base->OddEvenKeys(swapped, v); 595 } 596 597 // Swaps with the vector formed by reversing contiguous groups of 4 keys. 598 template <class D> 599 HWY_INLINE Vec<D> SortPairsReverse4(D d, Vec<D> v) const { 600 const Base* base = static_cast<const Base*>(this); 601 Vec<D> swapped = base->ReverseKeys4(d, v); 602 Sort2(d, v, swapped); 603 return base->OddEvenPairs(d, swapped, v); 604 } 605 606 // Conditionally swaps lane 0 with 4, 1 with 5 etc. 607 template <class D> 608 HWY_INLINE Vec<D> SortPairsDistance4(D d, Vec<D> v) const { 609 const Base* base = static_cast<const Base*>(this); 610 Vec<D> swapped = base->SwapAdjacentQuads(d, v); 611 // Only used in Merge16, so this will not be used on AVX2 (which only has 4 612 // u64 lanes), so skip the above optimization for 64-bit AVX2. 613 Sort2(d, v, swapped); 614 return base->OddEvenQuads(d, swapped, v); 615 } 616 }; 617 618 } // namespace detail 619 // NOLINTNEXTLINE(google-readability-namespace-comments) 620 } // namespace HWY_NAMESPACE 621 } // namespace hwy 622 HWY_AFTER_NAMESPACE(); 623 624 #endif // HIGHWAY_HWY_CONTRIB_SORT_TRAITS_TOGGLE