highway_test.cc (21045B)
1 // Copyright 2019 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <stdint.h> 17 #include <stdio.h> 18 19 #include <bitset> 20 21 #undef HWY_TARGET_INCLUDE 22 #define HWY_TARGET_INCLUDE "highway_test.cc" 23 #include "hwy/foreach_target.h" // IWYU pragma: keep 24 #include "hwy/highway.h" 25 #include "hwy/nanobenchmark.h" // Unpredictable1 26 #include "hwy/tests/test_util-inl.h" 27 28 HWY_BEFORE_NAMESPACE(); 29 namespace hwy { 30 namespace HWY_NAMESPACE { 31 namespace { 32 33 template <size_t kLimit, typename T> 34 HWY_NOINLINE void TestCappedLimit(T /* tag */) { 35 CappedTag<T, kLimit> d; 36 // Ensure two ops compile 37 const T k0 = ConvertScalarTo<T>(0); 38 const T k1 = ConvertScalarTo<T>(1); 39 HWY_ASSERT_VEC_EQ(d, Zero(d), Set(d, k0)); 40 41 // Ensure we do not write more than kLimit lanes 42 const size_t N = Lanes(d); 43 if (kLimit < N) { 44 auto lanes = AllocateAligned<T>(N); 45 HWY_ASSERT(lanes); 46 ZeroBytes(lanes.get(), N * sizeof(T)); 47 Store(Set(d, k1), d, lanes.get()); 48 for (size_t i = kLimit; i < N; ++i) { 49 HWY_ASSERT_EQ(lanes[i], k0); 50 } 51 } 52 } 53 54 // Adapter for ForAllTypes - we are constructing our own Simd<> and thus do not 55 // use ForPartialVectors etc. 56 struct TestCapped { 57 template <typename T> 58 void operator()(T t) const { 59 TestCappedLimit<1>(t); 60 TestCappedLimit<3>(t); 61 TestCappedLimit<5>(t); 62 TestCappedLimit<1ull << 15>(t); 63 } 64 }; 65 66 HWY_NOINLINE void TestAllCapped() { ForAllTypes(TestCapped()); } 67 68 // For testing that ForPartialVectors reaches every possible size: 69 using NumLanesSet = std::bitset<HWY_MAX_BYTES + 1>; 70 71 // Monostate pattern because ForPartialVectors takes a template argument, not a 72 // functor by reference. 73 static NumLanesSet* NumLanesForSize(size_t sizeof_t) { 74 HWY_ASSERT(sizeof_t <= sizeof(uint64_t)); 75 static NumLanesSet num_lanes[sizeof(uint64_t) + 1]; 76 return num_lanes + sizeof_t; 77 } 78 static size_t* MaxLanesForSize(size_t sizeof_t) { 79 HWY_ASSERT(sizeof_t <= sizeof(uint64_t)); 80 static size_t num_lanes[sizeof(uint64_t) + 1] = {0}; 81 return num_lanes + sizeof_t; 82 } 83 84 struct TestMaxLanes { 85 template <class T, class D> 86 HWY_NOINLINE void operator()(T /*unused*/, D d) const { 87 const size_t N = Lanes(d); 88 const size_t kMax = MaxLanes(d); // for RVV, includes LMUL 89 HWY_ASSERT(N <= kMax); 90 HWY_ASSERT(kMax <= (HWY_MAX_BYTES / sizeof(T))); 91 92 NumLanesForSize(sizeof(T))->set(N); 93 *MaxLanesForSize(sizeof(T)) = HWY_MAX(*MaxLanesForSize(sizeof(T)), N); 94 } 95 }; 96 97 class TestFracNLanes { 98 private: 99 template <int kNewPow2, class D> 100 using DWithPow2 = 101 Simd<TFromD<D>, D::template NewN<kNewPow2, HWY_MAX_LANES_D(D)>(), 102 kNewPow2>; 103 104 template <typename T1, size_t N1, int kPow2, typename T2, size_t N2> 105 static HWY_INLINE void DoTestFracNLanes(Simd<T1, N1, 0> /*d1*/, 106 Simd<T2, N2, kPow2> d2) { 107 using D2 = Simd<T2, N2, kPow2>; 108 static_assert(IsSame<T1, T2>(), "T1 and T2 should be the same type"); 109 static_assert(N2 > HWY_MAX_BYTES, "N2 > HWY_MAX_BYTES should be true"); 110 static_assert(HWY_MAX_LANES_D(D2) == N1, 111 "HWY_MAX_LANES_D(D2) should be equal to N1"); 112 static_assert(N1 <= HWY_LANES(T2), "N1 <= HWY_LANES(T2) should be true"); 113 114 TestMaxLanes()(T2(), d2); 115 } 116 117 #if HWY_TARGET != HWY_SCALAR 118 template <class T, HWY_IF_LANES_LE(4, HWY_LANES(T))> 119 static HWY_INLINE void DoTest4LanesWithPow3(T /*unused*/) { 120 // If HWY_LANES(T) >= 4 is true, do DoTestFracNLanes for the 121 // MaxLanes(d) == 4, kPow2 == 3 case 122 const Simd<T, 4, 0> d; 123 DoTestFracNLanes(d, DWithPow2<3, decltype(d)>()); 124 } 125 template <class T, HWY_IF_LANES_GT(4, HWY_LANES(T))> 126 static HWY_INLINE void DoTest4LanesWithPow3(T /*unused*/) { 127 // If HWY_LANES(T) < 4, do nothing 128 } 129 #endif 130 131 public: 132 template <class T> 133 HWY_NOINLINE void operator()(T /*unused*/) const { 134 const Simd<T, 1, 0> d1; 135 DoTestFracNLanes(d1, DWithPow2<1, decltype(d1)>()); 136 DoTestFracNLanes(d1, DWithPow2<2, decltype(d1)>()); 137 DoTestFracNLanes(d1, DWithPow2<3, decltype(d1)>()); 138 139 #if HWY_TARGET != HWY_SCALAR 140 const Simd<T, 2, 0> d2; 141 DoTestFracNLanes(d2, DWithPow2<2, decltype(d2)>()); 142 DoTestFracNLanes(d2, DWithPow2<3, decltype(d2)>()); 143 144 DoTest4LanesWithPow3(T()); 145 #endif 146 } 147 }; 148 149 HWY_NOINLINE void TestAllMaxLanes() { 150 ForAllTypes(ForPartialVectors<TestMaxLanes>()); 151 152 // Ensure ForPartialVectors visited all powers of two [1, N]. 153 for (size_t sizeof_t : {sizeof(uint8_t), sizeof(uint16_t), sizeof(uint32_t), 154 sizeof(uint64_t)}) { 155 const size_t N = *MaxLanesForSize(sizeof_t); 156 for (size_t i = 1; i <= N; i += i) { 157 if (!NumLanesForSize(sizeof_t)->test(i)) { 158 fprintf(stderr, "T=%d: did not visit for N=%d, max=%d\n", 159 static_cast<int>(sizeof_t), static_cast<int>(i), 160 static_cast<int>(N)); 161 HWY_ASSERT(false); 162 } 163 } 164 } 165 166 ForAllTypes(TestFracNLanes()); 167 } 168 169 struct TestSet { 170 template <class T, class D> 171 HWY_NOINLINE void operator()(T /*unused*/, D d) { 172 // Zero 173 const Vec<D> v0 = Zero(d); 174 const size_t N = Lanes(d); 175 auto expected = AllocateAligned<T>(N); 176 HWY_ASSERT(expected); 177 ZeroBytes(expected.get(), N * sizeof(T)); 178 HWY_ASSERT_VEC_EQ(d, expected.get(), v0); 179 180 // Set 181 const Vec<D> v2 = Set(d, ConvertScalarTo<T>(2)); 182 for (size_t i = 0; i < N; ++i) { 183 expected[i] = ConvertScalarTo<T>(2); 184 } 185 HWY_ASSERT_VEC_EQ(d, expected.get(), v2); 186 187 // Iota 188 const Vec<D> vi = IotaForSpecial(d, 5); 189 for (size_t i = 0; i < N; ++i) { 190 expected[i] = ConvertScalarTo<T>(5 + i); 191 } 192 HWY_ASSERT_VEC_EQ(d, expected.get(), vi); 193 194 // Undefined. This may result in a 'using uninitialized memory' warning 195 // here, even though we already suppress warnings in Undefined. 196 HWY_DIAGNOSTICS(push) 197 HWY_DIAGNOSTICS_OFF(disable : 4700, ignored "-Wuninitialized") 198 #if HWY_COMPILER_GCC_ACTUAL 199 HWY_DIAGNOSTICS_OFF(disable : 4701, ignored "-Wmaybe-uninitialized") 200 #endif 201 const Vec<D> vu = Undefined(d); 202 Store(vu, d, expected.get()); 203 HWY_DIAGNOSTICS(pop) 204 } 205 }; 206 207 HWY_NOINLINE void TestAllSet() { 208 ForAllTypesAndSpecial(ForPartialVectors<TestSet>()); 209 } 210 211 // Ensures wraparound (mod 2^bits) 212 struct TestOverflow { 213 template <class T, class D> 214 HWY_NOINLINE void operator()(T /*unused*/, D d) { 215 const Vec<D> v1 = Set(d, static_cast<T>(1)); 216 const Vec<D> vmax = Set(d, LimitsMax<T>()); 217 const Vec<D> vmin = Set(d, LimitsMin<T>()); 218 // Unsigned underflow / negative -> positive 219 HWY_ASSERT_VEC_EQ(d, vmax, Sub(vmin, v1)); 220 // Unsigned overflow / positive -> negative 221 HWY_ASSERT_VEC_EQ(d, vmin, Add(vmax, v1)); 222 } 223 }; 224 225 HWY_NOINLINE void TestAllOverflow() { 226 ForIntegerTypes(ForPartialVectors<TestOverflow>()); 227 } 228 229 struct TestClamp { 230 template <class T, class D> 231 HWY_NOINLINE void operator()(T /*unused*/, D d) { 232 const Vec<D> v0 = Zero(d); 233 const Vec<D> v1 = Set(d, ConvertScalarTo<T>(1)); 234 const Vec<D> v2 = Set(d, ConvertScalarTo<T>(2)); 235 236 HWY_ASSERT_VEC_EQ(d, v1, Clamp(v2, v0, v1)); 237 HWY_ASSERT_VEC_EQ(d, v1, Clamp(v0, v1, v2)); 238 } 239 }; 240 241 HWY_NOINLINE void TestAllClamp() { 242 ForAllTypes(ForPartialVectors<TestClamp>()); 243 } 244 245 struct TestSignBitInteger { 246 template <class T, class D> 247 HWY_NOINLINE void operator()(T /*unused*/, D d) { 248 const Vec<D> v0 = Zero(d); 249 const Vec<D> all = VecFromMask(d, Eq(v0, v0)); 250 const Vec<D> vs = SignBit(d); 251 const Vec<D> other = Sub(vs, Set(d, ConvertScalarTo<T>(1))); 252 253 // Shifting left by one => overflow, equal zero 254 HWY_ASSERT_VEC_EQ(d, v0, Add(vs, vs)); 255 // Verify the lower bits are zero (only +/- and logical ops are available 256 // for all types) 257 HWY_ASSERT_VEC_EQ(d, all, Add(vs, other)); 258 } 259 }; 260 261 struct TestSignBitFloat { 262 template <class T, class D> 263 HWY_NOINLINE void operator()(T /*unused*/, D d) { 264 const Vec<D> v0 = Zero(d); 265 const Vec<D> vs = SignBit(d); 266 const Vec<D> vp = Set(d, ConvertScalarTo<T>(2.25)); 267 const Vec<D> vn = Set(d, ConvertScalarTo<T>(-2.25)); 268 HWY_ASSERT_VEC_EQ(d, Or(vp, vs), vn); 269 HWY_ASSERT_VEC_EQ(d, AndNot(vs, vn), vp); 270 HWY_ASSERT_VEC_EQ(d, v0, vs); 271 } 272 }; 273 274 HWY_NOINLINE void TestAllSignBit() { 275 ForIntegerTypes(ForPartialVectors<TestSignBitInteger>()); 276 ForFloatTypes(ForPartialVectors<TestSignBitFloat>()); 277 } 278 279 // TODO(b/287462770): inline to work around incorrect SVE codegen 280 template <class D, class V> 281 HWY_INLINE void AssertNaN(D d, VecArg<V> v, const char* file, int line) { 282 using T = TFromD<D>; 283 const size_t N = Lanes(d); 284 if (!AllTrue(d, IsNaN(v))) { 285 Print(d, "not all NaN", v, 0, N); 286 Print(d, "mask", VecFromMask(d, IsNaN(v)), 0, N); 287 // RVV lacks PRIu64 and MSYS still has problems with %zu, so print bytes to 288 // avoid truncating doubles. 289 uint8_t bytes[HWY_MAX(sizeof(T), 8)] = {0}; 290 const T lane = GetLane(v); 291 CopyBytes<sizeof(T)>(&lane, bytes); 292 Abort(file, line, 293 "Expected %s NaN, got %E (bytes %02x %02x %02x %02x %02x %02x %02x " 294 "%02x)", 295 TypeName(T(), N).c_str(), ConvertScalarTo<double>(lane), bytes[0], 296 bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7]); 297 } 298 } 299 300 #define HWY_ASSERT_NAN(d, v) AssertNaN(d, v, __FILE__, __LINE__) 301 302 struct TestNaN { 303 template <class T, class D> 304 HWY_NOINLINE void operator()(T /*unused*/, D d) { 305 const Vec<D> v1 = Set(d, ConvertScalarTo<T>(Unpredictable1())); 306 const Vec<D> nan = 307 IfThenElse(Eq(v1, Set(d, ConvertScalarTo<T>(1))), NaN(d), v1); 308 HWY_ASSERT_NAN(d, nan); 309 310 // Arithmetic 311 HWY_ASSERT_NAN(d, Add(nan, v1)); 312 HWY_ASSERT_NAN(d, Add(v1, nan)); 313 HWY_ASSERT_NAN(d, Sub(nan, v1)); 314 HWY_ASSERT_NAN(d, Sub(v1, nan)); 315 HWY_ASSERT_NAN(d, Mul(nan, v1)); 316 HWY_ASSERT_NAN(d, Mul(v1, nan)); 317 HWY_ASSERT_NAN(d, Div(nan, v1)); 318 HWY_ASSERT_NAN(d, Div(v1, nan)); 319 320 // FMA 321 HWY_ASSERT_NAN(d, MulAdd(nan, v1, v1)); 322 HWY_ASSERT_NAN(d, MulAdd(v1, nan, v1)); 323 HWY_ASSERT_NAN(d, MulAdd(v1, v1, nan)); 324 HWY_ASSERT_NAN(d, MulSub(nan, v1, v1)); 325 HWY_ASSERT_NAN(d, MulSub(v1, nan, v1)); 326 HWY_ASSERT_NAN(d, MulSub(v1, v1, nan)); 327 HWY_ASSERT_NAN(d, NegMulAdd(nan, v1, v1)); 328 HWY_ASSERT_NAN(d, NegMulAdd(v1, nan, v1)); 329 HWY_ASSERT_NAN(d, NegMulAdd(v1, v1, nan)); 330 HWY_ASSERT_NAN(d, NegMulSub(nan, v1, v1)); 331 HWY_ASSERT_NAN(d, NegMulSub(v1, nan, v1)); 332 HWY_ASSERT_NAN(d, NegMulSub(v1, v1, nan)); 333 334 // Rcp/Sqrt 335 HWY_ASSERT_NAN(d, Sqrt(nan)); 336 337 // Sign manipulation 338 HWY_ASSERT_NAN(d, Abs(nan)); 339 HWY_ASSERT_NAN(d, Neg(nan)); 340 HWY_ASSERT_NAN(d, CopySign(nan, v1)); 341 HWY_ASSERT_NAN(d, CopySignToAbs(nan, v1)); 342 343 // Rounding 344 HWY_ASSERT_NAN(d, Ceil(nan)); 345 HWY_ASSERT_NAN(d, Floor(nan)); 346 HWY_ASSERT_NAN(d, Round(nan)); 347 HWY_ASSERT_NAN(d, Trunc(nan)); 348 349 // Logical (And/AndNot/Xor will clear NaN!) 350 HWY_ASSERT_NAN(d, Or(nan, v1)); 351 352 // Comparison 353 HWY_ASSERT(AllFalse(d, Eq(nan, v1))); 354 HWY_ASSERT(AllFalse(d, Gt(nan, v1))); 355 HWY_ASSERT(AllFalse(d, Lt(nan, v1))); 356 HWY_ASSERT(AllFalse(d, Ge(nan, v1))); 357 HWY_ASSERT(AllFalse(d, Le(nan, v1))); 358 359 HWY_ASSERT(AllTrue(d, IsEitherNaN(nan, nan))); 360 HWY_ASSERT(AllTrue(d, IsEitherNaN(nan, v1))); 361 HWY_ASSERT(AllTrue(d, IsEitherNaN(v1, nan))); 362 HWY_ASSERT(AllFalse(d, IsEitherNaN(v1, v1))); 363 364 // Reduction 365 HWY_ASSERT_NAN(d, SumOfLanes(d, nan)); 366 HWY_ASSERT_NAN(d, Set(d, ReduceSum(d, nan))); 367 // TODO(janwas): re-enable after QEMU/Spike are fixed 368 #if HWY_TARGET != HWY_RVV 369 HWY_ASSERT_NAN(d, MinOfLanes(d, nan)); 370 HWY_ASSERT_NAN(d, Set(d, ReduceMin(d, nan))); 371 HWY_ASSERT_NAN(d, MaxOfLanes(d, nan)); 372 HWY_ASSERT_NAN(d, Set(d, ReduceMax(d, nan))); 373 #endif 374 375 // Min/Max 376 #if (HWY_ARCH_X86 || HWY_ARCH_WASM) && (HWY_TARGET < HWY_EMU128) 377 // Native WASM or x86 SIMD return the second operand if any input is NaN. 378 HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1)); 379 HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1)); 380 HWY_ASSERT_NAN(d, Min(v1, nan)); 381 HWY_ASSERT_NAN(d, Max(v1, nan)); 382 #elif HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7 383 // Armv7 NEON returns NaN if any input is NaN. 384 HWY_ASSERT_NAN(d, Min(v1, nan)); 385 HWY_ASSERT_NAN(d, Max(v1, nan)); 386 HWY_ASSERT_NAN(d, Min(nan, v1)); 387 HWY_ASSERT_NAN(d, Max(nan, v1)); 388 #else 389 // IEEE 754-2019 minimumNumber is defined as the other argument if exactly 390 // one is NaN, and qNaN if both are. 391 HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1)); 392 HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1)); 393 HWY_ASSERT_VEC_EQ(d, v1, Min(v1, nan)); 394 HWY_ASSERT_VEC_EQ(d, v1, Max(v1, nan)); 395 #endif 396 HWY_ASSERT_NAN(d, Min(nan, nan)); 397 HWY_ASSERT_NAN(d, Max(nan, nan)); 398 399 HWY_ASSERT_VEC_EQ(d, v1, MinNumber(nan, v1)); 400 HWY_ASSERT_VEC_EQ(d, v1, MaxNumber(nan, v1)); 401 HWY_ASSERT_VEC_EQ(d, v1, MinNumber(v1, nan)); 402 HWY_ASSERT_VEC_EQ(d, v1, MaxNumber(v1, nan)); 403 404 // AbsDiff 405 HWY_ASSERT_NAN(d, AbsDiff(nan, v1)); 406 HWY_ASSERT_NAN(d, AbsDiff(v1, nan)); 407 408 // Approximate* 409 HWY_ASSERT_NAN(d, ApproximateReciprocal(nan)); 410 HWY_ASSERT_NAN(d, ApproximateReciprocalSqrt(nan)); 411 } 412 }; 413 414 HWY_NOINLINE void TestAllNaN() { ForFloatTypes(ForPartialVectors<TestNaN>()); } 415 416 struct TestIsNaN { 417 template <class T, class D> 418 HWY_NOINLINE void operator()(T /*unused*/, D d) { 419 const Vec<D> v1 = Set(d, ConvertScalarTo<T>(Unpredictable1())); 420 const Vec<D> inf = 421 IfThenElse(Eq(v1, Set(d, ConvertScalarTo<T>(1))), Inf(d), v1); 422 const Vec<D> nan = 423 IfThenElse(Eq(v1, Set(d, ConvertScalarTo<T>(1))), NaN(d), v1); 424 const Vec<D> neg = Set(d, ConvertScalarTo<T>(-1)); 425 HWY_ASSERT_NAN(d, nan); 426 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(inf)); 427 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(CopySign(inf, neg))); 428 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(nan)); 429 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(CopySign(nan, neg))); 430 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(v1)); 431 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Zero(d))); 432 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::LowestValue<T>()))); 433 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::HighestValue<T>()))); 434 } 435 }; 436 437 HWY_NOINLINE void TestAllIsNaN() { 438 ForFloatTypes(ForPartialVectors<TestIsNaN>()); 439 } 440 441 struct TestIsInf { 442 template <class T, class D> 443 HWY_NOINLINE void operator()(T /*unused*/, D d) { 444 const Vec<D> k1 = Set(d, ConvertScalarTo<T>(1)); 445 const Vec<D> v1 = Set(d, ConvertScalarTo<T>(Unpredictable1())); 446 const Vec<D> inf = IfThenElse(Eq(v1, k1), Inf(d), v1); 447 const Vec<D> nan = IfThenElse(Eq(v1, k1), NaN(d), v1); 448 const Vec<D> neg = Neg(k1); 449 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(inf)); 450 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(CopySign(inf, neg))); 451 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(nan)); 452 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(CopySign(nan, neg))); 453 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(v1)); 454 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Zero(d))); 455 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::LowestValue<T>()))); 456 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::HighestValue<T>()))); 457 } 458 }; 459 460 HWY_NOINLINE void TestAllIsInf() { 461 ForFloatTypes(ForPartialVectors<TestIsInf>()); 462 } 463 464 struct TestIsFinite { 465 template <class T, class D> 466 HWY_NOINLINE void operator()(T /*unused*/, D d) { 467 const Vec<D> k1 = Set(d, ConvertScalarTo<T>(1)); 468 const Vec<D> v1 = Set(d, ConvertScalarTo<T>(Unpredictable1())); 469 const Vec<D> inf = IfThenElse(Eq(v1, k1), Inf(d), v1); 470 const Vec<D> nan = IfThenElse(Eq(v1, k1), NaN(d), v1); 471 const Vec<D> neg = Neg(k1); 472 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(inf)); 473 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(inf, neg))); 474 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(nan)); 475 HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(nan, neg))); 476 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(v1)); 477 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Zero(d))); 478 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Set(d, hwy::LowestValue<T>()))); 479 HWY_ASSERT_MASK_EQ(d, MaskTrue(d), 480 IsFinite(Set(d, hwy::HighestValue<T>()))); 481 } 482 }; 483 484 HWY_NOINLINE void TestAllIsFinite() { 485 ForFloatTypes(ForPartialVectors<TestIsFinite>()); 486 } 487 488 struct TestCopyAndAssign { 489 template <class T, class D> 490 HWY_NOINLINE void operator()(T /*unused*/, D d) { 491 // copy V 492 const Vec<D> v3 = Iota(d, 3); 493 auto v3b(v3); 494 HWY_ASSERT_VEC_EQ(d, v3, v3b); 495 496 // assign V 497 auto v3c = Undefined(d); 498 v3c = v3; 499 HWY_ASSERT_VEC_EQ(d, v3, v3c); 500 } 501 }; 502 503 HWY_NOINLINE void TestAllCopyAndAssign() { 504 ForAllTypes(ForPartialVectors<TestCopyAndAssign>()); 505 } 506 507 struct TestGetLane { 508 template <class T, class D> 509 HWY_NOINLINE void operator()(T /*unused*/, D d) { 510 const T k1 = ConvertScalarTo<T>(1); 511 HWY_ASSERT_EQ(ConvertScalarTo<T>(0), GetLane(Zero(d))); 512 HWY_ASSERT_EQ(k1, GetLane(Set(d, k1))); 513 } 514 }; 515 516 HWY_NOINLINE void TestAllGetLane() { 517 ForAllTypes(ForPartialVectors<TestGetLane>()); 518 } 519 520 struct TestDFromV { 521 template <class T, class D> 522 HWY_NOINLINE void operator()(T /*unused*/, D d) { 523 const Vec<D> v0 = Zero(d); 524 // This deduced type is not necessarily the same as D. 525 using D0 = DFromV<decltype(v0)>; 526 // The two types of vectors can be used interchangeably. 527 const Vec<D> v0b = And(v0, Set(D0(), ConvertScalarTo<T>(1))); 528 HWY_ASSERT_VEC_EQ(d, v0, v0b); 529 } 530 }; 531 532 HWY_NOINLINE void TestAllDFromV() { 533 ForAllTypes(ForPartialVectors<TestDFromV>()); 534 } 535 536 struct TestBlocks { 537 template <class T, class D> 538 HWY_NOINLINE void operator()(T /*unused*/, D d) { 539 const size_t N = Lanes(d); 540 const size_t num_of_blocks = Blocks(d); 541 static constexpr size_t kNumOfLanesPer16ByteBlk = 16 / sizeof(T); 542 HWY_ASSERT(num_of_blocks >= 1); 543 HWY_ASSERT(num_of_blocks <= d.MaxBlocks()); 544 HWY_ASSERT( 545 num_of_blocks == 546 ((N < kNumOfLanesPer16ByteBlk) ? 1 : (N / kNumOfLanesPer16ByteBlk))); 547 } 548 }; 549 550 HWY_NOINLINE void TestAllBlocks() { 551 ForAllTypes(ForPartialVectors<TestBlocks>()); 552 } 553 554 struct TestBlockDFromD { 555 template <class T, class D> 556 HWY_NOINLINE void operator()(T /*unused*/, D d) { 557 const BlockDFromD<decltype(d)> d_block; 558 static_assert(d_block.MaxBytes() <= 16, 559 "d_block.MaxBytes() <= 16 must be true"); 560 static_assert(d_block.MaxBytes() <= d.MaxBytes(), 561 "d_block.MaxBytes() <= d.MaxBytes() must be true"); 562 static_assert(d.MaxBytes() > 16 || d_block.MaxBytes() == d.MaxBytes(), 563 "d_block.MaxBytes() == d.MaxBytes() must be true if " 564 "d.MaxBytes() is less than or equal to 16"); 565 static_assert(d.MaxBytes() < 16 || d_block.MaxBytes() == 16, 566 "d_block.MaxBytes() == 16 must be true if d.MaxBytes() is " 567 "greater than or equal to 16"); 568 static_assert( 569 IsSame<Vec<decltype(d_block)>, decltype(ExtractBlock<0>(Zero(d)))>(), 570 "Vec<decltype(d_block)> should be the same vector type as " 571 "decltype(ExtractBlock<0>(Zero(d)))"); 572 const size_t d_bytes = Lanes(d) * sizeof(T); 573 const size_t d_block_bytes = Lanes(d_block) * sizeof(T); 574 HWY_ASSERT(d_block_bytes >= 1); 575 HWY_ASSERT(d_block_bytes <= d_bytes); 576 HWY_ASSERT(d_block_bytes <= 16); 577 HWY_ASSERT(d_bytes > 16 || d_block_bytes == d_bytes); 578 HWY_ASSERT(d_bytes < 16 || d_block_bytes == 16); 579 } 580 }; 581 582 HWY_NOINLINE void TestAllBlockDFromD() { 583 ForAllTypes(ForPartialVectors<TestBlockDFromD>()); 584 } 585 586 } // namespace 587 // NOLINTNEXTLINE(google-readability-namespace-comments) 588 } // namespace HWY_NAMESPACE 589 } // namespace hwy 590 HWY_AFTER_NAMESPACE(); 591 592 #if HWY_ONCE 593 namespace hwy { 594 namespace { 595 HWY_BEFORE_TEST(HighwayTest); 596 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCapped); 597 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllMaxLanes); 598 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSet); 599 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllOverflow); 600 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllClamp); 601 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSignBit); 602 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllNaN); 603 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsNaN); 604 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsInf); 605 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsFinite); 606 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCopyAndAssign); 607 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllGetLane); 608 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllDFromV); 609 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllBlocks); 610 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllBlockDFromD); 611 HWY_AFTER_TEST(); 612 } // namespace 613 } // namespace hwy 614 HWY_TEST_MAIN(); 615 #endif // HWY_ONCE