transform_test.cc (14779B)
1 // Copyright 2022 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include <string.h> // memcpy 17 18 #include <vector> 19 20 #include "hwy/aligned_allocator.h" 21 #include "hwy/base.h" 22 23 // clang-format off 24 #undef HWY_TARGET_INCLUDE 25 #define HWY_TARGET_INCLUDE "hwy/contrib/algo/transform_test.cc" //NOLINT 26 #include "hwy/foreach_target.h" // IWYU pragma: keep 27 #include "hwy/highway.h" 28 #include "hwy/contrib/algo/transform-inl.h" 29 #include "hwy/tests/test_util-inl.h" 30 // clang-format on 31 32 // If your project requires C++14 or later, you can ignore this and pass lambdas 33 // directly to Transform, without requiring an lvalue as we do here for C++11. 34 #if __cplusplus < 201402L 35 #define HWY_GENERIC_LAMBDA 0 36 #else 37 #define HWY_GENERIC_LAMBDA 1 38 #endif 39 40 HWY_BEFORE_NAMESPACE(); 41 namespace hwy { 42 namespace HWY_NAMESPACE { 43 namespace { 44 45 constexpr double kAlpha = 1.5; // arbitrary scalar 46 47 // Returns random floating-point number in [-8, 8) to ensure computations do 48 // not exceed float32 precision. 49 template <typename T> 50 T Random(RandomState& rng) { 51 const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023; 52 const double val = (bits - 512) / 64.0; 53 // Clamp negative to zero for unsigned types. 54 return ConvertScalarTo<T>( 55 HWY_MAX(ConvertScalarTo<double>(hwy::LowestValue<T>()), val)); 56 } 57 58 // SCAL, AXPY names are from BLAS. 59 template <typename T> 60 HWY_NOINLINE void SimpleSCAL(const T* x, T* out, size_t count) { 61 for (size_t i = 0; i < count; ++i) { 62 out[i] = ConvertScalarTo<T>(ConvertScalarTo<T>(kAlpha) * x[i]); 63 } 64 } 65 66 template <typename T> 67 HWY_NOINLINE void SimpleAXPY(const T* x, const T* y, T* out, size_t count) { 68 for (size_t i = 0; i < count; ++i) { 69 out[i] = ConvertScalarTo<T>( 70 ConvertScalarTo<T>(ConvertScalarTo<T>(kAlpha) * x[i]) + y[i]); 71 } 72 } 73 74 template <typename T> 75 HWY_NOINLINE void SimpleFMA4(const T* x, const T* y, const T* z, T* out, 76 size_t count) { 77 for (size_t i = 0; i < count; ++i) { 78 out[i] = ConvertScalarTo<T>(x[i] * y[i] + z[i]); 79 } 80 } 81 82 // In C++14, we can instead define these as generic lambdas next to where they 83 // are invoked. 84 #if !HWY_GENERIC_LAMBDA 85 86 // Generator that returns even numbers by doubling the output indices. 87 struct Gen2 { 88 template <class D, class VU> 89 Vec<D> operator()(D d, VU vidx) const { 90 return BitCast(d, Add(vidx, vidx)); 91 } 92 }; 93 94 struct SCAL { 95 template <class D, class V> 96 Vec<D> operator()(D d, V v) const { 97 using T = TFromD<D>; 98 return Mul(Set(d, ConvertScalarTo<T>(kAlpha)), v); 99 } 100 }; 101 102 struct AXPY { 103 template <class D, class V> 104 Vec<D> operator()(D d, V v, V v1) const { 105 using T = TFromD<D>; 106 return MulAdd(Set(d, ConvertScalarTo<T>(kAlpha)), v, v1); 107 } 108 }; 109 110 struct FMA4 { 111 template <class D, class V> 112 Vec<D> operator()(D /*d*/, V v, V v1, V v2) const { 113 return MulAdd(v, v1, v2); 114 } 115 }; 116 117 #endif // !HWY_GENERIC_LAMBDA 118 119 // Invokes Test (e.g. TestTransform1) with all arg combinations. T comes from 120 // ForFloatTypes. 121 template <class Test> 122 struct ForeachCountAndMisalign { 123 template <typename T, class D> 124 HWY_NOINLINE void operator()(T /*unused*/, D d) const { 125 RandomState rng; 126 const size_t N = Lanes(d); 127 const size_t misalignments[3] = {0, N / 4, 3 * N / 5}; 128 129 for (size_t count = 0; count < 2 * N; ++count) { 130 for (size_t ma : misalignments) { 131 for (size_t mb : misalignments) { 132 Test()(d, count, ma, mb, rng); 133 } 134 } 135 } 136 } 137 }; 138 139 // Fills an array with random values, placing a given sentinel value both before 140 // (when misalignment space is available) and after. Requires an allocation of 141 // at least count + misalign + 1 elements. 142 template <typename T> 143 T* FillRandom(AlignedFreeUniquePtr<T[]>& pa, size_t count, size_t misalign, 144 T sentinel, RandomState& rng) { 145 for (size_t i = 0; i < misalign; ++i) { 146 pa[i] = sentinel; 147 } 148 149 T* a = pa.get() + misalign; 150 for (size_t i = 0; i < count; ++i) { 151 a[i] = Random<T>(rng); 152 } 153 a[count] = sentinel; 154 return a; 155 } 156 157 // Output-only, no loads 158 struct TestGenerate { 159 template <class D> 160 void operator()(D d, size_t count, size_t misalign_a, size_t /*misalign_b*/, 161 RandomState& /*rng*/) { 162 using T = TFromD<D>; 163 AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count + 1); 164 AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count)); 165 HWY_ASSERT(pa && expected); 166 167 T* actual = pa.get() + misalign_a; 168 169 for (size_t i = 0; i < count; ++i) { 170 expected[i] = ConvertScalarTo<T>(2 * i); 171 } 172 173 // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that 174 // the attribute also applies to lambdas? If so, remove HWY_ATTR. 175 #if HWY_GENERIC_LAMBDA 176 const auto gen2 = [](const auto d2, const auto vidx) 177 HWY_ATTR { return BitCast(d2, Add(vidx, vidx)); }; 178 #else 179 const Gen2 gen2; 180 #endif 181 actual[count] = ConvertScalarTo<T>(0); // sentinel 182 Generate(d, actual, count, gen2); 183 HWY_ASSERT_EQ(ConvertScalarTo<T>(0), actual[count]); // no write past end 184 185 const auto info = hwy::detail::MakeTypeInfo<T>(); 186 const char* target_name = hwy::TargetName(HWY_TARGET); 187 hwy::detail::AssertArrayEqual(info, expected.get(), actual, count, 188 target_name, __FILE__, __LINE__); 189 } 190 }; 191 192 // Input-only, no stores 193 struct TestForeach { 194 template <class D> 195 void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b, 196 RandomState& /*rng*/) { 197 if (misalign_b != 0) return; 198 using T = TFromD<D>; 199 AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count + 1); 200 HWY_ASSERT(pa); 201 202 T* actual = pa.get() + misalign_a; 203 T max = hwy::LowestValue<T>(); 204 for (size_t i = 0; i < count; ++i) { 205 actual[i] = hwy::ConvertScalarTo<T>(i <= count / 2 ? 2 * i : i); 206 max = HWY_MAX(max, actual[i]); 207 } 208 209 // Place sentinel values in the misalignment area and at the input's end. 210 for (size_t i = 0; i < misalign_a; ++i) { 211 pa[i] = ConvertScalarTo<T>(2 * count); 212 } 213 actual[count] = ConvertScalarTo<T>(2 * count); 214 215 const Vec<D> vmin = Set(d, hwy::LowestValue<T>()); 216 // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that 217 // the attribute also applies to lambdas? If so, remove HWY_ATTR. 218 Vec<D> vmax = vmin; 219 const auto func = [&vmax](const D, const Vec<D> v) 220 HWY_ATTR { vmax = Max(vmax, v); }; 221 Foreach(d, actual, count, vmin, func); 222 223 const char* target_name = hwy::TargetName(HWY_TARGET); 224 AssertEqual(max, ReduceMax(d, vmax), target_name, __FILE__, __LINE__); 225 } 226 }; 227 228 // Zero extra input arrays 229 struct TestTransform { 230 template <class D> 231 void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b, 232 RandomState& rng) { 233 if (misalign_b != 0) return; 234 using T = TFromD<D>; 235 // Prevents error if size to allocate is zero. 236 AlignedFreeUniquePtr<T[]> pa = 237 AllocateAligned<T>(HWY_MAX(1, misalign_a + count + 1)); 238 AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count)); 239 HWY_ASSERT(pa && expected); 240 241 const T sentinel = ConvertScalarTo<T>(-42); 242 T* a = FillRandom(pa, count, misalign_a, sentinel, rng); 243 SimpleSCAL(a, expected.get(), count); 244 245 // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that 246 // the attribute also applies to lambdas? If so, remove HWY_ATTR. 247 #if HWY_GENERIC_LAMBDA 248 const auto scal = [](const auto d2, const auto v) HWY_ATTR { 249 return Mul(Set(d2, ConvertScalarTo<T>(kAlpha)), v); 250 }; 251 #else 252 const SCAL scal; 253 #endif 254 Transform(d, a, count, scal); 255 256 const auto info = hwy::detail::MakeTypeInfo<T>(); 257 const char* target_name = hwy::TargetName(HWY_TARGET); 258 hwy::detail::AssertArrayEqual(info, expected.get(), a, count, target_name, 259 __FILE__, __LINE__); 260 261 // Ensure no out-of-bound writes. 262 for (size_t i = 0; i < misalign_a; ++i) { 263 HWY_ASSERT_EQ(sentinel, pa[i]); 264 } 265 HWY_ASSERT_EQ(sentinel, a[count]); 266 } 267 }; 268 269 // One extra input array 270 struct TestTransform1 { 271 template <class D> 272 void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b, 273 RandomState& rng) { 274 using T = TFromD<D>; 275 // Prevents error if size to allocate is zero. 276 AlignedFreeUniquePtr<T[]> pa = 277 AllocateAligned<T>(HWY_MAX(1, misalign_a + count + 1)); 278 AlignedFreeUniquePtr<T[]> pb = 279 AllocateAligned<T>(HWY_MAX(1, misalign_b + count)); 280 AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count)); 281 HWY_ASSERT(pa && pb && expected); 282 283 const T sentinel = ConvertScalarTo<T>(-42); 284 T* a = FillRandom(pa, count, misalign_a, sentinel, rng); 285 T* b = pb.get() + misalign_b; 286 for (size_t i = 0; i < count; ++i) { 287 b[i] = Random<T>(rng); 288 } 289 290 SimpleAXPY(a, b, expected.get(), count); 291 292 #if HWY_GENERIC_LAMBDA 293 const auto axpy = [](const auto d2, const auto v, const auto v1) HWY_ATTR { 294 return MulAdd(Set(d2, ConvertScalarTo<T>(kAlpha)), v, v1); 295 }; 296 #else 297 const AXPY axpy; 298 #endif 299 Transform1(d, a, count, b, axpy); 300 301 AssertArraySimilar(expected.get(), a, count, hwy::TargetName(HWY_TARGET), 302 __FILE__, __LINE__); 303 // Ensure no out-of-bound writes. 304 for (size_t i = 0; i < misalign_a; ++i) { 305 HWY_ASSERT_EQ(sentinel, pa[i]); 306 } 307 HWY_ASSERT_EQ(sentinel, a[count]); 308 } 309 }; 310 311 // Two extra input arrays 312 struct TestTransform2 { 313 template <class D> 314 void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b, 315 RandomState& rng) { 316 using T = TFromD<D>; 317 // Prevents error if size to allocate is zero. 318 AlignedFreeUniquePtr<T[]> pa = 319 AllocateAligned<T>(HWY_MAX(1, misalign_a + count + 1)); 320 AlignedFreeUniquePtr<T[]> pb = 321 AllocateAligned<T>(HWY_MAX(1, misalign_b + count)); 322 AlignedFreeUniquePtr<T[]> pc = 323 AllocateAligned<T>(HWY_MAX(1, misalign_a + count)); 324 AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count)); 325 HWY_ASSERT(pa && pb && pc && expected); 326 327 const T sentinel = ConvertScalarTo<T>(-42); 328 T* a = FillRandom(pa, count, misalign_a, sentinel, rng); 329 T* b = pb.get() + misalign_b; 330 T* c = pc.get() + misalign_a; 331 for (size_t i = 0; i < count; ++i) { 332 b[i] = Random<T>(rng); 333 c[i] = Random<T>(rng); 334 } 335 336 SimpleFMA4(a, b, c, expected.get(), count); 337 338 #if HWY_GENERIC_LAMBDA 339 const auto fma4 = [](auto /*d*/, auto v, auto v1, auto v2) 340 HWY_ATTR { return MulAdd(v, v1, v2); }; 341 #else 342 const FMA4 fma4; 343 #endif 344 Transform2(d, a, count, b, c, fma4); 345 346 AssertArraySimilar(expected.get(), a, count, hwy::TargetName(HWY_TARGET), 347 __FILE__, __LINE__); 348 // Ensure no out-of-bound writes. 349 for (size_t i = 0; i < misalign_a; ++i) { 350 HWY_ASSERT_EQ(sentinel, pa[i]); 351 } 352 HWY_ASSERT_EQ(sentinel, a[count]); 353 } 354 }; 355 356 template <typename T> 357 class IfEq { 358 public: 359 IfEq(T val) : val_(val) {} 360 361 template <class D, class V> 362 Mask<D> operator()(D d, V v) const { 363 return Eq(v, Set(d, val_)); 364 } 365 366 private: 367 T val_; 368 }; 369 370 struct TestReplace { 371 template <class D> 372 void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b, 373 RandomState& rng) { 374 if (misalign_b != 0) return; 375 if (count == 0) return; 376 using T = TFromD<D>; 377 AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count + 1); 378 AlignedFreeUniquePtr<T[]> pb = AllocateAligned<T>(count); 379 AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(count); 380 HWY_ASSERT(pa && pb && expected); 381 382 const T sentinel = ConvertScalarTo<T>(-42); 383 T* a = FillRandom(pa, count, misalign_a, sentinel, rng); 384 385 std::vector<size_t> positions(AdjustedReps(count)); 386 for (size_t& pos : positions) { 387 pos = static_cast<size_t>(rng()) % count; 388 } 389 390 for (size_t pos = 0; pos < count; ++pos) { 391 const T old_t = a[pos]; 392 const T new_t = Random<T>(rng); 393 for (size_t i = 0; i < count; ++i) { 394 expected[i] = IsEqual(a[i], old_t) ? new_t : a[i]; 395 } 396 397 // Copy so ReplaceIf gets the same input (and thus also outputs expected) 398 memcpy(pb.get(), a, count * sizeof(T)); 399 400 Replace(d, a, count, new_t, old_t); 401 HWY_ASSERT_ARRAY_EQ(expected.get(), a, count); 402 // Ensure no out-of-bound writes. 403 for (size_t i = 0; i < misalign_a; ++i) { 404 HWY_ASSERT_EQ(sentinel, pa[i]); 405 } 406 HWY_ASSERT_EQ(sentinel, a[count]); 407 408 ReplaceIf(d, pb.get(), count, new_t, IfEq<T>(old_t)); 409 HWY_ASSERT_ARRAY_EQ(expected.get(), pb.get(), count); 410 // Ensure no out-of-bound writes. 411 for (size_t i = 0; i < misalign_a; ++i) { 412 HWY_ASSERT_EQ(sentinel, pa[i]); 413 } 414 HWY_ASSERT_EQ(sentinel, a[count]); 415 } 416 } 417 }; 418 419 void TestAllGenerate() { 420 // The test BitCast-s the indices, which does not work for floats. 421 ForIntegerTypes(ForPartialVectors<ForeachCountAndMisalign<TestGenerate>>()); 422 } 423 424 void TestAllForeach() { 425 ForAllTypes(ForPartialVectors<ForeachCountAndMisalign<TestForeach>>()); 426 } 427 428 void TestAllTransform() { 429 ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform>>()); 430 } 431 432 void TestAllTransform1() { 433 ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform1>>()); 434 } 435 436 void TestAllTransform2() { 437 ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform2>>()); 438 } 439 440 void TestAllReplace() { 441 ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestReplace>>()); 442 } 443 444 } // namespace 445 // NOLINTNEXTLINE(google-readability-namespace-comments) 446 } // namespace HWY_NAMESPACE 447 } // namespace hwy 448 HWY_AFTER_NAMESPACE(); 449 450 #if HWY_ONCE 451 namespace hwy { 452 namespace { 453 HWY_BEFORE_TEST(TransformTest); 454 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllGenerate); 455 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllForeach); 456 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform); 457 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform1); 458 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform2); 459 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllReplace); 460 HWY_AFTER_TEST(); 461 } // namespace 462 } // namespace hwy 463 HWY_TEST_MAIN(); 464 #endif // HWY_ONCE