tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

transform_test.cc (14779B)


      1 // Copyright 2022 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include <string.h>  // memcpy
     17 
     18 #include <vector>
     19 
     20 #include "hwy/aligned_allocator.h"
     21 #include "hwy/base.h"
     22 
     23 // clang-format off
     24 #undef HWY_TARGET_INCLUDE
     25 #define HWY_TARGET_INCLUDE "hwy/contrib/algo/transform_test.cc"  //NOLINT
     26 #include "hwy/foreach_target.h"  // IWYU pragma: keep
     27 #include "hwy/highway.h"
     28 #include "hwy/contrib/algo/transform-inl.h"
     29 #include "hwy/tests/test_util-inl.h"
     30 // clang-format on
     31 
     32 // If your project requires C++14 or later, you can ignore this and pass lambdas
     33 // directly to Transform, without requiring an lvalue as we do here for C++11.
     34 #if __cplusplus < 201402L
     35 #define HWY_GENERIC_LAMBDA 0
     36 #else
     37 #define HWY_GENERIC_LAMBDA 1
     38 #endif
     39 
     40 HWY_BEFORE_NAMESPACE();
     41 namespace hwy {
     42 namespace HWY_NAMESPACE {
     43 namespace {
     44 
     45 constexpr double kAlpha = 1.5;  // arbitrary scalar
     46 
     47 // Returns random floating-point number in [-8, 8) to ensure computations do
     48 // not exceed float32 precision.
     49 template <typename T>
     50 T Random(RandomState& rng) {
     51  const int32_t bits = static_cast<int32_t>(Random32(&rng)) & 1023;
     52  const double val = (bits - 512) / 64.0;
     53  // Clamp negative to zero for unsigned types.
     54  return ConvertScalarTo<T>(
     55      HWY_MAX(ConvertScalarTo<double>(hwy::LowestValue<T>()), val));
     56 }
     57 
     58 // SCAL, AXPY names are from BLAS.
     59 template <typename T>
     60 HWY_NOINLINE void SimpleSCAL(const T* x, T* out, size_t count) {
     61  for (size_t i = 0; i < count; ++i) {
     62    out[i] = ConvertScalarTo<T>(ConvertScalarTo<T>(kAlpha) * x[i]);
     63  }
     64 }
     65 
     66 template <typename T>
     67 HWY_NOINLINE void SimpleAXPY(const T* x, const T* y, T* out, size_t count) {
     68  for (size_t i = 0; i < count; ++i) {
     69    out[i] = ConvertScalarTo<T>(
     70        ConvertScalarTo<T>(ConvertScalarTo<T>(kAlpha) * x[i]) + y[i]);
     71  }
     72 }
     73 
     74 template <typename T>
     75 HWY_NOINLINE void SimpleFMA4(const T* x, const T* y, const T* z, T* out,
     76                             size_t count) {
     77  for (size_t i = 0; i < count; ++i) {
     78    out[i] = ConvertScalarTo<T>(x[i] * y[i] + z[i]);
     79  }
     80 }
     81 
     82 // In C++14, we can instead define these as generic lambdas next to where they
     83 // are invoked.
     84 #if !HWY_GENERIC_LAMBDA
     85 
     86 // Generator that returns even numbers by doubling the output indices.
     87 struct Gen2 {
     88  template <class D, class VU>
     89  Vec<D> operator()(D d, VU vidx) const {
     90    return BitCast(d, Add(vidx, vidx));
     91  }
     92 };
     93 
     94 struct SCAL {
     95  template <class D, class V>
     96  Vec<D> operator()(D d, V v) const {
     97    using T = TFromD<D>;
     98    return Mul(Set(d, ConvertScalarTo<T>(kAlpha)), v);
     99  }
    100 };
    101 
    102 struct AXPY {
    103  template <class D, class V>
    104  Vec<D> operator()(D d, V v, V v1) const {
    105    using T = TFromD<D>;
    106    return MulAdd(Set(d, ConvertScalarTo<T>(kAlpha)), v, v1);
    107  }
    108 };
    109 
    110 struct FMA4 {
    111  template <class D, class V>
    112  Vec<D> operator()(D /*d*/, V v, V v1, V v2) const {
    113    return MulAdd(v, v1, v2);
    114  }
    115 };
    116 
    117 #endif  // !HWY_GENERIC_LAMBDA
    118 
    119 // Invokes Test (e.g. TestTransform1) with all arg combinations. T comes from
    120 // ForFloatTypes.
    121 template <class Test>
    122 struct ForeachCountAndMisalign {
    123  template <typename T, class D>
    124  HWY_NOINLINE void operator()(T /*unused*/, D d) const {
    125    RandomState rng;
    126    const size_t N = Lanes(d);
    127    const size_t misalignments[3] = {0, N / 4, 3 * N / 5};
    128 
    129    for (size_t count = 0; count < 2 * N; ++count) {
    130      for (size_t ma : misalignments) {
    131        for (size_t mb : misalignments) {
    132          Test()(d, count, ma, mb, rng);
    133        }
    134      }
    135    }
    136  }
    137 };
    138 
    139 // Fills an array with random values, placing a given sentinel value both before
    140 // (when misalignment space is available) and after. Requires an allocation of
    141 // at least count + misalign + 1 elements.
    142 template <typename T>
    143 T* FillRandom(AlignedFreeUniquePtr<T[]>& pa, size_t count, size_t misalign,
    144              T sentinel, RandomState& rng) {
    145  for (size_t i = 0; i < misalign; ++i) {
    146    pa[i] = sentinel;
    147  }
    148 
    149  T* a = pa.get() + misalign;
    150  for (size_t i = 0; i < count; ++i) {
    151    a[i] = Random<T>(rng);
    152  }
    153  a[count] = sentinel;
    154  return a;
    155 }
    156 
    157 // Output-only, no loads
    158 struct TestGenerate {
    159  template <class D>
    160  void operator()(D d, size_t count, size_t misalign_a, size_t /*misalign_b*/,
    161                  RandomState& /*rng*/) {
    162    using T = TFromD<D>;
    163    AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count + 1);
    164    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
    165    HWY_ASSERT(pa && expected);
    166 
    167    T* actual = pa.get() + misalign_a;
    168 
    169    for (size_t i = 0; i < count; ++i) {
    170      expected[i] = ConvertScalarTo<T>(2 * i);
    171    }
    172 
    173    // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that
    174    // the attribute also applies to lambdas? If so, remove HWY_ATTR.
    175 #if HWY_GENERIC_LAMBDA
    176    const auto gen2 = [](const auto d2, const auto vidx)
    177                          HWY_ATTR { return BitCast(d2, Add(vidx, vidx)); };
    178 #else
    179    const Gen2 gen2;
    180 #endif
    181    actual[count] = ConvertScalarTo<T>(0);  // sentinel
    182    Generate(d, actual, count, gen2);
    183    HWY_ASSERT_EQ(ConvertScalarTo<T>(0), actual[count]);  // no write past end
    184 
    185    const auto info = hwy::detail::MakeTypeInfo<T>();
    186    const char* target_name = hwy::TargetName(HWY_TARGET);
    187    hwy::detail::AssertArrayEqual(info, expected.get(), actual, count,
    188                                  target_name, __FILE__, __LINE__);
    189  }
    190 };
    191 
    192 // Input-only, no stores
    193 struct TestForeach {
    194  template <class D>
    195  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
    196                  RandomState& /*rng*/) {
    197    if (misalign_b != 0) return;
    198    using T = TFromD<D>;
    199    AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count + 1);
    200    HWY_ASSERT(pa);
    201 
    202    T* actual = pa.get() + misalign_a;
    203    T max = hwy::LowestValue<T>();
    204    for (size_t i = 0; i < count; ++i) {
    205      actual[i] = hwy::ConvertScalarTo<T>(i <= count / 2 ? 2 * i : i);
    206      max = HWY_MAX(max, actual[i]);
    207    }
    208 
    209    // Place sentinel values in the misalignment area and at the input's end.
    210    for (size_t i = 0; i < misalign_a; ++i) {
    211      pa[i] = ConvertScalarTo<T>(2 * count);
    212    }
    213    actual[count] = ConvertScalarTo<T>(2 * count);
    214 
    215    const Vec<D> vmin = Set(d, hwy::LowestValue<T>());
    216    // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that
    217    // the attribute also applies to lambdas? If so, remove HWY_ATTR.
    218    Vec<D> vmax = vmin;
    219    const auto func = [&vmax](const D, const Vec<D> v)
    220                          HWY_ATTR { vmax = Max(vmax, v); };
    221    Foreach(d, actual, count, vmin, func);
    222 
    223    const char* target_name = hwy::TargetName(HWY_TARGET);
    224    AssertEqual(max, ReduceMax(d, vmax), target_name, __FILE__, __LINE__);
    225  }
    226 };
    227 
    228 // Zero extra input arrays
    229 struct TestTransform {
    230  template <class D>
    231  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
    232                  RandomState& rng) {
    233    if (misalign_b != 0) return;
    234    using T = TFromD<D>;
    235    // Prevents error if size to allocate is zero.
    236    AlignedFreeUniquePtr<T[]> pa =
    237        AllocateAligned<T>(HWY_MAX(1, misalign_a + count + 1));
    238    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
    239    HWY_ASSERT(pa && expected);
    240 
    241    const T sentinel = ConvertScalarTo<T>(-42);
    242    T* a = FillRandom(pa, count, misalign_a, sentinel, rng);
    243    SimpleSCAL(a, expected.get(), count);
    244 
    245    // TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that
    246    // the attribute also applies to lambdas? If so, remove HWY_ATTR.
    247 #if HWY_GENERIC_LAMBDA
    248    const auto scal = [](const auto d2, const auto v) HWY_ATTR {
    249      return Mul(Set(d2, ConvertScalarTo<T>(kAlpha)), v);
    250    };
    251 #else
    252    const SCAL scal;
    253 #endif
    254    Transform(d, a, count, scal);
    255 
    256    const auto info = hwy::detail::MakeTypeInfo<T>();
    257    const char* target_name = hwy::TargetName(HWY_TARGET);
    258    hwy::detail::AssertArrayEqual(info, expected.get(), a, count, target_name,
    259                                  __FILE__, __LINE__);
    260 
    261    // Ensure no out-of-bound writes.
    262    for (size_t i = 0; i < misalign_a; ++i) {
    263      HWY_ASSERT_EQ(sentinel, pa[i]);
    264    }
    265    HWY_ASSERT_EQ(sentinel, a[count]);
    266  }
    267 };
    268 
    269 // One extra input array
    270 struct TestTransform1 {
    271  template <class D>
    272  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
    273                  RandomState& rng) {
    274    using T = TFromD<D>;
    275    // Prevents error if size to allocate is zero.
    276    AlignedFreeUniquePtr<T[]> pa =
    277        AllocateAligned<T>(HWY_MAX(1, misalign_a + count + 1));
    278    AlignedFreeUniquePtr<T[]> pb =
    279        AllocateAligned<T>(HWY_MAX(1, misalign_b + count));
    280    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
    281    HWY_ASSERT(pa && pb && expected);
    282 
    283    const T sentinel = ConvertScalarTo<T>(-42);
    284    T* a = FillRandom(pa, count, misalign_a, sentinel, rng);
    285    T* b = pb.get() + misalign_b;
    286    for (size_t i = 0; i < count; ++i) {
    287      b[i] = Random<T>(rng);
    288    }
    289 
    290    SimpleAXPY(a, b, expected.get(), count);
    291 
    292 #if HWY_GENERIC_LAMBDA
    293    const auto axpy = [](const auto d2, const auto v, const auto v1) HWY_ATTR {
    294      return MulAdd(Set(d2, ConvertScalarTo<T>(kAlpha)), v, v1);
    295    };
    296 #else
    297    const AXPY axpy;
    298 #endif
    299    Transform1(d, a, count, b, axpy);
    300 
    301    AssertArraySimilar(expected.get(), a, count, hwy::TargetName(HWY_TARGET),
    302                       __FILE__, __LINE__);
    303    // Ensure no out-of-bound writes.
    304    for (size_t i = 0; i < misalign_a; ++i) {
    305      HWY_ASSERT_EQ(sentinel, pa[i]);
    306    }
    307    HWY_ASSERT_EQ(sentinel, a[count]);
    308  }
    309 };
    310 
    311 // Two extra input arrays
    312 struct TestTransform2 {
    313  template <class D>
    314  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
    315                  RandomState& rng) {
    316    using T = TFromD<D>;
    317    // Prevents error if size to allocate is zero.
    318    AlignedFreeUniquePtr<T[]> pa =
    319        AllocateAligned<T>(HWY_MAX(1, misalign_a + count + 1));
    320    AlignedFreeUniquePtr<T[]> pb =
    321        AllocateAligned<T>(HWY_MAX(1, misalign_b + count));
    322    AlignedFreeUniquePtr<T[]> pc =
    323        AllocateAligned<T>(HWY_MAX(1, misalign_a + count));
    324    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(HWY_MAX(1, count));
    325    HWY_ASSERT(pa && pb && pc && expected);
    326 
    327    const T sentinel = ConvertScalarTo<T>(-42);
    328    T* a = FillRandom(pa, count, misalign_a, sentinel, rng);
    329    T* b = pb.get() + misalign_b;
    330    T* c = pc.get() + misalign_a;
    331    for (size_t i = 0; i < count; ++i) {
    332      b[i] = Random<T>(rng);
    333      c[i] = Random<T>(rng);
    334    }
    335 
    336    SimpleFMA4(a, b, c, expected.get(), count);
    337 
    338 #if HWY_GENERIC_LAMBDA
    339    const auto fma4 = [](auto /*d*/, auto v, auto v1, auto v2)
    340                          HWY_ATTR { return MulAdd(v, v1, v2); };
    341 #else
    342    const FMA4 fma4;
    343 #endif
    344    Transform2(d, a, count, b, c, fma4);
    345 
    346    AssertArraySimilar(expected.get(), a, count, hwy::TargetName(HWY_TARGET),
    347                       __FILE__, __LINE__);
    348    // Ensure no out-of-bound writes.
    349    for (size_t i = 0; i < misalign_a; ++i) {
    350      HWY_ASSERT_EQ(sentinel, pa[i]);
    351    }
    352    HWY_ASSERT_EQ(sentinel, a[count]);
    353  }
    354 };
    355 
    356 template <typename T>
    357 class IfEq {
    358 public:
    359  IfEq(T val) : val_(val) {}
    360 
    361  template <class D, class V>
    362  Mask<D> operator()(D d, V v) const {
    363    return Eq(v, Set(d, val_));
    364  }
    365 
    366 private:
    367  T val_;
    368 };
    369 
    370 struct TestReplace {
    371  template <class D>
    372  void operator()(D d, size_t count, size_t misalign_a, size_t misalign_b,
    373                  RandomState& rng) {
    374    if (misalign_b != 0) return;
    375    if (count == 0) return;
    376    using T = TFromD<D>;
    377    AlignedFreeUniquePtr<T[]> pa = AllocateAligned<T>(misalign_a + count + 1);
    378    AlignedFreeUniquePtr<T[]> pb = AllocateAligned<T>(count);
    379    AlignedFreeUniquePtr<T[]> expected = AllocateAligned<T>(count);
    380    HWY_ASSERT(pa && pb && expected);
    381 
    382    const T sentinel = ConvertScalarTo<T>(-42);
    383    T* a = FillRandom(pa, count, misalign_a, sentinel, rng);
    384 
    385    std::vector<size_t> positions(AdjustedReps(count));
    386    for (size_t& pos : positions) {
    387      pos = static_cast<size_t>(rng()) % count;
    388    }
    389 
    390    for (size_t pos = 0; pos < count; ++pos) {
    391      const T old_t = a[pos];
    392      const T new_t = Random<T>(rng);
    393      for (size_t i = 0; i < count; ++i) {
    394        expected[i] = IsEqual(a[i], old_t) ? new_t : a[i];
    395      }
    396 
    397      // Copy so ReplaceIf gets the same input (and thus also outputs expected)
    398      memcpy(pb.get(), a, count * sizeof(T));
    399 
    400      Replace(d, a, count, new_t, old_t);
    401      HWY_ASSERT_ARRAY_EQ(expected.get(), a, count);
    402      // Ensure no out-of-bound writes.
    403      for (size_t i = 0; i < misalign_a; ++i) {
    404        HWY_ASSERT_EQ(sentinel, pa[i]);
    405      }
    406      HWY_ASSERT_EQ(sentinel, a[count]);
    407 
    408      ReplaceIf(d, pb.get(), count, new_t, IfEq<T>(old_t));
    409      HWY_ASSERT_ARRAY_EQ(expected.get(), pb.get(), count);
    410      // Ensure no out-of-bound writes.
    411      for (size_t i = 0; i < misalign_a; ++i) {
    412        HWY_ASSERT_EQ(sentinel, pa[i]);
    413      }
    414      HWY_ASSERT_EQ(sentinel, a[count]);
    415    }
    416  }
    417 };
    418 
    419 void TestAllGenerate() {
    420  // The test BitCast-s the indices, which does not work for floats.
    421  ForIntegerTypes(ForPartialVectors<ForeachCountAndMisalign<TestGenerate>>());
    422 }
    423 
    424 void TestAllForeach() {
    425  ForAllTypes(ForPartialVectors<ForeachCountAndMisalign<TestForeach>>());
    426 }
    427 
    428 void TestAllTransform() {
    429  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform>>());
    430 }
    431 
    432 void TestAllTransform1() {
    433  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform1>>());
    434 }
    435 
    436 void TestAllTransform2() {
    437  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestTransform2>>());
    438 }
    439 
    440 void TestAllReplace() {
    441  ForFloatTypes(ForPartialVectors<ForeachCountAndMisalign<TestReplace>>());
    442 }
    443 
    444 }  // namespace
    445 // NOLINTNEXTLINE(google-readability-namespace-comments)
    446 }  // namespace HWY_NAMESPACE
    447 }  // namespace hwy
    448 HWY_AFTER_NAMESPACE();
    449 
    450 #if HWY_ONCE
    451 namespace hwy {
    452 namespace {
    453 HWY_BEFORE_TEST(TransformTest);
    454 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllGenerate);
    455 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllForeach);
    456 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform);
    457 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform1);
    458 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllTransform2);
    459 HWY_EXPORT_AND_TEST_P(TransformTest, TestAllReplace);
    460 HWY_AFTER_TEST();
    461 }  // namespace
    462 }  // namespace hwy
    463 HWY_TEST_MAIN();
    464 #endif  // HWY_ONCE