tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

transform-inl.h (8138B)


      1 // Copyright 2022 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 // Per-target include guard
     17 #if defined(HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_) == \
     18    defined(HWY_TARGET_TOGGLE)
     19 #ifdef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
     20 #undef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
     21 #else
     22 #define HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_
     23 #endif
     24 
     25 #include <stddef.h>
     26 
     27 #include "hwy/highway.h"
     28 
     29 HWY_BEFORE_NAMESPACE();
     30 namespace hwy {
     31 namespace HWY_NAMESPACE {
     32 
     33 // These functions avoid having to write a loop plus remainder handling in the
     34 // (unfortunately still common) case where arrays are not aligned/padded. If the
     35 // inputs are known to be aligned/padded, it is more efficient to write a single
     36 // loop using Load(). We do not provide a TransformAlignedPadded because it
     37 // would be more verbose than such a loop.
     38 //
     39 // Func is either a functor with a templated operator()(d, v[, v1[, v2]]), or a
     40 // generic lambda if using C++14. The d argument is the same as was passed to
     41 // the Generate etc. functions. Due to apparent limitations of Clang, it is
     42 // currently necessary to add HWY_ATTR before the opening { of the lambda to
     43 // avoid errors about "always_inline function .. requires target".
     44 //
     45 // We do not check HWY_MEM_OPS_MIGHT_FAULT because LoadN/StoreN do not fault.
     46 
     47 // Fills `out[0, count)` with the vectors returned by `func(d, index_vec)`,
     48 // where `index_vec` is `Vec<RebindToUnsigned<D>>`. On the first call to `func`,
     49 // the value of its lane i is i, and increases by `Lanes(d)` after every call.
     50 // Note that some of these indices may be `>= count`, but the elements that
     51 // `func` returns in those lanes will not be written to `out`.
     52 template <class D, class Func, typename T = TFromD<D>>
     53 void Generate(D d, T* HWY_RESTRICT out, size_t count, const Func& func) {
     54  const RebindToUnsigned<D> du;
     55  using TU = TFromD<decltype(du)>;
     56  const size_t N = Lanes(d);
     57 
     58  size_t idx = 0;
     59  Vec<decltype(du)> vidx = Iota(du, 0);
     60  if (count >= N) {
     61    for (; idx <= count - N; idx += N) {
     62      StoreU(func(d, vidx), d, out + idx);
     63      vidx = Add(vidx, Set(du, static_cast<TU>(N)));
     64    }
     65  }
     66 
     67  // `count` was a multiple of the vector length `N`: already done.
     68  if (HWY_UNLIKELY(idx == count)) return;
     69 
     70  const size_t remaining = count - idx;
     71  HWY_DASSERT(0 != remaining && remaining < N);
     72  StoreN(func(d, vidx), d, out + idx, remaining);
     73 }
     74 
     75 // Calls `func(d, v)` for each input vector; out of bound lanes with index i >=
     76 // `count` are instead taken from `no[i % Lanes(d)]`.
     77 template <class D, class Func, typename T = TFromD<D>>
     78 void Foreach(D d, const T* HWY_RESTRICT in, const size_t count, const Vec<D> no,
     79             const Func& func) {
     80  const size_t N = Lanes(d);
     81 
     82  size_t idx = 0;
     83  if (count >= N) {
     84    for (; idx <= count - N; idx += N) {
     85      const Vec<D> v = LoadU(d, in + idx);
     86      func(d, v);
     87    }
     88  }
     89 
     90  // `count` was a multiple of the vector length `N`: already done.
     91  if (HWY_UNLIKELY(idx == count)) return;
     92 
     93  const size_t remaining = count - idx;
     94  HWY_DASSERT(0 != remaining && remaining < N);
     95  const Vec<D> v = LoadNOr(no, d, in + idx, remaining);
     96  func(d, v);
     97 }
     98 
     99 // Replaces `inout[idx]` with `func(d, inout[idx])`. Example usage: multiplying
    100 // array elements by a constant.
    101 template <class D, class Func, typename T = TFromD<D>>
    102 void Transform(D d, T* HWY_RESTRICT inout, size_t count, const Func& func) {
    103  const size_t N = Lanes(d);
    104 
    105  size_t idx = 0;
    106  if (count >= N) {
    107    for (; idx <= count - N; idx += N) {
    108      const Vec<D> v = LoadU(d, inout + idx);
    109      StoreU(func(d, v), d, inout + idx);
    110    }
    111  }
    112 
    113  // `count` was a multiple of the vector length `N`: already done.
    114  if (HWY_UNLIKELY(idx == count)) return;
    115 
    116  const size_t remaining = count - idx;
    117  HWY_DASSERT(0 != remaining && remaining < N);
    118  const Vec<D> v = LoadN(d, inout + idx, remaining);
    119  StoreN(func(d, v), d, inout + idx, remaining);
    120 }
    121 
    122 // Replaces `inout[idx]` with `func(d, inout[idx], in1[idx])`. Example usage:
    123 // multiplying array elements by those of another array.
    124 template <class D, class Func, typename T = TFromD<D>>
    125 void Transform1(D d, T* HWY_RESTRICT inout, size_t count,
    126                const T* HWY_RESTRICT in1, const Func& func) {
    127  const size_t N = Lanes(d);
    128 
    129  size_t idx = 0;
    130  if (count >= N) {
    131    for (; idx <= count - N; idx += N) {
    132      const Vec<D> v = LoadU(d, inout + idx);
    133      const Vec<D> v1 = LoadU(d, in1 + idx);
    134      StoreU(func(d, v, v1), d, inout + idx);
    135    }
    136  }
    137 
    138  // `count` was a multiple of the vector length `N`: already done.
    139  if (HWY_UNLIKELY(idx == count)) return;
    140 
    141  const size_t remaining = count - idx;
    142  HWY_DASSERT(0 != remaining && remaining < N);
    143  const Vec<D> v = LoadN(d, inout + idx, remaining);
    144  const Vec<D> v1 = LoadN(d, in1 + idx, remaining);
    145  StoreN(func(d, v, v1), d, inout + idx, remaining);
    146 }
    147 
    148 // Replaces `inout[idx]` with `func(d, inout[idx], in1[idx], in2[idx])`. Example
    149 // usage: FMA of elements from three arrays, stored into the first array.
    150 template <class D, class Func, typename T = TFromD<D>>
    151 void Transform2(D d, T* HWY_RESTRICT inout, size_t count,
    152                const T* HWY_RESTRICT in1, const T* HWY_RESTRICT in2,
    153                const Func& func) {
    154  const size_t N = Lanes(d);
    155 
    156  size_t idx = 0;
    157  if (count >= N) {
    158    for (; idx <= count - N; idx += N) {
    159      const Vec<D> v = LoadU(d, inout + idx);
    160      const Vec<D> v1 = LoadU(d, in1 + idx);
    161      const Vec<D> v2 = LoadU(d, in2 + idx);
    162      StoreU(func(d, v, v1, v2), d, inout + idx);
    163    }
    164  }
    165 
    166  // `count` was a multiple of the vector length `N`: already done.
    167  if (HWY_UNLIKELY(idx == count)) return;
    168 
    169  const size_t remaining = count - idx;
    170  HWY_DASSERT(0 != remaining && remaining < N);
    171  const Vec<D> v = LoadN(d, inout + idx, remaining);
    172  const Vec<D> v1 = LoadN(d, in1 + idx, remaining);
    173  const Vec<D> v2 = LoadN(d, in2 + idx, remaining);
    174  StoreN(func(d, v, v1, v2), d, inout + idx, remaining);
    175 }
    176 
    177 template <class D, typename T = TFromD<D>>
    178 void Replace(D d, T* HWY_RESTRICT inout, size_t count, T new_t, T old_t) {
    179  const size_t N = Lanes(d);
    180  const Vec<D> old_v = Set(d, old_t);
    181  const Vec<D> new_v = Set(d, new_t);
    182 
    183  size_t idx = 0;
    184  if (count >= N) {
    185    for (; idx <= count - N; idx += N) {
    186      Vec<D> v = LoadU(d, inout + idx);
    187      StoreU(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx);
    188    }
    189  }
    190 
    191  // `count` was a multiple of the vector length `N`: already done.
    192  if (HWY_UNLIKELY(idx == count)) return;
    193 
    194  const size_t remaining = count - idx;
    195  HWY_DASSERT(0 != remaining && remaining < N);
    196  const Vec<D> v = LoadN(d, inout + idx, remaining);
    197  StoreN(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx, remaining);
    198 }
    199 
    200 template <class D, class Func, typename T = TFromD<D>>
    201 void ReplaceIf(D d, T* HWY_RESTRICT inout, size_t count, T new_t,
    202               const Func& func) {
    203  const size_t N = Lanes(d);
    204  const Vec<D> new_v = Set(d, new_t);
    205 
    206  size_t idx = 0;
    207  if (count >= N) {
    208    for (; idx <= count - N; idx += N) {
    209      Vec<D> v = LoadU(d, inout + idx);
    210      StoreU(IfThenElse(func(d, v), new_v, v), d, inout + idx);
    211    }
    212  }
    213 
    214  // `count` was a multiple of the vector length `N`: already done.
    215  if (HWY_UNLIKELY(idx == count)) return;
    216 
    217  const size_t remaining = count - idx;
    218  HWY_DASSERT(0 != remaining && remaining < N);
    219  const Vec<D> v = LoadN(d, inout + idx, remaining);
    220  StoreN(IfThenElse(func(d, v), new_v, v), d, inout + idx, remaining);
    221 }
    222 
    223 // NOLINTNEXTLINE(google-readability-namespace-comments)
    224 }  // namespace HWY_NAMESPACE
    225 }  // namespace hwy
    226 HWY_AFTER_NAMESPACE();
    227 
    228 #endif  // HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_