transform-inl.h (8138B)
1 // Copyright 2022 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // Per-target include guard 17 #if defined(HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_) == \ 18 defined(HWY_TARGET_TOGGLE) 19 #ifdef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_ 20 #undef HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_ 21 #else 22 #define HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_ 23 #endif 24 25 #include <stddef.h> 26 27 #include "hwy/highway.h" 28 29 HWY_BEFORE_NAMESPACE(); 30 namespace hwy { 31 namespace HWY_NAMESPACE { 32 33 // These functions avoid having to write a loop plus remainder handling in the 34 // (unfortunately still common) case where arrays are not aligned/padded. If the 35 // inputs are known to be aligned/padded, it is more efficient to write a single 36 // loop using Load(). We do not provide a TransformAlignedPadded because it 37 // would be more verbose than such a loop. 38 // 39 // Func is either a functor with a templated operator()(d, v[, v1[, v2]]), or a 40 // generic lambda if using C++14. The d argument is the same as was passed to 41 // the Generate etc. functions. Due to apparent limitations of Clang, it is 42 // currently necessary to add HWY_ATTR before the opening { of the lambda to 43 // avoid errors about "always_inline function .. requires target". 44 // 45 // We do not check HWY_MEM_OPS_MIGHT_FAULT because LoadN/StoreN do not fault. 46 47 // Fills `out[0, count)` with the vectors returned by `func(d, index_vec)`, 48 // where `index_vec` is `Vec<RebindToUnsigned<D>>`. On the first call to `func`, 49 // the value of its lane i is i, and increases by `Lanes(d)` after every call. 50 // Note that some of these indices may be `>= count`, but the elements that 51 // `func` returns in those lanes will not be written to `out`. 52 template <class D, class Func, typename T = TFromD<D>> 53 void Generate(D d, T* HWY_RESTRICT out, size_t count, const Func& func) { 54 const RebindToUnsigned<D> du; 55 using TU = TFromD<decltype(du)>; 56 const size_t N = Lanes(d); 57 58 size_t idx = 0; 59 Vec<decltype(du)> vidx = Iota(du, 0); 60 if (count >= N) { 61 for (; idx <= count - N; idx += N) { 62 StoreU(func(d, vidx), d, out + idx); 63 vidx = Add(vidx, Set(du, static_cast<TU>(N))); 64 } 65 } 66 67 // `count` was a multiple of the vector length `N`: already done. 68 if (HWY_UNLIKELY(idx == count)) return; 69 70 const size_t remaining = count - idx; 71 HWY_DASSERT(0 != remaining && remaining < N); 72 StoreN(func(d, vidx), d, out + idx, remaining); 73 } 74 75 // Calls `func(d, v)` for each input vector; out of bound lanes with index i >= 76 // `count` are instead taken from `no[i % Lanes(d)]`. 77 template <class D, class Func, typename T = TFromD<D>> 78 void Foreach(D d, const T* HWY_RESTRICT in, const size_t count, const Vec<D> no, 79 const Func& func) { 80 const size_t N = Lanes(d); 81 82 size_t idx = 0; 83 if (count >= N) { 84 for (; idx <= count - N; idx += N) { 85 const Vec<D> v = LoadU(d, in + idx); 86 func(d, v); 87 } 88 } 89 90 // `count` was a multiple of the vector length `N`: already done. 91 if (HWY_UNLIKELY(idx == count)) return; 92 93 const size_t remaining = count - idx; 94 HWY_DASSERT(0 != remaining && remaining < N); 95 const Vec<D> v = LoadNOr(no, d, in + idx, remaining); 96 func(d, v); 97 } 98 99 // Replaces `inout[idx]` with `func(d, inout[idx])`. Example usage: multiplying 100 // array elements by a constant. 101 template <class D, class Func, typename T = TFromD<D>> 102 void Transform(D d, T* HWY_RESTRICT inout, size_t count, const Func& func) { 103 const size_t N = Lanes(d); 104 105 size_t idx = 0; 106 if (count >= N) { 107 for (; idx <= count - N; idx += N) { 108 const Vec<D> v = LoadU(d, inout + idx); 109 StoreU(func(d, v), d, inout + idx); 110 } 111 } 112 113 // `count` was a multiple of the vector length `N`: already done. 114 if (HWY_UNLIKELY(idx == count)) return; 115 116 const size_t remaining = count - idx; 117 HWY_DASSERT(0 != remaining && remaining < N); 118 const Vec<D> v = LoadN(d, inout + idx, remaining); 119 StoreN(func(d, v), d, inout + idx, remaining); 120 } 121 122 // Replaces `inout[idx]` with `func(d, inout[idx], in1[idx])`. Example usage: 123 // multiplying array elements by those of another array. 124 template <class D, class Func, typename T = TFromD<D>> 125 void Transform1(D d, T* HWY_RESTRICT inout, size_t count, 126 const T* HWY_RESTRICT in1, const Func& func) { 127 const size_t N = Lanes(d); 128 129 size_t idx = 0; 130 if (count >= N) { 131 for (; idx <= count - N; idx += N) { 132 const Vec<D> v = LoadU(d, inout + idx); 133 const Vec<D> v1 = LoadU(d, in1 + idx); 134 StoreU(func(d, v, v1), d, inout + idx); 135 } 136 } 137 138 // `count` was a multiple of the vector length `N`: already done. 139 if (HWY_UNLIKELY(idx == count)) return; 140 141 const size_t remaining = count - idx; 142 HWY_DASSERT(0 != remaining && remaining < N); 143 const Vec<D> v = LoadN(d, inout + idx, remaining); 144 const Vec<D> v1 = LoadN(d, in1 + idx, remaining); 145 StoreN(func(d, v, v1), d, inout + idx, remaining); 146 } 147 148 // Replaces `inout[idx]` with `func(d, inout[idx], in1[idx], in2[idx])`. Example 149 // usage: FMA of elements from three arrays, stored into the first array. 150 template <class D, class Func, typename T = TFromD<D>> 151 void Transform2(D d, T* HWY_RESTRICT inout, size_t count, 152 const T* HWY_RESTRICT in1, const T* HWY_RESTRICT in2, 153 const Func& func) { 154 const size_t N = Lanes(d); 155 156 size_t idx = 0; 157 if (count >= N) { 158 for (; idx <= count - N; idx += N) { 159 const Vec<D> v = LoadU(d, inout + idx); 160 const Vec<D> v1 = LoadU(d, in1 + idx); 161 const Vec<D> v2 = LoadU(d, in2 + idx); 162 StoreU(func(d, v, v1, v2), d, inout + idx); 163 } 164 } 165 166 // `count` was a multiple of the vector length `N`: already done. 167 if (HWY_UNLIKELY(idx == count)) return; 168 169 const size_t remaining = count - idx; 170 HWY_DASSERT(0 != remaining && remaining < N); 171 const Vec<D> v = LoadN(d, inout + idx, remaining); 172 const Vec<D> v1 = LoadN(d, in1 + idx, remaining); 173 const Vec<D> v2 = LoadN(d, in2 + idx, remaining); 174 StoreN(func(d, v, v1, v2), d, inout + idx, remaining); 175 } 176 177 template <class D, typename T = TFromD<D>> 178 void Replace(D d, T* HWY_RESTRICT inout, size_t count, T new_t, T old_t) { 179 const size_t N = Lanes(d); 180 const Vec<D> old_v = Set(d, old_t); 181 const Vec<D> new_v = Set(d, new_t); 182 183 size_t idx = 0; 184 if (count >= N) { 185 for (; idx <= count - N; idx += N) { 186 Vec<D> v = LoadU(d, inout + idx); 187 StoreU(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx); 188 } 189 } 190 191 // `count` was a multiple of the vector length `N`: already done. 192 if (HWY_UNLIKELY(idx == count)) return; 193 194 const size_t remaining = count - idx; 195 HWY_DASSERT(0 != remaining && remaining < N); 196 const Vec<D> v = LoadN(d, inout + idx, remaining); 197 StoreN(IfThenElse(Eq(v, old_v), new_v, v), d, inout + idx, remaining); 198 } 199 200 template <class D, class Func, typename T = TFromD<D>> 201 void ReplaceIf(D d, T* HWY_RESTRICT inout, size_t count, T new_t, 202 const Func& func) { 203 const size_t N = Lanes(d); 204 const Vec<D> new_v = Set(d, new_t); 205 206 size_t idx = 0; 207 if (count >= N) { 208 for (; idx <= count - N; idx += N) { 209 Vec<D> v = LoadU(d, inout + idx); 210 StoreU(IfThenElse(func(d, v), new_v, v), d, inout + idx); 211 } 212 } 213 214 // `count` was a multiple of the vector length `N`: already done. 215 if (HWY_UNLIKELY(idx == count)) return; 216 217 const size_t remaining = count - idx; 218 HWY_DASSERT(0 != remaining && remaining < N); 219 const Vec<D> v = LoadN(d, inout + idx, remaining); 220 StoreN(IfThenElse(func(d, v), new_v, v), d, inout + idx, remaining); 221 } 222 223 // NOLINTNEXTLINE(google-readability-namespace-comments) 224 } // namespace HWY_NAMESPACE 225 } // namespace hwy 226 HWY_AFTER_NAMESPACE(); 227 228 #endif // HIGHWAY_HWY_CONTRIB_ALGO_TRANSFORM_INL_H_