skeleton-inl.h (2571B)
1 // Copyright 2020 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // Demo of functions that might be called from multiple SIMD modules (either 17 // other -inl.h files, or a .cc file between begin/end_target-inl). This is 18 // optional - all SIMD code can reside in .cc files. However, this allows 19 // splitting code into different files while still inlining instead of requiring 20 // calling through function pointers. 21 22 // Per-target include guard. This is only required when using dynamic dispatch, 23 // i.e. including foreach_target.h. For static dispatch, a normal include 24 // guard would be fine because the header is only compiled once. 25 #if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE) 26 #ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ 27 #undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ 28 #else 29 #define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ 30 #endif 31 32 // It is fine to #include normal or *-inl headers. 33 #include "hwy/highway.h" 34 35 HWY_BEFORE_NAMESPACE(); 36 namespace skeleton { 37 namespace HWY_NAMESPACE { 38 39 // Highway ops reside here; ADL does not find templates nor builtins. 40 namespace hn = hwy::HWY_NAMESPACE; 41 42 // Example of a type-agnostic (caller-specified lane type) and width-agnostic 43 // (uses best available instruction set) function in a header. 44 // 45 // Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size. 46 template <class D, typename T> 47 HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array, 48 const T* HWY_RESTRICT add_array, 49 const size_t size, T* HWY_RESTRICT x_array) { 50 for (size_t i = 0; i < size; i += hn::Lanes(d)) { 51 const auto mul = hn::Load(d, mul_array + i); 52 const auto add = hn::Load(d, add_array + i); 53 auto x = hn::Load(d, x_array + i); 54 x = hn::MulAdd(mul, x, add); 55 hn::Store(x, d, x_array + i); 56 } 57 } 58 59 // NOLINTNEXTLINE(google-readability-namespace-comments) 60 } // namespace HWY_NAMESPACE 61 } // namespace skeleton 62 HWY_AFTER_NAMESPACE(); 63 64 #endif // include guard