skeleton.cc (4886B)
1 // Copyright 2020 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include "hwy/examples/skeleton.h" 17 18 #include <stdio.h> 19 20 // >>>> for dynamic dispatch only, skip if you want static dispatch 21 22 // First undef to prevent error when re-included. 23 #undef HWY_TARGET_INCLUDE 24 // For dynamic dispatch, specify the name of the current file (unfortunately 25 // __FILE__ is not reliable) so that foreach_target.h can re-include it. 26 #define HWY_TARGET_INCLUDE "hwy/examples/skeleton.cc" 27 // Generates code for each enabled target by re-including this source file. 28 #include "hwy/foreach_target.h" // IWYU pragma: keep 29 30 // <<<< end of dynamic dispatch 31 32 // Must come after foreach_target.h to avoid redefinition errors. 33 #include "hwy/highway.h" 34 35 // Optional, can instead add HWY_ATTR to all functions. 36 HWY_BEFORE_NAMESPACE(); 37 38 namespace skeleton { 39 // This namespace name is unique per target, which allows code for multiple 40 // targets to co-exist in the same translation unit. Required when using dynamic 41 // dispatch, otherwise optional. 42 namespace HWY_NAMESPACE { 43 namespace { 44 45 // Highway ops reside here; ADL does not find templates nor builtins. 46 namespace hn = hwy::HWY_NAMESPACE; 47 48 // Computes log2 by converting to a vector of floats. Compiled once per target. 49 template <class DF> 50 HWY_ATTR_NO_MSAN void OneFloorLog2(const DF df, 51 const uint8_t* HWY_RESTRICT values, 52 uint8_t* HWY_RESTRICT log2) { 53 // Type tags for converting to other element types (Rebind = same count). 54 const hn::RebindToSigned<DF> d32; 55 const hn::Rebind<uint8_t, DF> d8; 56 using VI32 = hn::Vec<decltype(d32)>; 57 58 const VI32 vi32 = hn::PromoteTo(d32, hn::Load(d8, values)); 59 const VI32 bits = hn::BitCast(d32, hn::ConvertTo(df, vi32)); 60 const VI32 exponent = hn::Sub(hn::ShiftRight<23>(bits), hn::Set(d32, 127)); 61 hn::Store(hn::DemoteTo(d8, exponent), d8, log2); 62 } 63 64 void CodepathDemo() { 65 // Highway defaults to portability, but per-target codepaths may be selected 66 // via #if HWY_TARGET == HWY_SSE4 or by testing capability macros: 67 #if HWY_HAVE_INTEGER64 68 const char* gather = "Has int64"; 69 #else 70 const char* gather = "No int64"; 71 #endif 72 printf("Target %s: %s\n", hwy::TargetName(HWY_TARGET), gather); 73 } 74 75 void FloorLog2(const uint8_t* HWY_RESTRICT values, size_t count, 76 uint8_t* HWY_RESTRICT log2) { 77 CodepathDemo(); 78 79 const hn::ScalableTag<float> df; 80 const size_t N = hn::Lanes(df); 81 size_t i = 0; 82 for (; i + N <= count; i += N) { 83 OneFloorLog2(df, values + i, log2 + i); 84 } 85 for (; i < count; ++i) { 86 hn::CappedTag<float, 1> d1; 87 OneFloorLog2(d1, values + i, log2 + i); 88 } 89 } 90 91 } // namespace 92 // NOLINTNEXTLINE(google-readability-namespace-comments) 93 } // namespace HWY_NAMESPACE 94 } // namespace skeleton 95 HWY_AFTER_NAMESPACE(); 96 97 // The table of pointers to the various implementations in HWY_NAMESPACE must 98 // be compiled only once (foreach_target #includes this file multiple times). 99 // HWY_ONCE is true for only one of these 'compilation passes'. 100 #if HWY_ONCE 101 102 namespace skeleton { 103 104 // This macro declares a static array used for dynamic dispatch; it resides in 105 // the same outer namespace that contains FloorLog2. 106 HWY_EXPORT(FloorLog2); 107 108 // This function is optional and only needed in the case of exposing it in the 109 // header file. Otherwise using HWY_DYNAMIC_DISPATCH(FloorLog2) in this module 110 // is equivalent to inlining this function. 111 HWY_DLLEXPORT void CallFloorLog2(const uint8_t* HWY_RESTRICT in, 112 const size_t count, 113 uint8_t* HWY_RESTRICT out) { 114 // This must reside outside of HWY_NAMESPACE because it references (calls the 115 // appropriate one from) the per-target implementations there. 116 // For static dispatch, use HWY_STATIC_DISPATCH. 117 return HWY_DYNAMIC_DISPATCH(FloorLog2)(in, count, out); 118 } 119 120 HWY_DLLEXPORT void SavedCallFloorLog2(const uint8_t* HWY_RESTRICT in, 121 const size_t count, 122 uint8_t* HWY_RESTRICT out) { 123 const auto ptr = HWY_DYNAMIC_POINTER(FloorLog2); 124 return ptr(in, count, out); 125 } 126 127 // Optional: anything to compile only once, e.g. non-SIMD implementations of 128 // public functions provided by this module, can go inside #if HWY_ONCE. 129 130 } // namespace skeleton 131 #endif // HWY_ONCE