tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

skeleton.cc (4886B)


      1 // Copyright 2020 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include "hwy/examples/skeleton.h"
     17 
     18 #include <stdio.h>
     19 
     20 // >>>> for dynamic dispatch only, skip if you want static dispatch
     21 
     22 // First undef to prevent error when re-included.
     23 #undef HWY_TARGET_INCLUDE
     24 // For dynamic dispatch, specify the name of the current file (unfortunately
     25 // __FILE__ is not reliable) so that foreach_target.h can re-include it.
     26 #define HWY_TARGET_INCLUDE "hwy/examples/skeleton.cc"
     27 // Generates code for each enabled target by re-including this source file.
     28 #include "hwy/foreach_target.h"  // IWYU pragma: keep
     29 
     30 // <<<< end of dynamic dispatch
     31 
     32 // Must come after foreach_target.h to avoid redefinition errors.
     33 #include "hwy/highway.h"
     34 
     35 // Optional, can instead add HWY_ATTR to all functions.
     36 HWY_BEFORE_NAMESPACE();
     37 
     38 namespace skeleton {
     39 // This namespace name is unique per target, which allows code for multiple
     40 // targets to co-exist in the same translation unit. Required when using dynamic
     41 // dispatch, otherwise optional.
     42 namespace HWY_NAMESPACE {
     43 namespace {
     44 
     45 // Highway ops reside here; ADL does not find templates nor builtins.
     46 namespace hn = hwy::HWY_NAMESPACE;
     47 
     48 // Computes log2 by converting to a vector of floats. Compiled once per target.
     49 template <class DF>
     50 HWY_ATTR_NO_MSAN void OneFloorLog2(const DF df,
     51                                   const uint8_t* HWY_RESTRICT values,
     52                                   uint8_t* HWY_RESTRICT log2) {
     53  // Type tags for converting to other element types (Rebind = same count).
     54  const hn::RebindToSigned<DF> d32;
     55  const hn::Rebind<uint8_t, DF> d8;
     56  using VI32 = hn::Vec<decltype(d32)>;
     57 
     58  const VI32 vi32 = hn::PromoteTo(d32, hn::Load(d8, values));
     59  const VI32 bits = hn::BitCast(d32, hn::ConvertTo(df, vi32));
     60  const VI32 exponent = hn::Sub(hn::ShiftRight<23>(bits), hn::Set(d32, 127));
     61  hn::Store(hn::DemoteTo(d8, exponent), d8, log2);
     62 }
     63 
     64 void CodepathDemo() {
     65  // Highway defaults to portability, but per-target codepaths may be selected
     66  // via #if HWY_TARGET == HWY_SSE4 or by testing capability macros:
     67 #if HWY_HAVE_INTEGER64
     68  const char* gather = "Has int64";
     69 #else
     70  const char* gather = "No int64";
     71 #endif
     72  printf("Target %s: %s\n", hwy::TargetName(HWY_TARGET), gather);
     73 }
     74 
     75 void FloorLog2(const uint8_t* HWY_RESTRICT values, size_t count,
     76               uint8_t* HWY_RESTRICT log2) {
     77  CodepathDemo();
     78 
     79  const hn::ScalableTag<float> df;
     80  const size_t N = hn::Lanes(df);
     81  size_t i = 0;
     82  for (; i + N <= count; i += N) {
     83    OneFloorLog2(df, values + i, log2 + i);
     84  }
     85  for (; i < count; ++i) {
     86    hn::CappedTag<float, 1> d1;
     87    OneFloorLog2(d1, values + i, log2 + i);
     88  }
     89 }
     90 
     91 }  // namespace
     92 // NOLINTNEXTLINE(google-readability-namespace-comments)
     93 }  // namespace HWY_NAMESPACE
     94 }  // namespace skeleton
     95 HWY_AFTER_NAMESPACE();
     96 
     97 // The table of pointers to the various implementations in HWY_NAMESPACE must
     98 // be compiled only once (foreach_target #includes this file multiple times).
     99 // HWY_ONCE is true for only one of these 'compilation passes'.
    100 #if HWY_ONCE
    101 
    102 namespace skeleton {
    103 
    104 // This macro declares a static array used for dynamic dispatch; it resides in
    105 // the same outer namespace that contains FloorLog2.
    106 HWY_EXPORT(FloorLog2);
    107 
    108 // This function is optional and only needed in the case of exposing it in the
    109 // header file. Otherwise using HWY_DYNAMIC_DISPATCH(FloorLog2) in this module
    110 // is equivalent to inlining this function.
    111 HWY_DLLEXPORT void CallFloorLog2(const uint8_t* HWY_RESTRICT in,
    112                                 const size_t count,
    113                                 uint8_t* HWY_RESTRICT out) {
    114  // This must reside outside of HWY_NAMESPACE because it references (calls the
    115  // appropriate one from) the per-target implementations there.
    116  // For static dispatch, use HWY_STATIC_DISPATCH.
    117  return HWY_DYNAMIC_DISPATCH(FloorLog2)(in, count, out);
    118 }
    119 
    120 HWY_DLLEXPORT void SavedCallFloorLog2(const uint8_t* HWY_RESTRICT in,
    121                                      const size_t count,
    122                                      uint8_t* HWY_RESTRICT out) {
    123  const auto ptr = HWY_DYNAMIC_POINTER(FloorLog2);
    124  return ptr(in, count, out);
    125 }
    126 
    127 // Optional: anything to compile only once, e.g. non-SIMD implementations of
    128 // public functions provided by this module, can go inside #if HWY_ONCE.
    129 
    130 }  // namespace skeleton
    131 #endif  // HWY_ONCE