tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

copy-inl.h (5166B)


      1 // Copyright 2022 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 // Per-target include guard
     17 #if defined(HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_) == \
     18    defined(HWY_TARGET_TOGGLE)  // NOLINT
     19 #ifdef HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
     20 #undef HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
     21 #else
     22 #define HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_
     23 #endif
     24 
     25 #include <stddef.h>
     26 #include <stdint.h>
     27 
     28 #include "hwy/highway.h"
     29 
     30 HWY_BEFORE_NAMESPACE();
     31 namespace hwy {
     32 namespace HWY_NAMESPACE {
     33 
     34 // These functions avoid having to write a loop plus remainder handling in the
     35 // (unfortunately still common) case where arrays are not aligned/padded. If the
     36 // inputs are known to be aligned/padded, it is more efficient to write a single
     37 // loop using Load(). We do not provide a CopyAlignedPadded because it
     38 // would be more verbose than such a loop.
     39 
     40 // Fills `to`[0, `count`) with `value`.
     41 template <class D, typename T = TFromD<D>>
     42 void Fill(D d, T value, size_t count, T* HWY_RESTRICT to) {
     43  const size_t N = Lanes(d);
     44  const Vec<D> v = Set(d, value);
     45 
     46  size_t idx = 0;
     47  if (count >= N) {
     48    for (; idx <= count - N; idx += N) {
     49      StoreU(v, d, to + idx);
     50    }
     51  }
     52 
     53  // `count` was a multiple of the vector length `N`: already done.
     54  if (HWY_UNLIKELY(idx == count)) return;
     55 
     56  const size_t remaining = count - idx;
     57  HWY_DASSERT(0 != remaining && remaining < N);
     58  SafeFillN(remaining, value, d, to + idx);
     59 }
     60 
     61 // Copies `from`[0, `count`) to `to`, which must not overlap `from`.
     62 template <class D, typename T = TFromD<D>>
     63 void Copy(D d, const T* HWY_RESTRICT from, size_t count, T* HWY_RESTRICT to) {
     64  const size_t N = Lanes(d);
     65 
     66  size_t idx = 0;
     67  if (count >= N) {
     68    for (; idx <= count - N; idx += N) {
     69      const Vec<D> v = LoadU(d, from + idx);
     70      StoreU(v, d, to + idx);
     71    }
     72  }
     73 
     74  // `count` was a multiple of the vector length `N`: already done.
     75  if (HWY_UNLIKELY(idx == count)) return;
     76 
     77  const size_t remaining = count - idx;
     78  HWY_DASSERT(0 != remaining && remaining < N);
     79  SafeCopyN(remaining, d, from + idx, to + idx);
     80 }
     81 
     82 // For idx in [0, count) in ascending order, appends `from[idx]` to `to` if the
     83 // corresponding mask element of `func(d, v)` is true. Returns the STL-style end
     84 // of the newly written elements in `to`.
     85 //
     86 // `func` is either a functor with a templated operator()(d, v) returning a
     87 // mask, or a generic lambda if using C++14. Due to apparent limitations of
     88 // Clang on Windows, it is currently necessary to add HWY_ATTR before the
     89 // opening { of the lambda to avoid errors about "function .. requires target".
     90 //
     91 // NOTE: this is only supported for 16-, 32- or 64-bit types.
     92 // NOTE: Func may be called a second time for elements it has already seen, but
     93 // these elements will not be written to `to` again.
     94 template <class D, class Func, typename T = TFromD<D>>
     95 T* CopyIf(D d, const T* HWY_RESTRICT from, size_t count, T* HWY_RESTRICT to,
     96          const Func& func) {
     97  const size_t N = Lanes(d);
     98 
     99  size_t idx = 0;
    100  if (count >= N) {
    101    for (; idx <= count - N; idx += N) {
    102      const Vec<D> v = LoadU(d, from + idx);
    103      to += CompressBlendedStore(v, func(d, v), d, to);
    104    }
    105  }
    106 
    107  // `count` was a multiple of the vector length `N`: already done.
    108  if (HWY_UNLIKELY(idx == count)) return to;
    109 
    110 #if HWY_MEM_OPS_MIGHT_FAULT
    111  // Proceed one by one.
    112  const CappedTag<T, 1> d1;
    113  for (; idx < count; ++idx) {
    114    using V1 = Vec<decltype(d1)>;
    115    // Workaround for -Waggressive-loop-optimizations on GCC 8
    116    // (iteration 2305843009213693951 invokes undefined behavior for T=i64)
    117    const uintptr_t addr = reinterpret_cast<uintptr_t>(from);
    118    const T* HWY_RESTRICT from_idx =
    119        reinterpret_cast<const T * HWY_RESTRICT>(addr + (idx * sizeof(T)));
    120    const V1 v = LoadU(d1, from_idx);
    121    // Avoid storing to `to` unless we know it should be kept - otherwise, we
    122    // might overrun the end if it was allocated for the exact count.
    123    if (CountTrue(d1, func(d1, v)) == 0) continue;
    124    StoreU(v, d1, to);
    125    to += 1;
    126  }
    127 #else
    128  // Start index of the last unaligned whole vector, ending at the array end.
    129  const size_t last = count - N;
    130  // Number of elements before `from` or already written.
    131  const size_t invalid = idx - last;
    132  HWY_DASSERT(0 != invalid && invalid < N);
    133  const Mask<D> mask = Not(FirstN(d, invalid));
    134  const Vec<D> v = MaskedLoad(mask, d, from + last);
    135  to += CompressBlendedStore(v, And(mask, func(d, v)), d, to);
    136 #endif
    137  return to;
    138 }
    139 
    140 // NOLINTNEXTLINE(google-readability-namespace-comments)
    141 }  // namespace HWY_NAMESPACE
    142 }  // namespace hwy
    143 HWY_AFTER_NAMESPACE();
    144 
    145 #endif  // HIGHWAY_HWY_CONTRIB_ALGO_COPY_INL_H_