tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

prefetch.h (7210B)


      1 // Copyright 2023 The Abseil Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // -----------------------------------------------------------------------------
     16 // File: prefetch.h
     17 // -----------------------------------------------------------------------------
     18 //
     19 // This header file defines prefetch functions to prefetch memory contents
     20 // into the first level cache (L1) for the current CPU. The prefetch logic
     21 // offered in this header is limited to prefetching first level cachelines
     22 // only, and is aimed at relatively 'simple' prefetching logic.
     23 //
     24 #ifndef ABSL_BASE_PREFETCH_H_
     25 #define ABSL_BASE_PREFETCH_H_
     26 
     27 #include "absl/base/attributes.h"
     28 #include "absl/base/config.h"
     29 
     30 #if defined(ABSL_INTERNAL_HAVE_SSE)
     31 #include <xmmintrin.h>
     32 #endif
     33 
     34 #if defined(_MSC_VER)
     35 #include <intrin.h>
     36 #if defined(ABSL_INTERNAL_HAVE_SSE)
     37 #pragma intrinsic(_mm_prefetch)
     38 #endif
     39 #endif
     40 
     41 namespace absl {
     42 ABSL_NAMESPACE_BEGIN
     43 
     44 // Moves data into the L1 cache before it is read, or "prefetches" it.
     45 //
     46 // The value of `addr` is the address of the memory to prefetch. If
     47 // the target and compiler support it, data prefetch instructions are
     48 // generated. If the prefetch is done some time before the memory is
     49 // read, it may be in the cache by the time the read occurs.
     50 //
     51 // This method prefetches data with the highest degree of temporal locality;
     52 // data is prefetched where possible into all levels of the cache.
     53 //
     54 // Incorrect or gratuitous use of this function can degrade performance.
     55 // Use this function only when representative benchmarks show an improvement.
     56 //
     57 // Example:
     58 //
     59 //  // Computes incremental checksum for `data`.
     60 //  int ComputeChecksum(int sum, absl::string_view data);
     61 //
     62 //  // Computes cumulative checksum for all values in `data`
     63 //  int ComputeChecksum(absl::Span<const std::string> data) {
     64 //    int sum = 0;
     65 //    auto it = data.begin();
     66 //    auto pit = data.begin();
     67 //    auto end = data.end();
     68 //    for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
     69 //      absl::PrefetchToLocalCache(pit->data());
     70 //    }
     71 //    for (; pit != end; ++pit, ++it) {
     72 //      sum = ComputeChecksum(sum, *it);
     73 //      absl::PrefetchToLocalCache(pit->data());
     74 //    }
     75 //    for (; it != end; ++it) {
     76 //      sum = ComputeChecksum(sum, *it);
     77 //    }
     78 //    return sum;
     79 //  }
     80 //
     81 void PrefetchToLocalCache(const void* addr);
     82 
     83 // Moves data into the L1 cache before it is read, or "prefetches" it.
     84 //
     85 // This function is identical to `PrefetchToLocalCache()` except that it has
     86 // non-temporal locality: the fetched data should not be left in any of the
     87 // cache tiers. This is useful for cases where the data is used only once /
     88 // short term, for example, invoking a destructor on an object.
     89 //
     90 // Incorrect or gratuitous use of this function can degrade performance.
     91 // Use this function only when representative benchmarks show an improvement.
     92 //
     93 // Example:
     94 //
     95 //  template <typename Iterator>
     96 //  void DestroyPointers(Iterator begin, Iterator end) {
     97 //    size_t distance = std::min(8U, bars.size());
     98 //
     99 //    int dist = 8;
    100 //    auto prefetch_it = begin;
    101 //    while (prefetch_it != end && --dist;) {
    102 //      absl::PrefetchToLocalCacheNta(*prefetch_it++);
    103 //    }
    104 //    while (prefetch_it != end) {
    105 //      delete *begin++;
    106 //      absl::PrefetchToLocalCacheNta(*prefetch_it++);
    107 //    }
    108 //    while (begin != end) {
    109 //      delete *begin++;
    110 //    }
    111 //  }
    112 //
    113 void PrefetchToLocalCacheNta(const void* addr);
    114 
    115 // Moves data into the L1 cache with the intent to modify it.
    116 //
    117 // This function is similar to `PrefetchToLocalCache()` except that it
    118 // prefetches cachelines with an 'intent to modify' This typically includes
    119 // invalidating cache entries for this address in all other cache tiers, and an
    120 // exclusive access intent.
    121 //
    122 // Incorrect or gratuitous use of this function can degrade performance. As this
    123 // function can invalidate cached cachelines on other caches and computer cores,
    124 // incorrect usage of this function can have an even greater negative impact
    125 // than incorrect regular prefetches.
    126 // Use this function only when representative benchmarks show an improvement.
    127 //
    128 // Example:
    129 //
    130 //  void* Arena::Allocate(size_t size) {
    131 //    void* ptr = AllocateBlock(size);
    132 //    absl::PrefetchToLocalCacheForWrite(ptr);
    133 //    return ptr;
    134 //  }
    135 //
    136 void PrefetchToLocalCacheForWrite(const void* addr);
    137 
    138 #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
    139 
    140 #define ABSL_HAVE_PREFETCH 1
    141 
    142 // See __builtin_prefetch:
    143 // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
    144 //
    145 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
    146    const void* addr) {
    147  __builtin_prefetch(addr, 0, 3);
    148 }
    149 
    150 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
    151    const void* addr) {
    152  __builtin_prefetch(addr, 0, 0);
    153 }
    154 
    155 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
    156    const void* addr) {
    157  // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
    158  // unless -march=broadwell or newer; this is not generally the default, so we
    159  // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
    160  // processors and has been present on AMD processors since the K6-2.
    161 #if defined(__x86_64__) && !defined(__PRFCHW__)
    162  asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
    163 #else
    164  __builtin_prefetch(addr, 1, 3);
    165 #endif
    166 }
    167 
    168 #elif defined(ABSL_INTERNAL_HAVE_SSE)
    169 
    170 #define ABSL_HAVE_PREFETCH 1
    171 
    172 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
    173    const void* addr) {
    174  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
    175 }
    176 
    177 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
    178    const void* addr) {
    179  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
    180 }
    181 
    182 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
    183    const void* addr) {
    184 #if defined(_MM_HINT_ET0)
    185  _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
    186 #elif !defined(_MSC_VER) && defined(__x86_64__)
    187  // _MM_HINT_ET0 is not universally supported. As we commented further
    188  // up, PREFETCHW is recognized as a no-op on older Intel processors
    189  // and has been present on AMD processors since the K6-2. We have this
    190  // disabled for MSVC compilers as this miscompiles on older MSVC compilers.
    191  asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
    192 #endif
    193 }
    194 
    195 #else
    196 
    197 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
    198    const void* addr) {}
    199 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
    200    const void* addr) {}
    201 ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
    202    const void* addr) {}
    203 
    204 #endif
    205 
    206 ABSL_NAMESPACE_END
    207 }  // namespace absl
    208 
    209 #endif  // ABSL_BASE_PREFETCH_H_