tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

aligned_allocator.h (15965B)


      1 // Copyright 2020 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
     17 #define HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_
     18 
     19 // Memory allocator with support for alignment and offsets.
     20 
     21 #include <algorithm>
     22 #include <array>
     23 #include <cassert>
     24 #include <cstdint>
     25 #include <cstring>
     26 #include <initializer_list>
     27 #include <memory>
     28 #include <type_traits>
     29 #include <utility>
     30 #include <vector>
     31 
     32 #include "hwy/base.h"
     33 #include "hwy/per_target.h"
     34 
     35 #include <mozilla/Attributes.h>
     36 
     37 namespace hwy {
     38 
     39 // Minimum alignment of allocated memory for use in HWY_ASSUME_ALIGNED, which
     40 // requires a literal. To prevent false sharing, this should be at least the
     41 // L1 cache line size, usually 64 bytes. However, Intel's L2 prefetchers may
     42 // access pairs of lines, and M1 L2 and POWER8 lines are also 128 bytes.
     43 #define HWY_ALIGNMENT 128
     44 
     45 // `align` is in bytes.
     46 template <typename T>
     47 HWY_API constexpr bool IsAligned(T* ptr, size_t align = HWY_ALIGNMENT) {
     48  return reinterpret_cast<uintptr_t>(ptr) % align == 0;
     49 }
     50 
     51 // Pointers to functions equivalent to malloc/free with an opaque void* passed
     52 // to them.
     53 using AllocPtr = void* (*)(void* opaque, size_t bytes);
     54 using FreePtr = void (*)(void* opaque, void* memory);
     55 
     56 // Returns null or a pointer to at least `payload_size` (which can be zero)
     57 // bytes of newly allocated memory, aligned to the larger of HWY_ALIGNMENT and
     58 // the vector size. Calls `alloc` with the passed `opaque` pointer to obtain
     59 // memory or malloc() if it is null.
     60 HWY_DLLEXPORT void* AllocateAlignedBytes(size_t payload_size,
     61                                         AllocPtr alloc_ptr = nullptr,
     62                                         void* opaque_ptr = nullptr);
     63 
     64 // Frees all memory. No effect if `aligned_pointer` == nullptr, otherwise it
     65 // must have been returned from a previous call to `AllocateAlignedBytes`.
     66 // Calls `free_ptr` with the passed `opaque_ptr` pointer to free the memory; if
     67 // `free_ptr` function is null, uses the default free().
     68 HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer,
     69                                    FreePtr free_ptr, void* opaque_ptr);
     70 
     71 // Class that deletes the aligned pointer passed to operator() calling the
     72 // destructor before freeing the pointer. This is equivalent to the
     73 // std::default_delete but for aligned objects. For a similar deleter equivalent
     74 // to free() for aligned memory see AlignedFreer().
     75 class AlignedDeleter {
     76 public:
     77  AlignedDeleter() : free_(nullptr), opaque_ptr_(nullptr) {}
     78  AlignedDeleter(FreePtr free_ptr, void* opaque_ptr)
     79      : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
     80 
     81  template <typename T>
     82  void operator()(T* aligned_pointer) const {
     83    return DeleteAlignedArray(aligned_pointer, free_, opaque_ptr_,
     84                              TypedArrayDeleter<T>);
     85  }
     86 
     87 private:
     88  template <typename T>
     89  static void TypedArrayDeleter(void* ptr, size_t size_in_bytes) {
     90    size_t elems = size_in_bytes / sizeof(T);
     91    for (size_t i = 0; i < elems; i++) {
     92      // Explicitly call the destructor on each element.
     93      (static_cast<T*>(ptr) + i)->~T();
     94    }
     95  }
     96 
     97  // Function prototype that calls the destructor for each element in a typed
     98  // array. TypeArrayDeleter<T> would match this prototype.
     99  using ArrayDeleter = void (*)(void* t_ptr, size_t t_size);
    100 
    101  HWY_DLLEXPORT static void DeleteAlignedArray(void* aligned_pointer,
    102                                               FreePtr free_ptr,
    103                                               void* opaque_ptr,
    104                                               ArrayDeleter deleter);
    105 
    106  FreePtr free_;
    107  void* opaque_ptr_;
    108 };
    109 
    110 // Unique pointer to T with custom aligned deleter. This can be a single
    111 // element U or an array of element if T is a U[]. The custom aligned deleter
    112 // will call the destructor on U or each element of a U[] in the array case.
    113 template <typename T>
    114 using AlignedUniquePtr = std::unique_ptr<T, AlignedDeleter>;
    115 
    116 // Aligned memory equivalent of make_unique<T> using the custom allocators
    117 // alloc/free with the passed `opaque` pointer. This function calls the
    118 // constructor with the passed Args... and calls the destructor of the object
    119 // when the AlignedUniquePtr is destroyed.
    120 template <typename T, typename... Args>
    121 AlignedUniquePtr<T> MakeUniqueAlignedWithAlloc(AllocPtr alloc, FreePtr free,
    122                                               void* opaque, Args&&... args) {
    123  T* ptr = static_cast<T*>(AllocateAlignedBytes(sizeof(T), alloc, opaque));
    124  return AlignedUniquePtr<T>(new (ptr) T(std::forward<Args>(args)...),
    125                             AlignedDeleter(free, opaque));
    126 }
    127 
    128 // Similar to MakeUniqueAlignedWithAlloc but using the default alloc/free
    129 // functions.
    130 template <typename T, typename... Args>
    131 AlignedUniquePtr<T> MakeUniqueAligned(Args&&... args) {
    132  T* ptr = static_cast<T*>(AllocateAlignedBytes(sizeof(T)));
    133  return AlignedUniquePtr<T>(new (ptr) T(std::forward<Args>(args)...),
    134                             AlignedDeleter());
    135 }
    136 
    137 template <class T>
    138 struct AlignedAllocator {
    139  using value_type = T;
    140 
    141  AlignedAllocator() = default;
    142 
    143  template <class V>
    144  explicit AlignedAllocator(const AlignedAllocator<V>&) noexcept {}
    145 
    146  template <class V>
    147  value_type* allocate(V n) {
    148    static_assert(std::is_integral<V>::value,
    149                  "AlignedAllocator only supports integer types");
    150    static_assert(sizeof(V) <= sizeof(std::size_t),
    151                  "V n must be smaller or equal size_t to avoid overflow");
    152    return static_cast<value_type*>(
    153        AllocateAlignedBytes(static_cast<std::size_t>(n) * sizeof(value_type)));
    154  }
    155 
    156  template <class V>
    157  void deallocate(value_type* p, HWY_MAYBE_UNUSED V n) {
    158    return FreeAlignedBytes(p, nullptr, nullptr);
    159  }
    160 };
    161 
    162 template <class T, class V>
    163 constexpr bool operator==(const AlignedAllocator<T>&,
    164                          const AlignedAllocator<V>&) noexcept {
    165  return true;
    166 }
    167 
    168 template <class T, class V>
    169 constexpr bool operator!=(const AlignedAllocator<T>&,
    170                          const AlignedAllocator<V>&) noexcept {
    171  return false;
    172 }
    173 
    174 template <class T>
    175 using AlignedVector = std::vector<T, AlignedAllocator<T>>;
    176 
    177 // Helpers for array allocators (avoids overflow)
    178 namespace detail {
    179 
    180 // Returns x such that 1u << x == n (if n is a power of two).
    181 static inline constexpr size_t ShiftCount(size_t n) {
    182  return (n <= 1) ? 0 : 1 + ShiftCount(n / 2);
    183 }
    184 
    185 template <typename T>
    186 T* AllocateAlignedItems(size_t items, AllocPtr alloc_ptr, void* opaque_ptr) {
    187  constexpr size_t kSize = sizeof(T);
    188 
    189  constexpr bool kIsPow2 = (kSize & (kSize - 1)) == 0;
    190  constexpr size_t kBits = ShiftCount(kSize);
    191  static_assert(!kIsPow2 || (1ull << kBits) == kSize, "ShiftCount has a bug");
    192 
    193  const size_t bytes = kIsPow2 ? items << kBits : items * kSize;
    194  const size_t check = kIsPow2 ? bytes >> kBits : bytes / kSize;
    195  if (check != items) {
    196    return nullptr;  // overflowed
    197  }
    198  return static_cast<T*>(AllocateAlignedBytes(bytes, alloc_ptr, opaque_ptr));
    199 }
    200 
    201 }  // namespace detail
    202 
    203 // Aligned memory equivalent of make_unique<T[]> for array types using the
    204 // custom allocators alloc/free. This function calls the constructor with the
    205 // passed Args... on every created item. The destructor of each element will be
    206 // called when the AlignedUniquePtr is destroyed.
    207 template <typename T, typename... Args>
    208 AlignedUniquePtr<T[]> MakeUniqueAlignedArrayWithAlloc(
    209    size_t items, AllocPtr alloc, FreePtr free, void* opaque, Args&&... args) {
    210  T* ptr = detail::AllocateAlignedItems<T>(items, alloc, opaque);
    211  if (ptr != nullptr) {
    212    for (size_t i = 0; i < items; i++) {
    213      new (ptr + i) T(args...);
    214    }
    215  }
    216  return AlignedUniquePtr<T[]>(ptr, AlignedDeleter(free, opaque));
    217 }
    218 
    219 template <typename T, typename... Args>
    220 AlignedUniquePtr<T[]> MakeUniqueAlignedArray(size_t items, Args&&... args) {
    221  return MakeUniqueAlignedArrayWithAlloc<T, Args...>(
    222      items, nullptr, nullptr, nullptr, std::forward<Args>(args)...);
    223 }
    224 
    225 // Custom deleter for std::unique_ptr equivalent to using free() as a deleter
    226 // but for aligned memory.
    227 class AlignedFreer {
    228 public:
    229  // Pass address of this to ctor to skip deleting externally-owned memory.
    230  static void DoNothing(void* /*opaque*/, void* /*aligned_pointer*/) {}
    231 
    232  AlignedFreer() : free_(nullptr), opaque_ptr_(nullptr) {}
    233  AlignedFreer(FreePtr free_ptr, void* opaque_ptr)
    234      : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
    235 
    236  template <typename T>
    237  void operator()(T* aligned_pointer) const {
    238    FreeAlignedBytes(aligned_pointer, free_, opaque_ptr_);
    239  }
    240 
    241 private:
    242  FreePtr free_;
    243  void* opaque_ptr_;
    244 };
    245 
    246 // Unique pointer to single POD, or (if T is U[]) an array of POD. For non POD
    247 // data use AlignedUniquePtr.
    248 template <typename T>
    249 using AlignedFreeUniquePtr = std::unique_ptr<T, AlignedFreer>;
    250 
    251 // Allocate an aligned and uninitialized array of POD values as a unique_ptr.
    252 // Upon destruction of the unique_ptr the aligned array will be freed.
    253 template <typename T>
    254 AlignedFreeUniquePtr<T[]> AllocateAligned(const size_t items, AllocPtr alloc,
    255                                          FreePtr free, void* opaque) {
    256  static_assert(std::is_trivially_copyable<T>::value,
    257                "AllocateAligned: requires trivially copyable T");
    258  static_assert(std::is_trivially_destructible<T>::value,
    259                "AllocateAligned: requires trivially destructible T");
    260  return AlignedFreeUniquePtr<T[]>(
    261      detail::AllocateAlignedItems<T>(items, alloc, opaque),
    262      AlignedFreer(free, opaque));
    263 }
    264 
    265 // Same as previous AllocateAligned(), using default allocate/free functions.
    266 template <typename T>
    267 AlignedFreeUniquePtr<T[]> AllocateAligned(const size_t items) {
    268  return AllocateAligned<T>(items, nullptr, nullptr, nullptr);
    269 }
    270 
    271 // A simple span containing data and size of data.
    272 template <typename T>
    273 class Span {
    274 public:
    275  Span() = default;
    276  Span(T* data, size_t size) : size_(size), data_(data) {}
    277  template <typename U>
    278  MOZ_IMPLICIT Span(U u) : Span(u.data(), u.size()) {}
    279  MOZ_IMPLICIT Span(std::initializer_list<const T> v) : Span(v.begin(), v.size()) {}
    280 
    281  // Copies the contents of the initializer list to the span.
    282  Span<T>& operator=(std::initializer_list<const T> v) {
    283    HWY_DASSERT(size_ == v.size());
    284    CopyBytes(v.begin(), data_, sizeof(T) * std::min(size_, v.size()));
    285    return *this;
    286  }
    287 
    288  // Returns the size of the contained data.
    289  size_t size() const { return size_; }
    290 
    291  // Returns a pointer to the contained data.
    292  T* data() { return data_; }
    293  T* data() const { return data_; }
    294 
    295  // Returns the element at index.
    296  T& operator[](size_t index) const { return data_[index]; }
    297 
    298  // Returns an iterator pointing to the first element of this span.
    299  T* begin() { return data_; }
    300 
    301  // Returns a const iterator pointing to the first element of this span.
    302  constexpr const T* cbegin() const { return data_; }
    303 
    304  // Returns an iterator pointing just beyond the last element at the
    305  // end of this span.
    306  T* end() { return data_ + size_; }
    307 
    308  // Returns a const iterator pointing just beyond the last element at the
    309  // end of this span.
    310  constexpr const T* cend() const { return data_ + size_; }
    311 
    312 private:
    313  size_t size_ = 0;
    314  T* data_ = nullptr;
    315 };
    316 
    317 // A multi dimensional array containing an aligned buffer.
    318 //
    319 // To maintain alignment, the innermost dimension will be padded to ensure all
    320 // innermost arrays are aligned.
    321 template <typename T, size_t axes>
    322 class AlignedNDArray {
    323  static_assert(std::is_trivial<T>::value,
    324                "AlignedNDArray can only contain trivial types");
    325 
    326 public:
    327  AlignedNDArray(AlignedNDArray&& other) = default;
    328  AlignedNDArray& operator=(AlignedNDArray&& other) = default;
    329 
    330  // Constructs an array of the provided shape and fills it with zeros.
    331  explicit AlignedNDArray(std::array<size_t, axes> shape) : shape_(shape) {
    332    sizes_ = ComputeSizes(shape_);
    333    memory_shape_ = shape_;
    334    // Round the innermost dimension up to the number of bytes available for
    335    // SIMD operations on this architecture to make sure that each innermost
    336    // array is aligned from the first element.
    337    memory_shape_[axes - 1] = RoundUpTo(memory_shape_[axes - 1], VectorBytes());
    338    memory_sizes_ = ComputeSizes(memory_shape_);
    339    buffer_ = hwy::AllocateAligned<T>(memory_size());
    340    hwy::ZeroBytes(buffer_.get(), memory_size() * sizeof(T));
    341  }
    342 
    343  // Returns a span containing the innermost array at the provided indices.
    344  Span<T> operator[](std::array<const size_t, axes - 1> indices) {
    345    return Span<T>(buffer_.get() + Offset(indices), sizes_[indices.size()]);
    346  }
    347 
    348  // Returns a const span containing the innermost array at the provided
    349  // indices.
    350  Span<const T> operator[](std::array<const size_t, axes - 1> indices) const {
    351    return Span<const T>(buffer_.get() + Offset(indices),
    352                         sizes_[indices.size()]);
    353  }
    354 
    355  // Returns the shape of the array, which might be smaller than the allocated
    356  // buffer after padding the last axis to alignment.
    357  const std::array<size_t, axes>& shape() const { return shape_; }
    358 
    359  // Returns the shape of the allocated buffer, which might be larger than the
    360  // used size of the array after padding to alignment.
    361  const std::array<size_t, axes>& memory_shape() const { return memory_shape_; }
    362 
    363  // Returns the size of the array, which might be smaller than the allocated
    364  // buffer after padding the last axis to alignment.
    365  size_t size() const { return sizes_[0]; }
    366 
    367  // Returns the size of the allocated buffer, which might be larger than the
    368  // used size of the array after padding to alignment.
    369  size_t memory_size() const { return memory_sizes_[0]; }
    370 
    371  // Returns a pointer to the allocated buffer.
    372  T* data() { return buffer_.get(); }
    373 
    374  // Returns a const pointer to the buffer.
    375  const T* data() const { return buffer_.get(); }
    376 
    377  // Truncates the array by updating its shape.
    378  //
    379  // The new shape must be equal to or less than the old shape in all axes.
    380  //
    381  // Doesn't modify underlying memory.
    382  void truncate(const std::array<size_t, axes>& new_shape) {
    383 #if HWY_IS_DEBUG_BUILD
    384    for (size_t axis_index = 0; axis_index < axes; ++axis_index) {
    385      HWY_ASSERT(new_shape[axis_index] <= shape_[axis_index]);
    386    }
    387 #endif
    388    shape_ = new_shape;
    389    sizes_ = ComputeSizes(shape_);
    390  }
    391 
    392 private:
    393  std::array<size_t, axes> shape_;
    394  std::array<size_t, axes> memory_shape_;
    395  std::array<size_t, axes + 1> sizes_;
    396  std::array<size_t, axes + 1> memory_sizes_;
    397  hwy::AlignedFreeUniquePtr<T[]> buffer_;
    398 
    399  // Computes offset in the buffer based on the provided indices.
    400  size_t Offset(std::array<const size_t, axes - 1> indices) const {
    401    size_t offset = 0;
    402    size_t shape_index = 0;
    403    for (const size_t axis_index : indices) {
    404      offset += memory_sizes_[shape_index + 1] * axis_index;
    405      shape_index++;
    406    }
    407    return offset;
    408  }
    409 
    410  // Computes the sizes of all sub arrays based on the sizes of each axis.
    411  //
    412  // Does this by multiplying the size of each axis with the previous one in
    413  // reverse order, starting with the conceptual axis of size 1 containing the
    414  // actual elements in the array.
    415  static std::array<size_t, axes + 1> ComputeSizes(
    416      std::array<size_t, axes> shape) {
    417    std::array<size_t, axes + 1> sizes;
    418    size_t axis = shape.size();
    419    sizes[axis] = 1;
    420    while (axis > 0) {
    421      --axis;
    422      sizes[axis] = sizes[axis + 1] * shape[axis];
    423    }
    424    return sizes;
    425  }
    426 };
    427 
    428 }  // namespace hwy
    429 #endif  // HIGHWAY_HWY_ALIGNED_ALLOCATOR_H_