aligned_allocator.cc (5527B)
1 // Copyright 2019 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include "hwy/aligned_allocator.h" 17 18 #include <stdint.h> 19 #include <stdio.h> 20 #include <stdlib.h> // malloc 21 22 #include <atomic> 23 #include <limits> 24 25 #include "hwy/base.h" 26 27 namespace hwy { 28 namespace { 29 30 #if HWY_ARCH_RISCV && defined(__riscv_v_intrinsic) && \ 31 __riscv_v_intrinsic >= 11000 32 // Not actually an upper bound on the size, but this value prevents crossing a 33 // 4K boundary (relevant on Andes). 34 constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096); 35 #else 36 constexpr size_t kAlignment = HWY_ALIGNMENT; 37 #endif 38 39 #if HWY_ARCH_X86 40 // On x86, aliasing can only occur at multiples of 2K. To reduce the chance of 41 // allocations being equal mod 2K, we round up to kAlias and add a cyclic 42 // offset which is a multiple of kAlignment. Rounding up to only 1K decreases 43 // the number of alias-free allocations, but also wastes less memory. 44 constexpr size_t kAlias = HWY_MAX(kAlignment, 1024); 45 #else 46 constexpr size_t kAlias = kAlignment; 47 #endif 48 49 #pragma pack(push, 1) 50 struct AllocationHeader { 51 void* allocated; 52 size_t payload_size; 53 }; 54 #pragma pack(pop) 55 56 // Returns a 'random' (cyclical) offset for AllocateAlignedBytes. 57 size_t NextAlignedOffset() { 58 static std::atomic<size_t> next{0}; 59 static_assert(kAlias % kAlignment == 0, "kAlias must be a multiple"); 60 constexpr size_t kGroups = kAlias / kAlignment; 61 const size_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups; 62 const size_t offset = kAlignment * group; 63 HWY_DASSERT((offset % kAlignment == 0) && offset <= kAlias); 64 return offset; 65 } 66 67 } // namespace 68 69 HWY_DLLEXPORT void* AllocateAlignedBytes(const size_t payload_size, 70 AllocPtr alloc_ptr, void* opaque_ptr) { 71 HWY_ASSERT(payload_size != 0); // likely a bug in caller 72 if (payload_size >= std::numeric_limits<size_t>::max() / 2) { 73 HWY_DASSERT(false && "payload_size too large"); 74 return nullptr; 75 } 76 77 size_t offset = NextAlignedOffset(); 78 79 // What: | misalign | unused | AllocationHeader |payload 80 // Size: |<= kAlias | offset |payload_size 81 // ^allocated.^aligned.^header............^payload 82 // The header must immediately precede payload, which must remain aligned. 83 // To avoid wasting space, the header resides at the end of `unused`, 84 // which therefore cannot be empty (offset == 0). 85 if (offset == 0) { 86 offset = RoundUpTo(sizeof(AllocationHeader), kAlignment); 87 } 88 89 const size_t allocated_size = kAlias + offset + payload_size; 90 void* allocated; 91 if (alloc_ptr == nullptr) { 92 allocated = malloc(allocated_size); 93 } else { 94 allocated = (*alloc_ptr)(opaque_ptr, allocated_size); 95 } 96 if (allocated == nullptr) return nullptr; 97 // Always round up even if already aligned - we already asked for kAlias 98 // extra bytes and there's no way to give them back. 99 uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias; 100 static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2"); 101 static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias"); 102 aligned &= ~(kAlias - 1); 103 104 const uintptr_t payload = aligned + offset; // still aligned 105 HWY_DASSERT(payload % kAlignment == 0); 106 107 // Stash `allocated` and payload_size inside header for FreeAlignedBytes(). 108 // The allocated_size can be reconstructed from the payload_size. 109 AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1; 110 HWY_DASSERT(reinterpret_cast<uintptr_t>(header) >= aligned); 111 header->allocated = allocated; 112 header->payload_size = payload_size; 113 114 return HWY_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), kAlignment); 115 } 116 117 HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer, 118 FreePtr free_ptr, void* opaque_ptr) { 119 if (aligned_pointer == nullptr) return; 120 121 const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer); 122 HWY_DASSERT(payload % kAlignment == 0); 123 const AllocationHeader* header = 124 reinterpret_cast<const AllocationHeader*>(payload) - 1; 125 126 if (free_ptr == nullptr) { 127 free(header->allocated); 128 } else { 129 (*free_ptr)(opaque_ptr, header->allocated); 130 } 131 } 132 133 // static 134 HWY_DLLEXPORT void AlignedDeleter::DeleteAlignedArray(void* aligned_pointer, 135 FreePtr free_ptr, 136 void* opaque_ptr, 137 ArrayDeleter deleter) { 138 if (aligned_pointer == nullptr) return; 139 140 const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer); 141 HWY_DASSERT(payload % kAlignment == 0); 142 const AllocationHeader* header = 143 reinterpret_cast<const AllocationHeader*>(payload) - 1; 144 145 if (deleter) { 146 (*deleter)(aligned_pointer, header->payload_size); 147 } 148 149 if (free_ptr == nullptr) { 150 free(header->allocated); 151 } else { 152 (*free_ptr)(opaque_ptr, header->allocated); 153 } 154 } 155 156 } // namespace hwy