aligned_allocator_test.cc (15416B)
1 // Copyright 2020 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include "hwy/aligned_allocator.h" 17 18 #include <stddef.h> 19 #include <stdint.h> 20 #include <stdlib.h> // malloc 21 22 #include <array> 23 #include <random> 24 #include <set> 25 #include <vector> 26 27 #include "hwy/base.h" 28 #include "hwy/per_target.h" 29 #include "hwy/tests/hwy_gtest.h" 30 #include "hwy/tests/test_util-inl.h" // HWY_ASSERT_EQ 31 32 namespace { 33 34 // Sample object that keeps track on an external counter of how many times was 35 // the explicit constructor and destructor called. 36 template <size_t N> 37 class SampleObject { 38 public: 39 SampleObject() { data_[0] = 'a'; } 40 explicit SampleObject(int* counter) : counter_(counter) { 41 if (counter) (*counter)++; 42 data_[0] = 'b'; 43 } 44 45 ~SampleObject() { 46 if (counter_) (*counter_)--; 47 } 48 49 static_assert(N > sizeof(int*), "SampleObject size too small."); 50 int* counter_ = nullptr; 51 char data_[N - sizeof(int*)]; 52 }; 53 54 class FakeAllocator { 55 public: 56 // static AllocPtr and FreePtr member to be used with the aligned 57 // allocator. These functions calls the private non-static members. 58 static void* StaticAlloc(void* opaque, size_t bytes) { 59 return reinterpret_cast<FakeAllocator*>(opaque)->Alloc(bytes); 60 } 61 static void StaticFree(void* opaque, void* memory) { 62 return reinterpret_cast<FakeAllocator*>(opaque)->Free(memory); 63 } 64 65 // Returns the number of pending allocations to be freed. 66 size_t PendingAllocs() { return allocs_.size(); } 67 68 private: 69 void* Alloc(size_t bytes) { 70 void* ret = malloc(bytes); 71 allocs_.insert(ret); 72 return ret; 73 } 74 void Free(void* memory) { 75 if (!memory) return; 76 HWY_ASSERT(allocs_.end() != allocs_.find(memory)); 77 allocs_.erase(memory); 78 free(memory); 79 } 80 81 std::set<void*> allocs_; 82 }; 83 84 } // namespace 85 86 namespace hwy { 87 namespace { 88 89 #if !HWY_TEST_STANDALONE 90 class AlignedAllocatorTest : public testing::Test {}; 91 #endif 92 93 TEST(AlignedAllocatorTest, TestFreeNullptr) { 94 // Calling free with a nullptr is always ok. 95 FreeAlignedBytes(/*aligned_pointer=*/nullptr, /*free_ptr=*/nullptr, 96 /*opaque_ptr=*/nullptr); 97 } 98 99 TEST(AlignedAllocatorTest, TestLog2) { 100 HWY_ASSERT_EQ(0u, detail::ShiftCount(1)); 101 HWY_ASSERT_EQ(1u, detail::ShiftCount(2)); 102 HWY_ASSERT_EQ(3u, detail::ShiftCount(8)); 103 } 104 105 // Allocator returns null when it detects overflow of items * sizeof(T). 106 TEST(AlignedAllocatorTest, TestOverflow) { 107 constexpr size_t max = ~size_t(0); 108 constexpr size_t msb = (max >> 1) + 1; 109 using Size5 = std::array<uint8_t, 5>; 110 using Size10 = std::array<uint8_t, 10>; 111 HWY_ASSERT(nullptr == 112 detail::AllocateAlignedItems<uint32_t>(max / 2, nullptr, nullptr)); 113 HWY_ASSERT(nullptr == 114 detail::AllocateAlignedItems<uint32_t>(max / 3, nullptr, nullptr)); 115 HWY_ASSERT(nullptr == 116 detail::AllocateAlignedItems<Size5>(max / 4, nullptr, nullptr)); 117 HWY_ASSERT(nullptr == 118 detail::AllocateAlignedItems<uint16_t>(msb, nullptr, nullptr)); 119 HWY_ASSERT(nullptr == 120 detail::AllocateAlignedItems<double>(msb + 1, nullptr, nullptr)); 121 HWY_ASSERT(nullptr == 122 detail::AllocateAlignedItems<Size10>(msb / 4, nullptr, nullptr)); 123 } 124 125 TEST(AlignedAllocatorTest, TestAllocDefaultPointers) { 126 const size_t kSize = 7777; 127 void* ptr = AllocateAlignedBytes(kSize, /*alloc_ptr=*/nullptr, 128 /*opaque_ptr=*/nullptr); 129 HWY_ASSERT(ptr != nullptr); 130 // Make sure the pointer is actually aligned. 131 HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT); 132 char* p = static_cast<char*>(ptr); 133 size_t ret = 0; 134 for (size_t i = 0; i < kSize; i++) { 135 // Performs a computation using p[] to prevent it being optimized away. 136 p[i] = static_cast<char>(i & 0x7F); 137 if (i) ret += static_cast<size_t>(p[i] * p[i - 1]); 138 } 139 HWY_ASSERT(ret != size_t{0}); 140 FreeAlignedBytes(ptr, /*free_ptr=*/nullptr, /*opaque_ptr=*/nullptr); 141 } 142 143 TEST(AlignedAllocatorTest, TestEmptyAlignedUniquePtr) { 144 AlignedUniquePtr<SampleObject<32>> ptr(nullptr, AlignedDeleter()); 145 AlignedUniquePtr<SampleObject<32>[]> arr(nullptr, AlignedDeleter()); 146 } 147 148 TEST(AlignedAllocatorTest, TestEmptyAlignedFreeUniquePtr) { 149 AlignedFreeUniquePtr<std::array<char, 32>> ptr(nullptr, AlignedFreer()); 150 AlignedFreeUniquePtr<std::array<char, 32>[]> arr(nullptr, AlignedFreer()); 151 } 152 153 TEST(AlignedAllocatorTest, TestCustomAlloc) { 154 FakeAllocator fake_alloc; 155 156 const size_t kSize = 7777; 157 void* ptr = 158 AllocateAlignedBytes(kSize, &FakeAllocator::StaticAlloc, &fake_alloc); 159 HWY_ASSERT(ptr != nullptr); 160 // We should have only requested one alloc from the allocator. 161 HWY_ASSERT_EQ(1U, fake_alloc.PendingAllocs()); 162 // Make sure the pointer is actually aligned. 163 HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % HWY_ALIGNMENT); 164 FreeAlignedBytes(ptr, &FakeAllocator::StaticFree, &fake_alloc); 165 HWY_ASSERT_EQ(0U, fake_alloc.PendingAllocs()); 166 } 167 168 TEST(AlignedAllocatorTest, TestMakeUniqueAlignedDefaultConstructor) { 169 { 170 auto ptr = MakeUniqueAligned<SampleObject<24>>(); 171 // Default constructor sets the data_[0] to 'a'. 172 HWY_ASSERT_EQ('a', ptr->data_[0]); 173 HWY_ASSERT(nullptr == ptr->counter_); 174 } 175 } 176 177 TEST(AlignedAllocatorTest, TestMakeUniqueAligned) { 178 int counter = 0; 179 { 180 // Creates the object, initializes it with the explicit constructor and 181 // returns an unique_ptr to it. 182 auto ptr = MakeUniqueAligned<SampleObject<24>>(&counter); 183 HWY_ASSERT_EQ(1, counter); 184 // Custom constructor sets the data_[0] to 'b'. 185 HWY_ASSERT_EQ('b', ptr->data_[0]); 186 } 187 HWY_ASSERT_EQ(0, counter); 188 } 189 190 TEST(AlignedAllocatorTest, TestMakeUniqueAlignedArray) { 191 int counter = 0; 192 { 193 // Creates the array of objects and initializes them with the explicit 194 // constructor. 195 auto arr = MakeUniqueAlignedArray<SampleObject<24>>(7, &counter); 196 HWY_ASSERT_EQ(7, counter); 197 for (size_t i = 0; i < 7; i++) { 198 // Custom constructor sets the data_[0] to 'b'. 199 HWY_ASSERT_EQ('b', arr[i].data_[0]); 200 } 201 } 202 HWY_ASSERT_EQ(0, counter); 203 } 204 205 TEST(AlignedAllocatorTest, TestAllocSingleInt) { 206 auto ptr = AllocateAligned<uint32_t>(1); 207 HWY_ASSERT(ptr.get() != nullptr); 208 HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT); 209 // Force delete of the unique_ptr now to check that it doesn't crash. 210 ptr.reset(nullptr); 211 HWY_ASSERT(nullptr == ptr.get()); 212 } 213 214 TEST(AlignedAllocatorTest, TestAllocMultipleInt) { 215 const size_t kSize = 7777; 216 auto ptr = AllocateAligned<uint32_t>(kSize); 217 HWY_ASSERT(ptr.get() != nullptr); 218 HWY_ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr.get()) % HWY_ALIGNMENT); 219 // ptr[i] is actually (*ptr.get())[i] which will use the operator[] of the 220 // underlying type chosen by AllocateAligned() for the std::unique_ptr. 221 HWY_ASSERT(&(ptr[0]) + 1 == &(ptr[1])); 222 223 size_t ret = 0; 224 for (size_t i = 0; i < kSize; i++) { 225 // Performs a computation using ptr[] to prevent it being optimized away. 226 ptr[i] = static_cast<uint32_t>(i); 227 if (i) ret += static_cast<size_t>(ptr[i]) * ptr[i - 1]; 228 } 229 HWY_ASSERT(ret != size_t{0}); 230 } 231 232 TEST(AlignedAllocatorTest, TestMakeUniqueAlignedArrayWithCustomAlloc) { 233 FakeAllocator fake_alloc; 234 int counter = 0; 235 { 236 // Creates the array of objects and initializes them with the explicit 237 // constructor. 238 auto arr = MakeUniqueAlignedArrayWithAlloc<SampleObject<24>>( 239 7, FakeAllocator::StaticAlloc, FakeAllocator::StaticFree, &fake_alloc, 240 &counter); 241 HWY_ASSERT(arr.get() != nullptr); 242 // An array should still only call a single allocation. 243 HWY_ASSERT_EQ(1u, fake_alloc.PendingAllocs()); 244 HWY_ASSERT_EQ(7, counter); 245 for (size_t i = 0; i < 7; i++) { 246 // Custom constructor sets the data_[0] to 'b'. 247 HWY_ASSERT_EQ('b', arr[i].data_[0]); 248 } 249 } 250 HWY_ASSERT_EQ(0, counter); 251 HWY_ASSERT_EQ(0u, fake_alloc.PendingAllocs()); 252 } 253 254 TEST(AlignedAllocatorTest, TestDefaultInit) { 255 // The test is whether this compiles. Default-init is useful for output params 256 // and per-thread storage. 257 std::vector<AlignedUniquePtr<int[]>> ptrs; 258 std::vector<AlignedFreeUniquePtr<double[]>> free_ptrs; 259 ptrs.resize(128); 260 free_ptrs.resize(128); 261 // The following is to prevent elision of the pointers. 262 std::mt19937 rng(129); // Emscripten lacks random_device. 263 std::uniform_int_distribution<size_t> dist(0, 127); 264 ptrs[dist(rng)] = MakeUniqueAlignedArray<int>(123); 265 free_ptrs[dist(rng)] = AllocateAligned<double>(456); 266 // "Use" pointer without resorting to printf. 0 == 0. Can't shift by 64. 267 const auto addr1 = reinterpret_cast<uintptr_t>(ptrs[dist(rng)].get()); 268 const auto addr2 = reinterpret_cast<uintptr_t>(free_ptrs[dist(rng)].get()); 269 constexpr size_t kBits = sizeof(uintptr_t) * 8; 270 HWY_ASSERT_EQ((addr1 >> (kBits - 1)) >> (kBits - 1), 271 (addr2 >> (kBits - 1)) >> (kBits - 1)); 272 } 273 274 using std::array; 275 using std::vector; 276 277 template <typename T> 278 void CheckEqual(const T& t1, const T& t2) { 279 HWY_ASSERT_EQ(t1.size(), t2.size()); 280 for (size_t i = 0; i < t1.size(); i++) { 281 HWY_ASSERT_EQ(t1[i], t2[i]); 282 } 283 } 284 285 template <typename T> 286 void CheckEqual(const AlignedNDArray<T, 1>& a, const vector<T>& v) { 287 const array<size_t, 1> want_shape({v.size()}); 288 const array<size_t, 1> got_shape = a.shape(); 289 CheckEqual(got_shape, want_shape); 290 291 Span<const T> a_span = a[{}]; 292 HWY_ASSERT_EQ(a_span.size(), v.size()); 293 for (size_t i = 0; i < a_span.size(); i++) { 294 HWY_ASSERT_EQ(a_span[i], v[i]); 295 HWY_ASSERT_EQ(*(a_span.data() + i), v[i]); 296 } 297 } 298 299 template <typename T> 300 void CheckEqual(const AlignedNDArray<T, 2>& a, const vector<vector<T>>& v) { 301 const array<size_t, 2> want_shape({v.size(), v[1].size()}); 302 for (const vector<T>& row : v) { 303 HWY_ASSERT_EQ(row.size(), want_shape[1]); 304 } 305 const std::array<size_t, 2> got_shape = a.shape(); 306 CheckEqual(got_shape, want_shape); 307 308 HWY_ASSERT_EQ(a.size(), want_shape[0] * want_shape[1]); 309 310 for (size_t row_index = 0; row_index < v.size(); ++row_index) { 311 vector<T> want_row = v[row_index]; 312 Span<const T> got_row = a[{row_index}]; 313 HWY_ASSERT_EQ(got_row.size(), want_row.size()); 314 for (size_t column_index = 0; column_index < got_row.size(); 315 column_index++) { 316 HWY_ASSERT_EQ(a[{row_index}][column_index], want_row[column_index]); 317 HWY_ASSERT_EQ(got_row[column_index], want_row[column_index]); 318 HWY_ASSERT_EQ(*(a[{row_index}].data() + column_index), 319 want_row[column_index]); 320 } 321 } 322 } 323 324 TEST(AlignedAllocatorTest, TestAlignedNDArray) { 325 AlignedNDArray<float, 1> a1({4}); 326 CheckEqual(a1, {0, 0, 0, 0}); 327 a1[{}][2] = 3.4f; 328 CheckEqual(a1, {0, 0, 3.4f, 0}); 329 330 AlignedNDArray<float, 2> a2({2, 3}); 331 CheckEqual(a2, {{0, 0, 0}, {0, 0, 0}}); 332 a2[{1}][1] = 5.1f; 333 CheckEqual(a2, {{0, 0, 0}, {0, 5.1f, 0}}); 334 float f0[] = {1.0f, 2.0f, 3.0f}; 335 float f1[] = {4.0f, 5.0f, 6.0f}; 336 hwy::CopyBytes(f0, a2[{0}].data(), 3 * sizeof(float)); 337 hwy::CopyBytes(f1, a2[{1}].data(), 3 * sizeof(float)); 338 CheckEqual(a2, {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}); 339 } 340 341 // Tests that each innermost row in an AlignedNDArray is aligned to the max 342 // bytes available for SIMD operations on this architecture. 343 TEST(AlignedAllocatorTest, TestAlignedNDArrayAlignment) { 344 AlignedNDArray<float, 4> a({3, 3, 3, 3}); 345 for (size_t d0 = 0; d0 < a.shape()[0]; d0++) { 346 for (size_t d1 = 0; d1 < a.shape()[1]; d1++) { 347 for (size_t d2 = 0; d2 < a.shape()[2]; d2++) { 348 // Check that the address this innermost array starts at is an even 349 // number of VectorBytes(), which is the max bytes available for SIMD 350 // operations. 351 HWY_ASSERT_EQ( 352 reinterpret_cast<uintptr_t>(a[{d0, d1, d2}].data()) % VectorBytes(), 353 0); 354 } 355 } 356 } 357 } 358 359 TEST(AlignedAllocatorTest, TestSpanCopyAssignment) { 360 AlignedNDArray<float, 2> a({2, 2}); 361 CheckEqual(a, {{0.0f, 0.0f}, {0.0f, 0.0f}}); 362 a[{0}] = {1.0f, 2.0f}; 363 a[{1}] = {3.0f, 4.0f}; 364 CheckEqual(a, {{1.0f, 2.0f}, {3.0f, 4.0f}}); 365 } 366 367 TEST(AlignedAllocatorTest, TestAlignedNDArrayTruncate) { 368 AlignedNDArray<size_t, 4> a({8, 8, 8, 8}); 369 const size_t last_axis_memory_shape = a.memory_shape()[3]; 370 const auto compute_value = [&](const std::array<size_t, 4>& index) { 371 return index[0] * 8 * 8 * 8 + index[1] * 8 * 8 + index[2] * 8 * 8 + 372 index[3]; 373 }; 374 for (size_t axis0 = 0; axis0 < a.shape()[0]; ++axis0) { 375 for (size_t axis1 = 0; axis1 < a.shape()[1]; ++axis1) { 376 for (size_t axis2 = 0; axis2 < a.shape()[2]; ++axis2) { 377 for (size_t axis3 = 0; axis3 < a.shape()[3]; ++axis3) { 378 a[{axis0, axis1, axis2}][axis3] = 379 compute_value({axis0, axis1, axis2, axis3}); 380 } 381 } 382 } 383 } 384 const auto verify_values = [&](const AlignedNDArray<size_t, 4>& array) { 385 for (size_t axis0 = 0; axis0 < array.shape()[0]; ++axis0) { 386 for (size_t axis1 = 0; axis1 < array.shape()[1]; ++axis1) { 387 for (size_t axis2 = 0; axis2 < array.shape()[2]; ++axis2) { 388 for (size_t axis3 = 0; axis3 < array.shape()[3]; ++axis3) { 389 HWY_ASSERT_EQ((array[{axis0, axis1, axis2}][axis3]), 390 (compute_value({axis0, axis1, axis2, axis3}))); 391 } 392 } 393 } 394 } 395 }; 396 a.truncate({7, 7, 7, 7}); 397 HWY_ASSERT_EQ(a.shape()[0], 7); 398 HWY_ASSERT_EQ(a.shape()[1], 7); 399 HWY_ASSERT_EQ(a.shape()[2], 7); 400 HWY_ASSERT_EQ(a.shape()[3], 7); 401 HWY_ASSERT_EQ(a.memory_shape()[0], 8); 402 HWY_ASSERT_EQ(a.memory_shape()[1], 8); 403 HWY_ASSERT_EQ(a.memory_shape()[2], 8); 404 HWY_ASSERT_EQ(a.memory_shape()[3], last_axis_memory_shape); 405 verify_values(a); 406 a.truncate({6, 5, 4, 3}); 407 HWY_ASSERT_EQ(a.shape()[0], 6); 408 HWY_ASSERT_EQ(a.shape()[1], 5); 409 HWY_ASSERT_EQ(a.shape()[2], 4); 410 HWY_ASSERT_EQ(a.shape()[3], 3); 411 HWY_ASSERT_EQ(a.memory_shape()[0], 8); 412 HWY_ASSERT_EQ(a.memory_shape()[1], 8); 413 HWY_ASSERT_EQ(a.memory_shape()[2], 8); 414 HWY_ASSERT_EQ(a.memory_shape()[3], last_axis_memory_shape); 415 verify_values(a); 416 } 417 418 TEST(AlignedAllocatorTest, TestAlignedVector) { 419 AlignedVector<int> vec{0, 1, 2, 3, 4}; 420 HWY_ASSERT_EQ(5, vec.size()); 421 HWY_ASSERT_EQ(0, vec[0]); 422 HWY_ASSERT_EQ(2, vec.at(2)); 423 HWY_ASSERT_EQ(0, vec.front()); 424 HWY_ASSERT_EQ(4, vec.back()); 425 426 vec.pop_back(); 427 HWY_ASSERT_EQ(3, vec.back()); 428 HWY_ASSERT_EQ(4, vec.size()); 429 430 vec.push_back(4); 431 vec.push_back(5); 432 HWY_ASSERT_EQ(5, vec.back()); 433 HWY_ASSERT_EQ(6, vec.size()); 434 435 const size_t initialCapacity = vec.capacity(); 436 437 // Add elements to exceed initial capacity 438 for (auto i = vec.size(); i < initialCapacity + 10; ++i) { 439 vec.push_back(static_cast<int>(i)); 440 } 441 442 // Check if the capacity increased and elements are intact 443 HWY_ASSERT(vec.capacity() > initialCapacity); 444 for (size_t i = 0; i < vec.size(); ++i) { 445 HWY_ASSERT_EQ(i, vec[i]); 446 } 447 448 vec.clear(); 449 HWY_ASSERT(vec.empty()); 450 } 451 452 } // namespace 453 } // namespace hwy 454 455 HWY_TEST_MAIN();