image.cc (4883B)
1 // Copyright 2020 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include "hwy/contrib/image/image.h" 17 18 #include <stddef.h> 19 #include <stdint.h> 20 21 #include <algorithm> // std::swap 22 23 #include "hwy/aligned_allocator.h" 24 #include "hwy/base.h" 25 #include "hwy/per_target.h" 26 27 namespace hwy { 28 29 size_t ImageBase::VectorSize() { 30 // Do not cache result - must return the current value, which may be greater 31 // than the first call if it was subject to DisableTargets! 32 return VectorBytes(); 33 } 34 35 size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) { 36 const size_t vec_size = VectorSize(); 37 size_t valid_bytes = xsize * sizeof_t; 38 39 // Allow unaligned accesses starting at the last valid value - this may raise 40 // msan errors unless the user calls InitializePaddingForUnalignedAccesses. 41 // Skip for the scalar case because no extra lanes will be loaded. 42 if (vec_size != 1) { 43 HWY_DASSERT(vec_size >= sizeof_t); 44 valid_bytes += vec_size - sizeof_t; 45 } 46 47 // Round up to vector and cache line size. 48 const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT); 49 size_t bytes_per_row = RoundUpTo(valid_bytes, align); 50 51 // During the lengthy window before writes are committed to memory, CPUs 52 // guard against read after write hazards by checking the address, but 53 // only the lower 11 bits. We avoid a false dependency between writes to 54 // consecutive rows by ensuring their sizes are not multiples of 2 KiB. 55 // Avoid2K prevents the same problem for the planes of an Image3. 56 if (bytes_per_row % HWY_ALIGNMENT == 0) { 57 bytes_per_row += align; 58 } 59 60 HWY_DASSERT(bytes_per_row % align == 0); 61 return bytes_per_row; 62 } 63 64 ImageBase::ImageBase(const size_t xsize, const size_t ysize, 65 const size_t sizeof_t) 66 : xsize_(static_cast<uint32_t>(xsize)), 67 ysize_(static_cast<uint32_t>(ysize)), 68 bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) { 69 HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8); 70 71 bytes_per_row_ = 0; 72 // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate 73 // if nonzero, because "zero" bytes still have padding/bookkeeping overhead. 74 if (xsize != 0 && ysize != 0) { 75 bytes_per_row_ = BytesPerRow(xsize, sizeof_t); 76 bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize); 77 HWY_ASSERT(bytes_.get() != nullptr); 78 InitializePadding(sizeof_t, Padding::kRoundUp); 79 } 80 } 81 82 ImageBase::ImageBase(const size_t xsize, const size_t ysize, 83 const size_t bytes_per_row, void* const aligned) 84 : xsize_(static_cast<uint32_t>(xsize)), 85 ysize_(static_cast<uint32_t>(ysize)), 86 bytes_per_row_(bytes_per_row), 87 bytes_(static_cast<uint8_t*>(aligned), 88 AlignedFreer(&AlignedFreer::DoNothing, nullptr)) { 89 const size_t vec_size = VectorSize(); 90 HWY_ASSERT(bytes_per_row % vec_size == 0); 91 HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0); 92 } 93 94 void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) { 95 #if HWY_IS_MSAN || HWY_IDE 96 if (xsize_ == 0 || ysize_ == 0) return; 97 98 const size_t vec_size = VectorSize(); // Bytes, independent of sizeof_t! 99 if (vec_size == 1) return; // Scalar mode: no padding needed 100 101 const size_t valid_size = xsize_ * sizeof_t; 102 const size_t initialize_size = padding == Padding::kRoundUp 103 ? RoundUpTo(valid_size, vec_size) 104 : valid_size + vec_size - sizeof_t; 105 if (valid_size == initialize_size) return; 106 107 for (size_t y = 0; y < ysize_; ++y) { 108 uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y)); 109 #if defined(__clang__) && (__clang_major__ <= 6) 110 // There's a bug in msan in clang-6 when handling AVX2 operations. This 111 // workaround allows tests to pass on msan, although it is slower and 112 // prevents msan warnings from uninitialized images. 113 memset(row, 0, initialize_size); 114 #else 115 hwy::ZeroBytes(row + valid_size, initialize_size - valid_size); 116 #endif // clang6 117 } 118 #else 119 (void)sizeof_t; 120 (void)padding; 121 #endif // HWY_IS_MSAN 122 } 123 124 void ImageBase::Swap(ImageBase& other) { 125 std::swap(xsize_, other.xsize_); 126 std::swap(ysize_, other.ysize_); 127 std::swap(bytes_per_row_, other.bytes_per_row_); 128 std::swap(bytes_, other.bytes_); 129 } 130 131 } // namespace hwy