image.h (17282B)
1 // Copyright 2020 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ 17 #define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ 18 19 // SIMD/multicore-friendly planar image representation with row accessors. 20 21 #include <string.h> 22 23 #include <utility> // std::move 24 25 #include "hwy/aligned_allocator.h" 26 #include "hwy/base.h" 27 28 namespace hwy { 29 30 // Type-independent parts of Image<> - reduces code duplication and facilitates 31 // moving member function implementations to cc file. 32 struct HWY_CONTRIB_DLLEXPORT ImageBase { 33 // Returns required alignment in bytes for externally allocated memory. 34 static size_t VectorSize(); 35 36 // Returns distance [bytes] between the start of two consecutive rows, a 37 // multiple of VectorSize but NOT kAlias (see implementation). 38 static size_t BytesPerRow(size_t xsize, size_t sizeof_t); 39 40 // No allocation (for output params or unused images) 41 ImageBase() 42 : xsize_(0), 43 ysize_(0), 44 bytes_per_row_(0), 45 bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {} 46 47 // Allocates memory (this is the common case) 48 ImageBase(size_t xsize, size_t ysize, size_t sizeof_t); 49 50 // References but does not take ownership of external memory. Useful for 51 // interoperability with other libraries. `aligned` must be aligned to a 52 // multiple of VectorSize() and `bytes_per_row` must also be a multiple of 53 // VectorSize() or preferably equal to BytesPerRow(). 54 ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned); 55 56 // Copy construction/assignment is forbidden to avoid inadvertent copies, 57 // which can be very expensive. Use CopyImageTo() instead. 58 ImageBase(const ImageBase& other) = delete; 59 ImageBase& operator=(const ImageBase& other) = delete; 60 61 // Move constructor (required for returning Image from function) 62 ImageBase(ImageBase&& other) noexcept = default; 63 64 // Move assignment (required for std::vector) 65 ImageBase& operator=(ImageBase&& other) noexcept = default; 66 67 void Swap(ImageBase& other); 68 69 // Useful for pre-allocating image with some padding for alignment purposes 70 // and later reporting the actual valid dimensions. Caller is responsible 71 // for ensuring xsize/ysize are <= the original dimensions. 72 void ShrinkTo(const size_t xsize, const size_t ysize) { 73 xsize_ = static_cast<uint32_t>(xsize); 74 ysize_ = static_cast<uint32_t>(ysize); 75 // NOTE: we can't recompute bytes_per_row for more compact storage and 76 // better locality because that would invalidate the image contents. 77 } 78 79 // How many pixels. 80 HWY_INLINE size_t xsize() const { return xsize_; } 81 HWY_INLINE size_t ysize() const { return ysize_; } 82 83 // NOTE: do not use this for copying rows - the valid xsize may be much less. 84 HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; } 85 86 // Raw access to byte contents, for interfacing with other libraries. 87 // Unsigned char instead of char to avoid surprises (sign extension). 88 HWY_INLINE uint8_t* bytes() { 89 void* p = bytes_.get(); 90 return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64)); 91 } 92 HWY_INLINE const uint8_t* bytes() const { 93 const void* p = bytes_.get(); 94 return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64)); 95 } 96 97 protected: 98 // Returns pointer to the start of a row. 99 HWY_INLINE void* VoidRow(const size_t y) const { 100 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN 101 if (y >= ysize_) { 102 HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_); 103 } 104 #endif 105 106 void* row = bytes_.get() + y * bytes_per_row_; 107 return HWY_ASSUME_ALIGNED(row, 64); 108 } 109 110 enum class Padding { 111 // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default. 112 kRoundUp, 113 // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra 114 // vector to be initialized. If done by default, this would suppress 115 // legitimate msan warnings. We therefore require users to explicitly call 116 // InitializePadding before using unaligned loads (e.g. convolution). 117 kUnaligned 118 }; 119 120 // Initializes the minimum bytes required to suppress msan warnings from 121 // legitimate (according to Padding mode) vector loads/stores on the right 122 // border, where some lanes are uninitialized and assumed to be unused. 123 void InitializePadding(size_t sizeof_t, Padding padding); 124 125 // (Members are non-const to enable assignment during move-assignment.) 126 uint32_t xsize_; // In valid pixels, not including any padding. 127 uint32_t ysize_; 128 size_t bytes_per_row_; // Includes padding. 129 AlignedFreeUniquePtr<uint8_t[]> bytes_; 130 }; 131 132 // Single channel, aligned rows separated by padding. T must be POD. 133 // 134 // 'Single channel' (one 2D array per channel) simplifies vectorization 135 // (repeating the same operation on multiple adjacent components) without the 136 // complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients 137 // can easily iterate over all components in a row and Image requires no 138 // knowledge of the pixel format beyond the component type "T". 139 // 140 // 'Aligned' means each row is aligned to the L1 cache line size. This prevents 141 // false sharing between two threads operating on adjacent rows. 142 // 143 // 'Padding' is still relevant because vectors could potentially be larger than 144 // a cache line. By rounding up row sizes to the vector size, we allow 145 // reading/writing ALIGNED vectors whose first lane is a valid sample. This 146 // avoids needing a separate loop to handle remaining unaligned lanes. 147 // 148 // This image layout could also be achieved with a vector and a row accessor 149 // function, but a class wrapper with support for "deleter" allows wrapping 150 // existing memory allocated by clients without copying the pixels. It also 151 // provides convenient accessors for xsize/ysize, which shortens function 152 // argument lists. Supports move-construction so it can be stored in containers. 153 template <typename ComponentType> 154 class Image : public ImageBase { 155 public: 156 using T = ComponentType; 157 158 Image() = default; 159 Image(const size_t xsize, const size_t ysize) 160 : ImageBase(xsize, ysize, sizeof(T)) {} 161 Image(const size_t xsize, const size_t ysize, size_t bytes_per_row, 162 void* aligned) 163 : ImageBase(xsize, ysize, bytes_per_row, aligned) {} 164 165 void InitializePaddingForUnalignedAccesses() { 166 InitializePadding(sizeof(T), Padding::kUnaligned); 167 } 168 169 HWY_INLINE const T* ConstRow(const size_t y) const { 170 return static_cast<const T*>(VoidRow(y)); 171 } 172 HWY_INLINE const T* ConstRow(const size_t y) { 173 return static_cast<const T*>(VoidRow(y)); 174 } 175 176 // Returns pointer to non-const. This allows passing const Image* parameters 177 // when the callee is only supposed to fill the pixels, as opposed to 178 // allocating or resizing the image. 179 HWY_INLINE T* MutableRow(const size_t y) const { 180 return static_cast<T*>(VoidRow(y)); 181 } 182 HWY_INLINE T* MutableRow(const size_t y) { 183 return static_cast<T*>(VoidRow(y)); 184 } 185 186 // Returns number of pixels (some of which are padding) per row. Useful for 187 // computing other rows via pointer arithmetic. WARNING: this must 188 // NOT be used to determine xsize. 189 HWY_INLINE intptr_t PixelsPerRow() const { 190 return static_cast<intptr_t>(bytes_per_row_ / sizeof(T)); 191 } 192 }; 193 194 using ImageF = Image<float>; 195 196 // A bundle of 3 same-sized images. To fill an existing Image3 using 197 // single-channel producers, we also need access to each const Image*. Const 198 // prevents breaking the same-size invariant, while still allowing pixels to be 199 // changed via MutableRow. 200 template <typename ComponentType> 201 class Image3 { 202 public: 203 using T = ComponentType; 204 using ImageT = Image<T>; 205 static constexpr size_t kNumPlanes = 3; 206 207 Image3() : planes_{ImageT(), ImageT(), ImageT()} {} 208 209 Image3(const size_t xsize, const size_t ysize) 210 : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize), 211 ImageT(xsize, ysize)} {} 212 213 Image3(Image3&& other) noexcept { 214 for (size_t i = 0; i < kNumPlanes; i++) { 215 planes_[i] = std::move(other.planes_[i]); 216 } 217 } 218 219 Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) { 220 if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) { 221 HWY_ABORT( 222 "Not same size: %d x %d, %d x %d, %d x %d\n", 223 static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()), 224 static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()), 225 static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize())); 226 } 227 planes_[0] = std::move(plane0); 228 planes_[1] = std::move(plane1); 229 planes_[2] = std::move(plane2); 230 } 231 232 // Copy construction/assignment is forbidden to avoid inadvertent copies, 233 // which can be very expensive. Use CopyImageTo instead. 234 Image3(const Image3& other) = delete; 235 Image3& operator=(const Image3& other) = delete; 236 237 Image3& operator=(Image3&& other) noexcept { 238 for (size_t i = 0; i < kNumPlanes; i++) { 239 planes_[i] = std::move(other.planes_[i]); 240 } 241 return *this; 242 } 243 244 HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const { 245 return static_cast<const T*>(VoidPlaneRow(c, y)); 246 } 247 HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) { 248 return static_cast<const T*>(VoidPlaneRow(c, y)); 249 } 250 251 HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const { 252 return static_cast<T*>(VoidPlaneRow(c, y)); 253 } 254 HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) { 255 return static_cast<T*>(VoidPlaneRow(c, y)); 256 } 257 258 HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; } 259 260 void Swap(Image3& other) { 261 for (size_t c = 0; c < 3; ++c) { 262 other.planes_[c].Swap(planes_[c]); 263 } 264 } 265 266 void ShrinkTo(const size_t xsize, const size_t ysize) { 267 for (ImageT& plane : planes_) { 268 plane.ShrinkTo(xsize, ysize); 269 } 270 } 271 272 // Sizes of all three images are guaranteed to be equal. 273 HWY_INLINE size_t xsize() const { return planes_[0].xsize(); } 274 HWY_INLINE size_t ysize() const { return planes_[0].ysize(); } 275 // Returns offset [bytes] from one row to the next row of the same plane. 276 // WARNING: this must NOT be used to determine xsize, nor for copying rows - 277 // the valid xsize may be much less. 278 HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); } 279 // Returns number of pixels (some of which are padding) per row. Useful for 280 // computing other rows via pointer arithmetic. WARNING: this must NOT be used 281 // to determine xsize. 282 HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); } 283 284 private: 285 // Returns pointer to the start of a row. 286 HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const { 287 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN 288 if (c >= kNumPlanes || y >= ysize()) { 289 HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c), 290 static_cast<int>(y), static_cast<int>(ysize())); 291 } 292 #endif 293 // Use the first plane's stride because the compiler might not realize they 294 // are all equal. Thus we only need a single multiplication for all planes. 295 const size_t row_offset = y * planes_[0].bytes_per_row(); 296 const void* row = planes_[c].bytes() + row_offset; 297 return static_cast<const T * HWY_RESTRICT>( 298 HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT)); 299 } 300 301 private: 302 ImageT planes_[kNumPlanes]; 303 }; 304 305 using Image3F = Image3<float>; 306 307 // Rectangular region in image(s). Factoring this out of Image instead of 308 // shifting the pointer by x0/y0 allows this to apply to multiple images with 309 // different resolutions. Can compare size via SameSize(rect1, rect2). 310 class Rect { 311 public: 312 // Most windows are xsize_max * ysize_max, except those on the borders where 313 // begin + size_max > end. 314 constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max, 315 size_t ysize_max, size_t xend, size_t yend) 316 : x0_(xbegin), 317 y0_(ybegin), 318 xsize_(ClampedSize(xbegin, xsize_max, xend)), 319 ysize_(ClampedSize(ybegin, ysize_max, yend)) {} 320 321 // Construct with origin and known size (typically from another Rect). 322 constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize) 323 : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {} 324 325 // Construct a rect that covers a whole image. 326 template <typename Image> 327 explicit Rect(const Image& image) 328 : Rect(0, 0, image.xsize(), image.ysize()) {} 329 330 Rect() : Rect(0, 0, 0, 0) {} 331 332 Rect(const Rect&) = default; 333 Rect& operator=(const Rect&) = default; 334 335 Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max, 336 size_t ysize_max) { 337 return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_, 338 y0_ + ysize_); 339 } 340 341 template <typename T> 342 const T* ConstRow(const Image<T>* image, size_t y) const { 343 return image->ConstRow(y + y0_) + x0_; 344 } 345 346 template <typename T> 347 T* MutableRow(const Image<T>* image, size_t y) const { 348 return image->MutableRow(y + y0_) + x0_; 349 } 350 351 template <typename T> 352 const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const { 353 return image.ConstPlaneRow(c, y + y0_) + x0_; 354 } 355 356 template <typename T> 357 T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const { 358 return image->MutablePlaneRow(c, y + y0_) + x0_; 359 } 360 361 // Returns true if this Rect fully resides in the given image. ImageT could be 362 // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical. 363 template <class ImageT> 364 bool IsInside(const ImageT& image) const { 365 return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize()); 366 } 367 368 size_t x0() const { return x0_; } 369 size_t y0() const { return y0_; } 370 size_t xsize() const { return xsize_; } 371 size_t ysize() const { return ysize_; } 372 373 private: 374 // Returns size_max, or whatever is left in [begin, end). 375 static constexpr size_t ClampedSize(size_t begin, size_t size_max, 376 size_t end) { 377 return (begin + size_max <= end) ? size_max 378 : (end > begin ? end - begin : 0); 379 } 380 381 size_t x0_; 382 size_t y0_; 383 384 size_t xsize_; 385 size_t ysize_; 386 }; 387 388 // Works for any image-like input type(s). 389 template <class Image1, class Image2> 390 HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) { 391 return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize(); 392 } 393 394 // Mirrors out of bounds coordinates and returns valid coordinates unchanged. 395 // We assume the radius (distance outside the image) is small compared to the 396 // image size, otherwise this might not terminate. 397 // The mirror is outside the last column (border pixel is also replicated). 398 static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x, 399 const int64_t xsize) { 400 HWY_DASSERT(xsize != 0); 401 402 // TODO(janwas): replace with branchless version 403 while (x < 0 || x >= xsize) { 404 if (x < 0) { 405 x = -x - 1; 406 } else { 407 x = 2 * xsize - 1 - x; 408 } 409 } 410 return static_cast<size_t>(x); 411 } 412 413 // Wrap modes for ensuring X/Y coordinates are in the valid range [0, size): 414 415 // Mirrors (repeating the edge pixel once). Useful for convolutions. 416 struct WrapMirror { 417 HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const { 418 return Mirror(coord, static_cast<int64_t>(size)); 419 } 420 }; 421 422 // Returns the same coordinate, for when we know "coord" is already valid (e.g. 423 // interior of an image). 424 struct WrapUnchanged { 425 HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const { 426 return static_cast<size_t>(coord); 427 } 428 }; 429 430 // Similar to Wrap* but for row pointers (reduces Row() multiplications). 431 432 class WrapRowMirror { 433 public: 434 template <class View> 435 WrapRowMirror(const View& image, size_t ysize) 436 : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {} 437 438 const float* operator()(const float* const HWY_RESTRICT row, 439 const int64_t stride) const { 440 if (row < first_row_) { 441 const int64_t num_before = first_row_ - row; 442 // Mirrored; one row before => row 0, two before = row 1, ... 443 return first_row_ + num_before - stride; 444 } 445 if (row > last_row_) { 446 const int64_t num_after = row - last_row_; 447 // Mirrored; one row after => last row, two after = last - 1, ... 448 return last_row_ - num_after + stride; 449 } 450 return row; 451 } 452 453 private: 454 const float* const HWY_RESTRICT first_row_; 455 const float* const HWY_RESTRICT last_row_; 456 }; 457 458 struct WrapRowUnchanged { 459 HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row, 460 int64_t /*stride*/) const { 461 return row; 462 } 463 }; 464 465 } // namespace hwy 466 467 #endif // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_