[ tor-browser ].git.dasho

image.h (17282B)
      1 // Copyright 2020 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
     17 #define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
     18 
     19 // SIMD/multicore-friendly planar image representation with row accessors.
     20 
     21 #include <string.h>
     22 
     23 #include <utility>  // std::move
     24 
     25 #include "hwy/aligned_allocator.h"
     26 #include "hwy/base.h"
     27 
     28 namespace hwy {
     29 
     30 // Type-independent parts of Image<> - reduces code duplication and facilitates
     31 // moving member function implementations to cc file.
     32 struct HWY_CONTRIB_DLLEXPORT ImageBase {
     33  // Returns required alignment in bytes for externally allocated memory.
     34  static size_t VectorSize();
     35 
     36  // Returns distance [bytes] between the start of two consecutive rows, a
     37  // multiple of VectorSize but NOT kAlias (see implementation).
     38  static size_t BytesPerRow(size_t xsize, size_t sizeof_t);
     39 
     40  // No allocation (for output params or unused images)
     41  ImageBase()
     42      : xsize_(0),
     43        ysize_(0),
     44        bytes_per_row_(0),
     45        bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
     46 
     47  // Allocates memory (this is the common case)
     48  ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
     49 
     50  // References but does not take ownership of external memory. Useful for
     51  // interoperability with other libraries. `aligned` must be aligned to a
     52  // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
     53  // VectorSize() or preferably equal to BytesPerRow().
     54  ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
     55 
     56  // Copy construction/assignment is forbidden to avoid inadvertent copies,
     57  // which can be very expensive. Use CopyImageTo() instead.
     58  ImageBase(const ImageBase& other) = delete;
     59  ImageBase& operator=(const ImageBase& other) = delete;
     60 
     61  // Move constructor (required for returning Image from function)
     62  ImageBase(ImageBase&& other) noexcept = default;
     63 
     64  // Move assignment (required for std::vector)
     65  ImageBase& operator=(ImageBase&& other) noexcept = default;
     66 
     67  void Swap(ImageBase& other);
     68 
     69  // Useful for pre-allocating image with some padding for alignment purposes
     70  // and later reporting the actual valid dimensions. Caller is responsible
     71  // for ensuring xsize/ysize are <= the original dimensions.
     72  void ShrinkTo(const size_t xsize, const size_t ysize) {
     73    xsize_ = static_cast<uint32_t>(xsize);
     74    ysize_ = static_cast<uint32_t>(ysize);
     75    // NOTE: we can't recompute bytes_per_row for more compact storage and
     76    // better locality because that would invalidate the image contents.
     77  }
     78 
     79  // How many pixels.
     80  HWY_INLINE size_t xsize() const { return xsize_; }
     81  HWY_INLINE size_t ysize() const { return ysize_; }
     82 
     83  // NOTE: do not use this for copying rows - the valid xsize may be much less.
     84  HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
     85 
     86  // Raw access to byte contents, for interfacing with other libraries.
     87  // Unsigned char instead of char to avoid surprises (sign extension).
     88  HWY_INLINE uint8_t* bytes() {
     89    void* p = bytes_.get();
     90    return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
     91  }
     92  HWY_INLINE const uint8_t* bytes() const {
     93    const void* p = bytes_.get();
     94    return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
     95  }
     96 
     97 protected:
     98  // Returns pointer to the start of a row.
     99  HWY_INLINE void* VoidRow(const size_t y) const {
    100 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
    101    if (y >= ysize_) {
    102      HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_);
    103    }
    104 #endif
    105 
    106    void* row = bytes_.get() + y * bytes_per_row_;
    107    return HWY_ASSUME_ALIGNED(row, 64);
    108  }
    109 
    110  enum class Padding {
    111    // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
    112    kRoundUp,
    113    // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
    114    // vector to be initialized. If done by default, this would suppress
    115    // legitimate msan warnings. We therefore require users to explicitly call
    116    // InitializePadding before using unaligned loads (e.g. convolution).
    117    kUnaligned
    118  };
    119 
    120  // Initializes the minimum bytes required to suppress msan warnings from
    121  // legitimate (according to Padding mode) vector loads/stores on the right
    122  // border, where some lanes are uninitialized and assumed to be unused.
    123  void InitializePadding(size_t sizeof_t, Padding padding);
    124 
    125  // (Members are non-const to enable assignment during move-assignment.)
    126  uint32_t xsize_;  // In valid pixels, not including any padding.
    127  uint32_t ysize_;
    128  size_t bytes_per_row_;  // Includes padding.
    129  AlignedFreeUniquePtr<uint8_t[]> bytes_;
    130 };
    131 
    132 // Single channel, aligned rows separated by padding. T must be POD.
    133 //
    134 // 'Single channel' (one 2D array per channel) simplifies vectorization
    135 // (repeating the same operation on multiple adjacent components) without the
    136 // complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
    137 // can easily iterate over all components in a row and Image requires no
    138 // knowledge of the pixel format beyond the component type "T".
    139 //
    140 // 'Aligned' means each row is aligned to the L1 cache line size. This prevents
    141 // false sharing between two threads operating on adjacent rows.
    142 //
    143 // 'Padding' is still relevant because vectors could potentially be larger than
    144 // a cache line. By rounding up row sizes to the vector size, we allow
    145 // reading/writing ALIGNED vectors whose first lane is a valid sample. This
    146 // avoids needing a separate loop to handle remaining unaligned lanes.
    147 //
    148 // This image layout could also be achieved with a vector and a row accessor
    149 // function, but a class wrapper with support for "deleter" allows wrapping
    150 // existing memory allocated by clients without copying the pixels. It also
    151 // provides convenient accessors for xsize/ysize, which shortens function
    152 // argument lists. Supports move-construction so it can be stored in containers.
    153 template <typename ComponentType>
    154 class Image : public ImageBase {
    155 public:
    156  using T = ComponentType;
    157 
    158  Image() = default;
    159  Image(const size_t xsize, const size_t ysize)
    160      : ImageBase(xsize, ysize, sizeof(T)) {}
    161  Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
    162        void* aligned)
    163      : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
    164 
    165  void InitializePaddingForUnalignedAccesses() {
    166    InitializePadding(sizeof(T), Padding::kUnaligned);
    167  }
    168 
    169  HWY_INLINE const T* ConstRow(const size_t y) const {
    170    return static_cast<const T*>(VoidRow(y));
    171  }
    172  HWY_INLINE const T* ConstRow(const size_t y) {
    173    return static_cast<const T*>(VoidRow(y));
    174  }
    175 
    176  // Returns pointer to non-const. This allows passing const Image* parameters
    177  // when the callee is only supposed to fill the pixels, as opposed to
    178  // allocating or resizing the image.
    179  HWY_INLINE T* MutableRow(const size_t y) const {
    180    return static_cast<T*>(VoidRow(y));
    181  }
    182  HWY_INLINE T* MutableRow(const size_t y) {
    183    return static_cast<T*>(VoidRow(y));
    184  }
    185 
    186  // Returns number of pixels (some of which are padding) per row. Useful for
    187  // computing other rows via pointer arithmetic. WARNING: this must
    188  // NOT be used to determine xsize.
    189  HWY_INLINE intptr_t PixelsPerRow() const {
    190    return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
    191  }
    192 };
    193 
    194 using ImageF = Image<float>;
    195 
    196 // A bundle of 3 same-sized images. To fill an existing Image3 using
    197 // single-channel producers, we also need access to each const Image*. Const
    198 // prevents breaking the same-size invariant, while still allowing pixels to be
    199 // changed via MutableRow.
    200 template <typename ComponentType>
    201 class Image3 {
    202 public:
    203  using T = ComponentType;
    204  using ImageT = Image<T>;
    205  static constexpr size_t kNumPlanes = 3;
    206 
    207  Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
    208 
    209  Image3(const size_t xsize, const size_t ysize)
    210      : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
    211                ImageT(xsize, ysize)} {}
    212 
    213  Image3(Image3&& other) noexcept {
    214    for (size_t i = 0; i < kNumPlanes; i++) {
    215      planes_[i] = std::move(other.planes_[i]);
    216    }
    217  }
    218 
    219  Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
    220    if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
    221      HWY_ABORT(
    222          "Not same size: %d x %d, %d x %d, %d x %d\n",
    223          static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()),
    224          static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()),
    225          static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize()));
    226    }
    227    planes_[0] = std::move(plane0);
    228    planes_[1] = std::move(plane1);
    229    planes_[2] = std::move(plane2);
    230  }
    231 
    232  // Copy construction/assignment is forbidden to avoid inadvertent copies,
    233  // which can be very expensive. Use CopyImageTo instead.
    234  Image3(const Image3& other) = delete;
    235  Image3& operator=(const Image3& other) = delete;
    236 
    237  Image3& operator=(Image3&& other) noexcept {
    238    for (size_t i = 0; i < kNumPlanes; i++) {
    239      planes_[i] = std::move(other.planes_[i]);
    240    }
    241    return *this;
    242  }
    243 
    244  HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
    245    return static_cast<const T*>(VoidPlaneRow(c, y));
    246  }
    247  HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
    248    return static_cast<const T*>(VoidPlaneRow(c, y));
    249  }
    250 
    251  HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
    252    return static_cast<T*>(VoidPlaneRow(c, y));
    253  }
    254  HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
    255    return static_cast<T*>(VoidPlaneRow(c, y));
    256  }
    257 
    258  HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
    259 
    260  void Swap(Image3& other) {
    261    for (size_t c = 0; c < 3; ++c) {
    262      other.planes_[c].Swap(planes_[c]);
    263    }
    264  }
    265 
    266  void ShrinkTo(const size_t xsize, const size_t ysize) {
    267    for (ImageT& plane : planes_) {
    268      plane.ShrinkTo(xsize, ysize);
    269    }
    270  }
    271 
    272  // Sizes of all three images are guaranteed to be equal.
    273  HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
    274  HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
    275  // Returns offset [bytes] from one row to the next row of the same plane.
    276  // WARNING: this must NOT be used to determine xsize, nor for copying rows -
    277  // the valid xsize may be much less.
    278  HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
    279  // Returns number of pixels (some of which are padding) per row. Useful for
    280  // computing other rows via pointer arithmetic. WARNING: this must NOT be used
    281  // to determine xsize.
    282  HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
    283 
    284 private:
    285  // Returns pointer to the start of a row.
    286  HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
    287 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
    288    if (c >= kNumPlanes || y >= ysize()) {
    289      HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c),
    290                static_cast<int>(y), static_cast<int>(ysize()));
    291    }
    292 #endif
    293    // Use the first plane's stride because the compiler might not realize they
    294    // are all equal. Thus we only need a single multiplication for all planes.
    295    const size_t row_offset = y * planes_[0].bytes_per_row();
    296    const void* row = planes_[c].bytes() + row_offset;
    297    return static_cast<const T * HWY_RESTRICT>(
    298        HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
    299  }
    300 
    301 private:
    302  ImageT planes_[kNumPlanes];
    303 };
    304 
    305 using Image3F = Image3<float>;
    306 
    307 // Rectangular region in image(s). Factoring this out of Image instead of
    308 // shifting the pointer by x0/y0 allows this to apply to multiple images with
    309 // different resolutions. Can compare size via SameSize(rect1, rect2).
    310 class Rect {
    311 public:
    312  // Most windows are xsize_max * ysize_max, except those on the borders where
    313  // begin + size_max > end.
    314  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
    315                 size_t ysize_max, size_t xend, size_t yend)
    316      : x0_(xbegin),
    317        y0_(ybegin),
    318        xsize_(ClampedSize(xbegin, xsize_max, xend)),
    319        ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
    320 
    321  // Construct with origin and known size (typically from another Rect).
    322  constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
    323      : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
    324 
    325  // Construct a rect that covers a whole image.
    326  template <typename Image>
    327  explicit Rect(const Image& image)
    328      : Rect(0, 0, image.xsize(), image.ysize()) {}
    329 
    330  Rect() : Rect(0, 0, 0, 0) {}
    331 
    332  Rect(const Rect&) = default;
    333  Rect& operator=(const Rect&) = default;
    334 
    335  Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
    336               size_t ysize_max) {
    337    return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
    338                y0_ + ysize_);
    339  }
    340 
    341  template <typename T>
    342  const T* ConstRow(const Image<T>* image, size_t y) const {
    343    return image->ConstRow(y + y0_) + x0_;
    344  }
    345 
    346  template <typename T>
    347  T* MutableRow(const Image<T>* image, size_t y) const {
    348    return image->MutableRow(y + y0_) + x0_;
    349  }
    350 
    351  template <typename T>
    352  const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
    353    return image.ConstPlaneRow(c, y + y0_) + x0_;
    354  }
    355 
    356  template <typename T>
    357  T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
    358    return image->MutablePlaneRow(c, y + y0_) + x0_;
    359  }
    360 
    361  // Returns true if this Rect fully resides in the given image. ImageT could be
    362  // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
    363  template <class ImageT>
    364  bool IsInside(const ImageT& image) const {
    365    return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
    366  }
    367 
    368  size_t x0() const { return x0_; }
    369  size_t y0() const { return y0_; }
    370  size_t xsize() const { return xsize_; }
    371  size_t ysize() const { return ysize_; }
    372 
    373 private:
    374  // Returns size_max, or whatever is left in [begin, end).
    375  static constexpr size_t ClampedSize(size_t begin, size_t size_max,
    376                                      size_t end) {
    377    return (begin + size_max <= end) ? size_max
    378                                     : (end > begin ? end - begin : 0);
    379  }
    380 
    381  size_t x0_;
    382  size_t y0_;
    383 
    384  size_t xsize_;
    385  size_t ysize_;
    386 };
    387 
    388 // Works for any image-like input type(s).
    389 template <class Image1, class Image2>
    390 HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
    391  return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
    392 }
    393 
    394 // Mirrors out of bounds coordinates and returns valid coordinates unchanged.
    395 // We assume the radius (distance outside the image) is small compared to the
    396 // image size, otherwise this might not terminate.
    397 // The mirror is outside the last column (border pixel is also replicated).
    398 static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
    399                                                 const int64_t xsize) {
    400  HWY_DASSERT(xsize != 0);
    401 
    402  // TODO(janwas): replace with branchless version
    403  while (x < 0 || x >= xsize) {
    404    if (x < 0) {
    405      x = -x - 1;
    406    } else {
    407      x = 2 * xsize - 1 - x;
    408    }
    409  }
    410  return static_cast<size_t>(x);
    411 }
    412 
    413 // Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
    414 
    415 // Mirrors (repeating the edge pixel once). Useful for convolutions.
    416 struct WrapMirror {
    417  HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
    418    return Mirror(coord, static_cast<int64_t>(size));
    419  }
    420 };
    421 
    422 // Returns the same coordinate, for when we know "coord" is already valid (e.g.
    423 // interior of an image).
    424 struct WrapUnchanged {
    425  HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
    426    return static_cast<size_t>(coord);
    427  }
    428 };
    429 
    430 // Similar to Wrap* but for row pointers (reduces Row() multiplications).
    431 
    432 class WrapRowMirror {
    433 public:
    434  template <class View>
    435  WrapRowMirror(const View& image, size_t ysize)
    436      : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
    437 
    438  const float* operator()(const float* const HWY_RESTRICT row,
    439                          const int64_t stride) const {
    440    if (row < first_row_) {
    441      const int64_t num_before = first_row_ - row;
    442      // Mirrored; one row before => row 0, two before = row 1, ...
    443      return first_row_ + num_before - stride;
    444    }
    445    if (row > last_row_) {
    446      const int64_t num_after = row - last_row_;
    447      // Mirrored; one row after => last row, two after = last - 1, ...
    448      return last_row_ - num_after + stride;
    449    }
    450    return row;
    451  }
    452 
    453 private:
    454  const float* const HWY_RESTRICT first_row_;
    455  const float* const HWY_RESTRICT last_row_;
    456 };
    457 
    458 struct WrapRowUnchanged {
    459  HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
    460                                     int64_t /*stride*/) const {
    461    return row;
    462  }
    463 };
    464 
    465 }  // namespace hwy
    466 
    467 #endif  // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE