tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

image.cc (4883B)


      1 // Copyright 2020 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include "hwy/contrib/image/image.h"
     17 
     18 #include <stddef.h>
     19 #include <stdint.h>
     20 
     21 #include <algorithm>  // std::swap
     22 
     23 #include "hwy/aligned_allocator.h"
     24 #include "hwy/base.h"
     25 #include "hwy/per_target.h"
     26 
     27 namespace hwy {
     28 
     29 size_t ImageBase::VectorSize() {
     30  // Do not cache result - must return the current value, which may be greater
     31  // than the first call if it was subject to DisableTargets!
     32  return VectorBytes();
     33 }
     34 
     35 size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
     36  const size_t vec_size = VectorSize();
     37  size_t valid_bytes = xsize * sizeof_t;
     38 
     39  // Allow unaligned accesses starting at the last valid value - this may raise
     40  // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
     41  // Skip for the scalar case because no extra lanes will be loaded.
     42  if (vec_size != 1) {
     43    HWY_DASSERT(vec_size >= sizeof_t);
     44    valid_bytes += vec_size - sizeof_t;
     45  }
     46 
     47  // Round up to vector and cache line size.
     48  const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT);
     49  size_t bytes_per_row = RoundUpTo(valid_bytes, align);
     50 
     51  // During the lengthy window before writes are committed to memory, CPUs
     52  // guard against read after write hazards by checking the address, but
     53  // only the lower 11 bits. We avoid a false dependency between writes to
     54  // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
     55  // Avoid2K prevents the same problem for the planes of an Image3.
     56  if (bytes_per_row % HWY_ALIGNMENT == 0) {
     57    bytes_per_row += align;
     58  }
     59 
     60  HWY_DASSERT(bytes_per_row % align == 0);
     61  return bytes_per_row;
     62 }
     63 
     64 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
     65                     const size_t sizeof_t)
     66    : xsize_(static_cast<uint32_t>(xsize)),
     67      ysize_(static_cast<uint32_t>(ysize)),
     68      bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
     69  HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
     70 
     71  bytes_per_row_ = 0;
     72  // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
     73  // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
     74  if (xsize != 0 && ysize != 0) {
     75    bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
     76    bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
     77    HWY_ASSERT(bytes_.get() != nullptr);
     78    InitializePadding(sizeof_t, Padding::kRoundUp);
     79  }
     80 }
     81 
     82 ImageBase::ImageBase(const size_t xsize, const size_t ysize,
     83                     const size_t bytes_per_row, void* const aligned)
     84    : xsize_(static_cast<uint32_t>(xsize)),
     85      ysize_(static_cast<uint32_t>(ysize)),
     86      bytes_per_row_(bytes_per_row),
     87      bytes_(static_cast<uint8_t*>(aligned),
     88             AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
     89  const size_t vec_size = VectorSize();
     90  HWY_ASSERT(bytes_per_row % vec_size == 0);
     91  HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
     92 }
     93 
     94 void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
     95 #if HWY_IS_MSAN || HWY_IDE
     96  if (xsize_ == 0 || ysize_ == 0) return;
     97 
     98  const size_t vec_size = VectorSize();  // Bytes, independent of sizeof_t!
     99  if (vec_size == 1) return;             // Scalar mode: no padding needed
    100 
    101  const size_t valid_size = xsize_ * sizeof_t;
    102  const size_t initialize_size = padding == Padding::kRoundUp
    103                                     ? RoundUpTo(valid_size, vec_size)
    104                                     : valid_size + vec_size - sizeof_t;
    105  if (valid_size == initialize_size) return;
    106 
    107  for (size_t y = 0; y < ysize_; ++y) {
    108    uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
    109 #if defined(__clang__) && (__clang_major__ <= 6)
    110    // There's a bug in msan in clang-6 when handling AVX2 operations. This
    111    // workaround allows tests to pass on msan, although it is slower and
    112    // prevents msan warnings from uninitialized images.
    113    memset(row, 0, initialize_size);
    114 #else
    115    hwy::ZeroBytes(row + valid_size, initialize_size - valid_size);
    116 #endif  // clang6
    117  }
    118 #else
    119  (void)sizeof_t;
    120  (void)padding;
    121 #endif  // HWY_IS_MSAN
    122 }
    123 
    124 void ImageBase::Swap(ImageBase& other) {
    125  std::swap(xsize_, other.xsize_);
    126  std::swap(ysize_, other.ysize_);
    127  std::swap(bytes_per_row_, other.bytes_per_row_);
    128  std::swap(bytes_, other.bytes_);
    129 }
    130 
    131 }  // namespace hwy