tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pyramid.c (18549B)


      1 /*
      2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include "aom_dsp/pyramid.h"
     13 #include "aom_mem/aom_mem.h"
     14 #include "aom_ports/bitops.h"
     15 #include "aom_util/aom_pthread.h"
     16 
     17 // TODO(rachelbarker): Move needed code from av1/ to aom_dsp/
     18 #include "av1/common/resize.h"
     19 
     20 #include <assert.h>
     21 #include <string.h>
     22 
     23 // Lifecycle:
     24 // * Frame buffer alloc code calls aom_get_pyramid_alloc_size()
     25 //   to work out how much space is needed for a given number of pyramid
     26 //   levels. This is counted in the size checked against the max allocation
     27 //   limit
     28 // * Then calls aom_alloc_pyramid() to actually create the pyramid
     29 // * Pyramid is initially marked as containing no valid data
     30 // * Each pyramid layer is computed on-demand, the first time it is requested
     31 // * Whenever frame buffer is reused, reset the counter of filled levels.
     32 //   This invalidates all of the existing pyramid levels.
     33 // * Whenever frame buffer is resized, reallocate pyramid
     34 
     35 size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit) {
     36  // Allocate the maximum possible number of layers for this width and height
     37  const int msb = get_msb(AOMMIN(width, height));
     38  const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
     39 
     40  size_t alloc_size = 0;
     41  alloc_size += sizeof(ImagePyramid);
     42  alloc_size += n_levels * sizeof(PyramidLayer);
     43 
     44  // Calculate how much memory is needed for downscaled frame buffers
     45  size_t buffer_size = 0;
     46 
     47  // Work out if we need to allocate a few extra bytes for alignment.
     48  // aom_memalign() will ensure that the start of the allocation is aligned
     49  // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
     50  // to be aligned, not the first byte of the allocation.
     51  //
     52  // In the loop below, we ensure that the stride of every image is a multiple
     53  // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
     54  // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
     55  // first pixel in the first pyramid layer aligned properly, that will
     56  // automatically mean that the first pixel of every row of every layer is
     57  // properly aligned too.
     58  //
     59  // Thus all we need to consider is the first pixel in the first layer.
     60  // This is located at offset
     61  //   extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
     62  // bytes into the buffer. Since level_stride is a multiple of
     63  // PYRAMID_ALIGNMENT, we can ignore that. So we need
     64  //   extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
     65  //
     66  // To solve this, we can round PYRAMID_PADDING up to the next multiple
     67  // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
     68  // how many extra bytes are needed.
     69  size_t first_px_offset =
     70      (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
     71  size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
     72  buffer_size += extra_bytes;
     73 
     74  // If the original image is stored in an 8-bit buffer, then we can point the
     75  // lowest pyramid level at that buffer rather than allocating a new one.
     76  int first_allocated_level = image_is_16bit ? 0 : 1;
     77 
     78  for (int level = first_allocated_level; level < n_levels; level++) {
     79    int level_width = width >> level;
     80    int level_height = height >> level;
     81 
     82    // Allocate padding for each layer
     83    int padded_width = level_width + 2 * PYRAMID_PADDING;
     84    int padded_height = level_height + 2 * PYRAMID_PADDING;
     85 
     86    // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
     87    // This ensures that, as long as the top-left pixel in this pyramid level is
     88    // properly aligned, then so will the leftmost pixel in every row of the
     89    // pyramid level.
     90    int level_stride =
     91        (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
     92 
     93    buffer_size += level_stride * padded_height;
     94  }
     95 
     96  alloc_size += buffer_size;
     97 
     98  return alloc_size;
     99 }
    100 
    101 ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit) {
    102  // Allocate the maximum possible number of layers for this width and height
    103  const int msb = get_msb(AOMMIN(width, height));
    104  const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1);
    105 
    106  ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr));
    107  if (!pyr) {
    108    return NULL;
    109  }
    110 
    111  pyr->layers = aom_calloc(n_levels, sizeof(*pyr->layers));
    112  if (!pyr->layers) {
    113    aom_free(pyr);
    114    return NULL;
    115  }
    116 
    117  pyr->max_levels = n_levels;
    118  pyr->filled_levels = 0;
    119 
    120  // Compute sizes and offsets for each pyramid level
    121  // These are gathered up first, so that we can allocate all pyramid levels
    122  // in a single buffer
    123  size_t buffer_size = 0;
    124  size_t *layer_offsets = aom_calloc(n_levels, sizeof(*layer_offsets));
    125  if (!layer_offsets) {
    126    aom_free(pyr->layers);
    127    aom_free(pyr);
    128    return NULL;
    129  }
    130 
    131  // Work out if we need to allocate a few extra bytes for alignment.
    132  // aom_memalign() will ensure that the start of the allocation is aligned
    133  // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel
    134  // to be aligned, not the first byte of the allocation.
    135  //
    136  // In the loop below, we ensure that the stride of every image is a multiple
    137  // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will
    138  // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the
    139  // first pixel in the first pyramid layer aligned properly, that will
    140  // automatically mean that the first pixel of every row of every layer is
    141  // properly aligned too.
    142  //
    143  // Thus all we need to consider is the first pixel in the first layer.
    144  // This is located at offset
    145  //   extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING
    146  // bytes into the buffer. Since level_stride is a multiple of
    147  // PYRAMID_ALIGNMENT, we can ignore that. So we need
    148  //   extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT
    149  //
    150  // To solve this, we can round PYRAMID_PADDING up to the next multiple
    151  // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate
    152  // how many extra bytes are needed.
    153  size_t first_px_offset =
    154      (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
    155  size_t extra_bytes = first_px_offset - PYRAMID_PADDING;
    156  buffer_size += extra_bytes;
    157 
    158  // If the original image is stored in an 8-bit buffer, then we can point the
    159  // lowest pyramid level at that buffer rather than allocating a new one.
    160  int first_allocated_level = image_is_16bit ? 0 : 1;
    161 
    162  for (int level = first_allocated_level; level < n_levels; level++) {
    163    PyramidLayer *layer = &pyr->layers[level];
    164 
    165    int level_width = width >> level;
    166    int level_height = height >> level;
    167 
    168    // Allocate padding for each layer
    169    int padded_width = level_width + 2 * PYRAMID_PADDING;
    170    int padded_height = level_height + 2 * PYRAMID_PADDING;
    171 
    172    // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT
    173    // This ensures that, as long as the top-left pixel in this pyramid level is
    174    // properly aligned, then so will the leftmost pixel in every row of the
    175    // pyramid level.
    176    int level_stride =
    177        (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1);
    178 
    179    size_t level_alloc_start = buffer_size;
    180    size_t level_start =
    181        level_alloc_start + PYRAMID_PADDING * level_stride + PYRAMID_PADDING;
    182 
    183    buffer_size += level_stride * padded_height;
    184 
    185    layer_offsets[level] = level_start;
    186    layer->width = level_width;
    187    layer->height = level_height;
    188    layer->stride = level_stride;
    189  }
    190 
    191  pyr->buffer_alloc =
    192      aom_memalign(PYRAMID_ALIGNMENT, buffer_size * sizeof(*pyr->buffer_alloc));
    193  if (!pyr->buffer_alloc) {
    194    aom_free(pyr->layers);
    195    aom_free(pyr);
    196    aom_free(layer_offsets);
    197    return NULL;
    198  }
    199 
    200  // Fill in pointers for each level
    201  // If image is 8-bit, then the lowest level is left unconfigured for now,
    202  // and will be set up properly when the pyramid is filled in
    203  for (int level = first_allocated_level; level < n_levels; level++) {
    204    PyramidLayer *layer = &pyr->layers[level];
    205    layer->buffer = pyr->buffer_alloc + layer_offsets[level];
    206  }
    207 
    208 #if CONFIG_MULTITHREAD
    209  pthread_mutex_init(&pyr->mutex, NULL);
    210 #endif  // CONFIG_MULTITHREAD
    211 
    212  aom_free(layer_offsets);
    213  return pyr;
    214 }
    215 
    216 // Fill the border region of a pyramid frame.
    217 // This must be called after the main image area is filled out.
    218 // `img_buf` should point to the first pixel in the image area,
    219 // ie. it should be pyr->level_buffer + pyr->level_loc[level].
    220 static inline void fill_border(uint8_t *img_buf, const int width,
    221                               const int height, const int stride) {
    222  // Fill left and right areas
    223  for (int row = 0; row < height; row++) {
    224    uint8_t *row_start = &img_buf[row * stride];
    225    uint8_t left_pixel = row_start[0];
    226    memset(row_start - PYRAMID_PADDING, left_pixel, PYRAMID_PADDING);
    227    uint8_t right_pixel = row_start[width - 1];
    228    memset(row_start + width, right_pixel, PYRAMID_PADDING);
    229  }
    230 
    231  // Fill top area
    232  for (int row = -PYRAMID_PADDING; row < 0; row++) {
    233    uint8_t *row_start = &img_buf[row * stride];
    234    memcpy(row_start - PYRAMID_PADDING, img_buf - PYRAMID_PADDING,
    235           width + 2 * PYRAMID_PADDING);
    236  }
    237 
    238  // Fill bottom area
    239  uint8_t *last_row_start = &img_buf[(height - 1) * stride];
    240  for (int row = height; row < height + PYRAMID_PADDING; row++) {
    241    uint8_t *row_start = &img_buf[row * stride];
    242    memcpy(row_start - PYRAMID_PADDING, last_row_start - PYRAMID_PADDING,
    243           width + 2 * PYRAMID_PADDING);
    244  }
    245 }
    246 
    247 // Compute downsampling pyramid for a frame
    248 //
    249 // This function will ensure that the first `n_levels` levels of the pyramid
    250 // are filled, unless the frame is too small to have this many levels.
    251 // In that case, we will fill all available levels and then stop.
    252 //
    253 // Returns the actual number of levels filled, capped at n_levels,
    254 // or -1 on error.
    255 //
    256 // This must only be called while holding frame_pyr->mutex
    257 static inline int fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
    258                               int n_levels, ImagePyramid *frame_pyr) {
    259  int already_filled_levels = frame_pyr->filled_levels;
    260 
    261  // This condition should already be enforced by aom_compute_pyramid
    262  assert(n_levels <= frame_pyr->max_levels);
    263 
    264  if (already_filled_levels >= n_levels) {
    265    return n_levels;
    266  }
    267 
    268  const int frame_width = frame->y_crop_width;
    269  const int frame_height = frame->y_crop_height;
    270  const int frame_stride = frame->y_stride;
    271  assert((frame_width >> n_levels) >= 0);
    272  assert((frame_height >> n_levels) >= 0);
    273 
    274  if (already_filled_levels == 0) {
    275    // Fill in largest level from the original image
    276    PyramidLayer *first_layer = &frame_pyr->layers[0];
    277    if (frame->flags & YV12_FLAG_HIGHBITDEPTH) {
    278      // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits
    279      assert(first_layer->width == frame_width);
    280      assert(first_layer->height == frame_height);
    281 
    282      uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer);
    283      uint8_t *pyr_buffer = first_layer->buffer;
    284      int pyr_stride = first_layer->stride;
    285      for (int y = 0; y < frame_height; y++) {
    286        uint16_t *frame_row = frame_buffer + y * frame_stride;
    287        uint8_t *pyr_row = pyr_buffer + y * pyr_stride;
    288        for (int x = 0; x < frame_width; x++) {
    289          pyr_row[x] = frame_row[x] >> (bit_depth - 8);
    290        }
    291      }
    292 
    293      fill_border(pyr_buffer, frame_width, frame_height, pyr_stride);
    294    } else {
    295      // For frames stored in an 8-bit buffer, we don't need to copy anything -
    296      // we can just reference the original image buffer
    297      first_layer->buffer = frame->y_buffer;
    298      first_layer->width = frame_width;
    299      first_layer->height = frame_height;
    300      first_layer->stride = frame_stride;
    301    }
    302 
    303    already_filled_levels = 1;
    304  }
    305 
    306  // Fill in the remaining levels through progressive downsampling
    307  for (int level = already_filled_levels; level < n_levels; ++level) {
    308    bool mem_status = false;
    309    PyramidLayer *prev_layer = &frame_pyr->layers[level - 1];
    310    uint8_t *prev_buffer = prev_layer->buffer;
    311    int prev_stride = prev_layer->stride;
    312 
    313    PyramidLayer *this_layer = &frame_pyr->layers[level];
    314    uint8_t *this_buffer = this_layer->buffer;
    315    int this_width = this_layer->width;
    316    int this_height = this_layer->height;
    317    int this_stride = this_layer->stride;
    318 
    319    // The width and height of the previous layer that needs to be considered to
    320    // derive the current layer frame.
    321    const int input_layer_width = this_width << 1;
    322    const int input_layer_height = this_height << 1;
    323 
    324    // Compute the this pyramid level by downsampling the current level.
    325    //
    326    // We downsample by a factor of exactly 2, clipping the rightmost and
    327    // bottommost pixel off of the current level if needed. We do this for
    328    // two main reasons:
    329    //
    330    // 1) In the disflow code, when stepping from a higher pyramid level to a
    331    //    lower pyramid level, we need to not just interpolate the flow field
    332    //    but also to scale each flow vector by the upsampling ratio.
    333    //    So it is much more convenient if this ratio is simply 2.
    334    //
    335    // 2) Up/downsampling by a factor of 2 can be implemented much more
    336    //    efficiently than up/downsampling by a generic ratio.
    337    //    TODO(rachelbarker): Use optimized downsample-by-2 function
    338 
    339    // SIMD support has been added specifically for cases where the downsample
    340    // factor is exactly 2. In such instances, horizontal and vertical resizing
    341    // is performed utilizing the down2_symeven() function, which considers the
    342    // even dimensions of the input layer.
    343    if (should_resize_by_half(input_layer_height, input_layer_width,
    344                              this_height, this_width)) {
    345      assert(input_layer_height % 2 == 0 && input_layer_width % 2 == 0 &&
    346             "Input width or height cannot be odd.");
    347      mem_status = av1_resize_plane_to_half(
    348          prev_buffer, input_layer_height, input_layer_width, prev_stride,
    349          this_buffer, this_height, this_width, this_stride);
    350    } else {
    351      mem_status = av1_resize_plane(prev_buffer, input_layer_height,
    352                                    input_layer_width, prev_stride, this_buffer,
    353                                    this_height, this_width, this_stride);
    354    }
    355 
    356    // Terminate early in cases of memory allocation failure.
    357    if (!mem_status) {
    358      frame_pyr->filled_levels = n_levels;
    359      return -1;
    360    }
    361 
    362    fill_border(this_buffer, this_width, this_height, this_stride);
    363  }
    364 
    365  frame_pyr->filled_levels = n_levels;
    366  return n_levels;
    367 }
    368 
    369 // Fill out a downsampling pyramid for a given frame.
    370 //
    371 // The top level (index 0) will always be an 8-bit copy of the input frame,
    372 // regardless of the input bit depth. Additional levels are then downscaled
    373 // by powers of 2.
    374 //
    375 // This function will ensure that the first `n_levels` levels of the pyramid
    376 // are filled, unless the frame is too small to have this many levels.
    377 // In that case, we will fill all available levels and then stop.
    378 // No matter how small the frame is, at least one level is guaranteed
    379 // to be filled.
    380 //
    381 // Returns the actual number of levels filled, capped at n_levels,
    382 // or -1 on error.
    383 int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth,
    384                        int n_levels, ImagePyramid *pyr) {
    385  assert(pyr);
    386 
    387  // Per the comments in the ImagePyramid struct, we must take this mutex
    388  // before reading or writing the filled_levels field, and hold it while
    389  // computing any additional pyramid levels, to ensure proper behaviour
    390  // when multithreading is used
    391 #if CONFIG_MULTITHREAD
    392  pthread_mutex_lock(&pyr->mutex);
    393 #endif  // CONFIG_MULTITHREAD
    394 
    395  n_levels = AOMMIN(n_levels, pyr->max_levels);
    396  int result = n_levels;
    397  if (pyr->filled_levels < n_levels) {
    398    // Compute any missing levels that we need
    399    result = fill_pyramid(frame, bit_depth, n_levels, pyr);
    400  }
    401 
    402  // At this point, as long as result >= 0, the requested number of pyramid
    403  // levels are guaranteed to be valid, and can be safely read from without
    404  // holding the mutex any further
    405  assert(IMPLIES(result >= 0, pyr->filled_levels >= n_levels));
    406 #if CONFIG_MULTITHREAD
    407  pthread_mutex_unlock(&pyr->mutex);
    408 #endif  // CONFIG_MULTITHREAD
    409  return result;
    410 }
    411 
    412 #ifndef NDEBUG
    413 // Check if a pyramid has already been computed to at least n levels
    414 // This is mostly a debug helper - as it is necessary to hold pyr->mutex
    415 // while reading the number of already-computed levels, we cannot just write:
    416 //   assert(pyr->filled_levels >= n_levels);
    417 // This function allows the check to be correctly written as:
    418 //   assert(aom_is_pyramid_valid(pyr, n_levels));
    419 //
    420 // Note: This deliberately does not restrict n_levels based on the maximum
    421 // number of permitted levels for the frame size. This allows the check to
    422 // catch cases where the caller forgets to handle the case where
    423 // max_levels is less than the requested number of levels
    424 bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels) {
    425  assert(pyr);
    426 
    427  // Per the comments in the ImagePyramid struct, we must take this mutex
    428  // before reading or writing the filled_levels field, to ensure proper
    429  // behaviour when multithreading is used
    430 #if CONFIG_MULTITHREAD
    431  pthread_mutex_lock(&pyr->mutex);
    432 #endif  // CONFIG_MULTITHREAD
    433 
    434  bool result = (pyr->filled_levels >= n_levels);
    435 
    436 #if CONFIG_MULTITHREAD
    437  pthread_mutex_unlock(&pyr->mutex);
    438 #endif  // CONFIG_MULTITHREAD
    439 
    440  return result;
    441 }
    442 #endif
    443 
    444 // Mark a pyramid as no longer containing valid data.
    445 // This must be done whenever the corresponding frame buffer is reused
    446 void aom_invalidate_pyramid(ImagePyramid *pyr) {
    447  if (pyr) {
    448 #if CONFIG_MULTITHREAD
    449    pthread_mutex_lock(&pyr->mutex);
    450 #endif  // CONFIG_MULTITHREAD
    451    pyr->filled_levels = 0;
    452 #if CONFIG_MULTITHREAD
    453    pthread_mutex_unlock(&pyr->mutex);
    454 #endif  // CONFIG_MULTITHREAD
    455  }
    456 }
    457 
    458 // Release the memory associated with a pyramid
    459 void aom_free_pyramid(ImagePyramid *pyr) {
    460  if (pyr) {
    461 #if CONFIG_MULTITHREAD
    462    pthread_mutex_destroy(&pyr->mutex);
    463 #endif  // CONFIG_MULTITHREAD
    464    aom_free(pyr->buffer_alloc);
    465    aom_free(pyr->layers);
    466    aom_free(pyr);
    467  }
    468 }