pyramid.c (18549B)
1 /* 2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "aom_dsp/pyramid.h" 13 #include "aom_mem/aom_mem.h" 14 #include "aom_ports/bitops.h" 15 #include "aom_util/aom_pthread.h" 16 17 // TODO(rachelbarker): Move needed code from av1/ to aom_dsp/ 18 #include "av1/common/resize.h" 19 20 #include <assert.h> 21 #include <string.h> 22 23 // Lifecycle: 24 // * Frame buffer alloc code calls aom_get_pyramid_alloc_size() 25 // to work out how much space is needed for a given number of pyramid 26 // levels. This is counted in the size checked against the max allocation 27 // limit 28 // * Then calls aom_alloc_pyramid() to actually create the pyramid 29 // * Pyramid is initially marked as containing no valid data 30 // * Each pyramid layer is computed on-demand, the first time it is requested 31 // * Whenever frame buffer is reused, reset the counter of filled levels. 32 // This invalidates all of the existing pyramid levels. 33 // * Whenever frame buffer is resized, reallocate pyramid 34 35 size_t aom_get_pyramid_alloc_size(int width, int height, bool image_is_16bit) { 36 // Allocate the maximum possible number of layers for this width and height 37 const int msb = get_msb(AOMMIN(width, height)); 38 const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1); 39 40 size_t alloc_size = 0; 41 alloc_size += sizeof(ImagePyramid); 42 alloc_size += n_levels * sizeof(PyramidLayer); 43 44 // Calculate how much memory is needed for downscaled frame buffers 45 size_t buffer_size = 0; 46 47 // Work out if we need to allocate a few extra bytes for alignment. 48 // aom_memalign() will ensure that the start of the allocation is aligned 49 // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel 50 // to be aligned, not the first byte of the allocation. 51 // 52 // In the loop below, we ensure that the stride of every image is a multiple 53 // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will 54 // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the 55 // first pixel in the first pyramid layer aligned properly, that will 56 // automatically mean that the first pixel of every row of every layer is 57 // properly aligned too. 58 // 59 // Thus all we need to consider is the first pixel in the first layer. 60 // This is located at offset 61 // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING 62 // bytes into the buffer. Since level_stride is a multiple of 63 // PYRAMID_ALIGNMENT, we can ignore that. So we need 64 // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT 65 // 66 // To solve this, we can round PYRAMID_PADDING up to the next multiple 67 // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate 68 // how many extra bytes are needed. 69 size_t first_px_offset = 70 (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); 71 size_t extra_bytes = first_px_offset - PYRAMID_PADDING; 72 buffer_size += extra_bytes; 73 74 // If the original image is stored in an 8-bit buffer, then we can point the 75 // lowest pyramid level at that buffer rather than allocating a new one. 76 int first_allocated_level = image_is_16bit ? 0 : 1; 77 78 for (int level = first_allocated_level; level < n_levels; level++) { 79 int level_width = width >> level; 80 int level_height = height >> level; 81 82 // Allocate padding for each layer 83 int padded_width = level_width + 2 * PYRAMID_PADDING; 84 int padded_height = level_height + 2 * PYRAMID_PADDING; 85 86 // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT 87 // This ensures that, as long as the top-left pixel in this pyramid level is 88 // properly aligned, then so will the leftmost pixel in every row of the 89 // pyramid level. 90 int level_stride = 91 (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); 92 93 buffer_size += level_stride * padded_height; 94 } 95 96 alloc_size += buffer_size; 97 98 return alloc_size; 99 } 100 101 ImagePyramid *aom_alloc_pyramid(int width, int height, bool image_is_16bit) { 102 // Allocate the maximum possible number of layers for this width and height 103 const int msb = get_msb(AOMMIN(width, height)); 104 const int n_levels = AOMMAX(msb - MIN_PYRAMID_SIZE_LOG2, 1); 105 106 ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr)); 107 if (!pyr) { 108 return NULL; 109 } 110 111 pyr->layers = aom_calloc(n_levels, sizeof(*pyr->layers)); 112 if (!pyr->layers) { 113 aom_free(pyr); 114 return NULL; 115 } 116 117 pyr->max_levels = n_levels; 118 pyr->filled_levels = 0; 119 120 // Compute sizes and offsets for each pyramid level 121 // These are gathered up first, so that we can allocate all pyramid levels 122 // in a single buffer 123 size_t buffer_size = 0; 124 size_t *layer_offsets = aom_calloc(n_levels, sizeof(*layer_offsets)); 125 if (!layer_offsets) { 126 aom_free(pyr->layers); 127 aom_free(pyr); 128 return NULL; 129 } 130 131 // Work out if we need to allocate a few extra bytes for alignment. 132 // aom_memalign() will ensure that the start of the allocation is aligned 133 // to a multiple of PYRAMID_ALIGNMENT. But we want the first image pixel 134 // to be aligned, not the first byte of the allocation. 135 // 136 // In the loop below, we ensure that the stride of every image is a multiple 137 // of PYRAMID_ALIGNMENT. Thus the allocated size of each pyramid level will 138 // also be a multiple of PYRAMID_ALIGNMENT. Thus, as long as we can get the 139 // first pixel in the first pyramid layer aligned properly, that will 140 // automatically mean that the first pixel of every row of every layer is 141 // properly aligned too. 142 // 143 // Thus all we need to consider is the first pixel in the first layer. 144 // This is located at offset 145 // extra_bytes + level_stride * PYRAMID_PADDING + PYRAMID_PADDING 146 // bytes into the buffer. Since level_stride is a multiple of 147 // PYRAMID_ALIGNMENT, we can ignore that. So we need 148 // extra_bytes + PYRAMID_PADDING = multiple of PYRAMID_ALIGNMENT 149 // 150 // To solve this, we can round PYRAMID_PADDING up to the next multiple 151 // of PYRAMID_ALIGNMENT, then subtract the orginal value to calculate 152 // how many extra bytes are needed. 153 size_t first_px_offset = 154 (PYRAMID_PADDING + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); 155 size_t extra_bytes = first_px_offset - PYRAMID_PADDING; 156 buffer_size += extra_bytes; 157 158 // If the original image is stored in an 8-bit buffer, then we can point the 159 // lowest pyramid level at that buffer rather than allocating a new one. 160 int first_allocated_level = image_is_16bit ? 0 : 1; 161 162 for (int level = first_allocated_level; level < n_levels; level++) { 163 PyramidLayer *layer = &pyr->layers[level]; 164 165 int level_width = width >> level; 166 int level_height = height >> level; 167 168 // Allocate padding for each layer 169 int padded_width = level_width + 2 * PYRAMID_PADDING; 170 int padded_height = level_height + 2 * PYRAMID_PADDING; 171 172 // Align the layer stride to be a multiple of PYRAMID_ALIGNMENT 173 // This ensures that, as long as the top-left pixel in this pyramid level is 174 // properly aligned, then so will the leftmost pixel in every row of the 175 // pyramid level. 176 int level_stride = 177 (padded_width + PYRAMID_ALIGNMENT - 1) & ~(PYRAMID_ALIGNMENT - 1); 178 179 size_t level_alloc_start = buffer_size; 180 size_t level_start = 181 level_alloc_start + PYRAMID_PADDING * level_stride + PYRAMID_PADDING; 182 183 buffer_size += level_stride * padded_height; 184 185 layer_offsets[level] = level_start; 186 layer->width = level_width; 187 layer->height = level_height; 188 layer->stride = level_stride; 189 } 190 191 pyr->buffer_alloc = 192 aom_memalign(PYRAMID_ALIGNMENT, buffer_size * sizeof(*pyr->buffer_alloc)); 193 if (!pyr->buffer_alloc) { 194 aom_free(pyr->layers); 195 aom_free(pyr); 196 aom_free(layer_offsets); 197 return NULL; 198 } 199 200 // Fill in pointers for each level 201 // If image is 8-bit, then the lowest level is left unconfigured for now, 202 // and will be set up properly when the pyramid is filled in 203 for (int level = first_allocated_level; level < n_levels; level++) { 204 PyramidLayer *layer = &pyr->layers[level]; 205 layer->buffer = pyr->buffer_alloc + layer_offsets[level]; 206 } 207 208 #if CONFIG_MULTITHREAD 209 pthread_mutex_init(&pyr->mutex, NULL); 210 #endif // CONFIG_MULTITHREAD 211 212 aom_free(layer_offsets); 213 return pyr; 214 } 215 216 // Fill the border region of a pyramid frame. 217 // This must be called after the main image area is filled out. 218 // `img_buf` should point to the first pixel in the image area, 219 // ie. it should be pyr->level_buffer + pyr->level_loc[level]. 220 static inline void fill_border(uint8_t *img_buf, const int width, 221 const int height, const int stride) { 222 // Fill left and right areas 223 for (int row = 0; row < height; row++) { 224 uint8_t *row_start = &img_buf[row * stride]; 225 uint8_t left_pixel = row_start[0]; 226 memset(row_start - PYRAMID_PADDING, left_pixel, PYRAMID_PADDING); 227 uint8_t right_pixel = row_start[width - 1]; 228 memset(row_start + width, right_pixel, PYRAMID_PADDING); 229 } 230 231 // Fill top area 232 for (int row = -PYRAMID_PADDING; row < 0; row++) { 233 uint8_t *row_start = &img_buf[row * stride]; 234 memcpy(row_start - PYRAMID_PADDING, img_buf - PYRAMID_PADDING, 235 width + 2 * PYRAMID_PADDING); 236 } 237 238 // Fill bottom area 239 uint8_t *last_row_start = &img_buf[(height - 1) * stride]; 240 for (int row = height; row < height + PYRAMID_PADDING; row++) { 241 uint8_t *row_start = &img_buf[row * stride]; 242 memcpy(row_start - PYRAMID_PADDING, last_row_start - PYRAMID_PADDING, 243 width + 2 * PYRAMID_PADDING); 244 } 245 } 246 247 // Compute downsampling pyramid for a frame 248 // 249 // This function will ensure that the first `n_levels` levels of the pyramid 250 // are filled, unless the frame is too small to have this many levels. 251 // In that case, we will fill all available levels and then stop. 252 // 253 // Returns the actual number of levels filled, capped at n_levels, 254 // or -1 on error. 255 // 256 // This must only be called while holding frame_pyr->mutex 257 static inline int fill_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth, 258 int n_levels, ImagePyramid *frame_pyr) { 259 int already_filled_levels = frame_pyr->filled_levels; 260 261 // This condition should already be enforced by aom_compute_pyramid 262 assert(n_levels <= frame_pyr->max_levels); 263 264 if (already_filled_levels >= n_levels) { 265 return n_levels; 266 } 267 268 const int frame_width = frame->y_crop_width; 269 const int frame_height = frame->y_crop_height; 270 const int frame_stride = frame->y_stride; 271 assert((frame_width >> n_levels) >= 0); 272 assert((frame_height >> n_levels) >= 0); 273 274 if (already_filled_levels == 0) { 275 // Fill in largest level from the original image 276 PyramidLayer *first_layer = &frame_pyr->layers[0]; 277 if (frame->flags & YV12_FLAG_HIGHBITDEPTH) { 278 // For frames stored in a 16-bit buffer, we need to downconvert to 8 bits 279 assert(first_layer->width == frame_width); 280 assert(first_layer->height == frame_height); 281 282 uint16_t *frame_buffer = CONVERT_TO_SHORTPTR(frame->y_buffer); 283 uint8_t *pyr_buffer = first_layer->buffer; 284 int pyr_stride = first_layer->stride; 285 for (int y = 0; y < frame_height; y++) { 286 uint16_t *frame_row = frame_buffer + y * frame_stride; 287 uint8_t *pyr_row = pyr_buffer + y * pyr_stride; 288 for (int x = 0; x < frame_width; x++) { 289 pyr_row[x] = frame_row[x] >> (bit_depth - 8); 290 } 291 } 292 293 fill_border(pyr_buffer, frame_width, frame_height, pyr_stride); 294 } else { 295 // For frames stored in an 8-bit buffer, we don't need to copy anything - 296 // we can just reference the original image buffer 297 first_layer->buffer = frame->y_buffer; 298 first_layer->width = frame_width; 299 first_layer->height = frame_height; 300 first_layer->stride = frame_stride; 301 } 302 303 already_filled_levels = 1; 304 } 305 306 // Fill in the remaining levels through progressive downsampling 307 for (int level = already_filled_levels; level < n_levels; ++level) { 308 bool mem_status = false; 309 PyramidLayer *prev_layer = &frame_pyr->layers[level - 1]; 310 uint8_t *prev_buffer = prev_layer->buffer; 311 int prev_stride = prev_layer->stride; 312 313 PyramidLayer *this_layer = &frame_pyr->layers[level]; 314 uint8_t *this_buffer = this_layer->buffer; 315 int this_width = this_layer->width; 316 int this_height = this_layer->height; 317 int this_stride = this_layer->stride; 318 319 // The width and height of the previous layer that needs to be considered to 320 // derive the current layer frame. 321 const int input_layer_width = this_width << 1; 322 const int input_layer_height = this_height << 1; 323 324 // Compute the this pyramid level by downsampling the current level. 325 // 326 // We downsample by a factor of exactly 2, clipping the rightmost and 327 // bottommost pixel off of the current level if needed. We do this for 328 // two main reasons: 329 // 330 // 1) In the disflow code, when stepping from a higher pyramid level to a 331 // lower pyramid level, we need to not just interpolate the flow field 332 // but also to scale each flow vector by the upsampling ratio. 333 // So it is much more convenient if this ratio is simply 2. 334 // 335 // 2) Up/downsampling by a factor of 2 can be implemented much more 336 // efficiently than up/downsampling by a generic ratio. 337 // TODO(rachelbarker): Use optimized downsample-by-2 function 338 339 // SIMD support has been added specifically for cases where the downsample 340 // factor is exactly 2. In such instances, horizontal and vertical resizing 341 // is performed utilizing the down2_symeven() function, which considers the 342 // even dimensions of the input layer. 343 if (should_resize_by_half(input_layer_height, input_layer_width, 344 this_height, this_width)) { 345 assert(input_layer_height % 2 == 0 && input_layer_width % 2 == 0 && 346 "Input width or height cannot be odd."); 347 mem_status = av1_resize_plane_to_half( 348 prev_buffer, input_layer_height, input_layer_width, prev_stride, 349 this_buffer, this_height, this_width, this_stride); 350 } else { 351 mem_status = av1_resize_plane(prev_buffer, input_layer_height, 352 input_layer_width, prev_stride, this_buffer, 353 this_height, this_width, this_stride); 354 } 355 356 // Terminate early in cases of memory allocation failure. 357 if (!mem_status) { 358 frame_pyr->filled_levels = n_levels; 359 return -1; 360 } 361 362 fill_border(this_buffer, this_width, this_height, this_stride); 363 } 364 365 frame_pyr->filled_levels = n_levels; 366 return n_levels; 367 } 368 369 // Fill out a downsampling pyramid for a given frame. 370 // 371 // The top level (index 0) will always be an 8-bit copy of the input frame, 372 // regardless of the input bit depth. Additional levels are then downscaled 373 // by powers of 2. 374 // 375 // This function will ensure that the first `n_levels` levels of the pyramid 376 // are filled, unless the frame is too small to have this many levels. 377 // In that case, we will fill all available levels and then stop. 378 // No matter how small the frame is, at least one level is guaranteed 379 // to be filled. 380 // 381 // Returns the actual number of levels filled, capped at n_levels, 382 // or -1 on error. 383 int aom_compute_pyramid(const YV12_BUFFER_CONFIG *frame, int bit_depth, 384 int n_levels, ImagePyramid *pyr) { 385 assert(pyr); 386 387 // Per the comments in the ImagePyramid struct, we must take this mutex 388 // before reading or writing the filled_levels field, and hold it while 389 // computing any additional pyramid levels, to ensure proper behaviour 390 // when multithreading is used 391 #if CONFIG_MULTITHREAD 392 pthread_mutex_lock(&pyr->mutex); 393 #endif // CONFIG_MULTITHREAD 394 395 n_levels = AOMMIN(n_levels, pyr->max_levels); 396 int result = n_levels; 397 if (pyr->filled_levels < n_levels) { 398 // Compute any missing levels that we need 399 result = fill_pyramid(frame, bit_depth, n_levels, pyr); 400 } 401 402 // At this point, as long as result >= 0, the requested number of pyramid 403 // levels are guaranteed to be valid, and can be safely read from without 404 // holding the mutex any further 405 assert(IMPLIES(result >= 0, pyr->filled_levels >= n_levels)); 406 #if CONFIG_MULTITHREAD 407 pthread_mutex_unlock(&pyr->mutex); 408 #endif // CONFIG_MULTITHREAD 409 return result; 410 } 411 412 #ifndef NDEBUG 413 // Check if a pyramid has already been computed to at least n levels 414 // This is mostly a debug helper - as it is necessary to hold pyr->mutex 415 // while reading the number of already-computed levels, we cannot just write: 416 // assert(pyr->filled_levels >= n_levels); 417 // This function allows the check to be correctly written as: 418 // assert(aom_is_pyramid_valid(pyr, n_levels)); 419 // 420 // Note: This deliberately does not restrict n_levels based on the maximum 421 // number of permitted levels for the frame size. This allows the check to 422 // catch cases where the caller forgets to handle the case where 423 // max_levels is less than the requested number of levels 424 bool aom_is_pyramid_valid(ImagePyramid *pyr, int n_levels) { 425 assert(pyr); 426 427 // Per the comments in the ImagePyramid struct, we must take this mutex 428 // before reading or writing the filled_levels field, to ensure proper 429 // behaviour when multithreading is used 430 #if CONFIG_MULTITHREAD 431 pthread_mutex_lock(&pyr->mutex); 432 #endif // CONFIG_MULTITHREAD 433 434 bool result = (pyr->filled_levels >= n_levels); 435 436 #if CONFIG_MULTITHREAD 437 pthread_mutex_unlock(&pyr->mutex); 438 #endif // CONFIG_MULTITHREAD 439 440 return result; 441 } 442 #endif 443 444 // Mark a pyramid as no longer containing valid data. 445 // This must be done whenever the corresponding frame buffer is reused 446 void aom_invalidate_pyramid(ImagePyramid *pyr) { 447 if (pyr) { 448 #if CONFIG_MULTITHREAD 449 pthread_mutex_lock(&pyr->mutex); 450 #endif // CONFIG_MULTITHREAD 451 pyr->filled_levels = 0; 452 #if CONFIG_MULTITHREAD 453 pthread_mutex_unlock(&pyr->mutex); 454 #endif // CONFIG_MULTITHREAD 455 } 456 } 457 458 // Release the memory associated with a pyramid 459 void aom_free_pyramid(ImagePyramid *pyr) { 460 if (pyr) { 461 #if CONFIG_MULTITHREAD 462 pthread_mutex_destroy(&pyr->mutex); 463 #endif // CONFIG_MULTITHREAD 464 aom_free(pyr->buffer_alloc); 465 aom_free(pyr->layers); 466 aom_free(pyr); 467 } 468 }