[ tor-browser ].git.dasho

hash_motion.c (18662B)
      1 /*
      2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <stdbool.h>
     14 
     15 #include "config/av1_rtcd.h"
     16 
     17 #include "av1/encoder/block.h"
     18 #include "av1/encoder/hash.h"
     19 #include "av1/encoder/hash_motion.h"
     20 
     21 #define kSrcBits 16
     22 // kMaxAddr is the number of hash table buckets in p_hash_table->p_lookup_table.
     23 // p_hash_table->p_lookup_table consists of 6 hash tables of 1 << kSrcBits
     24 // buckets each. Each of the 6 supported block sizes (4, 8, 16, 32, 64, 128) has
     25 // its own hash table, indexed by the return value of
     26 // hash_block_size_to_index().
     27 #define kMaxAddr (6 << kSrcBits)
     28 #define kMaxCandidatesPerHashBucket 256
     29 
     30 static void get_pixels_in_1D_char_array_by_block_2x2(const uint8_t *y_src,
     31                                                     int stride,
     32                                                     uint8_t *p_pixels_in1D) {
     33  const uint8_t *p_pel = y_src;
     34  int index = 0;
     35  for (int i = 0; i < 2; i++) {
     36    for (int j = 0; j < 2; j++) {
     37      p_pixels_in1D[index++] = p_pel[j];
     38    }
     39    p_pel += stride;
     40  }
     41 }
     42 
     43 static void get_pixels_in_1D_short_array_by_block_2x2(const uint16_t *y_src,
     44                                                      int stride,
     45                                                      uint16_t *p_pixels_in1D) {
     46  const uint16_t *p_pel = y_src;
     47  int index = 0;
     48  for (int i = 0; i < 2; i++) {
     49    for (int j = 0; j < 2; j++) {
     50      p_pixels_in1D[index++] = p_pel[j];
     51    }
     52    p_pel += stride;
     53  }
     54 }
     55 
     56 // the hash value (hash_value1) consists of two parts, the first 3 bits relate
     57 // to the block size and the remaining 16 bits are the crc values. This
     58 // function is used to get the first 3 bits.
     59 static int hash_block_size_to_index(int block_size) {
     60  switch (block_size) {
     61    case 4: return 0;
     62    case 8: return 1;
     63    case 16: return 2;
     64    case 32: return 3;
     65    case 64: return 4;
     66    case 128: return 5;
     67    default: return -1;
     68  }
     69 }
     70 
     71 static uint32_t get_identity_hash_value(const uint8_t a, const uint8_t b,
     72                                        const uint8_t c, const uint8_t d) {
     73  // The four input values add up to 32 bits, which is the size of the output.
     74  // Just pack those values as is.
     75  return ((uint32_t)a << 24) + ((uint32_t)b << 16) + ((uint32_t)c << 8) +
     76         ((uint32_t)d);
     77 }
     78 
     79 static uint32_t get_xor_hash_value_hbd(const uint16_t a, const uint16_t b,
     80                                       const uint16_t c, const uint16_t d) {
     81  uint32_t result;
     82  // Pack the lower 8 bits of each input value to the 32 bit output, then xor
     83  // with the upper 8 bits of each input value.
     84  result = ((uint32_t)(a & 0x00ff) << 24) + ((uint32_t)(b & 0x00ff) << 16) +
     85           ((uint32_t)(c & 0x00ff) << 8) + ((uint32_t)(d & 0x00ff));
     86  result ^= ((uint32_t)(a & 0xff00) << 16) + ((uint32_t)(b & 0xff00) << 8) +
     87            ((uint32_t)(c & 0xff00)) + ((uint32_t)(d & 0xff00) >> 8);
     88  return result;
     89 }
     90 
     91 void av1_hash_table_init(IntraBCHashInfo *intrabc_hash_info) {
     92  if (!intrabc_hash_info->crc_initialized) {
     93    av1_crc32c_calculator_init(&intrabc_hash_info->crc_calculator);
     94    intrabc_hash_info->crc_initialized = 1;
     95  }
     96  intrabc_hash_info->intrabc_hash_table.p_lookup_table = NULL;
     97 }
     98 
     99 static void clear_all(hash_table *p_hash_table) {
    100  if (p_hash_table->p_lookup_table == NULL) {
    101    return;
    102  }
    103  for (int i = 0; i < kMaxAddr; i++) {
    104    if (p_hash_table->p_lookup_table[i] != NULL) {
    105      aom_vector_destroy(p_hash_table->p_lookup_table[i]);
    106      aom_free(p_hash_table->p_lookup_table[i]);
    107      p_hash_table->p_lookup_table[i] = NULL;
    108    }
    109  }
    110 }
    111 
    112 void av1_hash_table_destroy(hash_table *p_hash_table) {
    113  clear_all(p_hash_table);
    114  aom_free(p_hash_table->p_lookup_table);
    115  p_hash_table->p_lookup_table = NULL;
    116 }
    117 
    118 bool av1_hash_table_create(hash_table *p_hash_table) {
    119  if (p_hash_table->p_lookup_table != NULL) {
    120    clear_all(p_hash_table);
    121    return true;
    122  }
    123  p_hash_table->p_lookup_table =
    124      (Vector **)aom_calloc(kMaxAddr, sizeof(p_hash_table->p_lookup_table[0]));
    125  if (!p_hash_table->p_lookup_table) return false;
    126  return true;
    127 }
    128 
    129 static bool hash_table_add_to_table(hash_table *p_hash_table,
    130                                    uint32_t hash_value,
    131                                    const block_hash *curr_block_hash) {
    132  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
    133    p_hash_table->p_lookup_table[hash_value] =
    134        aom_malloc(sizeof(*p_hash_table->p_lookup_table[hash_value]));
    135    if (p_hash_table->p_lookup_table[hash_value] == NULL) {
    136      return false;
    137    }
    138    if (aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
    139                         sizeof(*curr_block_hash)) == VECTOR_ERROR)
    140      return false;
    141  }
    142  // Place an upper bound each hash table bucket to up to 256 intrabc
    143  // block candidates, and ignore subsequent ones. Considering more can
    144  // unnecessarily slow down encoding for virtually no efficiency gain.
    145  if (aom_vector_byte_size(p_hash_table->p_lookup_table[hash_value]) <
    146      kMaxCandidatesPerHashBucket * sizeof(*curr_block_hash)) {
    147    if (aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
    148                             (void *)curr_block_hash) == VECTOR_ERROR)
    149      return false;
    150  }
    151  return true;
    152 }
    153 
    154 int32_t av1_hash_table_count(const hash_table *p_hash_table,
    155                             uint32_t hash_value) {
    156  if (p_hash_table->p_lookup_table[hash_value] == NULL) {
    157    return 0;
    158  } else {
    159    return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size);
    160  }
    161 }
    162 
    163 Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
    164                                     uint32_t hash_value) {
    165  assert(av1_hash_table_count(p_hash_table, hash_value) > 0);
    166  return aom_vector_begin(p_hash_table->p_lookup_table[hash_value]);
    167 }
    168 
    169 void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture,
    170                                       uint32_t *pic_block_hash) {
    171  const int width = 2;
    172  const int height = 2;
    173  const int x_end = picture->y_crop_width - width + 1;
    174  const int y_end = picture->y_crop_height - height + 1;
    175 
    176  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
    177    uint16_t p[4];
    178    int pos = 0;
    179    for (int y_pos = 0; y_pos < y_end; y_pos++) {
    180      for (int x_pos = 0; x_pos < x_end; x_pos++) {
    181        get_pixels_in_1D_short_array_by_block_2x2(
    182            CONVERT_TO_SHORTPTR(picture->y_buffer) + y_pos * picture->y_stride +
    183                x_pos,
    184            picture->y_stride, p);
    185        // For HBD, we either have 40 or 48 bits of input data that the xor hash
    186        // reduce to 32 bits. We intentionally don't want to "discard" bits to
    187        // avoid any kind of biasing.
    188        pic_block_hash[pos] = get_xor_hash_value_hbd(p[0], p[1], p[2], p[3]);
    189        pos++;
    190      }
    191      pos += width - 1;
    192    }
    193  } else {
    194    uint8_t p[4];
    195    int pos = 0;
    196    for (int y_pos = 0; y_pos < y_end; y_pos++) {
    197      for (int x_pos = 0; x_pos < x_end; x_pos++) {
    198        get_pixels_in_1D_char_array_by_block_2x2(
    199            picture->y_buffer + y_pos * picture->y_stride + x_pos,
    200            picture->y_stride, p);
    201        // This 2x2 hash isn't used directly as a "key" for the hash table, so
    202        // we can afford to just copy the 4 8-bit pixel values as a single
    203        // 32-bit value directly. (i.e. there are no concerns of a lack of
    204        // uniform distribution)
    205        pic_block_hash[pos] = get_identity_hash_value(p[0], p[1], p[2], p[3]);
    206        pos++;
    207      }
    208      pos += width - 1;
    209    }
    210  }
    211 }
    212 
    213 void av1_generate_block_hash_value(IntraBCHashInfo *intrabc_hash_info,
    214                                   const YV12_BUFFER_CONFIG *picture,
    215                                   int block_size,
    216                                   const uint32_t *src_pic_block_hash,
    217                                   uint32_t *dst_pic_block_hash) {
    218  CRC32C *calc = &intrabc_hash_info->crc_calculator;
    219 
    220  const int pic_width = picture->y_crop_width;
    221  const int x_end = picture->y_crop_width - block_size + 1;
    222  const int y_end = picture->y_crop_height - block_size + 1;
    223  const int src_size = block_size >> 1;
    224 
    225  uint32_t p[4];
    226  const int length = sizeof(p);
    227 
    228  int pos = 0;
    229  for (int y_pos = 0; y_pos < y_end; y_pos++) {
    230    for (int x_pos = 0; x_pos < x_end; x_pos++) {
    231      // Build up a bigger block from 4 smaller, non-overlapping source block
    232      // hashes, and compute its hash. Note: source blocks at the right and
    233      // bottom borders cannot be part of larger blocks, therefore they won't be
    234      // considered into the block hash value generation process.
    235      p[0] = src_pic_block_hash[pos];
    236      p[1] = src_pic_block_hash[pos + src_size];
    237      p[2] = src_pic_block_hash[pos + src_size * pic_width];
    238      p[3] = src_pic_block_hash[pos + src_size * pic_width + src_size];
    239      // TODO: bug aomedia:433531610 - serialize input values in a way that's
    240      // independent of the computer architecture's endianness
    241      dst_pic_block_hash[pos] =
    242          av1_get_crc32c_value(calc, (uint8_t *)p, length);
    243      pos++;
    244    }
    245    pos += block_size - 1;
    246  }
    247 }
    248 
    249 bool av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
    250                                                 const uint32_t *pic_hash,
    251                                                 int pic_width, int pic_height,
    252                                                 int block_size) {
    253  const int x_end = pic_width - block_size + 1;
    254  const int y_end = pic_height - block_size + 1;
    255 
    256  int add_value = hash_block_size_to_index(block_size);
    257  assert(add_value >= 0);
    258  add_value <<= kSrcBits;
    259  const int crc_mask = (1 << kSrcBits) - 1;
    260  int step = block_size;
    261  int x_offset = 0;
    262  int y_offset = 0;
    263 
    264  // Explore the entire frame hierarchically to add intrabc candidate blocks to
    265  // the hash table, by starting with coarser steps (the block size), towards
    266  // finer-grained steps until every candidate block has been considered.
    267  // The nested for loop goes through the pic_hash array column by column.
    268 
    269  // Doing a hierarchical block exploration helps maximize spatial dispersion
    270  // of the first and foremost candidate blocks while minimizing overlap between
    271  // them. This is helpful because we only keep up to 256 entries of the
    272  // same candidate block (located in different places), so we want those
    273  // entries to cover the biggest area of the image to encode to maximize coding
    274  // efficiency.
    275 
    276  // This is the coordinate exploration order example for an 8x8 region, with
    277  // block_size = 4. The top-left corner (x, y) coordinates of each candidate
    278  // block are shown below. There are 5 * 5 (25) candidate blocks.
    279  //    x  0  1  2  3  4  5  6  7
    280  //  y +------------------------
    281  //  0 |  1 10  5 13  3
    282  //  1 | 16 22 18 24 20
    283  //  2 |  7 11  9 14  8
    284  //  3 | 17 23 19 25 21
    285  //  4 |  2 12  6 15  4--------+
    286  //  5 |              | 4 x 4  |
    287  //  6 |              | block  |
    288  //  7 |              +--------+
    289 
    290  // Please note that due to the way block exploration works, the smallest step
    291  // used is 2 (i.e. no two adjacent blocks will be explored consecutively).
    292  // Also, the exploration is designed to visit each block candidate only once.
    293  while (step > 1) {
    294    for (int x_pos = x_offset; x_pos < x_end; x_pos += step) {
    295      for (int y_pos = y_offset; y_pos < y_end; y_pos += step) {
    296        const int pos = y_pos * pic_width + x_pos;
    297        block_hash curr_block_hash;
    298 
    299        curr_block_hash.x = x_pos;
    300        curr_block_hash.y = y_pos;
    301 
    302        const uint32_t hash_value1 = (pic_hash[pos] & crc_mask) + add_value;
    303        curr_block_hash.hash_value2 = pic_hash[pos];
    304 
    305        if (!hash_table_add_to_table(p_hash_table, hash_value1,
    306                                     &curr_block_hash)) {
    307          return false;
    308        }
    309      }
    310    }
    311 
    312    // Adjust offsets and step sizes with this state machine.
    313    // State 0 is needed because no blocks in pic_hash have been explored,
    314    // so exploration requires a way to account for blocks with both zero
    315    // x_offset and zero y_offset.
    316    // State 0 is always meant to be executed first, but the relative order of
    317    // states 1, 2 and 3 can be arbitrary, as long as no two adjacent blocks
    318    // are explored consecutively.
    319    if (x_offset == 0 && y_offset == 0) {
    320      // State 0 -> State 1: special case
    321      // This state transition will only execute when step == block_size
    322      x_offset = step / 2;
    323    } else if (x_offset == step / 2 && y_offset == 0) {
    324      // State 1 -> State 2
    325      x_offset = 0;
    326      y_offset = step / 2;
    327    } else if (x_offset == 0 && y_offset == step / 2) {
    328      // State 2 -> State 3
    329      x_offset = step / 2;
    330    } else {
    331      assert(x_offset == step / 2 && y_offset == step / 2);
    332      // State 3 -> State 1: We've fully explored all the coordinates for the
    333      // current step size, continue by halving the step size
    334      step /= 2;
    335      x_offset = step / 2;
    336      y_offset = 0;
    337    }
    338  }
    339 
    340  return true;
    341 }
    342 
    343 int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
    344                                   int block_size, int x_start, int y_start) {
    345  const int stride = picture->y_stride;
    346  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
    347 
    348  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
    349    const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
    350    for (int i = 0; i < block_size; i++) {
    351      for (int j = 1; j < block_size; j++) {
    352        if (p16[j] != p16[0]) {
    353          return 0;
    354        }
    355      }
    356      p16 += stride;
    357    }
    358  } else {
    359    for (int i = 0; i < block_size; i++) {
    360      for (int j = 1; j < block_size; j++) {
    361        if (p[j] != p[0]) {
    362          return 0;
    363        }
    364      }
    365      p += stride;
    366    }
    367  }
    368 
    369  return 1;
    370 }
    371 
    372 int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture,
    373                                 int block_size, int x_start, int y_start) {
    374  const int stride = picture->y_stride;
    375  const uint8_t *p = picture->y_buffer + y_start * stride + x_start;
    376 
    377  if (picture->flags & YV12_FLAG_HIGHBITDEPTH) {
    378    const uint16_t *p16 = CONVERT_TO_SHORTPTR(p);
    379    for (int i = 0; i < block_size; i++) {
    380      for (int j = 1; j < block_size; j++) {
    381        if (p16[j * stride + i] != p16[i]) {
    382          return 0;
    383        }
    384      }
    385    }
    386  } else {
    387    for (int i = 0; i < block_size; i++) {
    388      for (int j = 1; j < block_size; j++) {
    389        if (p[j * stride + i] != p[i]) {
    390          return 0;
    391        }
    392      }
    393    }
    394  }
    395  return 1;
    396 }
    397 
    398 void av1_get_block_hash_value(IntraBCHashInfo *intra_bc_hash_info,
    399                              const uint8_t *y_src, int stride, int block_size,
    400                              uint32_t *hash_value1, uint32_t *hash_value2,
    401                              int use_highbitdepth) {
    402  int add_value = hash_block_size_to_index(block_size);
    403  assert(add_value >= 0);
    404  add_value <<= kSrcBits;
    405  const int crc_mask = (1 << kSrcBits) - 1;
    406 
    407  CRC32C *calc = &intra_bc_hash_info->crc_calculator;
    408  uint32_t **buf = intra_bc_hash_info->hash_value_buffer;
    409 
    410  // 2x2 subblock hash values in current CU
    411  int sub_block_in_width = (block_size >> 1);
    412  if (use_highbitdepth) {
    413    uint16_t pixel_to_hash[4];
    414    uint16_t *y16_src = CONVERT_TO_SHORTPTR(y_src);
    415    for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
    416      for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
    417        int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
    418        get_pixels_in_1D_short_array_by_block_2x2(
    419            y16_src + y_pos * stride + x_pos, stride, pixel_to_hash);
    420        assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
    421        // For HBD, we either have 40 or 48 bits of input data that the xor hash
    422        // reduce to 32 bits. We intentionally don't want to "discard" bits to
    423        // avoid any kind of biasing.
    424        buf[0][pos] =
    425            get_xor_hash_value_hbd(pixel_to_hash[0], pixel_to_hash[1],
    426                                   pixel_to_hash[2], pixel_to_hash[3]);
    427      }
    428    }
    429  } else {
    430    uint8_t pixel_to_hash[4];
    431    for (int y_pos = 0; y_pos < block_size; y_pos += 2) {
    432      for (int x_pos = 0; x_pos < block_size; x_pos += 2) {
    433        int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1);
    434        get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos,
    435                                                 stride, pixel_to_hash);
    436        assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
    437        // This 2x2 hash isn't used directly as a "key" for the hash table, so
    438        // we can afford to just copy the 4 8-bit pixel values as a single
    439        // 32-bit value directly. (i.e. there are no concerns of a lack of
    440        // uniform distribution)
    441        buf[0][pos] =
    442            get_identity_hash_value(pixel_to_hash[0], pixel_to_hash[1],
    443                                    pixel_to_hash[2], pixel_to_hash[3]);
    444      }
    445    }
    446  }
    447 
    448  int src_sub_block_in_width = sub_block_in_width;
    449  sub_block_in_width >>= 1;
    450 
    451  int src_idx = 0;
    452  int dst_idx = !src_idx;
    453 
    454  // 4x4 subblock hash values to current block hash values
    455  uint32_t to_hash[4];
    456  for (int sub_width = 4; sub_width <= block_size;
    457       sub_width *= 2, src_idx = !src_idx) {
    458    dst_idx = !src_idx;
    459 
    460    int dst_pos = 0;
    461    for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) {
    462      for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) {
    463        int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1);
    464 
    465        assert(srcPos + 1 < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
    466        assert(srcPos + src_sub_block_in_width + 1 <
    467               AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
    468        assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH);
    469 
    470        to_hash[0] = buf[src_idx][srcPos];
    471        to_hash[1] = buf[src_idx][srcPos + 1];
    472        to_hash[2] = buf[src_idx][srcPos + src_sub_block_in_width];
    473        to_hash[3] = buf[src_idx][srcPos + src_sub_block_in_width + 1];
    474 
    475        // TODO: bug aomedia:433531610 - serialize input values in a way that's
    476        // independent of the computer architecture's endianness
    477        buf[dst_idx][dst_pos] =
    478            av1_get_crc32c_value(calc, (uint8_t *)to_hash, sizeof(to_hash));
    479        dst_pos++;
    480      }
    481    }
    482 
    483    src_sub_block_in_width = sub_block_in_width;
    484    sub_block_in_width >>= 1;
    485  }
    486 
    487  *hash_value1 = (buf[dst_idx][0] & crc_mask) + add_value;
    488  *hash_value2 = buf[dst_idx][0];
    489 }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE