tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

intrapred.c (30847B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <math.h>
     14 
     15 #include "config/aom_config.h"
     16 #include "config/aom_dsp_rtcd.h"
     17 
     18 #include "aom_dsp/aom_dsp_common.h"
     19 #include "aom_dsp/intrapred_common.h"
     20 #include "aom_mem/aom_mem.h"
     21 #include "aom_ports/bitops.h"
     22 
     23 static inline void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
     24                               const uint8_t *above, const uint8_t *left) {
     25  int r;
     26  (void)left;
     27 
     28  for (r = 0; r < bh; r++) {
     29    memcpy(dst, above, bw);
     30    dst += stride;
     31  }
     32 }
     33 
     34 static inline void h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
     35                               const uint8_t *above, const uint8_t *left) {
     36  int r;
     37  (void)above;
     38 
     39  for (r = 0; r < bh; r++) {
     40    memset(dst, left[r], bw);
     41    dst += stride;
     42  }
     43 }
     44 
     45 static inline int abs_diff(int a, int b) { return (a > b) ? a - b : b - a; }
     46 
     47 static inline uint16_t paeth_predictor_single(uint16_t left, uint16_t top,
     48                                              uint16_t top_left) {
     49  const int base = top + left - top_left;
     50  const int p_left = abs_diff(base, left);
     51  const int p_top = abs_diff(base, top);
     52  const int p_top_left = abs_diff(base, top_left);
     53 
     54  // Return nearest to base of left, top and top_left.
     55  return (p_left <= p_top && p_left <= p_top_left) ? left
     56         : (p_top <= p_top_left)                   ? top
     57                                                   : top_left;
     58 }
     59 
     60 static inline void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
     61                                   int bh, const uint8_t *above,
     62                                   const uint8_t *left) {
     63  int r, c;
     64  const uint8_t ytop_left = above[-1];
     65 
     66  for (r = 0; r < bh; r++) {
     67    for (c = 0; c < bw; c++)
     68      dst[c] = (uint8_t)paeth_predictor_single(left[r], above[c], ytop_left);
     69    dst += stride;
     70  }
     71 }
     72 
     73 // Some basic checks on weights for smooth predictor.
     74 #define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \
     75                                 pred_scale)                          \
     76  assert(weights_w[0] < weights_scale);                               \
     77  assert(weights_h[0] < weights_scale);                               \
     78  assert(weights_scale - weights_w[bw - 1] < weights_scale);          \
     79  assert(weights_scale - weights_h[bh - 1] < weights_scale);          \
     80  assert(pred_scale < 31)  // ensures no overflow when calculating predictor.
     81 
     82 #define divide_round(value, bits) (((value) + (1 << ((bits) - 1))) >> (bits))
     83 
     84 static inline void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
     85                                    int bh, const uint8_t *above,
     86                                    const uint8_t *left) {
     87  const uint8_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
     88  const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
     89  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
     90  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
     91  // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
     92  const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
     93  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
     94  sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
     95                           log2_scale + sizeof(*dst));
     96  int r;
     97  for (r = 0; r < bh; ++r) {
     98    int c;
     99    for (c = 0; c < bw; ++c) {
    100      const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
    101      const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
    102                                  sm_weights_w[c], scale - sm_weights_w[c] };
    103      uint32_t this_pred = 0;
    104      int i;
    105      assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
    106      for (i = 0; i < 4; ++i) {
    107        this_pred += weights[i] * pixels[i];
    108      }
    109      dst[c] = divide_round(this_pred, log2_scale);
    110    }
    111    dst += stride;
    112  }
    113 }
    114 
    115 static inline void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
    116                                      int bh, const uint8_t *above,
    117                                      const uint8_t *left) {
    118  const uint8_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
    119  const uint8_t *const sm_weights = smooth_weights + bh - 4;
    120  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
    121  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
    122  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
    123  sm_weights_sanity_checks(sm_weights, sm_weights, scale,
    124                           log2_scale + sizeof(*dst));
    125 
    126  int r;
    127  for (r = 0; r < bh; r++) {
    128    int c;
    129    for (c = 0; c < bw; ++c) {
    130      const uint8_t pixels[] = { above[c], below_pred };
    131      const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
    132      uint32_t this_pred = 0;
    133      assert(scale >= sm_weights[r]);
    134      int i;
    135      for (i = 0; i < 2; ++i) {
    136        this_pred += weights[i] * pixels[i];
    137      }
    138      dst[c] = divide_round(this_pred, log2_scale);
    139    }
    140    dst += stride;
    141  }
    142 }
    143 
    144 static inline void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
    145                                      int bh, const uint8_t *above,
    146                                      const uint8_t *left) {
    147  const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
    148  const uint8_t *const sm_weights = smooth_weights + bw - 4;
    149  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
    150  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
    151  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
    152  sm_weights_sanity_checks(sm_weights, sm_weights, scale,
    153                           log2_scale + sizeof(*dst));
    154 
    155  int r;
    156  for (r = 0; r < bh; r++) {
    157    int c;
    158    for (c = 0; c < bw; ++c) {
    159      const uint8_t pixels[] = { left[r], right_pred };
    160      const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
    161      uint32_t this_pred = 0;
    162      assert(scale >= sm_weights[c]);
    163      int i;
    164      for (i = 0; i < 2; ++i) {
    165        this_pred += weights[i] * pixels[i];
    166      }
    167      dst[c] = divide_round(this_pred, log2_scale);
    168    }
    169    dst += stride;
    170  }
    171 }
    172 
    173 static inline void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
    174                                    int bh, const uint8_t *above,
    175                                    const uint8_t *left) {
    176  int r;
    177  (void)above;
    178  (void)left;
    179 
    180  for (r = 0; r < bh; r++) {
    181    memset(dst, 128, bw);
    182    dst += stride;
    183  }
    184 }
    185 
    186 static inline void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
    187                                     int bh, const uint8_t *above,
    188                                     const uint8_t *left) {
    189  int i, r, expected_dc, sum = 0;
    190  (void)above;
    191 
    192  for (i = 0; i < bh; i++) sum += left[i];
    193  expected_dc = (sum + (bh >> 1)) / bh;
    194 
    195  for (r = 0; r < bh; r++) {
    196    memset(dst, expected_dc, bw);
    197    dst += stride;
    198  }
    199 }
    200 
    201 static inline void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
    202                                    int bh, const uint8_t *above,
    203                                    const uint8_t *left) {
    204  int i, r, expected_dc, sum = 0;
    205  (void)left;
    206 
    207  for (i = 0; i < bw; i++) sum += above[i];
    208  expected_dc = (sum + (bw >> 1)) / bw;
    209 
    210  for (r = 0; r < bh; r++) {
    211    memset(dst, expected_dc, bw);
    212    dst += stride;
    213  }
    214 }
    215 
    216 static inline void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
    217                                const uint8_t *above, const uint8_t *left) {
    218  int i, r, expected_dc, sum = 0;
    219  const int count = bw + bh;
    220 
    221  for (i = 0; i < bw; i++) {
    222    sum += above[i];
    223  }
    224  for (i = 0; i < bh; i++) {
    225    sum += left[i];
    226  }
    227 
    228  expected_dc = (sum + (count >> 1)) / count;
    229 
    230  for (r = 0; r < bh; r++) {
    231    memset(dst, expected_dc, bw);
    232    dst += stride;
    233  }
    234 }
    235 
    236 static inline int divide_using_multiply_shift(int num, int shift1,
    237                                              int multiplier, int shift2) {
    238  const int interm = num >> shift1;
    239  return interm * multiplier >> shift2;
    240 }
    241 
    242 // The constants (multiplier and shifts) for a given block size are obtained
    243 // as follows:
    244 // - Let sum_w_h =  block width + block height.
    245 // - Shift 'sum_w_h' right until we reach an odd number. Let the number of
    246 // shifts for that block size be called 'shift1' (see the parameter in
    247 // dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2
    248 // possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect
    249 // block].
    250 // - Find multipliers for (i) dividing by 3, and (ii) dividing by 5,
    251 // using the "Algorithm 1" in:
    252 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
    253 // by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd
    254 // shift will be 16, regardless of the block size.
    255 
    256 // Note: For low bitdepth, assembly code may be optimized by using smaller
    257 // constants for smaller block sizes, where the range of the 'sum' is
    258 // restricted to fewer bits.
    259 
    260 #define DC_MULTIPLIER_1X2 0x5556
    261 #define DC_MULTIPLIER_1X4 0x3334
    262 
    263 #define DC_SHIFT2 16
    264 
    265 static inline void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw,
    266                                     int bh, const uint8_t *above,
    267                                     const uint8_t *left, int shift1,
    268                                     int multiplier) {
    269  int sum = 0;
    270 
    271  for (int i = 0; i < bw; i++) {
    272    sum += above[i];
    273  }
    274  for (int i = 0; i < bh; i++) {
    275    sum += left[i];
    276  }
    277 
    278  const int expected_dc = divide_using_multiply_shift(
    279      sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2);
    280  assert(expected_dc < (1 << 8));
    281 
    282  for (int r = 0; r < bh; r++) {
    283    memset(dst, expected_dc, bw);
    284    dst += stride;
    285  }
    286 }
    287 
    288 #undef DC_SHIFT2
    289 
    290 void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride,
    291                            const uint8_t *above, const uint8_t *left) {
    292  dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2);
    293 }
    294 
    295 void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride,
    296                            const uint8_t *above, const uint8_t *left) {
    297  dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2);
    298 }
    299 
    300 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    301 void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride,
    302                             const uint8_t *above, const uint8_t *left) {
    303  dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4);
    304 }
    305 
    306 void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride,
    307                             const uint8_t *above, const uint8_t *left) {
    308  dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4);
    309 }
    310 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    311 
    312 void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride,
    313                             const uint8_t *above, const uint8_t *left) {
    314  dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2);
    315 }
    316 
    317 void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride,
    318                             const uint8_t *above, const uint8_t *left) {
    319  dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2);
    320 }
    321 
    322 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    323 void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride,
    324                             const uint8_t *above, const uint8_t *left) {
    325  dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4);
    326 }
    327 
    328 void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride,
    329                             const uint8_t *above, const uint8_t *left) {
    330  dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4);
    331 }
    332 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    333 
    334 void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride,
    335                              const uint8_t *above, const uint8_t *left) {
    336  dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2);
    337 }
    338 
    339 void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride,
    340                              const uint8_t *above, const uint8_t *left) {
    341  dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2);
    342 }
    343 
    344 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    345 void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride,
    346                              const uint8_t *above, const uint8_t *left) {
    347  dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4);
    348 }
    349 
    350 void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride,
    351                              const uint8_t *above, const uint8_t *left) {
    352  dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4);
    353 }
    354 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    355 
    356 void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride,
    357                              const uint8_t *above, const uint8_t *left) {
    358  dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2);
    359 }
    360 
    361 void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
    362                              const uint8_t *above, const uint8_t *left) {
    363  dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2);
    364 }
    365 
    366 #undef DC_MULTIPLIER_1X2
    367 #undef DC_MULTIPLIER_1X4
    368 
    369 #if CONFIG_AV1_HIGHBITDEPTH
    370 
    371 static inline void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
    372                                      int bh, const uint16_t *above,
    373                                      const uint16_t *left, int bd) {
    374  int r;
    375  (void)left;
    376  (void)bd;
    377  for (r = 0; r < bh; r++) {
    378    memcpy(dst, above, bw * sizeof(uint16_t));
    379    dst += stride;
    380  }
    381 }
    382 
    383 static inline void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
    384                                      int bh, const uint16_t *above,
    385                                      const uint16_t *left, int bd) {
    386  int r;
    387  (void)above;
    388  (void)bd;
    389  for (r = 0; r < bh; r++) {
    390    aom_memset16(dst, left[r], bw);
    391    dst += stride;
    392  }
    393 }
    394 
    395 static inline void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
    396                                          int bw, int bh, const uint16_t *above,
    397                                          const uint16_t *left, int bd) {
    398  int r, c;
    399  const uint16_t ytop_left = above[-1];
    400  (void)bd;
    401 
    402  for (r = 0; r < bh; r++) {
    403    for (c = 0; c < bw; c++)
    404      dst[c] = paeth_predictor_single(left[r], above[c], ytop_left);
    405    dst += stride;
    406  }
    407 }
    408 
    409 static inline void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
    410                                           int bw, int bh,
    411                                           const uint16_t *above,
    412                                           const uint16_t *left, int bd) {
    413  (void)bd;
    414  const uint16_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
    415  const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
    416  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
    417  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
    418  // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
    419  const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
    420  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
    421  sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
    422                           log2_scale + sizeof(*dst));
    423  int r;
    424  for (r = 0; r < bh; ++r) {
    425    int c;
    426    for (c = 0; c < bw; ++c) {
    427      const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
    428      const uint8_t weights[] = { sm_weights_h[r], scale - sm_weights_h[r],
    429                                  sm_weights_w[c], scale - sm_weights_w[c] };
    430      uint32_t this_pred = 0;
    431      int i;
    432      assert(scale >= sm_weights_h[r] && scale >= sm_weights_w[c]);
    433      for (i = 0; i < 4; ++i) {
    434        this_pred += weights[i] * pixels[i];
    435      }
    436      dst[c] = divide_round(this_pred, log2_scale);
    437    }
    438    dst += stride;
    439  }
    440 }
    441 
    442 static inline void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
    443                                             int bw, int bh,
    444                                             const uint16_t *above,
    445                                             const uint16_t *left, int bd) {
    446  (void)bd;
    447  const uint16_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
    448  const uint8_t *const sm_weights = smooth_weights + bh - 4;
    449  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
    450  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
    451  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
    452  sm_weights_sanity_checks(sm_weights, sm_weights, scale,
    453                           log2_scale + sizeof(*dst));
    454 
    455  int r;
    456  for (r = 0; r < bh; r++) {
    457    int c;
    458    for (c = 0; c < bw; ++c) {
    459      const uint16_t pixels[] = { above[c], below_pred };
    460      const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r] };
    461      uint32_t this_pred = 0;
    462      assert(scale >= sm_weights[r]);
    463      int i;
    464      for (i = 0; i < 2; ++i) {
    465        this_pred += weights[i] * pixels[i];
    466      }
    467      dst[c] = divide_round(this_pred, log2_scale);
    468    }
    469    dst += stride;
    470  }
    471 }
    472 
    473 static inline void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
    474                                             int bw, int bh,
    475                                             const uint16_t *above,
    476                                             const uint16_t *left, int bd) {
    477  (void)bd;
    478  const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
    479  const uint8_t *const sm_weights = smooth_weights + bw - 4;
    480  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
    481  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
    482  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
    483  sm_weights_sanity_checks(sm_weights, sm_weights, scale,
    484                           log2_scale + sizeof(*dst));
    485 
    486  int r;
    487  for (r = 0; r < bh; r++) {
    488    int c;
    489    for (c = 0; c < bw; ++c) {
    490      const uint16_t pixels[] = { left[r], right_pred };
    491      const uint8_t weights[] = { sm_weights[c], scale - sm_weights[c] };
    492      uint32_t this_pred = 0;
    493      assert(scale >= sm_weights[c]);
    494      int i;
    495      for (i = 0; i < 2; ++i) {
    496        this_pred += weights[i] * pixels[i];
    497      }
    498      dst[c] = divide_round(this_pred, log2_scale);
    499    }
    500    dst += stride;
    501  }
    502 }
    503 
    504 static inline void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
    505                                           int bw, int bh,
    506                                           const uint16_t *above,
    507                                           const uint16_t *left, int bd) {
    508  int r;
    509  (void)above;
    510  (void)left;
    511 
    512  for (r = 0; r < bh; r++) {
    513    aom_memset16(dst, 128 << (bd - 8), bw);
    514    dst += stride;
    515  }
    516 }
    517 
    518 static inline void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride,
    519                                            int bw, int bh,
    520                                            const uint16_t *above,
    521                                            const uint16_t *left, int bd) {
    522  int i, r, expected_dc, sum = 0;
    523  (void)above;
    524  (void)bd;
    525 
    526  for (i = 0; i < bh; i++) sum += left[i];
    527  expected_dc = (sum + (bh >> 1)) / bh;
    528 
    529  for (r = 0; r < bh; r++) {
    530    aom_memset16(dst, expected_dc, bw);
    531    dst += stride;
    532  }
    533 }
    534 
    535 static inline void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride,
    536                                           int bw, int bh,
    537                                           const uint16_t *above,
    538                                           const uint16_t *left, int bd) {
    539  int i, r, expected_dc, sum = 0;
    540  (void)left;
    541  (void)bd;
    542 
    543  for (i = 0; i < bw; i++) sum += above[i];
    544  expected_dc = (sum + (bw >> 1)) / bw;
    545 
    546  for (r = 0; r < bh; r++) {
    547    aom_memset16(dst, expected_dc, bw);
    548    dst += stride;
    549  }
    550 }
    551 
    552 static inline void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
    553                                       int bh, const uint16_t *above,
    554                                       const uint16_t *left, int bd) {
    555  int i, r, expected_dc, sum = 0;
    556  const int count = bw + bh;
    557  (void)bd;
    558 
    559  for (i = 0; i < bw; i++) {
    560    sum += above[i];
    561  }
    562  for (i = 0; i < bh; i++) {
    563    sum += left[i];
    564  }
    565 
    566  expected_dc = (sum + (count >> 1)) / count;
    567 
    568  for (r = 0; r < bh; r++) {
    569    aom_memset16(dst, expected_dc, bw);
    570    dst += stride;
    571  }
    572 }
    573 
    574 // Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but
    575 // assume 2nd shift of 17 bits instead of 16.
    576 // Note: Strictly speaking, 2nd shift needs to be 17 only when:
    577 // - bit depth == 12, and
    578 // - bw + bh is divisible by 5 (as opposed to divisible by 3).
    579 // All other cases can use half the multipliers with a shift of 16 instead.
    580 // This special optimization can be used when writing assembly code.
    581 #define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB
    582 // Note: This constant is odd, but a smaller even constant (0x199a) with the
    583 // appropriate shift should work for neon in 8/10-bit.
    584 #define HIGHBD_DC_MULTIPLIER_1X4 0x6667
    585 
    586 #define HIGHBD_DC_SHIFT2 17
    587 
    588 static inline void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
    589                                            int bw, int bh,
    590                                            const uint16_t *above,
    591                                            const uint16_t *left, int bd,
    592                                            int shift1, uint32_t multiplier) {
    593  int sum = 0;
    594  (void)bd;
    595 
    596  for (int i = 0; i < bw; i++) {
    597    sum += above[i];
    598  }
    599  for (int i = 0; i < bh; i++) {
    600    sum += left[i];
    601  }
    602 
    603  const int expected_dc = divide_using_multiply_shift(
    604      sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2);
    605  assert(expected_dc < (1 << bd));
    606 
    607  for (int r = 0; r < bh; r++) {
    608    aom_memset16(dst, expected_dc, bw);
    609    dst += stride;
    610  }
    611 }
    612 
    613 #undef HIGHBD_DC_SHIFT2
    614 
    615 void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
    616                                   const uint16_t *above, const uint16_t *left,
    617                                   int bd) {
    618  highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2,
    619                           HIGHBD_DC_MULTIPLIER_1X2);
    620 }
    621 
    622 void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
    623                                   const uint16_t *above, const uint16_t *left,
    624                                   int bd) {
    625  highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2,
    626                           HIGHBD_DC_MULTIPLIER_1X2);
    627 }
    628 
    629 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    630 void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
    631                                    const uint16_t *above, const uint16_t *left,
    632                                    int bd) {
    633  highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2,
    634                           HIGHBD_DC_MULTIPLIER_1X4);
    635 }
    636 
    637 void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
    638                                    const uint16_t *above, const uint16_t *left,
    639                                    int bd) {
    640  highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2,
    641                           HIGHBD_DC_MULTIPLIER_1X4);
    642 }
    643 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    644 
    645 void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
    646                                    const uint16_t *above, const uint16_t *left,
    647                                    int bd) {
    648  highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3,
    649                           HIGHBD_DC_MULTIPLIER_1X2);
    650 }
    651 
    652 void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
    653                                    const uint16_t *above, const uint16_t *left,
    654                                    int bd) {
    655  highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3,
    656                           HIGHBD_DC_MULTIPLIER_1X2);
    657 }
    658 
    659 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    660 void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
    661                                    const uint16_t *above, const uint16_t *left,
    662                                    int bd) {
    663  highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3,
    664                           HIGHBD_DC_MULTIPLIER_1X4);
    665 }
    666 
    667 void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
    668                                    const uint16_t *above, const uint16_t *left,
    669                                    int bd) {
    670  highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3,
    671                           HIGHBD_DC_MULTIPLIER_1X4);
    672 }
    673 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    674 
    675 void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
    676                                     const uint16_t *above,
    677                                     const uint16_t *left, int bd) {
    678  highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4,
    679                           HIGHBD_DC_MULTIPLIER_1X2);
    680 }
    681 
    682 void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
    683                                     const uint16_t *above,
    684                                     const uint16_t *left, int bd) {
    685  highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4,
    686                           HIGHBD_DC_MULTIPLIER_1X2);
    687 }
    688 
    689 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    690 void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
    691                                     const uint16_t *above,
    692                                     const uint16_t *left, int bd) {
    693  highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4,
    694                           HIGHBD_DC_MULTIPLIER_1X4);
    695 }
    696 
    697 void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
    698                                     const uint16_t *above,
    699                                     const uint16_t *left, int bd) {
    700  highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4,
    701                           HIGHBD_DC_MULTIPLIER_1X4);
    702 }
    703 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    704 
    705 void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
    706                                     const uint16_t *above,
    707                                     const uint16_t *left, int bd) {
    708  highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5,
    709                           HIGHBD_DC_MULTIPLIER_1X2);
    710 }
    711 
    712 void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
    713                                     const uint16_t *above,
    714                                     const uint16_t *left, int bd) {
    715  highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5,
    716                           HIGHBD_DC_MULTIPLIER_1X2);
    717 }
    718 
    719 #undef HIGHBD_DC_MULTIPLIER_1X2
    720 #undef HIGHBD_DC_MULTIPLIER_1X4
    721 #endif  // CONFIG_AV1_HIGHBITDEPTH
    722 
    723 // This serves as a wrapper function, so that all the prediction functions
    724 // can be unified and accessed as a pointer array. Note that the boundary
    725 // above and left are not necessarily used all the time.
    726 #define intra_pred_sized(type, width, height)                  \
    727  void aom_##type##_predictor_##width##x##height##_c(          \
    728      uint8_t *dst, ptrdiff_t stride, const uint8_t *above,    \
    729      const uint8_t *left) {                                   \
    730    type##_predictor(dst, stride, width, height, above, left); \
    731  }
    732 
    733 #if CONFIG_AV1_HIGHBITDEPTH
    734 #define intra_pred_highbd_sized(type, width, height)                        \
    735  void aom_highbd_##type##_predictor_##width##x##height##_c(                \
    736      uint16_t *dst, ptrdiff_t stride, const uint16_t *above,               \
    737      const uint16_t *left, int bd) {                                       \
    738    highbd_##type##_predictor(dst, stride, width, height, above, left, bd); \
    739  }
    740 #else  // !CONFIG_AV1_HIGHBITDEPTH
    741 #define intra_pred_highbd_sized(type, width, height)
    742 #endif  // CONFIG_AV1_HIGHBITDEPTH
    743 
    744 /* clang-format off */
    745 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
    746 #define intra_pred_rectangular(type) \
    747  intra_pred_sized(type, 4, 8) \
    748  intra_pred_sized(type, 8, 4) \
    749  intra_pred_sized(type, 8, 16) \
    750  intra_pred_sized(type, 16, 8) \
    751  intra_pred_sized(type, 16, 32) \
    752  intra_pred_sized(type, 32, 16) \
    753  intra_pred_sized(type, 32, 64) \
    754  intra_pred_sized(type, 64, 32) \
    755  intra_pred_highbd_sized(type, 4, 8) \
    756  intra_pred_highbd_sized(type, 8, 4) \
    757  intra_pred_highbd_sized(type, 8, 16) \
    758  intra_pred_highbd_sized(type, 16, 8) \
    759  intra_pred_highbd_sized(type, 16, 32) \
    760  intra_pred_highbd_sized(type, 32, 16) \
    761  intra_pred_highbd_sized(type, 32, 64) \
    762  intra_pred_highbd_sized(type, 64, 32)
    763 #else
    764 #define intra_pred_rectangular(type) \
    765  intra_pred_sized(type, 4, 8) \
    766  intra_pred_sized(type, 8, 4) \
    767  intra_pred_sized(type, 8, 16) \
    768  intra_pred_sized(type, 16, 8) \
    769  intra_pred_sized(type, 16, 32) \
    770  intra_pred_sized(type, 32, 16) \
    771  intra_pred_sized(type, 32, 64) \
    772  intra_pred_sized(type, 64, 32) \
    773  intra_pred_sized(type, 4, 16) \
    774  intra_pred_sized(type, 16, 4) \
    775  intra_pred_sized(type, 8, 32) \
    776  intra_pred_sized(type, 32, 8) \
    777  intra_pred_sized(type, 16, 64) \
    778  intra_pred_sized(type, 64, 16) \
    779  intra_pred_highbd_sized(type, 4, 8) \
    780  intra_pred_highbd_sized(type, 8, 4) \
    781  intra_pred_highbd_sized(type, 8, 16) \
    782  intra_pred_highbd_sized(type, 16, 8) \
    783  intra_pred_highbd_sized(type, 16, 32) \
    784  intra_pred_highbd_sized(type, 32, 16) \
    785  intra_pred_highbd_sized(type, 32, 64) \
    786  intra_pred_highbd_sized(type, 64, 32) \
    787  intra_pred_highbd_sized(type, 4, 16) \
    788  intra_pred_highbd_sized(type, 16, 4) \
    789  intra_pred_highbd_sized(type, 8, 32) \
    790  intra_pred_highbd_sized(type, 32, 8) \
    791  intra_pred_highbd_sized(type, 16, 64) \
    792  intra_pred_highbd_sized(type, 64, 16)
    793 #endif // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
    794 
    795 #define intra_pred_above_4x4(type) \
    796  intra_pred_sized(type, 8, 8) \
    797  intra_pred_sized(type, 16, 16) \
    798  intra_pred_sized(type, 32, 32) \
    799  intra_pred_sized(type, 64, 64) \
    800  intra_pred_highbd_sized(type, 4, 4) \
    801  intra_pred_highbd_sized(type, 8, 8) \
    802  intra_pred_highbd_sized(type, 16, 16) \
    803  intra_pred_highbd_sized(type, 32, 32) \
    804  intra_pred_highbd_sized(type, 64, 64) \
    805  intra_pred_rectangular(type)
    806 #define intra_pred_allsizes(type) \
    807  intra_pred_sized(type, 4, 4) \
    808  intra_pred_above_4x4(type)
    809 #define intra_pred_square(type) \
    810  intra_pred_sized(type, 4, 4) \
    811  intra_pred_sized(type, 8, 8) \
    812  intra_pred_sized(type, 16, 16) \
    813  intra_pred_sized(type, 32, 32) \
    814  intra_pred_sized(type, 64, 64) \
    815  intra_pred_highbd_sized(type, 4, 4) \
    816  intra_pred_highbd_sized(type, 8, 8) \
    817  intra_pred_highbd_sized(type, 16, 16) \
    818  intra_pred_highbd_sized(type, 32, 32) \
    819  intra_pred_highbd_sized(type, 64, 64)
    820 
    821 intra_pred_allsizes(v)
    822 intra_pred_allsizes(h)
    823 intra_pred_allsizes(smooth)
    824 intra_pred_allsizes(smooth_v)
    825 intra_pred_allsizes(smooth_h)
    826 intra_pred_allsizes(paeth)
    827 intra_pred_allsizes(dc_128)
    828 intra_pred_allsizes(dc_left)
    829 intra_pred_allsizes(dc_top)
    830 intra_pred_square(dc)
    831 /* clang-format on */
    832 #undef intra_pred_allsizes