tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

reconinter.c (48765B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <stdio.h>
     14 #include <limits.h>
     15 
     16 #include "config/aom_config.h"
     17 #include "config/aom_dsp_rtcd.h"
     18 #include "config/aom_scale_rtcd.h"
     19 
     20 #include "aom/aom_integer.h"
     21 #include "aom_dsp/blend.h"
     22 #include "aom_ports/aom_once.h"
     23 
     24 #include "av1/common/av1_common_int.h"
     25 #include "av1/common/blockd.h"
     26 #include "av1/common/mvref_common.h"
     27 #include "av1/common/obmc.h"
     28 #include "av1/common/reconinter.h"
     29 #include "av1/common/reconintra.h"
     30 
     31 // This function will determine whether or not to create a warped
     32 // prediction.
     33 static int allow_warp(const MB_MODE_INFO *const mbmi,
     34                      const WarpTypesAllowed *const warp_types,
     35                      const WarpedMotionParams *const gm_params,
     36                      int build_for_obmc, const struct scale_factors *const sf,
     37                      WarpedMotionParams *final_warp_params) {
     38  // Note: As per the spec, we must test the fixed point scales here, which are
     39  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
     40  // have 1 << 10 precision).
     41  if (av1_is_scaled(sf)) return 0;
     42 
     43  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
     44 
     45  if (build_for_obmc) return 0;
     46 
     47  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
     48    if (final_warp_params != NULL) *final_warp_params = mbmi->wm_params;
     49    return 1;
     50  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
     51    if (final_warp_params != NULL) *final_warp_params = *gm_params;
     52    return 1;
     53  }
     54 
     55  return 0;
     56 }
     57 
     58 void av1_init_warp_params(InterPredParams *inter_pred_params,
     59                          const WarpTypesAllowed *warp_types, int ref,
     60                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
     61  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
     62    return;
     63 
     64  if (xd->cur_frame_force_integer_mv) return;
     65 
     66  if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
     67                 inter_pred_params->scale_factors,
     68                 &inter_pred_params->warp_params)) {
     69 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
     70    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
     71                       "Warped motion is disabled in realtime only build.");
     72 #endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
     73    inter_pred_params->mode = WARP_PRED;
     74  }
     75 }
     76 
     77 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
     78                              int dst_stride,
     79                              InterPredParams *inter_pred_params,
     80                              const SubpelParams *subpel_params) {
     81  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
     82                 inter_pred_params->conv_params.dst != NULL));
     83 
     84  if (inter_pred_params->mode == TRANSLATION_PRED) {
     85 #if CONFIG_AV1_HIGHBITDEPTH
     86    if (inter_pred_params->use_hbd_buf) {
     87      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
     88                             inter_pred_params->block_width,
     89                             inter_pred_params->block_height,
     90                             &inter_pred_params->conv_params,
     91                             inter_pred_params->interp_filter_params,
     92                             inter_pred_params->bit_depth);
     93    } else {
     94      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
     95                      inter_pred_params->block_width,
     96                      inter_pred_params->block_height,
     97                      &inter_pred_params->conv_params,
     98                      inter_pred_params->interp_filter_params);
     99    }
    100 #else
    101    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
    102                    inter_pred_params->block_width,
    103                    inter_pred_params->block_height,
    104                    &inter_pred_params->conv_params,
    105                    inter_pred_params->interp_filter_params);
    106 #endif
    107  }
    108 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    109  // TODO(jingning): av1_warp_plane() can be further cleaned up.
    110  else if (inter_pred_params->mode == WARP_PRED) {
    111    av1_warp_plane(
    112        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
    113        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
    114        inter_pred_params->ref_frame_buf.width,
    115        inter_pred_params->ref_frame_buf.height,
    116        inter_pred_params->ref_frame_buf.stride, dst,
    117        inter_pred_params->pix_col, inter_pred_params->pix_row,
    118        inter_pred_params->block_width, inter_pred_params->block_height,
    119        dst_stride, inter_pred_params->subsampling_x,
    120        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
    121  }
    122 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
    123  else {
    124    assert(0 && "Unsupported inter_pred_params->mode");
    125  }
    126 }
    127 
    128 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
    129  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    130  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
    131  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    132  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    133 };
    134 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
    135  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    136  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
    137  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    138  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    139 };
    140 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
    141  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    142  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
    143  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    144  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    145 };
    146 
    147 static inline void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
    148                              int width) {
    149  if (shift >= 0) {
    150    memcpy(dst + shift, src, width - shift);
    151    memset(dst, src[0], shift);
    152  } else {
    153    shift = -shift;
    154    memcpy(dst, src + shift, width - shift);
    155    memset(dst + width - shift, src[width - 1], shift);
    156  }
    157 }
    158 
    159 /* clang-format off */
    160 DECLARE_ALIGNED(16, static uint8_t,
    161                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
    162  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    163  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    164  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    165  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
    166  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
    167  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
    168  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
    169  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
    170  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
    171  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
    172  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    173  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    174  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    175  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    176  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    177  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    178  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    179  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    180  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
    181  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
    182  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    183  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
    184 };
    185 /* clang-format on */
    186 
    187 // [negative][direction]
    188 DECLARE_ALIGNED(
    189    16, static uint8_t,
    190    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
    191 
    192 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
    193 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
    194 DECLARE_ALIGNED(16, static uint8_t,
    195                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
    196 
    197 DECLARE_ALIGNED(16, static uint8_t,
    198                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
    199                                          [MAX_WEDGE_SQUARE]);
    200 
    201 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
    202 
    203 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
    204  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
    205  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
    206  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
    207  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
    208  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
    209  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
    210  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
    211  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
    212 };
    213 
    214 static const wedge_code_type wedge_codebook_16_hltw[16] = {
    215  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
    216  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
    217  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
    218  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
    219  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
    220  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
    221  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
    222  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
    223 };
    224 
    225 static const wedge_code_type wedge_codebook_16_heqw[16] = {
    226  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
    227  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
    228  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
    229  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
    230  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
    231  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
    232  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
    233  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
    234 };
    235 
    236 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
    237  { 0, NULL, NULL, NULL },
    238  { 0, NULL, NULL, NULL },
    239  { 0, NULL, NULL, NULL },
    240  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
    241    wedge_masks[BLOCK_8X8] },
    242  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
    243    wedge_masks[BLOCK_8X16] },
    244  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
    245    wedge_masks[BLOCK_16X8] },
    246  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
    247    wedge_masks[BLOCK_16X16] },
    248  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
    249    wedge_masks[BLOCK_16X32] },
    250  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
    251    wedge_masks[BLOCK_32X16] },
    252  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
    253    wedge_masks[BLOCK_32X32] },
    254  { 0, NULL, NULL, NULL },
    255  { 0, NULL, NULL, NULL },
    256  { 0, NULL, NULL, NULL },
    257  { 0, NULL, NULL, NULL },
    258  { 0, NULL, NULL, NULL },
    259  { 0, NULL, NULL, NULL },
    260  { 0, NULL, NULL, NULL },
    261  { 0, NULL, NULL, NULL },
    262  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
    263    wedge_masks[BLOCK_8X32] },
    264  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
    265    wedge_masks[BLOCK_32X8] },
    266  { 0, NULL, NULL, NULL },
    267  { 0, NULL, NULL, NULL },
    268 };
    269 
    270 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
    271                                             BLOCK_SIZE sb_type) {
    272  const uint8_t *master;
    273  const int bh = block_size_high[sb_type];
    274  const int bw = block_size_wide[sb_type];
    275  const wedge_code_type *a =
    276      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
    277  int woff, hoff;
    278  const uint8_t wsignflip =
    279      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
    280 
    281  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
    282  woff = (a->x_offset * bw) >> 3;
    283  hoff = (a->y_offset * bh) >> 3;
    284  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
    285           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
    286           MASK_MASTER_SIZE / 2 - woff;
    287  return master;
    288 }
    289 
    290 const uint8_t *av1_get_compound_type_mask(
    291    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
    292  (void)sb_type;
    293  switch (comp_data->type) {
    294    case COMPOUND_WEDGE:
    295      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
    296                                          comp_data->wedge_sign, sb_type);
    297    default: return comp_data->seg_mask;
    298  }
    299 }
    300 
    301 static inline void diffwtd_mask_d16(uint8_t *mask, int which_inverse,
    302                                    int mask_base, const CONV_BUF_TYPE *src0,
    303                                    int src0_stride, const CONV_BUF_TYPE *src1,
    304                                    int src1_stride, int h, int w,
    305                                    ConvolveParams *conv_params, int bd) {
    306  int round =
    307      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
    308  int i, j, m, diff;
    309  for (i = 0; i < h; ++i) {
    310    for (j = 0; j < w; ++j) {
    311      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
    312      diff = ROUND_POWER_OF_TWO(diff, round);
    313      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
    314      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    315    }
    316  }
    317 }
    318 
    319 void av1_build_compound_diffwtd_mask_d16_c(
    320    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
    321    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
    322    ConvolveParams *conv_params, int bd) {
    323  switch (mask_type) {
    324    case DIFFWTD_38:
    325      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
    326                       conv_params, bd);
    327      break;
    328    case DIFFWTD_38_INV:
    329      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
    330                       conv_params, bd);
    331      break;
    332    default: assert(0);
    333  }
    334 }
    335 
    336 static inline void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
    337                                const uint8_t *src0, int src0_stride,
    338                                const uint8_t *src1, int src1_stride, int h,
    339                                int w) {
    340  int i, j, m, diff;
    341  for (i = 0; i < h; ++i) {
    342    for (j = 0; j < w; ++j) {
    343      diff =
    344          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
    345      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
    346      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    347    }
    348  }
    349 }
    350 
    351 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
    352                                       DIFFWTD_MASK_TYPE mask_type,
    353                                       const uint8_t *src0, int src0_stride,
    354                                       const uint8_t *src1, int src1_stride,
    355                                       int h, int w) {
    356  switch (mask_type) {
    357    case DIFFWTD_38:
    358      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
    359      break;
    360    case DIFFWTD_38_INV:
    361      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
    362      break;
    363    default: assert(0);
    364  }
    365 }
    366 
    367 #if CONFIG_AV1_HIGHBITDEPTH
    368 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
    369    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
    370    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
    371    const unsigned int bd) {
    372  assert(bd >= 8);
    373  if (bd == 8) {
    374    if (which_inverse) {
    375      for (int i = 0; i < h; ++i) {
    376        for (int j = 0; j < w; ++j) {
    377          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
    378          unsigned int m = negative_to_zero(mask_base + diff);
    379          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
    380          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
    381        }
    382        src0 += src0_stride;
    383        src1 += src1_stride;
    384        mask += w;
    385      }
    386    } else {
    387      for (int i = 0; i < h; ++i) {
    388        for (int j = 0; j < w; ++j) {
    389          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
    390          unsigned int m = negative_to_zero(mask_base + diff);
    391          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
    392          mask[j] = m;
    393        }
    394        src0 += src0_stride;
    395        src1 += src1_stride;
    396        mask += w;
    397      }
    398    }
    399  } else {
    400    const unsigned int bd_shift = bd - 8;
    401    if (which_inverse) {
    402      for (int i = 0; i < h; ++i) {
    403        for (int j = 0; j < w; ++j) {
    404          int diff =
    405              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
    406          unsigned int m = negative_to_zero(mask_base + diff);
    407          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
    408          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
    409        }
    410        src0 += src0_stride;
    411        src1 += src1_stride;
    412        mask += w;
    413      }
    414    } else {
    415      for (int i = 0; i < h; ++i) {
    416        for (int j = 0; j < w; ++j) {
    417          int diff =
    418              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
    419          unsigned int m = negative_to_zero(mask_base + diff);
    420          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
    421          mask[j] = m;
    422        }
    423        src0 += src0_stride;
    424        src1 += src1_stride;
    425        mask += w;
    426      }
    427    }
    428  }
    429 }
    430 
    431 void av1_build_compound_diffwtd_mask_highbd_c(
    432    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
    433    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
    434    int bd) {
    435  switch (mask_type) {
    436    case DIFFWTD_38:
    437      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
    438                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
    439      break;
    440    case DIFFWTD_38_INV:
    441      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
    442                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
    443      break;
    444    default: assert(0);
    445  }
    446 }
    447 #endif  // CONFIG_AV1_HIGHBITDEPTH
    448 
    449 static inline void init_wedge_master_masks(void) {
    450  int i, j;
    451  const int w = MASK_MASTER_SIZE;
    452  const int h = MASK_MASTER_SIZE;
    453  const int stride = MASK_MASTER_STRIDE;
    454  // Note: index [0] stores the masters, and [1] its complement.
    455  // Generate prototype by shifting the masters
    456  int shift = h / 4;
    457  for (i = 0; i < h; i += 2) {
    458    shift_copy(wedge_master_oblique_even,
    459               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
    460               MASK_MASTER_SIZE);
    461    shift--;
    462    shift_copy(wedge_master_oblique_odd,
    463               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
    464               MASK_MASTER_SIZE);
    465    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
    466           wedge_master_vertical,
    467           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
    468    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
    469           wedge_master_vertical,
    470           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
    471  }
    472 
    473  for (i = 0; i < h; ++i) {
    474    for (j = 0; j < w; ++j) {
    475      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
    476      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
    477      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
    478          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
    479              (1 << WEDGE_WEIGHT_BITS) - msk;
    480      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
    481          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
    482              (1 << WEDGE_WEIGHT_BITS) - msk;
    483      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
    484          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
    485      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
    486      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
    487      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
    488          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
    489              (1 << WEDGE_WEIGHT_BITS) - mskx;
    490    }
    491  }
    492 }
    493 
    494 static inline void init_wedge_masks(void) {
    495  uint8_t *dst = wedge_mask_buf;
    496  BLOCK_SIZE bsize;
    497  memset(wedge_masks, 0, sizeof(wedge_masks));
    498  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
    499    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
    500    const int wtypes = wedge_params->wedge_types;
    501    if (wtypes == 0) continue;
    502    const uint8_t *mask;
    503    const int bw = block_size_wide[bsize];
    504    const int bh = block_size_high[bsize];
    505    int w;
    506    for (w = 0; w < wtypes; ++w) {
    507      mask = get_wedge_mask_inplace(w, 0, bsize);
    508      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
    509                        bh);
    510      wedge_params->masks[0][w] = dst;
    511      dst += bw * bh;
    512 
    513      mask = get_wedge_mask_inplace(w, 1, bsize);
    514      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
    515                        bh);
    516      wedge_params->masks[1][w] = dst;
    517      dst += bw * bh;
    518    }
    519    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
    520  }
    521 }
    522 
    523 /* clang-format off */
    524 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
    525  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
    526  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
    527  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
    528  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
    529  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
    530  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
    531  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
    532 };
    533 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
    534    32, 16, 16, 16, 8, 8, 8, 4,
    535    4,  4,  2,  2,  2, 1, 1, 1,
    536    8,  8,  4,  4,  2, 2
    537 };
    538 /* clang-format on */
    539 
    540 static inline void build_smooth_interintra_mask(uint8_t *mask, int stride,
    541                                                BLOCK_SIZE plane_bsize,
    542                                                INTERINTRA_MODE mode) {
    543  int i, j;
    544  const int bw = block_size_wide[plane_bsize];
    545  const int bh = block_size_high[plane_bsize];
    546  const int size_scale = ii_size_scales[plane_bsize];
    547 
    548  switch (mode) {
    549    case II_V_PRED:
    550      for (i = 0; i < bh; ++i) {
    551        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
    552        mask += stride;
    553      }
    554      break;
    555 
    556    case II_H_PRED:
    557      for (i = 0; i < bh; ++i) {
    558        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
    559        mask += stride;
    560      }
    561      break;
    562 
    563    case II_SMOOTH_PRED:
    564      for (i = 0; i < bh; ++i) {
    565        for (j = 0; j < bw; ++j)
    566          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
    567        mask += stride;
    568      }
    569      break;
    570 
    571    case II_DC_PRED:
    572    default:
    573      for (i = 0; i < bh; ++i) {
    574        memset(mask, 32, bw * sizeof(mask[0]));
    575        mask += stride;
    576      }
    577      break;
    578  }
    579 }
    580 
    581 static inline void init_smooth_interintra_masks(void) {
    582  for (int m = 0; m < INTERINTRA_MODES; ++m) {
    583    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
    584      const int bw = block_size_wide[bs];
    585      const int bh = block_size_high[bs];
    586      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
    587      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
    588                                   m);
    589    }
    590  }
    591 }
    592 
    593 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
    594 static void init_all_wedge_masks(void) {
    595  init_wedge_master_masks();
    596  init_wedge_masks();
    597  init_smooth_interintra_masks();
    598 }
    599 
    600 void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
    601 
    602 static inline void build_masked_compound_no_round(
    603    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
    604    const CONV_BUF_TYPE *src1, int src1_stride,
    605    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    606    int w, InterPredParams *inter_pred_params) {
    607  const int ssy = inter_pred_params->subsampling_y;
    608  const int ssx = inter_pred_params->subsampling_x;
    609  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
    610  const int mask_stride = block_size_wide[sb_type];
    611 #if CONFIG_AV1_HIGHBITDEPTH
    612  if (inter_pred_params->use_hbd_buf) {
    613    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
    614                                  src1_stride, mask, mask_stride, w, h, ssx,
    615                                  ssy, &inter_pred_params->conv_params,
    616                                  inter_pred_params->bit_depth);
    617  } else {
    618    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
    619                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
    620                                 &inter_pred_params->conv_params);
    621  }
    622 #else
    623  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
    624                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
    625                               &inter_pred_params->conv_params);
    626 #endif
    627 }
    628 
    629 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
    630                                     uint8_t *dst, int dst_stride,
    631                                     InterPredParams *inter_pred_params,
    632                                     const SubpelParams *subpel_params) {
    633  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
    634  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
    635 
    636  // We're going to call av1_make_inter_predictor to generate a prediction into
    637  // a temporary buffer, then will blend that temporary buffer with that from
    638  // the other reference.
    639  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
    640  uint8_t *tmp_dst =
    641      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
    642 
    643  const int tmp_buf_stride = MAX_SB_SIZE;
    644  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
    645  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
    646  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
    647  inter_pred_params->conv_params.dst = tmp_buf16;
    648  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
    649  assert(inter_pred_params->conv_params.do_average == 0);
    650 
    651  // This will generate a prediction in tmp_buf for the second reference
    652  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
    653                           inter_pred_params, subpel_params);
    654 
    655  if (!inter_pred_params->conv_params.plane &&
    656      comp_data->type == COMPOUND_DIFFWTD) {
    657    av1_build_compound_diffwtd_mask_d16(
    658        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
    659        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
    660        inter_pred_params->block_width, &inter_pred_params->conv_params,
    661        inter_pred_params->bit_depth);
    662  }
    663  build_masked_compound_no_round(
    664      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
    665      comp_data, sb_type, inter_pred_params->block_height,
    666      inter_pred_params->block_width, inter_pred_params);
    667 }
    668 
    669 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
    670                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
    671                                     int *bck_offset,
    672                                     int *use_dist_wtd_comp_avg,
    673                                     int is_compound) {
    674  assert(fwd_offset != NULL && bck_offset != NULL);
    675  if (!is_compound || mbmi->compound_idx) {
    676    *fwd_offset = 8;
    677    *bck_offset = 8;
    678    *use_dist_wtd_comp_avg = 0;
    679    return;
    680  }
    681 
    682  *use_dist_wtd_comp_avg = 1;
    683  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
    684  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
    685  const int cur_frame_index = cm->cur_frame->order_hint;
    686  int bck_frame_index = 0, fwd_frame_index = 0;
    687 
    688  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
    689  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
    690 
    691  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
    692                                       fwd_frame_index, cur_frame_index)),
    693                 0, MAX_FRAME_DISTANCE);
    694  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
    695                                       cur_frame_index, bck_frame_index)),
    696                 0, MAX_FRAME_DISTANCE);
    697 
    698  const int order = d0 <= d1;
    699 
    700  if (d0 == 0 || d1 == 0) {
    701    *fwd_offset = quant_dist_lookup_table[3][order];
    702    *bck_offset = quant_dist_lookup_table[3][1 - order];
    703    return;
    704  }
    705 
    706  int i;
    707  for (i = 0; i < 3; ++i) {
    708    int c0 = quant_dist_weight[i][order];
    709    int c1 = quant_dist_weight[i][!order];
    710    int d0_c0 = d0 * c0;
    711    int d1_c1 = d1 * c1;
    712    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
    713  }
    714 
    715  *fwd_offset = quant_dist_lookup_table[i][order];
    716  *bck_offset = quant_dist_lookup_table[i][1 - order];
    717 }
    718 
    719 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
    720                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
    721                          const int plane_start, const int plane_end) {
    722  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
    723  // the static analysis warnings.
    724  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
    725    struct macroblockd_plane *const pd = &planes[i];
    726    const int is_uv = i > 0;
    727    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
    728                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
    729                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
    730  }
    731 }
    732 
    733 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
    734                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
    735                          const struct scale_factors *sf,
    736                          const int num_planes) {
    737  if (src != NULL) {
    738    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
    739    // the static analysis warnings.
    740    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
    741      struct macroblockd_plane *const pd = &xd->plane[i];
    742      const int is_uv = i > 0;
    743      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
    744                       src->crop_widths[is_uv], src->crop_heights[is_uv],
    745                       src->strides[is_uv], mi_row, mi_col, sf,
    746                       pd->subsampling_x, pd->subsampling_y);
    747    }
    748  }
    749 }
    750 
    751 // obmc_mask_N[overlap_position]
    752 static const uint8_t obmc_mask_1[1] = { 64 };
    753 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
    754 
    755 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
    756 
    757 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
    758 
    759 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
    760                                          56, 58, 60, 61, 64, 64, 64, 64 };
    761 
    762 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
    763                                          45, 47, 48, 50, 51, 52, 53, 55,
    764                                          56, 57, 58, 59, 60, 60, 61, 62,
    765                                          64, 64, 64, 64, 64, 64, 64, 64 };
    766 
    767 static const uint8_t obmc_mask_64[64] = {
    768  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
    769  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
    770  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
    771  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    772 };
    773 
    774 const uint8_t *av1_get_obmc_mask(int length) {
    775  switch (length) {
    776    case 1: return obmc_mask_1;
    777    case 2: return obmc_mask_2;
    778    case 4: return obmc_mask_4;
    779    case 8: return obmc_mask_8;
    780    case 16: return obmc_mask_16;
    781    case 32: return obmc_mask_32;
    782    case 64: return obmc_mask_64;
    783    default: assert(0); return NULL;
    784  }
    785 }
    786 
    787 static inline void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
    788                                     int rel_mi_col, uint8_t op_mi_size,
    789                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
    790                                     const int num_planes) {
    791  (void)xd;
    792  (void)rel_mi_row;
    793  (void)rel_mi_col;
    794  (void)op_mi_size;
    795  (void)dir;
    796  (void)mi;
    797  ++*(uint8_t *)fun_ctxt;
    798  (void)num_planes;
    799 }
    800 
    801 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
    802  MB_MODE_INFO *mbmi = xd->mi[0];
    803 
    804  mbmi->overlappable_neighbors = 0;
    805 
    806  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
    807 
    808  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
    809                                &mbmi->overlappable_neighbors);
    810  if (mbmi->overlappable_neighbors) return;
    811  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
    812                               &mbmi->overlappable_neighbors);
    813 }
    814 
    815 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
    816 // block-size of current plane is smaller than 8x8, always only blend with the
    817 // left neighbor(s) (skip blending with the above side).
    818 #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
    819 
    820 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
    821                               const struct macroblockd_plane *pd, int dir) {
    822  assert(is_motion_variation_allowed_bsize(bsize));
    823 
    824  const BLOCK_SIZE bsize_plane =
    825      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
    826  switch (bsize_plane) {
    827 #if DISABLE_CHROMA_U8X8_OBMC
    828    case BLOCK_4X4:
    829    case BLOCK_8X4:
    830    case BLOCK_4X8: return 1;
    831 #else
    832    case BLOCK_4X4:
    833    case BLOCK_8X4:
    834    case BLOCK_4X8: return dir == 0;
    835 #endif
    836    default: return 0;
    837  }
    838 }
    839 
    840 #if CONFIG_AV1_DECODER
    841 static void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
    842  mbmi->ref_frame[1] = NONE_FRAME;
    843  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
    844 }
    845 #endif  // CONFIG_AV1_DECODER
    846 
    847 struct obmc_inter_pred_ctxt {
    848  uint8_t **adjacent;
    849  int *adjacent_stride;
    850 };
    851 
    852 static inline void build_obmc_inter_pred_above(
    853    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
    854    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
    855  (void)above_mi;
    856  (void)rel_mi_row;
    857  (void)dir;
    858  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
    859  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
    860  const int overlap =
    861      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
    862 
    863  for (int plane = 0; plane < num_planes; ++plane) {
    864    const struct macroblockd_plane *pd = &xd->plane[plane];
    865    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
    866    const int bh = overlap >> pd->subsampling_y;
    867    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
    868 
    869    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
    870 
    871    const int dst_stride = pd->dst.stride;
    872    uint8_t *const dst = &pd->dst.buf[plane_col];
    873    const int tmp_stride = ctxt->adjacent_stride[plane];
    874    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
    875    const uint8_t *const mask = av1_get_obmc_mask(bh);
    876 #if CONFIG_AV1_HIGHBITDEPTH
    877    const int is_hbd = is_cur_buf_hbd(xd);
    878    if (is_hbd)
    879      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
    880                                 tmp_stride, mask, bw, bh, xd->bd);
    881    else
    882      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
    883                          mask, bw, bh);
    884 #else
    885    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
    886                        bw, bh);
    887 #endif
    888  }
    889 }
    890 
    891 static inline void build_obmc_inter_pred_left(
    892    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
    893    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
    894  (void)left_mi;
    895  (void)rel_mi_col;
    896  (void)dir;
    897  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
    898  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
    899  const int overlap =
    900      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
    901 
    902  for (int plane = 0; plane < num_planes; ++plane) {
    903    const struct macroblockd_plane *pd = &xd->plane[plane];
    904    const int bw = overlap >> pd->subsampling_x;
    905    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
    906    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
    907 
    908    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
    909 
    910    const int dst_stride = pd->dst.stride;
    911    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
    912    const int tmp_stride = ctxt->adjacent_stride[plane];
    913    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
    914    const uint8_t *const mask = av1_get_obmc_mask(bw);
    915 
    916 #if CONFIG_AV1_HIGHBITDEPTH
    917    const int is_hbd = is_cur_buf_hbd(xd);
    918    if (is_hbd)
    919      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
    920                                 tmp_stride, mask, bw, bh, xd->bd);
    921    else
    922      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
    923                          mask, bw, bh);
    924 #else
    925    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
    926                        bw, bh);
    927 #endif
    928  }
    929 }
    930 
    931 // This function combines motion compensated predictions that are generated by
    932 // top/left neighboring blocks' inter predictors with the regular inter
    933 // prediction. We assume the original prediction (bmc) is stored in
    934 // xd->plane[].dst.buf
    935 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
    936                                     uint8_t *above[MAX_MB_PLANE],
    937                                     int above_stride[MAX_MB_PLANE],
    938                                     uint8_t *left[MAX_MB_PLANE],
    939                                     int left_stride[MAX_MB_PLANE]) {
    940  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
    941 
    942  // handle above row
    943  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
    944  foreach_overlappable_nb_above(cm, xd,
    945                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
    946                                build_obmc_inter_pred_above, &ctxt_above);
    947 
    948  // handle left column
    949  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
    950  foreach_overlappable_nb_left(cm, xd,
    951                               max_neighbor_obmc[mi_size_high_log2[bsize]],
    952                               build_obmc_inter_pred_left, &ctxt_left);
    953 }
    954 
    955 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
    956                             uint8_t **dst_buf2) {
    957  if (is_cur_buf_hbd(xd)) {
    958    int len = sizeof(uint16_t);
    959    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
    960    dst_buf1[1] =
    961        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
    962    dst_buf1[2] =
    963        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
    964    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
    965    dst_buf2[1] =
    966        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
    967    dst_buf2[2] =
    968        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
    969  } else {
    970    dst_buf1[0] = xd->tmp_obmc_bufs[0];
    971    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
    972    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
    973    dst_buf2[0] = xd->tmp_obmc_bufs[1];
    974    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
    975    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
    976  }
    977 }
    978 
    979 #if CONFIG_AV1_DECODER
    980 void av1_setup_build_prediction_by_above_pred(
    981    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
    982    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
    983    const int num_planes) {
    984  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
    985  const int above_mi_col = xd->mi_col + rel_mi_col;
    986 
    987  modify_neighbor_predictor_for_obmc(above_mbmi);
    988 
    989  for (int j = 0; j < num_planes; ++j) {
    990    struct macroblockd_plane *const pd = &xd->plane[j];
    991    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
    992                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
    993                     NULL, pd->subsampling_x, pd->subsampling_y);
    994  }
    995 
    996  const int num_refs = 1 + has_second_ref(above_mbmi);
    997 
    998  for (int ref = 0; ref < num_refs; ++ref) {
    999    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
   1000 
   1001    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
   1002    const struct scale_factors *const sf =
   1003        get_ref_scale_factors_const(ctxt->cm, frame);
   1004    xd->block_ref_scale_factors[ref] = sf;
   1005    if ((!av1_is_valid_scale(sf)))
   1006      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
   1007                         "Reference frame has invalid dimensions");
   1008    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
   1009                         num_planes);
   1010  }
   1011 
   1012  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
   1013  xd->mb_to_right_edge =
   1014      ctxt->mb_to_far_edge +
   1015      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
   1016 }
   1017 
   1018 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
   1019                                             uint8_t left_mi_height,
   1020                                             MB_MODE_INFO *left_mbmi,
   1021                                             struct build_prediction_ctxt *ctxt,
   1022                                             const int num_planes) {
   1023  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
   1024  const int left_mi_row = xd->mi_row + rel_mi_row;
   1025 
   1026  modify_neighbor_predictor_for_obmc(left_mbmi);
   1027 
   1028  for (int j = 0; j < num_planes; ++j) {
   1029    struct macroblockd_plane *const pd = &xd->plane[j];
   1030    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
   1031                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
   1032                     NULL, pd->subsampling_x, pd->subsampling_y);
   1033  }
   1034 
   1035  const int num_refs = 1 + has_second_ref(left_mbmi);
   1036 
   1037  for (int ref = 0; ref < num_refs; ++ref) {
   1038    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
   1039 
   1040    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
   1041    const struct scale_factors *const ref_scale_factors =
   1042        get_ref_scale_factors_const(ctxt->cm, frame);
   1043 
   1044    xd->block_ref_scale_factors[ref] = ref_scale_factors;
   1045    if ((!av1_is_valid_scale(ref_scale_factors)))
   1046      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
   1047                         "Reference frame has invalid dimensions");
   1048    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
   1049                         ref_scale_factors, num_planes);
   1050  }
   1051 
   1052  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
   1053  xd->mb_to_bottom_edge =
   1054      ctxt->mb_to_far_edge +
   1055      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
   1056 }
   1057 #endif  // CONFIG_AV1_DECODER
   1058 
   1059 static inline void combine_interintra(
   1060    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
   1061    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
   1062    uint8_t *comppred, int compstride, const uint8_t *interpred,
   1063    int interstride, const uint8_t *intrapred, int intrastride) {
   1064  const int bw = block_size_wide[plane_bsize];
   1065  const int bh = block_size_high[plane_bsize];
   1066 
   1067  if (use_wedge_interintra) {
   1068    if (av1_is_wedge_used(bsize)) {
   1069      const uint8_t *mask =
   1070          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
   1071      const int subw = 2 * mi_size_wide[bsize] == bw;
   1072      const int subh = 2 * mi_size_high[bsize] == bh;
   1073      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
   1074                         interpred, interstride, mask, block_size_wide[bsize],
   1075                         bw, bh, subw, subh);
   1076    }
   1077    return;
   1078  }
   1079 
   1080  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
   1081  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
   1082                     interstride, mask, bw, bw, bh, 0, 0);
   1083 }
   1084 
   1085 #if CONFIG_AV1_HIGHBITDEPTH
   1086 static inline void combine_interintra_highbd(
   1087    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
   1088    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
   1089    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
   1090    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
   1091  const int bw = block_size_wide[plane_bsize];
   1092  const int bh = block_size_high[plane_bsize];
   1093 
   1094  if (use_wedge_interintra) {
   1095    if (av1_is_wedge_used(bsize)) {
   1096      const uint8_t *mask =
   1097          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
   1098      const int subh = 2 * mi_size_high[bsize] == bh;
   1099      const int subw = 2 * mi_size_wide[bsize] == bw;
   1100      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
   1101                                interpred8, interstride, mask,
   1102                                block_size_wide[bsize], bw, bh, subw, subh, bd);
   1103    }
   1104    return;
   1105  }
   1106 
   1107  uint8_t mask[MAX_SB_SQUARE];
   1108  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
   1109  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
   1110                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
   1111                            bd);
   1112 }
   1113 #endif
   1114 
   1115 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
   1116                                               MACROBLOCKD *xd,
   1117                                               BLOCK_SIZE bsize, int plane,
   1118                                               const BUFFER_SET *ctx,
   1119                                               uint8_t *dst, int dst_stride) {
   1120  struct macroblockd_plane *const pd = &xd->plane[plane];
   1121  const int ssx = xd->plane[plane].subsampling_x;
   1122  const int ssy = xd->plane[plane].subsampling_y;
   1123  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
   1124  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
   1125  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
   1126  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
   1127  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
   1128  assert(xd->mi[0]->use_intrabc == 0);
   1129  const SequenceHeader *seq_params = cm->seq_params;
   1130 
   1131  av1_predict_intra_block(xd, seq_params->sb_size,
   1132                          seq_params->enable_intra_edge_filter, pd->width,
   1133                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
   1134                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
   1135                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
   1136 }
   1137 
   1138 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
   1139                            const uint8_t *inter_pred, int inter_stride,
   1140                            const uint8_t *intra_pred, int intra_stride) {
   1141  const int ssx = xd->plane[plane].subsampling_x;
   1142  const int ssy = xd->plane[plane].subsampling_y;
   1143  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
   1144 #if CONFIG_AV1_HIGHBITDEPTH
   1145  if (is_cur_buf_hbd(xd)) {
   1146    combine_interintra_highbd(
   1147        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
   1148        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
   1149        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
   1150        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
   1151    return;
   1152  }
   1153 #endif
   1154  combine_interintra(
   1155      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
   1156      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
   1157      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
   1158      inter_pred, inter_stride, intra_pred, intra_stride);
   1159 }
   1160 
   1161 // build interintra_predictors for one plane
   1162 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
   1163                                    uint8_t *pred, int stride,
   1164                                    const BUFFER_SET *ctx, int plane,
   1165                                    BLOCK_SIZE bsize) {
   1166  assert(bsize < BLOCK_SIZES_ALL);
   1167  if (is_cur_buf_hbd(xd)) {
   1168    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
   1169    av1_build_intra_predictors_for_interintra(
   1170        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
   1171        MAX_SB_SIZE);
   1172    av1_combine_interintra(xd, bsize, plane, pred, stride,
   1173                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
   1174  } else {
   1175    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
   1176    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
   1177                                              intrapredictor, MAX_SB_SIZE);
   1178    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
   1179                           MAX_SB_SIZE);
   1180  }
   1181 }