reconinter.c (48765B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <stdio.h> 14 #include <limits.h> 15 16 #include "config/aom_config.h" 17 #include "config/aom_dsp_rtcd.h" 18 #include "config/aom_scale_rtcd.h" 19 20 #include "aom/aom_integer.h" 21 #include "aom_dsp/blend.h" 22 #include "aom_ports/aom_once.h" 23 24 #include "av1/common/av1_common_int.h" 25 #include "av1/common/blockd.h" 26 #include "av1/common/mvref_common.h" 27 #include "av1/common/obmc.h" 28 #include "av1/common/reconinter.h" 29 #include "av1/common/reconintra.h" 30 31 // This function will determine whether or not to create a warped 32 // prediction. 33 static int allow_warp(const MB_MODE_INFO *const mbmi, 34 const WarpTypesAllowed *const warp_types, 35 const WarpedMotionParams *const gm_params, 36 int build_for_obmc, const struct scale_factors *const sf, 37 WarpedMotionParams *final_warp_params) { 38 // Note: As per the spec, we must test the fixed point scales here, which are 39 // at a higher precision (1 << 14) than the xs and ys in subpel_params (that 40 // have 1 << 10 precision). 41 if (av1_is_scaled(sf)) return 0; 42 43 if (final_warp_params != NULL) *final_warp_params = default_warp_params; 44 45 if (build_for_obmc) return 0; 46 47 if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) { 48 if (final_warp_params != NULL) *final_warp_params = mbmi->wm_params; 49 return 1; 50 } else if (warp_types->global_warp_allowed && !gm_params->invalid) { 51 if (final_warp_params != NULL) *final_warp_params = *gm_params; 52 return 1; 53 } 54 55 return 0; 56 } 57 58 void av1_init_warp_params(InterPredParams *inter_pred_params, 59 const WarpTypesAllowed *warp_types, int ref, 60 const MACROBLOCKD *xd, const MB_MODE_INFO *mi) { 61 if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8) 62 return; 63 64 if (xd->cur_frame_force_integer_mv) return; 65 66 if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0, 67 inter_pred_params->scale_factors, 68 &inter_pred_params->warp_params)) { 69 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER 70 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE, 71 "Warped motion is disabled in realtime only build."); 72 #endif // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER 73 inter_pred_params->mode = WARP_PRED; 74 } 75 } 76 77 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, 78 int dst_stride, 79 InterPredParams *inter_pred_params, 80 const SubpelParams *subpel_params) { 81 assert(IMPLIES(inter_pred_params->conv_params.is_compound, 82 inter_pred_params->conv_params.dst != NULL)); 83 84 if (inter_pred_params->mode == TRANSLATION_PRED) { 85 #if CONFIG_AV1_HIGHBITDEPTH 86 if (inter_pred_params->use_hbd_buf) { 87 highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, 88 inter_pred_params->block_width, 89 inter_pred_params->block_height, 90 &inter_pred_params->conv_params, 91 inter_pred_params->interp_filter_params, 92 inter_pred_params->bit_depth); 93 } else { 94 inter_predictor(src, src_stride, dst, dst_stride, subpel_params, 95 inter_pred_params->block_width, 96 inter_pred_params->block_height, 97 &inter_pred_params->conv_params, 98 inter_pred_params->interp_filter_params); 99 } 100 #else 101 inter_predictor(src, src_stride, dst, dst_stride, subpel_params, 102 inter_pred_params->block_width, 103 inter_pred_params->block_height, 104 &inter_pred_params->conv_params, 105 inter_pred_params->interp_filter_params); 106 #endif 107 } 108 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 109 // TODO(jingning): av1_warp_plane() can be further cleaned up. 110 else if (inter_pred_params->mode == WARP_PRED) { 111 av1_warp_plane( 112 &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf, 113 inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0, 114 inter_pred_params->ref_frame_buf.width, 115 inter_pred_params->ref_frame_buf.height, 116 inter_pred_params->ref_frame_buf.stride, dst, 117 inter_pred_params->pix_col, inter_pred_params->pix_row, 118 inter_pred_params->block_width, inter_pred_params->block_height, 119 dst_stride, inter_pred_params->subsampling_x, 120 inter_pred_params->subsampling_y, &inter_pred_params->conv_params); 121 } 122 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 123 else { 124 assert(0 && "Unsupported inter_pred_params->mode"); 125 } 126 } 127 128 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = { 129 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18, 131 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 132 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 133 }; 134 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = { 135 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27, 137 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 138 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 139 }; 140 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = { 141 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 142 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21, 143 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 144 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 145 }; 146 147 static inline void shift_copy(const uint8_t *src, uint8_t *dst, int shift, 148 int width) { 149 if (shift >= 0) { 150 memcpy(dst + shift, src, width - shift); 151 memset(dst, src[0], shift); 152 } else { 153 shift = -shift; 154 memcpy(dst, src + shift, width - shift); 155 memset(dst + width - shift, src[width - 1], shift); 156 } 157 } 158 159 /* clang-format off */ 160 DECLARE_ALIGNED(16, static uint8_t, 161 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = { 162 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 163 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 164 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 165 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 166 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 167 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 168 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 169 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 170 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 171 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, }, 172 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 173 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 174 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 175 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 176 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 177 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 178 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 179 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 180 { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, }, 181 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, }, 182 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 183 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used 184 }; 185 /* clang-format on */ 186 187 // [negative][direction] 188 DECLARE_ALIGNED( 189 16, static uint8_t, 190 wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]); 191 192 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound 193 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE. 194 DECLARE_ALIGNED(16, static uint8_t, 195 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]); 196 197 DECLARE_ALIGNED(16, static uint8_t, 198 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL] 199 [MAX_WEDGE_SQUARE]); 200 201 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2]; 202 203 static const wedge_code_type wedge_codebook_16_hgtw[16] = { 204 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 205 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 206 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 }, 207 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 }, 208 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 209 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 210 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 211 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 212 }; 213 214 static const wedge_code_type wedge_codebook_16_hltw[16] = { 215 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 216 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 217 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 }, 218 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 }, 219 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 220 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 221 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 222 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 223 }; 224 225 static const wedge_code_type wedge_codebook_16_heqw[16] = { 226 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 }, 227 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 }, 228 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 }, 229 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 }, 230 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 }, 231 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 }, 232 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 }, 233 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 }, 234 }; 235 236 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = { 237 { 0, NULL, NULL, NULL }, 238 { 0, NULL, NULL, NULL }, 239 { 0, NULL, NULL, NULL }, 240 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 241 wedge_masks[BLOCK_8X8] }, 242 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 243 wedge_masks[BLOCK_8X16] }, 244 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 245 wedge_masks[BLOCK_16X8] }, 246 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 247 wedge_masks[BLOCK_16X16] }, 248 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 249 wedge_masks[BLOCK_16X32] }, 250 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 251 wedge_masks[BLOCK_32X16] }, 252 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 253 wedge_masks[BLOCK_32X32] }, 254 { 0, NULL, NULL, NULL }, 255 { 0, NULL, NULL, NULL }, 256 { 0, NULL, NULL, NULL }, 257 { 0, NULL, NULL, NULL }, 258 { 0, NULL, NULL, NULL }, 259 { 0, NULL, NULL, NULL }, 260 { 0, NULL, NULL, NULL }, 261 { 0, NULL, NULL, NULL }, 262 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 263 wedge_masks[BLOCK_8X32] }, 264 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 265 wedge_masks[BLOCK_32X8] }, 266 { 0, NULL, NULL, NULL }, 267 { 0, NULL, NULL, NULL }, 268 }; 269 270 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg, 271 BLOCK_SIZE sb_type) { 272 const uint8_t *master; 273 const int bh = block_size_high[sb_type]; 274 const int bw = block_size_wide[sb_type]; 275 const wedge_code_type *a = 276 av1_wedge_params_lookup[sb_type].codebook + wedge_index; 277 int woff, hoff; 278 const uint8_t wsignflip = 279 av1_wedge_params_lookup[sb_type].signflip[wedge_index]; 280 281 assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type)); 282 woff = (a->x_offset * bw) >> 3; 283 hoff = (a->y_offset * bh) >> 3; 284 master = wedge_mask_obl[neg ^ wsignflip][a->direction] + 285 MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) + 286 MASK_MASTER_SIZE / 2 - woff; 287 return master; 288 } 289 290 const uint8_t *av1_get_compound_type_mask( 291 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) { 292 (void)sb_type; 293 switch (comp_data->type) { 294 case COMPOUND_WEDGE: 295 return av1_get_contiguous_soft_mask(comp_data->wedge_index, 296 comp_data->wedge_sign, sb_type); 297 default: return comp_data->seg_mask; 298 } 299 } 300 301 static inline void diffwtd_mask_d16(uint8_t *mask, int which_inverse, 302 int mask_base, const CONV_BUF_TYPE *src0, 303 int src0_stride, const CONV_BUF_TYPE *src1, 304 int src1_stride, int h, int w, 305 ConvolveParams *conv_params, int bd) { 306 int round = 307 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8); 308 int i, j, m, diff; 309 for (i = 0; i < h; ++i) { 310 for (j = 0; j < w; ++j) { 311 diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]); 312 diff = ROUND_POWER_OF_TWO(diff, round); 313 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); 314 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; 315 } 316 } 317 } 318 319 void av1_build_compound_diffwtd_mask_d16_c( 320 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, 321 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, 322 ConvolveParams *conv_params, int bd) { 323 switch (mask_type) { 324 case DIFFWTD_38: 325 diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w, 326 conv_params, bd); 327 break; 328 case DIFFWTD_38_INV: 329 diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w, 330 conv_params, bd); 331 break; 332 default: assert(0); 333 } 334 } 335 336 static inline void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base, 337 const uint8_t *src0, int src0_stride, 338 const uint8_t *src1, int src1_stride, int h, 339 int w) { 340 int i, j, m, diff; 341 for (i = 0; i < h; ++i) { 342 for (j = 0; j < w; ++j) { 343 diff = 344 abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]); 345 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA); 346 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m; 347 } 348 } 349 } 350 351 void av1_build_compound_diffwtd_mask_c(uint8_t *mask, 352 DIFFWTD_MASK_TYPE mask_type, 353 const uint8_t *src0, int src0_stride, 354 const uint8_t *src1, int src1_stride, 355 int h, int w) { 356 switch (mask_type) { 357 case DIFFWTD_38: 358 diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w); 359 break; 360 case DIFFWTD_38_INV: 361 diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w); 362 break; 363 default: assert(0); 364 } 365 } 366 367 #if CONFIG_AV1_HIGHBITDEPTH 368 static AOM_FORCE_INLINE void diffwtd_mask_highbd( 369 uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0, 370 int src0_stride, const uint16_t *src1, int src1_stride, int h, int w, 371 const unsigned int bd) { 372 assert(bd >= 8); 373 if (bd == 8) { 374 if (which_inverse) { 375 for (int i = 0; i < h; ++i) { 376 for (int j = 0; j < w; ++j) { 377 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; 378 unsigned int m = negative_to_zero(mask_base + diff); 379 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 380 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; 381 } 382 src0 += src0_stride; 383 src1 += src1_stride; 384 mask += w; 385 } 386 } else { 387 for (int i = 0; i < h; ++i) { 388 for (int j = 0; j < w; ++j) { 389 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR; 390 unsigned int m = negative_to_zero(mask_base + diff); 391 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 392 mask[j] = m; 393 } 394 src0 += src0_stride; 395 src1 += src1_stride; 396 mask += w; 397 } 398 } 399 } else { 400 const unsigned int bd_shift = bd - 8; 401 if (which_inverse) { 402 for (int i = 0; i < h; ++i) { 403 for (int j = 0; j < w; ++j) { 404 int diff = 405 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; 406 unsigned int m = negative_to_zero(mask_base + diff); 407 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 408 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m; 409 } 410 src0 += src0_stride; 411 src1 += src1_stride; 412 mask += w; 413 } 414 } else { 415 for (int i = 0; i < h; ++i) { 416 for (int j = 0; j < w; ++j) { 417 int diff = 418 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR; 419 unsigned int m = negative_to_zero(mask_base + diff); 420 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA); 421 mask[j] = m; 422 } 423 src0 += src0_stride; 424 src1 += src1_stride; 425 mask += w; 426 } 427 } 428 } 429 } 430 431 void av1_build_compound_diffwtd_mask_highbd_c( 432 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, 433 int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, 434 int bd) { 435 switch (mask_type) { 436 case DIFFWTD_38: 437 diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride, 438 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd); 439 break; 440 case DIFFWTD_38_INV: 441 diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride, 442 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd); 443 break; 444 default: assert(0); 445 } 446 } 447 #endif // CONFIG_AV1_HIGHBITDEPTH 448 449 static inline void init_wedge_master_masks(void) { 450 int i, j; 451 const int w = MASK_MASTER_SIZE; 452 const int h = MASK_MASTER_SIZE; 453 const int stride = MASK_MASTER_STRIDE; 454 // Note: index [0] stores the masters, and [1] its complement. 455 // Generate prototype by shifting the masters 456 int shift = h / 4; 457 for (i = 0; i < h; i += 2) { 458 shift_copy(wedge_master_oblique_even, 459 &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift, 460 MASK_MASTER_SIZE); 461 shift--; 462 shift_copy(wedge_master_oblique_odd, 463 &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift, 464 MASK_MASTER_SIZE); 465 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride], 466 wedge_master_vertical, 467 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0])); 468 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride], 469 wedge_master_vertical, 470 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0])); 471 } 472 473 for (i = 0; i < h; ++i) { 474 for (j = 0; j < w; ++j) { 475 const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j]; 476 wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk; 477 wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = 478 wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = 479 (1 << WEDGE_WEIGHT_BITS) - msk; 480 wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] = 481 wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] = 482 (1 << WEDGE_WEIGHT_BITS) - msk; 483 wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] = 484 wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk; 485 const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j]; 486 wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx; 487 wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] = 488 wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] = 489 (1 << WEDGE_WEIGHT_BITS) - mskx; 490 } 491 } 492 } 493 494 static inline void init_wedge_masks(void) { 495 uint8_t *dst = wedge_mask_buf; 496 BLOCK_SIZE bsize; 497 memset(wedge_masks, 0, sizeof(wedge_masks)); 498 for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) { 499 const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize]; 500 const int wtypes = wedge_params->wedge_types; 501 if (wtypes == 0) continue; 502 const uint8_t *mask; 503 const int bw = block_size_wide[bsize]; 504 const int bh = block_size_high[bsize]; 505 int w; 506 for (w = 0; w < wtypes; ++w) { 507 mask = get_wedge_mask_inplace(w, 0, bsize); 508 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw, 509 bh); 510 wedge_params->masks[0][w] = dst; 511 dst += bw * bh; 512 513 mask = get_wedge_mask_inplace(w, 1, bsize); 514 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw, 515 bh); 516 wedge_params->masks[1][w] = dst; 517 dst += bw * bh; 518 } 519 assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf)); 520 } 521 } 522 523 /* clang-format off */ 524 static const uint8_t ii_weights1d[MAX_SB_SIZE] = { 525 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32, 526 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16, 527 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8, 528 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 529 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 530 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 532 }; 533 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = { 534 32, 16, 16, 16, 8, 8, 8, 4, 535 4, 4, 2, 2, 2, 1, 1, 1, 536 8, 8, 4, 4, 2, 2 537 }; 538 /* clang-format on */ 539 540 static inline void build_smooth_interintra_mask(uint8_t *mask, int stride, 541 BLOCK_SIZE plane_bsize, 542 INTERINTRA_MODE mode) { 543 int i, j; 544 const int bw = block_size_wide[plane_bsize]; 545 const int bh = block_size_high[plane_bsize]; 546 const int size_scale = ii_size_scales[plane_bsize]; 547 548 switch (mode) { 549 case II_V_PRED: 550 for (i = 0; i < bh; ++i) { 551 memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0])); 552 mask += stride; 553 } 554 break; 555 556 case II_H_PRED: 557 for (i = 0; i < bh; ++i) { 558 for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale]; 559 mask += stride; 560 } 561 break; 562 563 case II_SMOOTH_PRED: 564 for (i = 0; i < bh; ++i) { 565 for (j = 0; j < bw; ++j) 566 mask[j] = ii_weights1d[(i < j ? i : j) * size_scale]; 567 mask += stride; 568 } 569 break; 570 571 case II_DC_PRED: 572 default: 573 for (i = 0; i < bh; ++i) { 574 memset(mask, 32, bw * sizeof(mask[0])); 575 mask += stride; 576 } 577 break; 578 } 579 } 580 581 static inline void init_smooth_interintra_masks(void) { 582 for (int m = 0; m < INTERINTRA_MODES; ++m) { 583 for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) { 584 const int bw = block_size_wide[bs]; 585 const int bh = block_size_high[bs]; 586 if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue; 587 build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs, 588 m); 589 } 590 } 591 } 592 593 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0 594 static void init_all_wedge_masks(void) { 595 init_wedge_master_masks(); 596 init_wedge_masks(); 597 init_smooth_interintra_masks(); 598 } 599 600 void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); } 601 602 static inline void build_masked_compound_no_round( 603 uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride, 604 const CONV_BUF_TYPE *src1, int src1_stride, 605 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, 606 int w, InterPredParams *inter_pred_params) { 607 const int ssy = inter_pred_params->subsampling_y; 608 const int ssx = inter_pred_params->subsampling_x; 609 const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); 610 const int mask_stride = block_size_wide[sb_type]; 611 #if CONFIG_AV1_HIGHBITDEPTH 612 if (inter_pred_params->use_hbd_buf) { 613 aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, 614 src1_stride, mask, mask_stride, w, h, ssx, 615 ssy, &inter_pred_params->conv_params, 616 inter_pred_params->bit_depth); 617 } else { 618 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, 619 src1_stride, mask, mask_stride, w, h, ssx, ssy, 620 &inter_pred_params->conv_params); 621 } 622 #else 623 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, 624 src1_stride, mask, mask_stride, w, h, ssx, ssy, 625 &inter_pred_params->conv_params); 626 #endif 627 } 628 629 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride, 630 uint8_t *dst, int dst_stride, 631 InterPredParams *inter_pred_params, 632 const SubpelParams *subpel_params) { 633 const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp; 634 BLOCK_SIZE sb_type = inter_pred_params->sb_type; 635 636 // We're going to call av1_make_inter_predictor to generate a prediction into 637 // a temporary buffer, then will blend that temporary buffer with that from 638 // the other reference. 639 DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]); 640 uint8_t *tmp_dst = 641 inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf; 642 643 const int tmp_buf_stride = MAX_SB_SIZE; 644 CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst; 645 int org_dst_stride = inter_pred_params->conv_params.dst_stride; 646 CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf; 647 inter_pred_params->conv_params.dst = tmp_buf16; 648 inter_pred_params->conv_params.dst_stride = tmp_buf_stride; 649 assert(inter_pred_params->conv_params.do_average == 0); 650 651 // This will generate a prediction in tmp_buf for the second reference 652 av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, 653 inter_pred_params, subpel_params); 654 655 if (!inter_pred_params->conv_params.plane && 656 comp_data->type == COMPOUND_DIFFWTD) { 657 av1_build_compound_diffwtd_mask_d16( 658 comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride, 659 tmp_buf16, tmp_buf_stride, inter_pred_params->block_height, 660 inter_pred_params->block_width, &inter_pred_params->conv_params, 661 inter_pred_params->bit_depth); 662 } 663 build_masked_compound_no_round( 664 dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride, 665 comp_data, sb_type, inter_pred_params->block_height, 666 inter_pred_params->block_width, inter_pred_params); 667 } 668 669 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm, 670 const MB_MODE_INFO *mbmi, int *fwd_offset, 671 int *bck_offset, 672 int *use_dist_wtd_comp_avg, 673 int is_compound) { 674 assert(fwd_offset != NULL && bck_offset != NULL); 675 if (!is_compound || mbmi->compound_idx) { 676 *fwd_offset = 8; 677 *bck_offset = 8; 678 *use_dist_wtd_comp_avg = 0; 679 return; 680 } 681 682 *use_dist_wtd_comp_avg = 1; 683 const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]); 684 const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]); 685 const int cur_frame_index = cm->cur_frame->order_hint; 686 int bck_frame_index = 0, fwd_frame_index = 0; 687 688 if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint; 689 if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint; 690 691 int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info, 692 fwd_frame_index, cur_frame_index)), 693 0, MAX_FRAME_DISTANCE); 694 int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info, 695 cur_frame_index, bck_frame_index)), 696 0, MAX_FRAME_DISTANCE); 697 698 const int order = d0 <= d1; 699 700 if (d0 == 0 || d1 == 0) { 701 *fwd_offset = quant_dist_lookup_table[3][order]; 702 *bck_offset = quant_dist_lookup_table[3][1 - order]; 703 return; 704 } 705 706 int i; 707 for (i = 0; i < 3; ++i) { 708 int c0 = quant_dist_weight[i][order]; 709 int c1 = quant_dist_weight[i][!order]; 710 int d0_c0 = d0 * c0; 711 int d1_c1 = d1 * c1; 712 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break; 713 } 714 715 *fwd_offset = quant_dist_lookup_table[i][order]; 716 *bck_offset = quant_dist_lookup_table[i][1 - order]; 717 } 718 719 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize, 720 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, 721 const int plane_start, const int plane_end) { 722 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet 723 // the static analysis warnings. 724 for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) { 725 struct macroblockd_plane *const pd = &planes[i]; 726 const int is_uv = i > 0; 727 setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv], 728 src->crop_heights[is_uv], src->strides[is_uv], mi_row, 729 mi_col, NULL, pd->subsampling_x, pd->subsampling_y); 730 } 731 } 732 733 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx, 734 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, 735 const struct scale_factors *sf, 736 const int num_planes) { 737 if (src != NULL) { 738 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet 739 // the static analysis warnings. 740 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) { 741 struct macroblockd_plane *const pd = &xd->plane[i]; 742 const int is_uv = i > 0; 743 setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i], 744 src->crop_widths[is_uv], src->crop_heights[is_uv], 745 src->strides[is_uv], mi_row, mi_col, sf, 746 pd->subsampling_x, pd->subsampling_y); 747 } 748 } 749 } 750 751 // obmc_mask_N[overlap_position] 752 static const uint8_t obmc_mask_1[1] = { 64 }; 753 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 }; 754 755 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 }; 756 757 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 }; 758 759 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54, 760 56, 58, 60, 61, 64, 64, 64, 64 }; 761 762 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44, 763 45, 47, 48, 50, 51, 52, 53, 55, 764 56, 57, 58, 59, 60, 60, 61, 62, 765 64, 64, 64, 64, 64, 64, 64, 64 }; 766 767 static const uint8_t obmc_mask_64[64] = { 768 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44, 769 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56, 770 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62, 771 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 772 }; 773 774 const uint8_t *av1_get_obmc_mask(int length) { 775 switch (length) { 776 case 1: return obmc_mask_1; 777 case 2: return obmc_mask_2; 778 case 4: return obmc_mask_4; 779 case 8: return obmc_mask_8; 780 case 16: return obmc_mask_16; 781 case 32: return obmc_mask_32; 782 case 64: return obmc_mask_64; 783 default: assert(0); return NULL; 784 } 785 } 786 787 static inline void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row, 788 int rel_mi_col, uint8_t op_mi_size, 789 int dir, MB_MODE_INFO *mi, void *fun_ctxt, 790 const int num_planes) { 791 (void)xd; 792 (void)rel_mi_row; 793 (void)rel_mi_col; 794 (void)op_mi_size; 795 (void)dir; 796 (void)mi; 797 ++*(uint8_t *)fun_ctxt; 798 (void)num_planes; 799 } 800 801 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) { 802 MB_MODE_INFO *mbmi = xd->mi[0]; 803 804 mbmi->overlappable_neighbors = 0; 805 806 if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return; 807 808 foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr, 809 &mbmi->overlappable_neighbors); 810 if (mbmi->overlappable_neighbors) return; 811 foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr, 812 &mbmi->overlappable_neighbors); 813 } 814 815 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if 816 // block-size of current plane is smaller than 8x8, always only blend with the 817 // left neighbor(s) (skip blending with the above side). 818 #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable 819 820 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, 821 const struct macroblockd_plane *pd, int dir) { 822 assert(is_motion_variation_allowed_bsize(bsize)); 823 824 const BLOCK_SIZE bsize_plane = 825 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 826 switch (bsize_plane) { 827 #if DISABLE_CHROMA_U8X8_OBMC 828 case BLOCK_4X4: 829 case BLOCK_8X4: 830 case BLOCK_4X8: return 1; 831 #else 832 case BLOCK_4X4: 833 case BLOCK_8X4: 834 case BLOCK_4X8: return dir == 0; 835 #endif 836 default: return 0; 837 } 838 } 839 840 #if CONFIG_AV1_DECODER 841 static void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) { 842 mbmi->ref_frame[1] = NONE_FRAME; 843 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 844 } 845 #endif // CONFIG_AV1_DECODER 846 847 struct obmc_inter_pred_ctxt { 848 uint8_t **adjacent; 849 int *adjacent_stride; 850 }; 851 852 static inline void build_obmc_inter_pred_above( 853 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size, 854 int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) { 855 (void)above_mi; 856 (void)rel_mi_row; 857 (void)dir; 858 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt; 859 const BLOCK_SIZE bsize = xd->mi[0]->bsize; 860 const int overlap = 861 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; 862 863 for (int plane = 0; plane < num_planes; ++plane) { 864 const struct macroblockd_plane *pd = &xd->plane[plane]; 865 const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x; 866 const int bh = overlap >> pd->subsampling_y; 867 const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x; 868 869 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue; 870 871 const int dst_stride = pd->dst.stride; 872 uint8_t *const dst = &pd->dst.buf[plane_col]; 873 const int tmp_stride = ctxt->adjacent_stride[plane]; 874 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col]; 875 const uint8_t *const mask = av1_get_obmc_mask(bh); 876 #if CONFIG_AV1_HIGHBITDEPTH 877 const int is_hbd = is_cur_buf_hbd(xd); 878 if (is_hbd) 879 aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, 880 tmp_stride, mask, bw, bh, xd->bd); 881 else 882 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, 883 mask, bw, bh); 884 #else 885 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, 886 bw, bh); 887 #endif 888 } 889 } 890 891 static inline void build_obmc_inter_pred_left( 892 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size, 893 int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) { 894 (void)left_mi; 895 (void)rel_mi_col; 896 (void)dir; 897 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt; 898 const BLOCK_SIZE bsize = xd->mi[0]->bsize; 899 const int overlap = 900 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1; 901 902 for (int plane = 0; plane < num_planes; ++plane) { 903 const struct macroblockd_plane *pd = &xd->plane[plane]; 904 const int bw = overlap >> pd->subsampling_x; 905 const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y; 906 const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y; 907 908 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue; 909 910 const int dst_stride = pd->dst.stride; 911 uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride]; 912 const int tmp_stride = ctxt->adjacent_stride[plane]; 913 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride]; 914 const uint8_t *const mask = av1_get_obmc_mask(bw); 915 916 #if CONFIG_AV1_HIGHBITDEPTH 917 const int is_hbd = is_cur_buf_hbd(xd); 918 if (is_hbd) 919 aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, 920 tmp_stride, mask, bw, bh, xd->bd); 921 else 922 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, 923 mask, bw, bh); 924 #else 925 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask, 926 bw, bh); 927 #endif 928 } 929 } 930 931 // This function combines motion compensated predictions that are generated by 932 // top/left neighboring blocks' inter predictors with the regular inter 933 // prediction. We assume the original prediction (bmc) is stored in 934 // xd->plane[].dst.buf 935 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd, 936 uint8_t *above[MAX_MB_PLANE], 937 int above_stride[MAX_MB_PLANE], 938 uint8_t *left[MAX_MB_PLANE], 939 int left_stride[MAX_MB_PLANE]) { 940 const BLOCK_SIZE bsize = xd->mi[0]->bsize; 941 942 // handle above row 943 struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride }; 944 foreach_overlappable_nb_above(cm, xd, 945 max_neighbor_obmc[mi_size_wide_log2[bsize]], 946 build_obmc_inter_pred_above, &ctxt_above); 947 948 // handle left column 949 struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride }; 950 foreach_overlappable_nb_left(cm, xd, 951 max_neighbor_obmc[mi_size_high_log2[bsize]], 952 build_obmc_inter_pred_left, &ctxt_left); 953 } 954 955 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1, 956 uint8_t **dst_buf2) { 957 if (is_cur_buf_hbd(xd)) { 958 int len = sizeof(uint16_t); 959 dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]); 960 dst_buf1[1] = 961 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len); 962 dst_buf1[2] = 963 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len); 964 dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]); 965 dst_buf2[1] = 966 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len); 967 dst_buf2[2] = 968 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len); 969 } else { 970 dst_buf1[0] = xd->tmp_obmc_bufs[0]; 971 dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE; 972 dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2; 973 dst_buf2[0] = xd->tmp_obmc_bufs[1]; 974 dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE; 975 dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2; 976 } 977 } 978 979 #if CONFIG_AV1_DECODER 980 void av1_setup_build_prediction_by_above_pred( 981 MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, 982 MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt, 983 const int num_planes) { 984 const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize); 985 const int above_mi_col = xd->mi_col + rel_mi_col; 986 987 modify_neighbor_predictor_for_obmc(above_mbmi); 988 989 for (int j = 0; j < num_planes; ++j) { 990 struct macroblockd_plane *const pd = &xd->plane[j]; 991 setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], 992 ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col, 993 NULL, pd->subsampling_x, pd->subsampling_y); 994 } 995 996 const int num_refs = 1 + has_second_ref(above_mbmi); 997 998 for (int ref = 0; ref < num_refs; ++ref) { 999 const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref]; 1000 1001 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame); 1002 const struct scale_factors *const sf = 1003 get_ref_scale_factors_const(ctxt->cm, frame); 1004 xd->block_ref_scale_factors[ref] = sf; 1005 if ((!av1_is_valid_scale(sf))) 1006 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, 1007 "Reference frame has invalid dimensions"); 1008 av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf, 1009 num_planes); 1010 } 1011 1012 xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col); 1013 xd->mb_to_right_edge = 1014 ctxt->mb_to_far_edge + 1015 (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8; 1016 } 1017 1018 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row, 1019 uint8_t left_mi_height, 1020 MB_MODE_INFO *left_mbmi, 1021 struct build_prediction_ctxt *ctxt, 1022 const int num_planes) { 1023 const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize); 1024 const int left_mi_row = xd->mi_row + rel_mi_row; 1025 1026 modify_neighbor_predictor_for_obmc(left_mbmi); 1027 1028 for (int j = 0; j < num_planes; ++j) { 1029 struct macroblockd_plane *const pd = &xd->plane[j]; 1030 setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j], 1031 ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0, 1032 NULL, pd->subsampling_x, pd->subsampling_y); 1033 } 1034 1035 const int num_refs = 1 + has_second_ref(left_mbmi); 1036 1037 for (int ref = 0; ref < num_refs; ++ref) { 1038 const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref]; 1039 1040 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame); 1041 const struct scale_factors *const ref_scale_factors = 1042 get_ref_scale_factors_const(ctxt->cm, frame); 1043 1044 xd->block_ref_scale_factors[ref] = ref_scale_factors; 1045 if ((!av1_is_valid_scale(ref_scale_factors))) 1046 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM, 1047 "Reference frame has invalid dimensions"); 1048 av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col, 1049 ref_scale_factors, num_planes); 1050 } 1051 1052 xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row)); 1053 xd->mb_to_bottom_edge = 1054 ctxt->mb_to_far_edge + 1055 GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE); 1056 } 1057 #endif // CONFIG_AV1_DECODER 1058 1059 static inline void combine_interintra( 1060 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index, 1061 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, 1062 uint8_t *comppred, int compstride, const uint8_t *interpred, 1063 int interstride, const uint8_t *intrapred, int intrastride) { 1064 const int bw = block_size_wide[plane_bsize]; 1065 const int bh = block_size_high[plane_bsize]; 1066 1067 if (use_wedge_interintra) { 1068 if (av1_is_wedge_used(bsize)) { 1069 const uint8_t *mask = 1070 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); 1071 const int subw = 2 * mi_size_wide[bsize] == bw; 1072 const int subh = 2 * mi_size_high[bsize] == bh; 1073 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, 1074 interpred, interstride, mask, block_size_wide[bsize], 1075 bw, bh, subw, subh); 1076 } 1077 return; 1078 } 1079 1080 const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize]; 1081 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred, 1082 interstride, mask, bw, bw, bh, 0, 0); 1083 } 1084 1085 #if CONFIG_AV1_HIGHBITDEPTH 1086 static inline void combine_interintra_highbd( 1087 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index, 1088 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize, 1089 uint8_t *comppred8, int compstride, const uint8_t *interpred8, 1090 int interstride, const uint8_t *intrapred8, int intrastride, int bd) { 1091 const int bw = block_size_wide[plane_bsize]; 1092 const int bh = block_size_high[plane_bsize]; 1093 1094 if (use_wedge_interintra) { 1095 if (av1_is_wedge_used(bsize)) { 1096 const uint8_t *mask = 1097 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); 1098 const int subh = 2 * mi_size_high[bsize] == bh; 1099 const int subw = 2 * mi_size_wide[bsize] == bw; 1100 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, 1101 interpred8, interstride, mask, 1102 block_size_wide[bsize], bw, bh, subw, subh, bd); 1103 } 1104 return; 1105 } 1106 1107 uint8_t mask[MAX_SB_SQUARE]; 1108 build_smooth_interintra_mask(mask, bw, plane_bsize, mode); 1109 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride, 1110 interpred8, interstride, mask, bw, bw, bh, 0, 0, 1111 bd); 1112 } 1113 #endif 1114 1115 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm, 1116 MACROBLOCKD *xd, 1117 BLOCK_SIZE bsize, int plane, 1118 const BUFFER_SET *ctx, 1119 uint8_t *dst, int dst_stride) { 1120 struct macroblockd_plane *const pd = &xd->plane[plane]; 1121 const int ssx = xd->plane[plane].subsampling_x; 1122 const int ssy = xd->plane[plane].subsampling_y; 1123 BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); 1124 PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode]; 1125 assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0); 1126 assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0); 1127 assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0); 1128 assert(xd->mi[0]->use_intrabc == 0); 1129 const SequenceHeader *seq_params = cm->seq_params; 1130 1131 av1_predict_intra_block(xd, seq_params->sb_size, 1132 seq_params->enable_intra_edge_filter, pd->width, 1133 pd->height, max_txsize_rect_lookup[plane_bsize], mode, 1134 0, 0, FILTER_INTRA_MODES, ctx->plane[plane], 1135 ctx->stride[plane], dst, dst_stride, 0, 0, plane); 1136 } 1137 1138 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, 1139 const uint8_t *inter_pred, int inter_stride, 1140 const uint8_t *intra_pred, int intra_stride) { 1141 const int ssx = xd->plane[plane].subsampling_x; 1142 const int ssy = xd->plane[plane].subsampling_y; 1143 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy); 1144 #if CONFIG_AV1_HIGHBITDEPTH 1145 if (is_cur_buf_hbd(xd)) { 1146 combine_interintra_highbd( 1147 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra, 1148 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize, 1149 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, 1150 inter_pred, inter_stride, intra_pred, intra_stride, xd->bd); 1151 return; 1152 } 1153 #endif 1154 combine_interintra( 1155 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra, 1156 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize, 1157 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, 1158 inter_pred, inter_stride, intra_pred, intra_stride); 1159 } 1160 1161 // build interintra_predictors for one plane 1162 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd, 1163 uint8_t *pred, int stride, 1164 const BUFFER_SET *ctx, int plane, 1165 BLOCK_SIZE bsize) { 1166 assert(bsize < BLOCK_SIZES_ALL); 1167 if (is_cur_buf_hbd(xd)) { 1168 DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]); 1169 av1_build_intra_predictors_for_interintra( 1170 cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor), 1171 MAX_SB_SIZE); 1172 av1_combine_interintra(xd, bsize, plane, pred, stride, 1173 CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE); 1174 } else { 1175 DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]); 1176 av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx, 1177 intrapredictor, MAX_SB_SIZE); 1178 av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor, 1179 MAX_SB_SIZE); 1180 } 1181 }