mcomp.c (162025B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <limits.h> 14 #include <math.h> 15 #include <stdio.h> 16 17 #include "config/aom_config.h" 18 #include "config/aom_dsp_rtcd.h" 19 20 #include "aom_dsp/aom_dsp_common.h" 21 #include "aom_mem/aom_mem.h" 22 #include "aom_ports/mem.h" 23 24 #include "av1/common/av1_common_int.h" 25 #include "av1/common/common.h" 26 #include "av1/common/filter.h" 27 #include "av1/common/mvref_common.h" 28 #include "av1/common/reconinter.h" 29 30 #include "av1/encoder/encoder.h" 31 #include "av1/encoder/encodemv.h" 32 #include "av1/encoder/mcomp.h" 33 #include "av1/encoder/rdopt.h" 34 #include "av1/encoder/reconinter_enc.h" 35 36 static inline void init_mv_cost_params(MV_COST_PARAMS *mv_cost_params, 37 const MvCosts *mv_costs, 38 const MV *ref_mv, int errorperbit, 39 int sadperbit) { 40 mv_cost_params->ref_mv = ref_mv; 41 mv_cost_params->full_ref_mv = get_fullmv_from_mv(ref_mv); 42 mv_cost_params->mv_cost_type = MV_COST_ENTROPY; 43 mv_cost_params->error_per_bit = errorperbit; 44 mv_cost_params->sad_per_bit = sadperbit; 45 // For allintra encoding mode, 'mv_costs' is not allocated. Hence, the 46 // population of mvjcost and mvcost are avoided. In case of IntraBC, these 47 // values are populated from 'dv_costs' in av1_set_ms_to_intra_mode(). 48 if (mv_costs != NULL) { 49 mv_cost_params->mvjcost = mv_costs->nmv_joint_cost; 50 mv_cost_params->mvcost[0] = mv_costs->mv_cost_stack[0]; 51 mv_cost_params->mvcost[1] = mv_costs->mv_cost_stack[1]; 52 } 53 } 54 55 static inline void init_ms_buffers(MSBuffers *ms_buffers, const MACROBLOCK *x) { 56 ms_buffers->ref = &x->e_mbd.plane[0].pre[0]; 57 ms_buffers->src = &x->plane[0].src; 58 59 av1_set_ms_compound_refs(ms_buffers, NULL, NULL, 0, 0); 60 61 ms_buffers->wsrc = x->obmc_buffer.wsrc; 62 ms_buffers->obmc_mask = x->obmc_buffer.mask; 63 } 64 65 void av1_init_obmc_buffer(OBMCBuffer *obmc_buffer) { 66 obmc_buffer->wsrc = NULL; 67 obmc_buffer->mask = NULL; 68 obmc_buffer->above_pred = NULL; 69 obmc_buffer->left_pred = NULL; 70 } 71 72 void av1_make_default_fullpel_ms_params( 73 FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const struct AV1_COMP *cpi, 74 MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv, FULLPEL_MV start_mv, 75 const search_site_config search_sites[NUM_DISTINCT_SEARCH_METHODS], 76 SEARCH_METHODS search_method, int fine_search_interval) { 77 const MV_SPEED_FEATURES *mv_sf = &cpi->sf.mv_sf; 78 const int is_key_frame = 79 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == KF_UPDATE; 80 81 // High level params 82 ms_params->bsize = bsize; 83 ms_params->vfp = &cpi->ppi->fn_ptr[bsize]; 84 85 init_ms_buffers(&ms_params->ms_buffers, x); 86 87 av1_set_mv_search_method(ms_params, search_sites, search_method); 88 89 ms_params->mesh_patterns[0] = mv_sf->mesh_patterns; 90 ms_params->mesh_patterns[1] = mv_sf->intrabc_mesh_patterns; 91 ms_params->force_mesh_thresh = mv_sf->exhaustive_searches_thresh; 92 ms_params->prune_mesh_search = 93 (cpi->sf.mv_sf.prune_mesh_search == PRUNE_MESH_SEARCH_LVL_2) ? 1 : 0; 94 ms_params->mesh_search_mv_diff_threshold = 4; 95 ms_params->run_mesh_search = 0; 96 ms_params->fine_search_interval = fine_search_interval; 97 98 ms_params->is_intra_mode = 0; 99 100 ms_params->fast_obmc_search = mv_sf->obmc_full_pixel_search_level; 101 102 ms_params->mv_limits = x->mv_limits; 103 av1_set_mv_search_range(&ms_params->mv_limits, ref_mv); 104 105 if (cpi->oxcf.algo_cfg.sharpness == 3) { 106 int top_margin = x->e_mbd.mi_row * MI_SIZE + 8; 107 int left_margin = x->e_mbd.mi_col * MI_SIZE + 8; 108 int bottom_margin = 109 cpi->common.height - mi_size_high[bsize] * MI_SIZE - top_margin + 16; 110 int right_margin = 111 cpi->common.width - mi_size_wide[bsize] * MI_SIZE - left_margin + 16; 112 113 bottom_margin = AOMMAX(bottom_margin, -top_margin); 114 right_margin = AOMMAX(right_margin, -left_margin); 115 116 FullMvLimits *mv_limits = &ms_params->mv_limits; 117 mv_limits->row_min = AOMMAX(mv_limits->row_min, -top_margin); 118 mv_limits->row_max = AOMMIN(mv_limits->row_max, bottom_margin); 119 mv_limits->col_min = AOMMAX(mv_limits->col_min, -left_margin); 120 mv_limits->col_max = AOMMIN(mv_limits->col_max, right_margin); 121 } 122 123 // Mvcost params 124 init_mv_cost_params(&ms_params->mv_cost_params, x->mv_costs, ref_mv, 125 x->errorperbit, x->sadperbit); 126 127 ms_params->sdf = ms_params->vfp->sdf; 128 ms_params->sdx4df = ms_params->vfp->sdx4df; 129 ms_params->sdx3df = ms_params->vfp->sdx3df; 130 131 if (mv_sf->use_downsampled_sad == 2 && block_size_high[bsize] >= 16) { 132 assert(ms_params->vfp->sdsf != NULL); 133 ms_params->sdf = ms_params->vfp->sdsf; 134 assert(ms_params->vfp->sdsx4df != NULL); 135 ms_params->sdx4df = ms_params->vfp->sdsx4df; 136 // Skip version of sadx3 is not available yet 137 ms_params->sdx3df = ms_params->vfp->sdsx4df; 138 } else if (mv_sf->use_downsampled_sad == 1 && block_size_high[bsize] >= 16 && 139 !is_key_frame) { 140 FULLPEL_MV start_mv_clamped = start_mv; 141 // adjust start_mv to make sure it is within MV range 142 clamp_fullmv(&start_mv_clamped, &ms_params->mv_limits); 143 144 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 145 const int ref_stride = ref->stride; 146 const uint8_t *best_address = get_buf_from_fullmv(ref, &start_mv_clamped); 147 const struct buf_2d *const src = ms_params->ms_buffers.src; 148 const uint8_t *src_buf = src->buf; 149 const int src_stride = src->stride; 150 151 unsigned int start_mv_sad_even_rows, start_mv_sad_odd_rows; 152 assert(ms_params->vfp->sdsf != NULL); 153 start_mv_sad_even_rows = 154 ms_params->vfp->sdsf(src_buf, src_stride, best_address, ref_stride); 155 start_mv_sad_odd_rows = 156 ms_params->vfp->sdsf(src_buf + src_stride, src_stride, 157 best_address + ref_stride, ref_stride); 158 159 // If the absolute SAD difference computed between the pred-to-src of even 160 // and odd rows is small, skip every other row in sad computation. 161 const int odd_to_even_diff_sad = 162 abs((int)start_mv_sad_even_rows - (int)start_mv_sad_odd_rows); 163 const int mult_thresh = 4; 164 if (odd_to_even_diff_sad * mult_thresh < (int)start_mv_sad_even_rows) { 165 ms_params->sdf = ms_params->vfp->sdsf; 166 assert(ms_params->vfp->sdsx4df != NULL); 167 ms_params->sdx4df = ms_params->vfp->sdsx4df; 168 ms_params->sdx3df = ms_params->vfp->sdsx4df; 169 } 170 } 171 } 172 173 void av1_set_ms_to_intra_mode(FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 174 const IntraBCMVCosts *dv_costs) { 175 ms_params->is_intra_mode = 1; 176 177 MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 178 179 mv_cost_params->mvjcost = dv_costs->joint_mv; 180 mv_cost_params->mvcost[0] = dv_costs->dv_costs[0]; 181 mv_cost_params->mvcost[1] = dv_costs->dv_costs[1]; 182 } 183 184 void av1_make_default_subpel_ms_params(SUBPEL_MOTION_SEARCH_PARAMS *ms_params, 185 const struct AV1_COMP *cpi, 186 const MACROBLOCK *x, BLOCK_SIZE bsize, 187 const MV *ref_mv, const int *cost_list) { 188 const AV1_COMMON *cm = &cpi->common; 189 // High level params 190 ms_params->allow_hp = cm->features.allow_high_precision_mv; 191 ms_params->forced_stop = cpi->sf.mv_sf.subpel_force_stop; 192 ms_params->iters_per_step = cpi->sf.mv_sf.subpel_iters_per_step; 193 ms_params->cost_list = cond_cost_list_const(cpi, cost_list); 194 195 av1_set_subpel_mv_search_range(&ms_params->mv_limits, &x->mv_limits, ref_mv); 196 197 if (cpi->oxcf.algo_cfg.sharpness == 3) { 198 int top_margin = GET_MV_SUBPEL(x->e_mbd.mi_row * MI_SIZE + 8); 199 int left_margin = GET_MV_SUBPEL(x->e_mbd.mi_col * MI_SIZE + 8); 200 int bottom_margin = 201 GET_MV_SUBPEL(cpi->common.height - mi_size_high[bsize] * MI_SIZE - 202 x->e_mbd.mi_row * MI_SIZE + 8); 203 int right_margin = 204 GET_MV_SUBPEL(cpi->common.width - mi_size_wide[bsize] * MI_SIZE - 205 x->e_mbd.mi_col * MI_SIZE + 8); 206 207 bottom_margin = AOMMAX(bottom_margin, -top_margin); 208 right_margin = AOMMAX(right_margin, -left_margin); 209 210 SubpelMvLimits *mv_limits = &ms_params->mv_limits; 211 mv_limits->row_min = AOMMAX(mv_limits->row_min, -top_margin); 212 mv_limits->row_max = AOMMIN(mv_limits->row_max, bottom_margin); 213 mv_limits->col_min = AOMMAX(mv_limits->col_min, -left_margin); 214 mv_limits->col_max = AOMMIN(mv_limits->col_max, right_margin); 215 } 216 217 // Mvcost params 218 init_mv_cost_params(&ms_params->mv_cost_params, x->mv_costs, ref_mv, 219 x->errorperbit, x->sadperbit); 220 221 // Subpel variance params 222 ms_params->var_params.vfp = &cpi->ppi->fn_ptr[bsize]; 223 ms_params->var_params.subpel_search_type = 224 cpi->sf.mv_sf.use_accurate_subpel_search; 225 ms_params->var_params.w = block_size_wide[bsize]; 226 ms_params->var_params.h = block_size_high[bsize]; 227 228 // Ref and src buffers 229 MSBuffers *ms_buffers = &ms_params->var_params.ms_buffers; 230 init_ms_buffers(ms_buffers, x); 231 } 232 233 void av1_set_mv_search_range(FullMvLimits *mv_limits, const MV *mv) { 234 // Calculate the outermost full-pixel MVs which are inside the limits set by 235 // av1_set_subpel_mv_search_range(). 236 // 237 // The subpel limits are simply mv->col +/- 8*MAX_FULL_PEL_VAL, and similar 238 // for mv->row. We can then divide by 8 to find the fullpel MV limits. But 239 // we have to be careful about the rounding. We want these bounds to be 240 // at least as tight as the subpel limits, which means that we must round 241 // the minimum values up and the maximum values down when dividing. 242 int col_min = ((mv->col + 7) >> 3) - MAX_FULL_PEL_VAL; 243 int row_min = ((mv->row + 7) >> 3) - MAX_FULL_PEL_VAL; 244 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 245 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 246 247 col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1); 248 row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1); 249 col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1); 250 row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1); 251 252 // Get intersection of UMV window and valid MV window to reduce # of checks 253 // in diamond search. 254 mv_limits->col_min = AOMMAX(mv_limits->col_min, col_min); 255 mv_limits->col_max = AOMMIN(mv_limits->col_max, col_max); 256 mv_limits->row_min = AOMMAX(mv_limits->row_min, row_min); 257 mv_limits->row_max = AOMMIN(mv_limits->row_max, row_max); 258 259 mv_limits->col_max = AOMMAX(mv_limits->col_min, mv_limits->col_max); 260 mv_limits->row_max = AOMMAX(mv_limits->row_min, mv_limits->row_max); 261 } 262 263 int av1_init_search_range(int size) { 264 int sr = 0; 265 // Minimum search size no matter what the passed in value. 266 size = AOMMAX(16, size); 267 268 while ((size << sr) < MAX_FULL_PEL_VAL) sr++; 269 270 sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2); 271 return sr; 272 } 273 274 // ============================================================================ 275 // Cost of motion vectors 276 // ============================================================================ 277 // TODO(any): Adaptively adjust the regularization strength based on image size 278 // and motion activity instead of using hard-coded values. It seems like we 279 // roughly half the lambda for each increase in resolution 280 // These are multiplier used to perform regularization in motion compensation 281 // when x->mv_cost_type is set to MV_COST_L1. 282 // LOWRES 283 #define SSE_LAMBDA_LOWRES 2 // Used by mv_cost_err_fn 284 #define SAD_LAMBDA_LOWRES 32 // Used by mvsad_err_cost during full pixel search 285 // MIDRES 286 #define SSE_LAMBDA_MIDRES 0 // Used by mv_cost_err_fn 287 #define SAD_LAMBDA_MIDRES 15 // Used by mvsad_err_cost during full pixel search 288 // HDRES 289 #define SSE_LAMBDA_HDRES 1 // Used by mv_cost_err_fn 290 #define SAD_LAMBDA_HDRES 8 // Used by mvsad_err_cost during full pixel search 291 292 // Returns the rate of encoding the current motion vector based on the 293 // joint_cost and comp_cost. joint_costs covers the cost of transmitting 294 // JOINT_MV, and comp_cost covers the cost of transmitting the actual motion 295 // vector. 296 static inline int mv_cost(const MV *mv, const int *joint_cost, 297 const int *const comp_cost[2]) { 298 return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] + 299 comp_cost[1][mv->col]; 300 } 301 302 #define CONVERT_TO_CONST_MVCOST(ptr) ((const int *const *)(ptr)) 303 // Returns the cost of encoding the motion vector diff := *mv - *ref. The cost 304 // is defined as the rate required to encode diff * weight, rounded to the 305 // nearest 2 ** 7. 306 // This is NOT used during motion compensation. 307 int av1_mv_bit_cost(const MV *mv, const MV *ref_mv, const int *mvjcost, 308 int *const mvcost[2], int weight) { 309 const MV diff = { mv->row - ref_mv->row, mv->col - ref_mv->col }; 310 return ROUND_POWER_OF_TWO( 311 mv_cost(&diff, mvjcost, CONVERT_TO_CONST_MVCOST(mvcost)) * weight, 7); 312 } 313 314 // Returns the cost of using the current mv during the motion search. This is 315 // used when var is used as the error metric. 316 #define PIXEL_TRANSFORM_ERROR_SCALE 4 317 static inline int mv_err_cost(const MV *mv, const MV *ref_mv, 318 const int *mvjcost, const int *const mvcost[2], 319 int error_per_bit, MV_COST_TYPE mv_cost_type) { 320 const MV diff = { mv->row - ref_mv->row, mv->col - ref_mv->col }; 321 const MV abs_diff = { abs(diff.row), abs(diff.col) }; 322 323 switch (mv_cost_type) { 324 case MV_COST_ENTROPY: 325 if (mvcost) { 326 return (int)ROUND_POWER_OF_TWO_64( 327 (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, 328 RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + 329 PIXEL_TRANSFORM_ERROR_SCALE); 330 } 331 return 0; 332 case MV_COST_L1_LOWRES: 333 return (SSE_LAMBDA_LOWRES * (abs_diff.row + abs_diff.col)) >> 3; 334 case MV_COST_L1_MIDRES: 335 return (SSE_LAMBDA_MIDRES * (abs_diff.row + abs_diff.col)) >> 3; 336 case MV_COST_L1_HDRES: 337 return (SSE_LAMBDA_HDRES * (abs_diff.row + abs_diff.col)) >> 3; 338 case MV_COST_NONE: return 0; 339 default: assert(0 && "Invalid rd_cost_type"); return 0; 340 } 341 } 342 343 static inline int mv_err_cost_(const MV *mv, 344 const MV_COST_PARAMS *mv_cost_params) { 345 if (mv_cost_params->mv_cost_type == MV_COST_NONE) { 346 return 0; 347 } 348 return mv_err_cost(mv, mv_cost_params->ref_mv, mv_cost_params->mvjcost, 349 mv_cost_params->mvcost, mv_cost_params->error_per_bit, 350 mv_cost_params->mv_cost_type); 351 } 352 353 // Returns the cost of using the current mv during the motion search. This is 354 // only used during full pixel motion search when sad is used as the error 355 // metric 356 static inline int mvsad_err_cost(const FULLPEL_MV *mv, const FULLPEL_MV *ref_mv, 357 const int *mvjcost, const int *const mvcost[2], 358 int sad_per_bit, MV_COST_TYPE mv_cost_type) { 359 const MV diff = { GET_MV_SUBPEL(mv->row - ref_mv->row), 360 GET_MV_SUBPEL(mv->col - ref_mv->col) }; 361 362 switch (mv_cost_type) { 363 case MV_COST_ENTROPY: 364 return ROUND_POWER_OF_TWO( 365 (unsigned)mv_cost(&diff, mvjcost, CONVERT_TO_CONST_MVCOST(mvcost)) * 366 sad_per_bit, 367 AV1_PROB_COST_SHIFT); 368 case MV_COST_L1_LOWRES: 369 return (SAD_LAMBDA_LOWRES * (abs(diff.row) + abs(diff.col))) >> 3; 370 case MV_COST_L1_MIDRES: 371 return (SAD_LAMBDA_MIDRES * (abs(diff.row) + abs(diff.col))) >> 3; 372 case MV_COST_L1_HDRES: 373 return (SAD_LAMBDA_HDRES * (abs(diff.row) + abs(diff.col))) >> 3; 374 case MV_COST_NONE: return 0; 375 default: assert(0 && "Invalid rd_cost_type"); return 0; 376 } 377 } 378 379 static inline int mvsad_err_cost_(const FULLPEL_MV *mv, 380 const MV_COST_PARAMS *mv_cost_params) { 381 return mvsad_err_cost(mv, &mv_cost_params->full_ref_mv, 382 mv_cost_params->mvjcost, mv_cost_params->mvcost, 383 mv_cost_params->sad_per_bit, 384 mv_cost_params->mv_cost_type); 385 } 386 387 // ============================================================================= 388 // Fullpixel Motion Search: Translational 389 // ============================================================================= 390 #define MAX_PATTERN_SCALES 11 391 #define MAX_PATTERN_CANDIDATES 8 // max number of candidates per scale 392 #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 393 394 // Search site initialization for DIAMOND / CLAMPED_DIAMOND search methods. 395 // level = 0: DIAMOND, level = 1: CLAMPED_DIAMOND. 396 static void init_dsmotion_compensation(search_site_config *cfg, int stride, 397 int level) { 398 int num_search_steps = 0; 399 int stage_index = MAX_MVSEARCH_STEPS - 1; 400 401 cfg->site[stage_index][0].mv.col = cfg->site[stage_index][0].mv.row = 0; 402 cfg->site[stage_index][0].offset = 0; 403 cfg->stride = stride; 404 405 // Choose the initial step size depending on level. 406 const int first_step = (level > 0) ? (MAX_FIRST_STEP / 4) : MAX_FIRST_STEP; 407 408 for (int radius = first_step; radius > 0;) { 409 int num_search_pts = 8; 410 411 const FULLPEL_MV search_site_mvs[13] = { 412 { 0, 0 }, { -radius, 0 }, { radius, 0 }, 413 { 0, -radius }, { 0, radius }, { -radius, -radius }, 414 { radius, radius }, { -radius, radius }, { radius, -radius }, 415 }; 416 417 int i; 418 for (i = 0; i <= num_search_pts; ++i) { 419 search_site *const site = &cfg->site[stage_index][i]; 420 site->mv = search_site_mvs[i]; 421 site->offset = get_offset_from_fullmv(&site->mv, stride); 422 } 423 cfg->searches_per_step[stage_index] = num_search_pts; 424 cfg->radius[stage_index] = radius; 425 // Update the search radius based on level. 426 if (!level || ((stage_index < 9) && level)) radius /= 2; 427 --stage_index; 428 ++num_search_steps; 429 } 430 cfg->num_search_steps = num_search_steps; 431 } 432 433 void av1_init_motion_fpf(search_site_config *cfg, int stride) { 434 int num_search_steps = 0; 435 int stage_index = MAX_MVSEARCH_STEPS - 1; 436 437 cfg->site[stage_index][0].mv.col = cfg->site[stage_index][0].mv.row = 0; 438 cfg->site[stage_index][0].offset = 0; 439 cfg->stride = stride; 440 441 for (int radius = MAX_FIRST_STEP; radius > 0; radius /= 2) { 442 // Generate offsets for 8 search sites per step. 443 int tan_radius = AOMMAX((int)(0.41 * radius), 1); 444 int num_search_pts = 12; 445 if (radius == 1) num_search_pts = 8; 446 447 const FULLPEL_MV search_site_mvs[13] = { 448 { 0, 0 }, 449 { -radius, 0 }, 450 { radius, 0 }, 451 { 0, -radius }, 452 { 0, radius }, 453 { -radius, -tan_radius }, 454 { radius, tan_radius }, 455 { -tan_radius, radius }, 456 { tan_radius, -radius }, 457 { -radius, tan_radius }, 458 { radius, -tan_radius }, 459 { tan_radius, radius }, 460 { -tan_radius, -radius }, 461 }; 462 463 int i; 464 for (i = 0; i <= num_search_pts; ++i) { 465 search_site *const site = &cfg->site[stage_index][i]; 466 site->mv = search_site_mvs[i]; 467 site->offset = get_offset_from_fullmv(&site->mv, stride); 468 } 469 cfg->searches_per_step[stage_index] = num_search_pts; 470 cfg->radius[stage_index] = radius; 471 --stage_index; 472 ++num_search_steps; 473 } 474 cfg->num_search_steps = num_search_steps; 475 } 476 477 // Search site initialization for NSTEP / NSTEP_8PT search methods. 478 // level = 0: NSTEP, level = 1: NSTEP_8PT. 479 static void init_motion_compensation_nstep(search_site_config *cfg, int stride, 480 int level) { 481 int num_search_steps = 0; 482 int stage_index = 0; 483 cfg->stride = stride; 484 int radius = 1; 485 const int num_stages = (level > 0) ? 16 : 15; 486 for (stage_index = 0; stage_index < num_stages; ++stage_index) { 487 int tan_radius = AOMMAX((int)(0.41 * radius), 1); 488 int num_search_pts = 12; 489 if ((radius <= 5) || (level > 0)) { 490 tan_radius = radius; 491 num_search_pts = 8; 492 } 493 const FULLPEL_MV search_site_mvs[13] = { 494 { 0, 0 }, 495 { -radius, 0 }, 496 { radius, 0 }, 497 { 0, -radius }, 498 { 0, radius }, 499 { -radius, -tan_radius }, 500 { radius, tan_radius }, 501 { -tan_radius, radius }, 502 { tan_radius, -radius }, 503 { -radius, tan_radius }, 504 { radius, -tan_radius }, 505 { tan_radius, radius }, 506 { -tan_radius, -radius }, 507 }; 508 509 for (int i = 0; i <= num_search_pts; ++i) { 510 search_site *const site = &cfg->site[stage_index][i]; 511 site->mv = search_site_mvs[i]; 512 site->offset = get_offset_from_fullmv(&site->mv, stride); 513 } 514 cfg->searches_per_step[stage_index] = num_search_pts; 515 cfg->radius[stage_index] = radius; 516 ++num_search_steps; 517 if (stage_index < 12) 518 radius = (int)AOMMAX((radius * 1.5 + 0.5), radius + 1); 519 } 520 cfg->num_search_steps = num_search_steps; 521 } 522 523 // Search site initialization for BIGDIA / FAST_BIGDIA / FAST_DIAMOND 524 // search methods. 525 static void init_motion_compensation_bigdia(search_site_config *cfg, int stride, 526 int level) { 527 (void)level; 528 cfg->stride = stride; 529 // First scale has 4-closest points, the rest have 8 points in diamond 530 // shape at increasing scales 531 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 532 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 533 }; 534 535 // BIGDIA search method candidates. 536 // Note that the largest candidate step at each scale is 2^scale 537 /* clang-format off */ 538 static const FULLPEL_MV 539 site_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 540 { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }, { 0, 0 }, { 0, 0 }, 541 { 0, 0 }, { 0, 0 } }, 542 { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 }, 543 { -1, 1 }, { -2, 0 } }, 544 { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 }, 545 { -2, 2 }, { -4, 0 } }, 546 { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 }, 547 { -4, 4 }, { -8, 0 } }, 548 { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 }, 549 { -8, 8 }, { -16, 0 } }, 550 { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 }, 551 { 0, 32 }, { -16, 16 }, { -32, 0 } }, 552 { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 }, 553 { 0, 64 }, { -32, 32 }, { -64, 0 } }, 554 { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 }, 555 { 0, 128 }, { -64, 64 }, { -128, 0 } }, 556 { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, 557 { 128, 128 }, { 0, 256 }, { -128, 128 }, { -256, 0 } }, 558 { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, 559 { 256, 256 }, { 0, 512 }, { -256, 256 }, { -512, 0 } }, 560 { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 }, 561 { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }, 562 }; 563 564 /* clang-format on */ 565 int radius = 1; 566 for (int i = 0; i < MAX_PATTERN_SCALES; ++i) { 567 cfg->searches_per_step[i] = bigdia_num_candidates[i]; 568 cfg->radius[i] = radius; 569 for (int j = 0; j < MAX_PATTERN_CANDIDATES; ++j) { 570 search_site *const site = &cfg->site[i][j]; 571 site->mv = site_candidates[i][j]; 572 site->offset = get_offset_from_fullmv(&site->mv, stride); 573 } 574 radius *= 2; 575 } 576 cfg->num_search_steps = MAX_PATTERN_SCALES; 577 } 578 579 // Search site initialization for SQUARE search method. 580 static void init_motion_compensation_square(search_site_config *cfg, int stride, 581 int level) { 582 (void)level; 583 cfg->stride = stride; 584 // All scales have 8 closest points in square shape. 585 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 586 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 587 }; 588 589 // Square search method candidates. 590 // Note that the largest candidate step at each scale is 2^scale. 591 /* clang-format off */ 592 static const FULLPEL_MV 593 square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 594 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, 595 { -1, 1 }, { -1, 0 } }, 596 { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 }, 597 { -2, 2 }, { -2, 0 } }, 598 { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 }, 599 { -4, 4 }, { -4, 0 } }, 600 { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 }, 601 { -8, 8 }, { -8, 0 } }, 602 { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 }, 603 { 0, 16 }, { -16, 16 }, { -16, 0 } }, 604 { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 }, 605 { 0, 32 }, { -32, 32 }, { -32, 0 } }, 606 { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 }, 607 { 0, 64 }, { -64, 64 }, { -64, 0 } }, 608 { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, 609 { 128, 128 }, { 0, 128 }, { -128, 128 }, { -128, 0 } }, 610 { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, 611 { 256, 256 }, { 0, 256 }, { -256, 256 }, { -256, 0 } }, 612 { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, 613 { 512, 512 }, { 0, 512 }, { -512, 512 }, { -512, 0 } }, 614 { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 }, 615 { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }, 616 }; 617 618 /* clang-format on */ 619 int radius = 1; 620 for (int i = 0; i < MAX_PATTERN_SCALES; ++i) { 621 cfg->searches_per_step[i] = square_num_candidates[i]; 622 cfg->radius[i] = radius; 623 for (int j = 0; j < MAX_PATTERN_CANDIDATES; ++j) { 624 search_site *const site = &cfg->site[i][j]; 625 site->mv = square_candidates[i][j]; 626 site->offset = get_offset_from_fullmv(&site->mv, stride); 627 } 628 radius *= 2; 629 } 630 cfg->num_search_steps = MAX_PATTERN_SCALES; 631 } 632 633 // Search site initialization for HEX / FAST_HEX search methods. 634 static void init_motion_compensation_hex(search_site_config *cfg, int stride, 635 int level) { 636 (void)level; 637 cfg->stride = stride; 638 // First scale has 8-closest points, the rest have 6 points in hex shape 639 // at increasing scales. 640 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6, 641 6, 6, 6, 6, 6 }; 642 // Note that the largest candidate step at each scale is 2^scale. 643 /* clang-format off */ 644 static const FULLPEL_MV 645 hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 646 { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, 647 { -1, 1 }, { -1, 0 } }, 648 { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }, 649 { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } }, 650 { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } }, 651 { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, 652 { -8, 16 }, { -16, 0 } }, 653 { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 }, 654 { -32, 0 } }, 655 { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 }, 656 { -64, 0 } }, 657 { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, 658 { -64, 128 }, { -128, 0 } }, 659 { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, 660 { -128, 256 }, { -256, 0 } }, 661 { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, 662 { -256, 512 }, { -512, 0 } }, 663 { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 }, 664 { -512, 1024 }, { -1024, 0 } }, 665 }; 666 667 /* clang-format on */ 668 int radius = 1; 669 for (int i = 0; i < MAX_PATTERN_SCALES; ++i) { 670 cfg->searches_per_step[i] = hex_num_candidates[i]; 671 cfg->radius[i] = radius; 672 for (int j = 0; j < hex_num_candidates[i]; ++j) { 673 search_site *const site = &cfg->site[i][j]; 674 site->mv = hex_candidates[i][j]; 675 site->offset = get_offset_from_fullmv(&site->mv, stride); 676 } 677 radius *= 2; 678 } 679 cfg->num_search_steps = MAX_PATTERN_SCALES; 680 } 681 682 const av1_init_search_site_config 683 av1_init_motion_compensation[NUM_DISTINCT_SEARCH_METHODS] = { 684 init_dsmotion_compensation, init_motion_compensation_nstep, 685 init_motion_compensation_nstep, init_dsmotion_compensation, 686 init_motion_compensation_hex, init_motion_compensation_bigdia, 687 init_motion_compensation_square 688 }; 689 690 // Checks whether the mv is within range of the mv_limits 691 static inline int check_bounds(const FullMvLimits *mv_limits, int row, int col, 692 int range) { 693 return ((row - range) >= mv_limits->row_min) & 694 ((row + range) <= mv_limits->row_max) & 695 ((col - range) >= mv_limits->col_min) & 696 ((col + range) <= mv_limits->col_max); 697 } 698 699 static inline int get_mvpred_var_cost( 700 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV *this_mv, 701 FULLPEL_MV_STATS *mv_stats) { 702 const aom_variance_fn_ptr_t *vfp = ms_params->vfp; 703 const MV sub_this_mv = get_mv_from_fullmv(this_mv); 704 const struct buf_2d *const src = ms_params->ms_buffers.src; 705 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 706 const uint8_t *src_buf = src->buf; 707 const int src_stride = src->stride; 708 const int ref_stride = ref->stride; 709 710 int bestsme; 711 712 bestsme = vfp->vf(src_buf, src_stride, get_buf_from_fullmv(ref, this_mv), 713 ref_stride, &mv_stats->sse); 714 mv_stats->distortion = bestsme; 715 716 mv_stats->err_cost = mv_err_cost_(&sub_this_mv, &ms_params->mv_cost_params); 717 bestsme += mv_stats->err_cost; 718 719 return bestsme; 720 } 721 722 static inline int get_mvpred_sad(const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 723 const struct buf_2d *const src, 724 const uint8_t *const ref_address, 725 const int ref_stride) { 726 const uint8_t *src_buf = src->buf; 727 const int src_stride = src->stride; 728 729 return ms_params->sdf(src_buf, src_stride, ref_address, ref_stride); 730 } 731 732 static inline int get_mvpred_compound_var_cost( 733 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV *this_mv, 734 FULLPEL_MV_STATS *mv_stats) { 735 const aom_variance_fn_ptr_t *vfp = ms_params->vfp; 736 const struct buf_2d *const src = ms_params->ms_buffers.src; 737 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 738 const uint8_t *src_buf = src->buf; 739 const int src_stride = src->stride; 740 const int ref_stride = ref->stride; 741 742 const uint8_t *mask = ms_params->ms_buffers.mask; 743 const uint8_t *second_pred = ms_params->ms_buffers.second_pred; 744 const int mask_stride = ms_params->ms_buffers.mask_stride; 745 const int invert_mask = ms_params->ms_buffers.inv_mask; 746 int bestsme; 747 748 if (mask) { 749 bestsme = vfp->msvf(get_buf_from_fullmv(ref, this_mv), ref_stride, 0, 0, 750 src_buf, src_stride, second_pred, mask, mask_stride, 751 invert_mask, &mv_stats->sse); 752 } else if (second_pred) { 753 bestsme = vfp->svaf(get_buf_from_fullmv(ref, this_mv), ref_stride, 0, 0, 754 src_buf, src_stride, &mv_stats->sse, second_pred); 755 } else { 756 bestsme = vfp->vf(src_buf, src_stride, get_buf_from_fullmv(ref, this_mv), 757 ref_stride, &mv_stats->sse); 758 } 759 mv_stats->distortion = bestsme; 760 761 const MV sub_this_mv = get_mv_from_fullmv(this_mv); 762 mv_stats->err_cost = mv_err_cost_(&sub_this_mv, &ms_params->mv_cost_params); 763 bestsme += mv_stats->err_cost; 764 765 return bestsme; 766 } 767 768 static inline int get_mvpred_compound_sad( 769 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 770 const struct buf_2d *const src, const uint8_t *const ref_address, 771 const int ref_stride) { 772 const aom_variance_fn_ptr_t *vfp = ms_params->vfp; 773 const uint8_t *src_buf = src->buf; 774 const int src_stride = src->stride; 775 776 const uint8_t *mask = ms_params->ms_buffers.mask; 777 const uint8_t *second_pred = ms_params->ms_buffers.second_pred; 778 const int mask_stride = ms_params->ms_buffers.mask_stride; 779 const int invert_mask = ms_params->ms_buffers.inv_mask; 780 781 if (mask) { 782 return vfp->msdf(src_buf, src_stride, ref_address, ref_stride, second_pred, 783 mask, mask_stride, invert_mask); 784 } else if (second_pred) { 785 assert(vfp->sdaf != NULL); 786 return vfp->sdaf(src_buf, src_stride, ref_address, ref_stride, second_pred); 787 } else { 788 return ms_params->sdf(src_buf, src_stride, ref_address, ref_stride); 789 } 790 } 791 792 // Calculates and returns a sad+mvcost list around an integer best pel during 793 // fullpixel motion search. The resulting list can be used to speed up subpel 794 // motion search later. 795 #define USE_SAD_COSTLIST 1 796 797 // calc_int_cost_list uses var to populate the costlist, which is more accurate 798 // than sad but slightly slower. 799 static AOM_FORCE_INLINE void calc_int_cost_list( 800 const FULLPEL_MV best_mv, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 801 int *cost_list) { 802 static const FULLPEL_MV neighbors[4] = { 803 { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } 804 }; 805 const int br = best_mv.row; 806 const int bc = best_mv.col; 807 808 FULLPEL_MV_STATS mv_stats; 809 cost_list[0] = get_mvpred_var_cost(ms_params, &best_mv, &mv_stats); 810 811 if (check_bounds(&ms_params->mv_limits, br, bc, 1)) { 812 for (int i = 0; i < 4; i++) { 813 const FULLPEL_MV neighbor_mv = { br + neighbors[i].row, 814 bc + neighbors[i].col }; 815 cost_list[i + 1] = 816 get_mvpred_var_cost(ms_params, &neighbor_mv, &mv_stats); 817 } 818 } else { 819 for (int i = 0; i < 4; i++) { 820 const FULLPEL_MV neighbor_mv = { br + neighbors[i].row, 821 bc + neighbors[i].col }; 822 if (!av1_is_fullmv_in_range(&ms_params->mv_limits, neighbor_mv)) { 823 cost_list[i + 1] = INT_MAX; 824 } else { 825 cost_list[i + 1] = 826 get_mvpred_var_cost(ms_params, &neighbor_mv, &mv_stats); 827 } 828 } 829 } 830 } 831 832 // calc_int_sad_list uses sad to populate the costlist, which is less accurate 833 // than var but faster. 834 static AOM_FORCE_INLINE void calc_int_sad_list( 835 const FULLPEL_MV best_mv, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 836 int *cost_list, int costlist_has_sad) { 837 static const FULLPEL_MV neighbors[4] = { 838 { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } 839 }; 840 const struct buf_2d *const src = ms_params->ms_buffers.src; 841 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 842 const int ref_stride = ref->stride; 843 const int br = best_mv.row; 844 const int bc = best_mv.col; 845 846 assert(av1_is_fullmv_in_range(&ms_params->mv_limits, best_mv)); 847 848 // Refresh the costlist it does not contain valid sad 849 if (!costlist_has_sad) { 850 cost_list[0] = get_mvpred_sad( 851 ms_params, src, get_buf_from_fullmv(ref, &best_mv), ref_stride); 852 853 if (check_bounds(&ms_params->mv_limits, br, bc, 1)) { 854 for (int i = 0; i < 4; i++) { 855 const FULLPEL_MV this_mv = { br + neighbors[i].row, 856 bc + neighbors[i].col }; 857 cost_list[i + 1] = get_mvpred_sad( 858 ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride); 859 } 860 } else { 861 for (int i = 0; i < 4; i++) { 862 const FULLPEL_MV this_mv = { br + neighbors[i].row, 863 bc + neighbors[i].col }; 864 if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) { 865 cost_list[i + 1] = INT_MAX; 866 } else { 867 cost_list[i + 1] = get_mvpred_sad( 868 ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride); 869 } 870 } 871 } 872 } 873 874 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 875 cost_list[0] += mvsad_err_cost_(&best_mv, mv_cost_params); 876 877 for (int idx = 0; idx < 4; idx++) { 878 if (cost_list[idx + 1] != INT_MAX) { 879 const FULLPEL_MV this_mv = { br + neighbors[idx].row, 880 bc + neighbors[idx].col }; 881 cost_list[idx + 1] += mvsad_err_cost_(&this_mv, mv_cost_params); 882 } 883 } 884 } 885 886 // Computes motion vector cost and adds to the sad cost. 887 // Then updates the best sad and motion vectors. 888 // Inputs: 889 // this_sad: the sad to be evaluated. 890 // mv: the current motion vector. 891 // mv_cost_params: a structure containing information to compute mv cost. 892 // best_sad: the current best sad. 893 // raw_best_sad (optional): the current best sad without calculating mv cost. 894 // best_mv: the current best motion vector. 895 // second_best_mv (optional): the second best motion vector up to now. 896 // Modifies: 897 // best_sad, raw_best_sad, best_mv, second_best_mv 898 // If the current sad is lower than the current best sad. 899 // Returns: 900 // Whether the input sad (mv) is better than the current best. 901 static inline int update_mvs_and_sad(const unsigned int this_sad, 902 const FULLPEL_MV *mv, 903 const MV_COST_PARAMS *mv_cost_params, 904 unsigned int *best_sad, 905 unsigned int *raw_best_sad, 906 FULLPEL_MV *best_mv, 907 FULLPEL_MV *second_best_mv) { 908 if (this_sad >= *best_sad) return 0; 909 910 // Add the motion vector cost. 911 const unsigned int sad = this_sad + mvsad_err_cost_(mv, mv_cost_params); 912 if (sad < *best_sad) { 913 if (raw_best_sad) *raw_best_sad = this_sad; 914 *best_sad = sad; 915 if (second_best_mv) *second_best_mv = *best_mv; 916 *best_mv = *mv; 917 return 1; 918 } 919 return 0; 920 } 921 922 // Calculate sad4 and update the bestmv information 923 // in FAST_DIAMOND search method. 924 static inline void calc_sad4_update_bestmv( 925 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 926 const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv, 927 const FULLPEL_MV center_mv, const uint8_t *center_address, 928 unsigned int *bestsad, unsigned int *raw_bestsad, int search_step, 929 int *best_site, int cand_start, int *cost_list) { 930 const struct buf_2d *const src = ms_params->ms_buffers.src; 931 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 932 const search_site *site = ms_params->search_sites->site[search_step]; 933 934 unsigned char const *block_offset[4]; 935 unsigned int sads_buf[4]; 936 unsigned int *sads; 937 const uint8_t *src_buf = src->buf; 938 const int src_stride = src->stride; 939 if (cost_list) { 940 sads = (unsigned int *)(cost_list + 1); 941 } else { 942 sads = sads_buf; 943 } 944 // Loop over number of candidates. 945 for (int j = 0; j < 4; j++) 946 block_offset[j] = site[cand_start + j].offset + center_address; 947 948 // 4-point sad calculation. 949 ms_params->sdx4df(src_buf, src_stride, block_offset, ref->stride, sads); 950 951 for (int j = 0; j < 4; j++) { 952 const FULLPEL_MV this_mv = { center_mv.row + site[cand_start + j].mv.row, 953 center_mv.col + site[cand_start + j].mv.col }; 954 const int found_better_mv = update_mvs_and_sad( 955 sads[j], &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv, 956 /*second_best_mv=*/NULL); 957 if (found_better_mv) *best_site = cand_start + j; 958 } 959 } 960 961 static inline void calc_sad3_update_bestmv( 962 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 963 const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv, 964 FULLPEL_MV center_mv, const uint8_t *center_address, unsigned int *bestsad, 965 unsigned int *raw_bestsad, int search_step, int *best_site, 966 const int *chkpts_indices, int *cost_list) { 967 const struct buf_2d *const src = ms_params->ms_buffers.src; 968 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 969 const search_site *site = ms_params->search_sites->site[search_step]; 970 unsigned char const *block_offset[4] = { 971 center_address + site[chkpts_indices[0]].offset, 972 center_address + site[chkpts_indices[1]].offset, 973 center_address + site[chkpts_indices[2]].offset, 974 center_address, 975 }; 976 unsigned int sads[4]; 977 ms_params->sdx3df(src->buf, src->stride, block_offset, ref->stride, sads); 978 for (int j = 0; j < 3; j++) { 979 const int index = chkpts_indices[j]; 980 const FULLPEL_MV this_mv = { center_mv.row + site[index].mv.row, 981 center_mv.col + site[index].mv.col }; 982 const int found_better_mv = update_mvs_and_sad( 983 sads[j], &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv, 984 /*second_best_mv=*/NULL); 985 if (found_better_mv) *best_site = j; 986 } 987 if (cost_list) { 988 for (int j = 0; j < 3; j++) { 989 int index = chkpts_indices[j]; 990 cost_list[index + 1] = sads[j]; 991 } 992 } 993 } 994 995 // Calculate sad and update the bestmv information 996 // in FAST_DIAMOND search method. 997 static inline void calc_sad_update_bestmv( 998 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 999 const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv, 1000 const FULLPEL_MV center_mv, const uint8_t *center_address, 1001 unsigned int *bestsad, unsigned int *raw_bestsad, int search_step, 1002 int *best_site, const int num_candidates, int cand_start, int *cost_list) { 1003 const struct buf_2d *const src = ms_params->ms_buffers.src; 1004 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 1005 const search_site *site = ms_params->search_sites->site[search_step]; 1006 // Loop over number of candidates. 1007 for (int i = cand_start; i < num_candidates; i++) { 1008 const FULLPEL_MV this_mv = { center_mv.row + site[i].mv.row, 1009 center_mv.col + site[i].mv.col }; 1010 if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) continue; 1011 int thissad = get_mvpred_sad(ms_params, src, 1012 center_address + site[i].offset, ref->stride); 1013 if (cost_list) { 1014 cost_list[i + 1] = thissad; 1015 } 1016 const int found_better_mv = update_mvs_and_sad( 1017 thissad, &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv, 1018 /*second_best_mv=*/NULL); 1019 if (found_better_mv) *best_site = i; 1020 } 1021 } 1022 1023 static inline void calc_sad_update_bestmv_with_indices( 1024 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1025 const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv, 1026 const FULLPEL_MV center_mv, const uint8_t *center_address, 1027 unsigned int *bestsad, unsigned int *raw_bestsad, int search_step, 1028 int *best_site, const int num_candidates, const int *chkpts_indices, 1029 int *cost_list) { 1030 const struct buf_2d *const src = ms_params->ms_buffers.src; 1031 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 1032 const search_site *site = ms_params->search_sites->site[search_step]; 1033 // Loop over number of candidates. 1034 for (int i = 0; i < num_candidates; i++) { 1035 int index = chkpts_indices[i]; 1036 const FULLPEL_MV this_mv = { center_mv.row + site[index].mv.row, 1037 center_mv.col + site[index].mv.col }; 1038 if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) { 1039 if (cost_list) { 1040 cost_list[index + 1] = INT_MAX; 1041 } 1042 continue; 1043 } 1044 const int thissad = get_mvpred_sad( 1045 ms_params, src, center_address + site[index].offset, ref->stride); 1046 if (cost_list) { 1047 cost_list[index + 1] = thissad; 1048 } 1049 const int found_better_mv = update_mvs_and_sad( 1050 thissad, &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv, 1051 /*second_best_mv=*/NULL); 1052 if (found_better_mv) *best_site = i; 1053 } 1054 } 1055 1056 // Generic pattern search function that searches over multiple scales. 1057 // Each scale can have a different number of candidates and shape of 1058 // candidates as indicated in the num_candidates and candidates arrays 1059 // passed into this function 1060 static int pattern_search(FULLPEL_MV start_mv, 1061 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1062 int search_step, const int do_init_search, 1063 int *cost_list, FULLPEL_MV *best_mv, 1064 FULLPEL_MV_STATS *best_mv_stats) { 1065 static const int search_steps[MAX_MVSEARCH_STEPS] = { 1066 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1067 }; 1068 int i, s, t; 1069 1070 const struct buf_2d *const src = ms_params->ms_buffers.src; 1071 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 1072 const search_site_config *search_sites = ms_params->search_sites; 1073 const int *num_candidates = search_sites->searches_per_step; 1074 const int ref_stride = ref->stride; 1075 const int last_is_4 = num_candidates[0] == 4; 1076 int br, bc; 1077 unsigned int bestsad = UINT_MAX, raw_bestsad = UINT_MAX; 1078 int k = -1; 1079 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 1080 search_step = AOMMIN(search_step, MAX_MVSEARCH_STEPS - 1); 1081 assert(search_step >= 0); 1082 int best_init_s = search_steps[search_step]; 1083 // adjust ref_mv to make sure it is within MV range 1084 clamp_fullmv(&start_mv, &ms_params->mv_limits); 1085 br = start_mv.row; 1086 bc = start_mv.col; 1087 if (cost_list != NULL) { 1088 cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = 1089 INT_MAX; 1090 } 1091 int costlist_has_sad = 0; 1092 1093 // Work out the start point for the search 1094 raw_bestsad = get_mvpred_sad(ms_params, src, 1095 get_buf_from_fullmv(ref, &start_mv), ref_stride); 1096 bestsad = raw_bestsad + mvsad_err_cost_(&start_mv, mv_cost_params); 1097 1098 // Search all possible scales up to the search param around the center point 1099 // pick the scale of the point that is best as the starting scale of 1100 // further steps around it. 1101 const uint8_t *center_address = get_buf_from_fullmv(ref, &start_mv); 1102 if (do_init_search) { 1103 s = best_init_s; 1104 best_init_s = -1; 1105 for (t = 0; t <= s; ++t) { 1106 int best_site = -1; 1107 FULLPEL_MV center_mv = { br, bc }; 1108 if (check_bounds(&ms_params->mv_limits, br, bc, 1 << t)) { 1109 // Call 4-point sad for multiples of 4 candidates. 1110 const int no_of_4_cand_loops = num_candidates[t] >> 2; 1111 for (i = 0; i < no_of_4_cand_loops; i++) { 1112 calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1113 center_address, &bestsad, &raw_bestsad, t, 1114 &best_site, i * 4, /*cost_list=*/NULL); 1115 } 1116 // Rest of the candidates 1117 const int remaining_cand = num_candidates[t] % 4; 1118 calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1119 center_address, &bestsad, &raw_bestsad, t, 1120 &best_site, remaining_cand, 1121 no_of_4_cand_loops * 4, NULL); 1122 } else { 1123 calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1124 center_address, &bestsad, &raw_bestsad, t, 1125 &best_site, num_candidates[t], 0, NULL); 1126 } 1127 if (best_site == -1) { 1128 continue; 1129 } else { 1130 best_init_s = t; 1131 k = best_site; 1132 } 1133 } 1134 if (best_init_s != -1) { 1135 br += search_sites->site[best_init_s][k].mv.row; 1136 bc += search_sites->site[best_init_s][k].mv.col; 1137 center_address += search_sites->site[best_init_s][k].offset; 1138 } 1139 } 1140 1141 // If the center point is still the best, just skip this and move to 1142 // the refinement step. 1143 if (best_init_s != -1) { 1144 const int last_s = (last_is_4 && cost_list != NULL); 1145 int best_site = -1; 1146 s = best_init_s; 1147 1148 for (; s >= last_s; s--) { 1149 // No need to search all points the 1st time if initial search was used 1150 if (!do_init_search || s != best_init_s) { 1151 FULLPEL_MV center_mv = { br, bc }; 1152 if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) { 1153 // Call 4-point sad for multiples of 4 candidates. 1154 const int no_of_4_cand_loops = num_candidates[s] >> 2; 1155 for (i = 0; i < no_of_4_cand_loops; i++) { 1156 calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv, 1157 center_mv, center_address, &bestsad, 1158 &raw_bestsad, s, &best_site, i * 4, 1159 /*cost_list=*/NULL); 1160 } 1161 // Rest of the candidates 1162 const int remaining_cand = num_candidates[s] % 4; 1163 calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1164 center_address, &bestsad, &raw_bestsad, s, 1165 &best_site, remaining_cand, 1166 no_of_4_cand_loops * 4, NULL); 1167 } else { 1168 calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1169 center_address, &bestsad, &raw_bestsad, s, 1170 &best_site, num_candidates[s], 0, NULL); 1171 } 1172 1173 if (best_site == -1) { 1174 continue; 1175 } else { 1176 br += search_sites->site[s][best_site].mv.row; 1177 bc += search_sites->site[s][best_site].mv.col; 1178 center_address += search_sites->site[s][best_site].offset; 1179 k = best_site; 1180 } 1181 } 1182 1183 do { 1184 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1185 best_site = -1; 1186 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1187 next_chkpts_indices[1] = k; 1188 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1189 1190 FULLPEL_MV center_mv = { br, bc }; 1191 if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) { 1192 calc_sad3_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1193 center_address, &bestsad, &raw_bestsad, s, 1194 &best_site, next_chkpts_indices, NULL); 1195 } else { 1196 calc_sad_update_bestmv_with_indices( 1197 ms_params, mv_cost_params, best_mv, center_mv, center_address, 1198 &bestsad, &raw_bestsad, s, &best_site, PATTERN_CANDIDATES_REF, 1199 next_chkpts_indices, NULL); 1200 } 1201 1202 if (best_site != -1) { 1203 k = next_chkpts_indices[best_site]; 1204 br += search_sites->site[s][k].mv.row; 1205 bc += search_sites->site[s][k].mv.col; 1206 center_address += search_sites->site[s][k].offset; 1207 } 1208 } while (best_site != -1); 1209 } 1210 // Note: If we enter the if below, then cost_list must be non-NULL. 1211 if (s == 0) { 1212 cost_list[0] = raw_bestsad; 1213 costlist_has_sad = 1; 1214 assert(num_candidates[s] == 4); 1215 if (!do_init_search || s != best_init_s) { 1216 FULLPEL_MV center_mv = { br, bc }; 1217 if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) { 1218 calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1219 center_address, &bestsad, &raw_bestsad, s, 1220 &best_site, 0, cost_list); 1221 } else { 1222 calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1223 center_address, &bestsad, &raw_bestsad, s, 1224 &best_site, /*num_candidates=*/4, 1225 /*cand_start=*/0, cost_list); 1226 } 1227 1228 if (best_site != -1) { 1229 br += search_sites->site[s][best_site].mv.row; 1230 bc += search_sites->site[s][best_site].mv.col; 1231 center_address += search_sites->site[s][best_site].offset; 1232 k = best_site; 1233 } 1234 } 1235 while (best_site != -1) { 1236 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 1237 best_site = -1; 1238 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 1239 next_chkpts_indices[1] = k; 1240 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 1241 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; 1242 cost_list[((k + 2) % 4) + 1] = cost_list[0]; 1243 cost_list[0] = raw_bestsad; 1244 1245 FULLPEL_MV center_mv = { br, bc }; 1246 if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) { 1247 assert(PATTERN_CANDIDATES_REF == 3); 1248 calc_sad3_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv, 1249 center_address, &bestsad, &raw_bestsad, s, 1250 &best_site, next_chkpts_indices, cost_list); 1251 } else { 1252 calc_sad_update_bestmv_with_indices( 1253 ms_params, mv_cost_params, best_mv, center_mv, center_address, 1254 &bestsad, &raw_bestsad, s, &best_site, PATTERN_CANDIDATES_REF, 1255 next_chkpts_indices, cost_list); 1256 } 1257 1258 if (best_site != -1) { 1259 k = next_chkpts_indices[best_site]; 1260 br += search_sites->site[s][k].mv.row; 1261 bc += search_sites->site[s][k].mv.col; 1262 center_address += search_sites->site[s][k].offset; 1263 } 1264 } 1265 } 1266 } 1267 best_mv->row = br; 1268 best_mv->col = bc; 1269 1270 assert(center_address == get_buf_from_fullmv(ref, best_mv) && 1271 "center address is out of sync with best_mv!\n"); 1272 1273 // Returns the one-away integer pel cost/sad around the best as follows: 1274 // cost_list[0]: cost/sad at the best integer pel 1275 // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel 1276 // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel 1277 // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel 1278 // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel 1279 if (cost_list) { 1280 if (USE_SAD_COSTLIST) { 1281 calc_int_sad_list(*best_mv, ms_params, cost_list, costlist_has_sad); 1282 } else { 1283 calc_int_cost_list(*best_mv, ms_params, cost_list); 1284 } 1285 } 1286 1287 const int var_cost = get_mvpred_var_cost(ms_params, best_mv, best_mv_stats); 1288 return var_cost; 1289 } 1290 1291 // For the following foo_search, the input arguments are: 1292 // start_mv: where we are starting our motion search 1293 // ms_params: a collection of motion search parameters 1294 // search_step: how many steps to skip in our motion search. For example, 1295 // a value 3 suggests that 3 search steps have already taken place prior to 1296 // this function call, so we jump directly to step 4 of the search process 1297 // do_init_search: if on, do an initial search of all possible scales around the 1298 // start_mv, and then pick the best scale. 1299 // cond_list: used to hold the cost around the best full mv so we can use it to 1300 // speed up subpel search later. 1301 // best_mv: the best mv found in the motion search 1302 static int hex_search(const FULLPEL_MV start_mv, 1303 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1304 const int search_step, const int do_init_search, 1305 int *cost_list, FULLPEL_MV *best_mv, 1306 FULLPEL_MV_STATS *best_mv_stats) { 1307 return pattern_search(start_mv, ms_params, search_step, do_init_search, 1308 cost_list, best_mv, best_mv_stats); 1309 } 1310 1311 static int bigdia_search(const FULLPEL_MV start_mv, 1312 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1313 const int search_step, const int do_init_search, 1314 int *cost_list, FULLPEL_MV *best_mv, 1315 FULLPEL_MV_STATS *best_mv_stats) { 1316 return pattern_search(start_mv, ms_params, search_step, do_init_search, 1317 cost_list, best_mv, best_mv_stats); 1318 } 1319 1320 static int square_search(const FULLPEL_MV start_mv, 1321 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1322 const int search_step, const int do_init_search, 1323 int *cost_list, FULLPEL_MV *best_mv, 1324 FULLPEL_MV_STATS *best_mv_stats) { 1325 return pattern_search(start_mv, ms_params, search_step, do_init_search, 1326 cost_list, best_mv, best_mv_stats); 1327 } 1328 1329 static int fast_hex_search(const FULLPEL_MV start_mv, 1330 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1331 const int search_step, const int do_init_search, 1332 int *cost_list, FULLPEL_MV *best_mv, 1333 FULLPEL_MV_STATS *best_mv_stats) { 1334 return hex_search(start_mv, ms_params, 1335 AOMMAX(MAX_MVSEARCH_STEPS - 2, search_step), do_init_search, 1336 cost_list, best_mv, best_mv_stats); 1337 } 1338 1339 static int vfast_dia_search(const FULLPEL_MV start_mv, 1340 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1341 const int search_step, const int do_init_search, 1342 int *cost_list, FULLPEL_MV *best_mv, 1343 FULLPEL_MV_STATS *best_mv_stats) { 1344 return bigdia_search(start_mv, ms_params, 1345 AOMMAX(MAX_MVSEARCH_STEPS - 1, search_step), 1346 do_init_search, cost_list, best_mv, best_mv_stats); 1347 } 1348 1349 static int fast_dia_search(const FULLPEL_MV start_mv, 1350 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1351 const int search_step, const int do_init_search, 1352 int *cost_list, FULLPEL_MV *best_mv, 1353 FULLPEL_MV_STATS *best_mv_stats) { 1354 return bigdia_search(start_mv, ms_params, 1355 AOMMAX(MAX_MVSEARCH_STEPS - 2, search_step), 1356 do_init_search, cost_list, best_mv, best_mv_stats); 1357 } 1358 1359 static int fast_bigdia_search(const FULLPEL_MV start_mv, 1360 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1361 const int search_step, const int do_init_search, 1362 int *cost_list, FULLPEL_MV *best_mv, 1363 FULLPEL_MV_STATS *best_mv_stats) { 1364 return bigdia_search(start_mv, ms_params, 1365 AOMMAX(MAX_MVSEARCH_STEPS - 3, search_step), 1366 do_init_search, cost_list, best_mv, best_mv_stats); 1367 } 1368 1369 static int diamond_search_sad(FULLPEL_MV start_mv, unsigned int start_mv_sad, 1370 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1371 const int search_step, int *num00, 1372 FULLPEL_MV *best_mv, FULLPEL_MV *second_best_mv) { 1373 #define UPDATE_SEARCH_STEP \ 1374 do { \ 1375 if (best_site != 0) { \ 1376 tmp_second_best_mv = *best_mv; \ 1377 best_mv->row += site[best_site].mv.row; \ 1378 best_mv->col += site[best_site].mv.col; \ 1379 best_address += site[best_site].offset; \ 1380 is_off_center = 1; \ 1381 } \ 1382 \ 1383 if (is_off_center == 0) num_center_steps++; \ 1384 \ 1385 if (best_site == 0 && step > 2) { \ 1386 int next_step_size = cfg->radius[step - 1]; \ 1387 while (next_step_size == cfg->radius[step] && step > 2) { \ 1388 num_center_steps++; \ 1389 --step; \ 1390 next_step_size = cfg->radius[step - 1]; \ 1391 } \ 1392 } \ 1393 } while (0) 1394 1395 const struct buf_2d *const src = ms_params->ms_buffers.src; 1396 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 1397 1398 const uint8_t *src_buf = src->buf; 1399 const int src_stride = src->stride; 1400 const int ref_stride = ref->stride; 1401 1402 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 1403 1404 const search_site_config *cfg = ms_params->search_sites; 1405 1406 int is_off_center = 0; 1407 // Number of times that we have stayed in the middle. This is used to skip 1408 // search steps in the future if diamond_search_sad is called again. 1409 int num_center_steps = 0; 1410 1411 // search_step determines the length of the initial step and hence the number 1412 // of iterations. 1413 const int tot_steps = cfg->num_search_steps - search_step; 1414 FULLPEL_MV tmp_second_best_mv; 1415 if (second_best_mv) { 1416 tmp_second_best_mv = *second_best_mv; 1417 } 1418 1419 *best_mv = start_mv; 1420 1421 // Check the starting position 1422 const uint8_t *best_address = get_buf_from_fullmv(ref, &start_mv); 1423 unsigned int bestsad = start_mv_sad; 1424 1425 // TODO(chiyotsai@google.com): Implement 4 points search for msdf&sdaf 1426 if (ms_params->ms_buffers.second_pred) { 1427 for (int step = tot_steps - 1; step >= 0; --step) { 1428 const search_site *site = cfg->site[step]; 1429 const int num_searches = cfg->searches_per_step[step]; 1430 int best_site = 0; 1431 1432 for (int idx = 1; idx <= num_searches; idx++) { 1433 const FULLPEL_MV this_mv = { best_mv->row + site[idx].mv.row, 1434 best_mv->col + site[idx].mv.col }; 1435 1436 if (av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) { 1437 const uint8_t *const check_here = site[idx].offset + best_address; 1438 unsigned int thissad = 1439 get_mvpred_compound_sad(ms_params, src, check_here, ref_stride); 1440 1441 if (thissad < bestsad) { 1442 thissad += mvsad_err_cost_(&this_mv, mv_cost_params); 1443 if (thissad < bestsad) { 1444 bestsad = thissad; 1445 best_site = idx; 1446 } 1447 } 1448 } 1449 } 1450 UPDATE_SEARCH_STEP; 1451 } 1452 } else { 1453 for (int step = tot_steps - 1; step >= 0; --step) { 1454 const search_site *site = cfg->site[step]; 1455 const int num_searches = cfg->searches_per_step[step]; 1456 int best_site = 0; 1457 1458 int all_in = 1; 1459 // Trap illegal vectors 1460 all_in &= best_mv->row + site[1].mv.row >= ms_params->mv_limits.row_min; 1461 all_in &= best_mv->row + site[2].mv.row <= ms_params->mv_limits.row_max; 1462 all_in &= best_mv->col + site[3].mv.col >= ms_params->mv_limits.col_min; 1463 all_in &= best_mv->col + site[4].mv.col <= ms_params->mv_limits.col_max; 1464 1465 if (all_in) { 1466 for (int idx = 1; idx <= num_searches; idx += 4) { 1467 unsigned char const *block_offset[4]; 1468 unsigned int sads[4]; 1469 1470 for (int j = 0; j < 4; j++) 1471 block_offset[j] = site[idx + j].offset + best_address; 1472 1473 ms_params->sdx4df(src_buf, src_stride, block_offset, ref_stride, 1474 sads); 1475 for (int j = 0; j < 4; j++) { 1476 if (sads[j] < bestsad) { 1477 const FULLPEL_MV this_mv = { best_mv->row + site[idx + j].mv.row, 1478 best_mv->col + 1479 site[idx + j].mv.col }; 1480 unsigned int thissad = 1481 sads[j] + mvsad_err_cost_(&this_mv, mv_cost_params); 1482 if (thissad < bestsad) { 1483 bestsad = thissad; 1484 best_site = idx + j; 1485 } 1486 } 1487 } 1488 } 1489 } else { 1490 for (int idx = 1; idx <= num_searches; idx++) { 1491 const FULLPEL_MV this_mv = { best_mv->row + site[idx].mv.row, 1492 best_mv->col + site[idx].mv.col }; 1493 1494 if (av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) { 1495 const uint8_t *const check_here = site[idx].offset + best_address; 1496 unsigned int thissad = 1497 get_mvpred_sad(ms_params, src, check_here, ref_stride); 1498 1499 if (thissad < bestsad) { 1500 thissad += mvsad_err_cost_(&this_mv, mv_cost_params); 1501 if (thissad < bestsad) { 1502 bestsad = thissad; 1503 best_site = idx; 1504 } 1505 } 1506 } 1507 } 1508 } 1509 UPDATE_SEARCH_STEP; 1510 } 1511 } 1512 1513 *num00 = num_center_steps; 1514 if (second_best_mv) { 1515 *second_best_mv = tmp_second_best_mv; 1516 } 1517 1518 return bestsad; 1519 1520 #undef UPDATE_SEARCH_STEP 1521 } 1522 1523 static inline unsigned int get_start_mvpred_sad_cost( 1524 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, FULLPEL_MV start_mv) { 1525 const struct buf_2d *const src = ms_params->ms_buffers.src; 1526 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 1527 const uint8_t *best_address = get_buf_from_fullmv(ref, &start_mv); 1528 1529 unsigned int start_mv_sad = 1530 mvsad_err_cost_(&start_mv, &ms_params->mv_cost_params); 1531 1532 if (ms_params->ms_buffers.second_pred) 1533 start_mv_sad += 1534 get_mvpred_compound_sad(ms_params, src, best_address, ref->stride); 1535 else 1536 start_mv_sad += get_mvpred_sad(ms_params, src, best_address, ref->stride); 1537 1538 return start_mv_sad; 1539 } 1540 1541 static int full_pixel_diamond(FULLPEL_MV start_mv, 1542 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1543 const int step_param, int *cost_list, 1544 FULLPEL_MV *best_mv, 1545 FULLPEL_MV_STATS *best_mv_stats, 1546 FULLPEL_MV *second_best_mv) { 1547 const search_site_config *cfg = ms_params->search_sites; 1548 int thissme, n, num00 = 0; 1549 1550 // Clamp start mv and calculate the cost 1551 clamp_fullmv(&start_mv, &ms_params->mv_limits); 1552 unsigned int start_mv_sad = get_start_mvpred_sad_cost(ms_params, start_mv); 1553 1554 diamond_search_sad(start_mv, start_mv_sad, ms_params, step_param, &n, best_mv, 1555 second_best_mv); 1556 1557 int bestsme = get_mvpred_compound_var_cost(ms_params, best_mv, best_mv_stats); 1558 1559 // If there won't be more n-step search, check to see if refining search is 1560 // needed. 1561 const int further_steps = cfg->num_search_steps - 1 - step_param; 1562 while (n < further_steps) { 1563 ++n; 1564 1565 // TODO(chiyotsai@google.com): There is another bug here where the second 1566 // best mv gets incorrectly overwritten. Fix it later. 1567 FULLPEL_MV tmp_best_mv; 1568 FULLPEL_MV_STATS tmp_best_mv_stats; 1569 diamond_search_sad(start_mv, start_mv_sad, ms_params, step_param + n, 1570 &num00, &tmp_best_mv, second_best_mv); 1571 1572 thissme = get_mvpred_compound_var_cost(ms_params, &tmp_best_mv, 1573 &tmp_best_mv_stats); 1574 1575 if (thissme < bestsme) { 1576 bestsme = thissme; 1577 *best_mv = tmp_best_mv; 1578 *best_mv_stats = tmp_best_mv_stats; 1579 } 1580 1581 if (num00) { 1582 // Advance the loop by num00 steps 1583 n += num00; 1584 num00 = 0; 1585 } 1586 } 1587 1588 // Return cost list. 1589 if (cost_list) { 1590 if (USE_SAD_COSTLIST) { 1591 const int costlist_has_sad = 0; 1592 calc_int_sad_list(*best_mv, ms_params, cost_list, costlist_has_sad); 1593 } else { 1594 calc_int_cost_list(*best_mv, ms_params, cost_list); 1595 } 1596 } 1597 return bestsme; 1598 } 1599 1600 // Exhaustive motion search around a given centre position with a given 1601 // step size. 1602 static int exhaustive_mesh_search(FULLPEL_MV start_mv, 1603 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1604 const int range, const int step, 1605 FULLPEL_MV *best_mv, 1606 FULLPEL_MV *second_best_mv) { 1607 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 1608 const struct buf_2d *const src = ms_params->ms_buffers.src; 1609 const struct buf_2d *const ref = ms_params->ms_buffers.ref; 1610 const int ref_stride = ref->stride; 1611 unsigned int best_sad = INT_MAX; 1612 int r, c, i; 1613 int start_col, end_col, start_row, end_row; 1614 const int col_step = (step > 1) ? step : 4; 1615 1616 assert(step >= 1); 1617 1618 clamp_fullmv(&start_mv, &ms_params->mv_limits); 1619 *best_mv = start_mv; 1620 best_sad = get_mvpred_sad(ms_params, src, get_buf_from_fullmv(ref, &start_mv), 1621 ref_stride); 1622 best_sad += mvsad_err_cost_(&start_mv, mv_cost_params); 1623 start_row = AOMMAX(-range, ms_params->mv_limits.row_min - start_mv.row); 1624 start_col = AOMMAX(-range, ms_params->mv_limits.col_min - start_mv.col); 1625 end_row = AOMMIN(range, ms_params->mv_limits.row_max - start_mv.row); 1626 end_col = AOMMIN(range, ms_params->mv_limits.col_max - start_mv.col); 1627 1628 for (r = start_row; r <= end_row; r += step) { 1629 for (c = start_col; c <= end_col; c += col_step) { 1630 // Step > 1 means we are not checking every location in this pass. 1631 if (step > 1) { 1632 const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c }; 1633 unsigned int sad = get_mvpred_sad( 1634 ms_params, src, get_buf_from_fullmv(ref, &mv), ref_stride); 1635 update_mvs_and_sad(sad, &mv, mv_cost_params, &best_sad, 1636 /*raw_best_sad=*/NULL, best_mv, second_best_mv); 1637 } else { 1638 // 4 sads in a single call if we are checking every location 1639 if (c + 3 <= end_col) { 1640 unsigned int sads[4]; 1641 const uint8_t *addrs[4]; 1642 for (i = 0; i < 4; ++i) { 1643 const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c + i }; 1644 addrs[i] = get_buf_from_fullmv(ref, &mv); 1645 } 1646 1647 ms_params->sdx4df(src->buf, src->stride, addrs, ref_stride, sads); 1648 1649 for (i = 0; i < 4; ++i) { 1650 if (sads[i] < best_sad) { 1651 const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c + i }; 1652 update_mvs_and_sad(sads[i], &mv, mv_cost_params, &best_sad, 1653 /*raw_best_sad=*/NULL, best_mv, 1654 second_best_mv); 1655 } 1656 } 1657 } else { 1658 for (i = 0; i < end_col - c; ++i) { 1659 const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c + i }; 1660 unsigned int sad = get_mvpred_sad( 1661 ms_params, src, get_buf_from_fullmv(ref, &mv), ref_stride); 1662 update_mvs_and_sad(sad, &mv, mv_cost_params, &best_sad, 1663 /*raw_best_sad=*/NULL, best_mv, second_best_mv); 1664 } 1665 } 1666 } 1667 } 1668 } 1669 1670 return best_sad; 1671 } 1672 1673 // Runs an limited range exhaustive mesh search using a pattern set 1674 // according to the encode speed profile. 1675 static int full_pixel_exhaustive(const FULLPEL_MV start_mv, 1676 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1677 const struct MESH_PATTERN *const mesh_patterns, 1678 int *cost_list, FULLPEL_MV *best_mv, 1679 FULLPEL_MV_STATS *mv_stats, 1680 FULLPEL_MV *second_best_mv) { 1681 const int kMinRange = 7; 1682 const int kMaxRange = 256; 1683 const int kMinInterval = 1; 1684 1685 int bestsme; 1686 int i; 1687 int interval = mesh_patterns[0].interval; 1688 int range = mesh_patterns[0].range; 1689 int baseline_interval_divisor; 1690 1691 // TODO(chiyotsai@google.com): Currently exhaustive search calls single ref 1692 // version of sad and variance function. We still need to check the 1693 // performance when compound ref exhaustive search is enabled. 1694 assert(!ms_params->ms_buffers.second_pred && 1695 "Mesh search does not support compound mode!"); 1696 1697 *best_mv = start_mv; 1698 1699 // Trap illegal values for interval and range for this function. 1700 if ((range < kMinRange) || (range > kMaxRange) || (interval < kMinInterval) || 1701 (interval > range)) 1702 return INT_MAX; 1703 1704 baseline_interval_divisor = range / interval; 1705 1706 // Check size of proposed first range against magnitude of the centre 1707 // value used as a starting point. 1708 range = AOMMAX(range, (5 * AOMMAX(abs(best_mv->row), abs(best_mv->col))) / 4); 1709 range = AOMMIN(range, kMaxRange); 1710 interval = AOMMAX(interval, range / baseline_interval_divisor); 1711 // Use a small search step/interval for certain kind of clips. 1712 // For example, screen content clips with a lot of texts. 1713 // Large interval could lead to a false matching position, and it can't find 1714 // the best global candidate in following iterations due to reduced search 1715 // range. The solution here is to use a small search iterval in the beginning 1716 // and thus reduces the chance of missing the best candidate. 1717 if (ms_params->fine_search_interval) { 1718 interval = AOMMIN(interval, 4); 1719 } 1720 1721 // initial search 1722 bestsme = exhaustive_mesh_search(*best_mv, ms_params, range, interval, 1723 best_mv, second_best_mv); 1724 1725 if ((interval > kMinInterval) && (range > kMinRange)) { 1726 // Progressive searches with range and step size decreasing each time 1727 // till we reach a step size of 1. Then break out. 1728 for (i = 1; i < MAX_MESH_STEP; ++i) { 1729 // First pass with coarser step and longer range 1730 bestsme = exhaustive_mesh_search( 1731 *best_mv, ms_params, mesh_patterns[i].range, 1732 mesh_patterns[i].interval, best_mv, second_best_mv); 1733 1734 if (mesh_patterns[i].interval == 1) break; 1735 } 1736 } 1737 1738 if (bestsme < INT_MAX) { 1739 bestsme = get_mvpred_var_cost(ms_params, best_mv, mv_stats); 1740 } 1741 1742 // Return cost list. 1743 if (cost_list) { 1744 if (USE_SAD_COSTLIST) { 1745 const int costlist_has_sad = 0; 1746 calc_int_sad_list(*best_mv, ms_params, cost_list, costlist_has_sad); 1747 } else { 1748 calc_int_cost_list(*best_mv, ms_params, cost_list); 1749 } 1750 } 1751 return bestsme; 1752 } 1753 1754 // This function is called when we do joint motion search in comp_inter_inter 1755 // mode, or when searching for one component of an ext-inter compound mode. 1756 int av1_refining_search_8p_c(const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1757 const FULLPEL_MV start_mv, FULLPEL_MV *best_mv) { 1758 static const search_neighbors neighbors[8] = { 1759 { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 }, 1760 { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 }, 1761 { { 0, 1 }, 0 * SEARCH_GRID_STRIDE_8P + 1 }, 1762 { { 1, 0 }, 1 * SEARCH_GRID_STRIDE_8P + 0 }, 1763 { { -1, -1 }, -1 * SEARCH_GRID_STRIDE_8P - 1 }, 1764 { { 1, -1 }, 1 * SEARCH_GRID_STRIDE_8P - 1 }, 1765 { { -1, 1 }, -1 * SEARCH_GRID_STRIDE_8P + 1 }, 1766 { { 1, 1 }, 1 * SEARCH_GRID_STRIDE_8P + 1 } 1767 }; 1768 1769 uint8_t do_refine_search_grid[SEARCH_GRID_STRIDE_8P * 1770 SEARCH_GRID_STRIDE_8P] = { 0 }; 1771 int grid_center = SEARCH_GRID_CENTER_8P; 1772 int grid_coord = grid_center; 1773 1774 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 1775 const FullMvLimits *mv_limits = &ms_params->mv_limits; 1776 const MSBuffers *ms_buffers = &ms_params->ms_buffers; 1777 const struct buf_2d *src = ms_buffers->src; 1778 const struct buf_2d *ref = ms_buffers->ref; 1779 const int ref_stride = ref->stride; 1780 1781 *best_mv = start_mv; 1782 clamp_fullmv(best_mv, mv_limits); 1783 1784 unsigned int best_sad = get_mvpred_compound_sad( 1785 ms_params, src, get_buf_from_fullmv(ref, best_mv), ref_stride); 1786 best_sad += mvsad_err_cost_(best_mv, mv_cost_params); 1787 1788 do_refine_search_grid[grid_coord] = 1; 1789 1790 for (int i = 0; i < SEARCH_RANGE_8P; ++i) { 1791 int best_site = -1; 1792 1793 for (int j = 0; j < 8; ++j) { 1794 grid_coord = grid_center + neighbors[j].coord_offset; 1795 if (do_refine_search_grid[grid_coord] == 1) { 1796 continue; 1797 } 1798 const FULLPEL_MV mv = { best_mv->row + neighbors[j].coord.row, 1799 best_mv->col + neighbors[j].coord.col }; 1800 1801 do_refine_search_grid[grid_coord] = 1; 1802 if (av1_is_fullmv_in_range(mv_limits, mv)) { 1803 unsigned int sad; 1804 sad = get_mvpred_compound_sad( 1805 ms_params, src, get_buf_from_fullmv(ref, &mv), ref_stride); 1806 if (sad < best_sad) { 1807 sad += mvsad_err_cost_(&mv, mv_cost_params); 1808 1809 if (sad < best_sad) { 1810 best_sad = sad; 1811 best_site = j; 1812 } 1813 } 1814 } 1815 } 1816 1817 if (best_site == -1) { 1818 break; 1819 } else { 1820 best_mv->row += neighbors[best_site].coord.row; 1821 best_mv->col += neighbors[best_site].coord.col; 1822 grid_center += neighbors[best_site].coord_offset; 1823 } 1824 } 1825 return best_sad; 1826 } 1827 1828 int av1_full_pixel_search(const FULLPEL_MV start_mv, 1829 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1830 const int step_param, int *cost_list, 1831 FULLPEL_MV *best_mv, FULLPEL_MV_STATS *best_mv_stats, 1832 FULLPEL_MV *second_best_mv) { 1833 const BLOCK_SIZE bsize = ms_params->bsize; 1834 const SEARCH_METHODS search_method = ms_params->search_method; 1835 1836 const int is_intra_mode = ms_params->is_intra_mode; 1837 int run_mesh_search = ms_params->run_mesh_search; 1838 1839 int var = 0; 1840 MARK_MV_INVALID(best_mv); 1841 if (second_best_mv) { 1842 MARK_MV_INVALID(second_best_mv); 1843 } 1844 1845 if (cost_list) { 1846 cost_list[0] = INT_MAX; 1847 cost_list[1] = INT_MAX; 1848 cost_list[2] = INT_MAX; 1849 cost_list[3] = INT_MAX; 1850 cost_list[4] = INT_MAX; 1851 } 1852 1853 assert(ms_params->ms_buffers.ref->stride == ms_params->search_sites->stride); 1854 1855 switch (search_method) { 1856 case FAST_BIGDIA: 1857 var = fast_bigdia_search(start_mv, ms_params, step_param, 0, cost_list, 1858 best_mv, best_mv_stats); 1859 break; 1860 case VFAST_DIAMOND: 1861 var = vfast_dia_search(start_mv, ms_params, step_param, 0, cost_list, 1862 best_mv, best_mv_stats); 1863 break; 1864 case FAST_DIAMOND: 1865 var = fast_dia_search(start_mv, ms_params, step_param, 0, cost_list, 1866 best_mv, best_mv_stats); 1867 break; 1868 case FAST_HEX: 1869 var = fast_hex_search(start_mv, ms_params, step_param, 0, cost_list, 1870 best_mv, best_mv_stats); 1871 break; 1872 case HEX: 1873 var = hex_search(start_mv, ms_params, step_param, 1, cost_list, best_mv, 1874 best_mv_stats); 1875 break; 1876 case SQUARE: 1877 var = square_search(start_mv, ms_params, step_param, 1, cost_list, 1878 best_mv, best_mv_stats); 1879 break; 1880 case BIGDIA: 1881 var = bigdia_search(start_mv, ms_params, step_param, 1, cost_list, 1882 best_mv, best_mv_stats); 1883 break; 1884 case NSTEP: 1885 case NSTEP_8PT: 1886 case DIAMOND: 1887 case CLAMPED_DIAMOND: 1888 var = full_pixel_diamond(start_mv, ms_params, step_param, cost_list, 1889 best_mv, best_mv_stats, second_best_mv); 1890 break; 1891 default: assert(0 && "Invalid search method."); 1892 } 1893 1894 // Should we allow a follow on exhaustive search? 1895 if (!run_mesh_search && 1896 ((search_method == NSTEP) || (search_method == NSTEP_8PT)) && 1897 !ms_params->ms_buffers.second_pred) { 1898 int exhaustive_thr = ms_params->force_mesh_thresh; 1899 exhaustive_thr >>= 1900 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]); 1901 // Threshold variance for an exhaustive full search. 1902 if (var > exhaustive_thr) run_mesh_search = 1; 1903 } 1904 1905 // TODO(yunqing): the following is used to reduce mesh search in temporal 1906 // filtering. Can extend it to intrabc. 1907 if (!is_intra_mode && ms_params->prune_mesh_search) { 1908 const int full_pel_mv_diff = AOMMAX(abs(start_mv.row - best_mv->row), 1909 abs(start_mv.col - best_mv->col)); 1910 if (full_pel_mv_diff <= ms_params->mesh_search_mv_diff_threshold) { 1911 run_mesh_search = 0; 1912 } 1913 } 1914 1915 if (ms_params->sdf != ms_params->vfp->sdf) { 1916 // If we are skipping rows when we perform the motion search, we need to 1917 // check the quality of skipping. If it's bad, then we run mesh search with 1918 // skip row features off. 1919 // TODO(chiyotsai@google.com): Handle the case where we have a vertical 1920 // offset of 1 before we hit this statement to avoid having to redo 1921 // motion search. 1922 const struct buf_2d *src = ms_params->ms_buffers.src; 1923 const struct buf_2d *ref = ms_params->ms_buffers.ref; 1924 const int src_stride = src->stride; 1925 const int ref_stride = ref->stride; 1926 1927 const uint8_t *src_address = src->buf; 1928 const uint8_t *best_address = get_buf_from_fullmv(ref, best_mv); 1929 const int sad = 1930 ms_params->vfp->sdf(src_address, src_stride, best_address, ref_stride); 1931 const int skip_sad = 1932 ms_params->vfp->sdsf(src_address, src_stride, best_address, ref_stride); 1933 // We will keep the result of skipping rows if it's good enough. Here, good 1934 // enough means the error is less than 1 per pixel. 1935 const int kSADThresh = 1936 1 << (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]); 1937 if (sad > kSADThresh && abs(skip_sad - sad) * 10 >= AOMMAX(sad, 1) * 9) { 1938 // There is a large discrepancy between skipping and not skipping, so we 1939 // need to redo the motion search. 1940 FULLPEL_MOTION_SEARCH_PARAMS new_ms_params = *ms_params; 1941 new_ms_params.sdf = new_ms_params.vfp->sdf; 1942 new_ms_params.sdx4df = new_ms_params.vfp->sdx4df; 1943 new_ms_params.sdx3df = new_ms_params.vfp->sdx3df; 1944 1945 return av1_full_pixel_search(start_mv, &new_ms_params, step_param, 1946 cost_list, best_mv, best_mv_stats, 1947 second_best_mv); 1948 } 1949 } 1950 1951 if (run_mesh_search) { 1952 int var_ex; 1953 FULLPEL_MV tmp_mv_ex; 1954 FULLPEL_MV_STATS tmp_mv_stats; 1955 // Pick the mesh pattern for exhaustive search based on the toolset (intraBC 1956 // or non-intraBC) 1957 // TODO(chiyotsai@google.com): There is a bug here where the second best mv 1958 // gets overwritten without actually comparing the rdcost. 1959 const MESH_PATTERN *const mesh_patterns = 1960 ms_params->mesh_patterns[is_intra_mode]; 1961 // TODO(chiyotsai@google.com): the second best mv is not set correctly by 1962 // full_pixel_exhaustive, which can incorrectly override it. 1963 var_ex = 1964 full_pixel_exhaustive(*best_mv, ms_params, mesh_patterns, cost_list, 1965 &tmp_mv_ex, &tmp_mv_stats, second_best_mv); 1966 if (var_ex < var) { 1967 var = var_ex; 1968 *best_mv_stats = tmp_mv_stats; 1969 *best_mv = tmp_mv_ex; 1970 } 1971 } 1972 1973 return var; 1974 } 1975 1976 int av1_intrabc_hash_search(const AV1_COMP *cpi, const MACROBLOCKD *xd, 1977 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 1978 IntraBCHashInfo *intrabc_hash_info, 1979 FULLPEL_MV *best_mv) { 1980 if (!av1_use_hash_me(cpi)) return INT_MAX; 1981 1982 const BLOCK_SIZE bsize = ms_params->bsize; 1983 const int block_width = block_size_wide[bsize]; 1984 const int block_height = block_size_high[bsize]; 1985 1986 if (block_width != block_height) return INT_MAX; 1987 1988 const FullMvLimits *mv_limits = &ms_params->mv_limits; 1989 const MSBuffers *ms_buffer = &ms_params->ms_buffers; 1990 1991 const uint8_t *src = ms_buffer->src->buf; 1992 const int src_stride = ms_buffer->src->stride; 1993 1994 const int mi_row = xd->mi_row; 1995 const int mi_col = xd->mi_col; 1996 const int x_pos = mi_col * MI_SIZE; 1997 const int y_pos = mi_row * MI_SIZE; 1998 1999 uint32_t hash_value1, hash_value2; 2000 int best_hash_cost = INT_MAX; 2001 2002 // for the hashMap 2003 hash_table *ref_frame_hash = &intrabc_hash_info->intrabc_hash_table; 2004 2005 av1_get_block_hash_value(intrabc_hash_info, src, src_stride, block_width, 2006 &hash_value1, &hash_value2, is_cur_buf_hbd(xd)); 2007 2008 int count = av1_hash_table_count(ref_frame_hash, hash_value1); 2009 if (count <= 1) { 2010 return INT_MAX; 2011 } 2012 if (cpi->sf.mv_sf.prune_intrabc_candidate_block_hash_search) { 2013 count = AOMMIN(64, count); 2014 } 2015 2016 Iterator iterator = av1_hash_get_first_iterator(ref_frame_hash, hash_value1); 2017 for (int i = 0; i < count; i++, aom_iterator_increment(&iterator)) { 2018 block_hash ref_block_hash = *(block_hash *)(aom_iterator_get(&iterator)); 2019 if (hash_value2 == ref_block_hash.hash_value2) { 2020 // Make sure the prediction is from valid area. 2021 const MV dv = { GET_MV_SUBPEL(ref_block_hash.y - y_pos), 2022 GET_MV_SUBPEL(ref_block_hash.x - x_pos) }; 2023 if (!av1_is_dv_valid(dv, &cpi->common, xd, mi_row, mi_col, bsize, 2024 cpi->common.seq_params->mib_size_log2)) 2025 continue; 2026 2027 FULLPEL_MV hash_mv; 2028 hash_mv.col = ref_block_hash.x - x_pos; 2029 hash_mv.row = ref_block_hash.y - y_pos; 2030 if (!av1_is_fullmv_in_range(mv_limits, hash_mv)) continue; 2031 FULLPEL_MV_STATS mv_stats; 2032 const int refCost = get_mvpred_var_cost(ms_params, &hash_mv, &mv_stats); 2033 if (refCost < best_hash_cost) { 2034 best_hash_cost = refCost; 2035 *best_mv = hash_mv; 2036 } 2037 } 2038 } 2039 2040 return best_hash_cost; 2041 } 2042 2043 int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl, 2044 int search_size, int full_search, int *sad) { 2045 int best_sad = INT_MAX; 2046 int this_sad; 2047 int d; 2048 int center, offset = 0; 2049 int bw = search_size << 1; 2050 2051 if (full_search) { 2052 for (d = 0; d <= bw; d++) { 2053 this_sad = aom_vector_var(&ref[d], src, bwl); 2054 if (this_sad < best_sad) { 2055 best_sad = this_sad; 2056 offset = d; 2057 } 2058 } 2059 center = offset; 2060 *sad = best_sad; 2061 return (center - (bw >> 1)); 2062 } 2063 2064 for (d = 0; d <= bw; d += 16) { 2065 this_sad = aom_vector_var(&ref[d], src, bwl); 2066 if (this_sad < best_sad) { 2067 best_sad = this_sad; 2068 offset = d; 2069 } 2070 } 2071 center = offset; 2072 2073 for (d = -8; d <= 8; d += 16) { 2074 int this_pos = offset + d; 2075 // check limit 2076 if (this_pos < 0 || this_pos > bw) continue; 2077 this_sad = aom_vector_var(&ref[this_pos], src, bwl); 2078 if (this_sad < best_sad) { 2079 best_sad = this_sad; 2080 center = this_pos; 2081 } 2082 } 2083 offset = center; 2084 2085 for (d = -4; d <= 4; d += 8) { 2086 int this_pos = offset + d; 2087 // check limit 2088 if (this_pos < 0 || this_pos > bw) continue; 2089 this_sad = aom_vector_var(&ref[this_pos], src, bwl); 2090 if (this_sad < best_sad) { 2091 best_sad = this_sad; 2092 center = this_pos; 2093 } 2094 } 2095 offset = center; 2096 2097 for (d = -2; d <= 2; d += 4) { 2098 int this_pos = offset + d; 2099 // check limit 2100 if (this_pos < 0 || this_pos > bw) continue; 2101 this_sad = aom_vector_var(&ref[this_pos], src, bwl); 2102 if (this_sad < best_sad) { 2103 best_sad = this_sad; 2104 center = this_pos; 2105 } 2106 } 2107 offset = center; 2108 2109 for (d = -1; d <= 1; d += 2) { 2110 int this_pos = offset + d; 2111 // check limit 2112 if (this_pos < 0 || this_pos > bw) continue; 2113 this_sad = aom_vector_var(&ref[this_pos], src, bwl); 2114 if (this_sad < best_sad) { 2115 best_sad = this_sad; 2116 center = this_pos; 2117 } 2118 } 2119 *sad = best_sad; 2120 return (center - (bw >> 1)); 2121 } 2122 2123 // A special fast version of motion search used in rt mode. 2124 // The search window along columns and row is given by: 2125 // +/- me_search_size_col/row. 2126 unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x, 2127 BLOCK_SIZE bsize, int mi_row, 2128 int mi_col, const MV *ref_mv, 2129 unsigned int *y_sad_zero, 2130 int me_search_size_col, 2131 int me_search_size_row) { 2132 const AV1_COMMON *const cm = &cpi->common; 2133 MACROBLOCKD *xd = &x->e_mbd; 2134 MB_MODE_INFO *mi = xd->mi[0]; 2135 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; 2136 int idx; 2137 const int bw = block_size_wide[bsize]; 2138 const int bh = block_size_high[bsize]; 2139 const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; 2140 const int full_search = is_screen; 2141 const bool screen_scroll_superblock = 2142 is_screen && bsize == cm->seq_params->sb_size; 2143 // Keep border a multiple of 16. 2144 const int border = (cpi->oxcf.border_in_pixels >> 4) << 4; 2145 int search_size_width = me_search_size_col; 2146 int search_size_height = me_search_size_row; 2147 // Adjust based on boundary. 2148 if (((mi_col << 2) - search_size_width < -border) || 2149 ((mi_col << 2) + search_size_width > cm->width + border)) 2150 search_size_width = border; 2151 if (((mi_row << 2) - search_size_height < -border) || 2152 ((mi_row << 2) + search_size_height > cm->height + border)) 2153 search_size_height = border; 2154 const int src_stride = x->plane[0].src.stride; 2155 const int ref_stride = xd->plane[0].pre[0].stride; 2156 uint8_t const *ref_buf, *src_buf; 2157 int_mv *best_int_mv = &xd->mi[0]->mv[0]; 2158 unsigned int best_sad, tmp_sad, this_sad[4]; 2159 int best_sad_col, best_sad_row; 2160 const int row_norm_factor = mi_size_high_log2[bsize] + 1; 2161 const int col_norm_factor = 3 + (bw >> 5); 2162 const YV12_BUFFER_CONFIG *scaled_ref_frame = 2163 av1_get_scaled_ref_frame(cpi, mi->ref_frame[0]); 2164 static const MV search_pos[4] = { 2165 { -1, 0 }, 2166 { 0, -1 }, 2167 { 0, 1 }, 2168 { 1, 0 }, 2169 }; 2170 2171 if (scaled_ref_frame) { 2172 int i; 2173 // Swap out the reference frame for a version that's been scaled to 2174 // match the resolution of the current frame, allowing the existing 2175 // motion search code to be used without additional modifications. 2176 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; 2177 av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL, 2178 MAX_MB_PLANE); 2179 } 2180 2181 if (xd->bd != 8) { 2182 best_int_mv->as_fullmv = kZeroFullMv; 2183 best_sad = cpi->ppi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, 2184 xd->plane[0].pre[0].buf, ref_stride); 2185 2186 if (scaled_ref_frame) { 2187 int i; 2188 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 2189 } 2190 return best_sad; 2191 } 2192 const int width_ref_buf = (search_size_width << 1) + bw; 2193 const int height_ref_buf = (search_size_height << 1) + bh; 2194 int16_t *hbuf = (int16_t *)aom_malloc(width_ref_buf * sizeof(*hbuf)); 2195 int16_t *vbuf = (int16_t *)aom_malloc(height_ref_buf * sizeof(*vbuf)); 2196 int16_t *src_hbuf = (int16_t *)aom_malloc(bw * sizeof(*src_hbuf)); 2197 int16_t *src_vbuf = (int16_t *)aom_malloc(bh * sizeof(*src_vbuf)); 2198 if (!hbuf || !vbuf || !src_hbuf || !src_vbuf) { 2199 aom_free(hbuf); 2200 aom_free(vbuf); 2201 aom_free(src_hbuf); 2202 aom_free(src_vbuf); 2203 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2204 "Failed to allocate hbuf, vbuf, src_hbuf, or src_vbuf"); 2205 } 2206 2207 // Set up prediction 1-D reference set for rows. 2208 ref_buf = xd->plane[0].pre[0].buf - search_size_width; 2209 aom_int_pro_row(hbuf, ref_buf, ref_stride, width_ref_buf, bh, 2210 row_norm_factor); 2211 2212 // Set up prediction 1-D reference set for cols 2213 ref_buf = xd->plane[0].pre[0].buf - search_size_height * ref_stride; 2214 aom_int_pro_col(vbuf, ref_buf, ref_stride, bw, height_ref_buf, 2215 col_norm_factor); 2216 2217 // Set up src 1-D reference set 2218 src_buf = x->plane[0].src.buf; 2219 aom_int_pro_row(src_hbuf, src_buf, src_stride, bw, bh, row_norm_factor); 2220 aom_int_pro_col(src_vbuf, src_buf, src_stride, bw, bh, col_norm_factor); 2221 2222 // Find the best match per 1-D search 2223 best_int_mv->as_fullmv.col = 2224 av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize], 2225 search_size_width, full_search, &best_sad_col); 2226 best_int_mv->as_fullmv.row = 2227 av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize], 2228 search_size_height, full_search, &best_sad_row); 2229 2230 // For screen: select between horiz or vert motion. 2231 if (is_screen) { 2232 if (best_sad_col < best_sad_row) 2233 best_int_mv->as_fullmv.row = 0; 2234 else 2235 best_int_mv->as_fullmv.col = 0; 2236 } 2237 2238 FULLPEL_MV this_mv = best_int_mv->as_fullmv; 2239 src_buf = x->plane[0].src.buf; 2240 ref_buf = get_buf_from_fullmv(&xd->plane[0].pre[0], &this_mv); 2241 best_sad = 2242 cpi->ppi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 2243 2244 // Evaluate zero MV if found MV is non-zero. 2245 if (best_int_mv->as_int != 0) { 2246 tmp_sad = cpi->ppi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, 2247 xd->plane[0].pre[0].buf, ref_stride); 2248 *y_sad_zero = tmp_sad; 2249 if (tmp_sad < best_sad) { 2250 best_int_mv->as_fullmv = kZeroFullMv; 2251 this_mv = best_int_mv->as_fullmv; 2252 ref_buf = xd->plane[0].pre[0].buf; 2253 best_sad = tmp_sad; 2254 } 2255 } else { 2256 *y_sad_zero = best_sad; 2257 } 2258 2259 if (!screen_scroll_superblock) { 2260 const uint8_t *const pos[4] = { 2261 ref_buf - ref_stride, 2262 ref_buf - 1, 2263 ref_buf + 1, 2264 ref_buf + ref_stride, 2265 }; 2266 2267 cpi->ppi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, 2268 this_sad); 2269 2270 for (idx = 0; idx < 4; ++idx) { 2271 if (this_sad[idx] < best_sad) { 2272 best_sad = this_sad[idx]; 2273 best_int_mv->as_fullmv.row = search_pos[idx].row + this_mv.row; 2274 best_int_mv->as_fullmv.col = search_pos[idx].col + this_mv.col; 2275 } 2276 } 2277 2278 if (this_sad[0] < this_sad[3]) 2279 this_mv.row -= 1; 2280 else 2281 this_mv.row += 1; 2282 2283 if (this_sad[1] < this_sad[2]) 2284 this_mv.col -= 1; 2285 else 2286 this_mv.col += 1; 2287 2288 ref_buf = get_buf_from_fullmv(&xd->plane[0].pre[0], &this_mv); 2289 2290 tmp_sad = 2291 cpi->ppi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); 2292 if (best_sad > tmp_sad) { 2293 best_int_mv->as_fullmv = this_mv; 2294 best_sad = tmp_sad; 2295 } 2296 } 2297 2298 FullMvLimits mv_limits = x->mv_limits; 2299 av1_set_mv_search_range(&mv_limits, ref_mv); 2300 clamp_fullmv(&best_int_mv->as_fullmv, &mv_limits); 2301 2302 convert_fullmv_to_mv(best_int_mv); 2303 2304 if (scaled_ref_frame) { 2305 int i; 2306 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; 2307 } 2308 2309 aom_free(hbuf); 2310 aom_free(vbuf); 2311 aom_free(src_hbuf); 2312 aom_free(src_vbuf); 2313 return best_sad; 2314 } 2315 2316 // ============================================================================= 2317 // Fullpixel Motion Search: OBMC 2318 // ============================================================================= 2319 static inline int get_obmc_mvpred_var( 2320 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV *this_mv) { 2321 const aom_variance_fn_ptr_t *vfp = ms_params->vfp; 2322 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 2323 const MSBuffers *ms_buffers = &ms_params->ms_buffers; 2324 const int32_t *wsrc = ms_buffers->wsrc; 2325 const int32_t *mask = ms_buffers->obmc_mask; 2326 const struct buf_2d *ref_buf = ms_buffers->ref; 2327 2328 const MV mv = get_mv_from_fullmv(this_mv); 2329 unsigned int unused; 2330 2331 return vfp->ovf(get_buf_from_fullmv(ref_buf, this_mv), ref_buf->stride, wsrc, 2332 mask, &unused) + 2333 mv_err_cost_(&mv, mv_cost_params); 2334 } 2335 2336 static int obmc_refining_search_sad( 2337 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, FULLPEL_MV *best_mv) { 2338 const aom_variance_fn_ptr_t *fn_ptr = ms_params->vfp; 2339 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 2340 const MSBuffers *ms_buffers = &ms_params->ms_buffers; 2341 const int32_t *wsrc = ms_buffers->wsrc; 2342 const int32_t *mask = ms_buffers->obmc_mask; 2343 const struct buf_2d *ref_buf = ms_buffers->ref; 2344 const FULLPEL_MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; 2345 const int kSearchRange = 8; 2346 2347 unsigned int best_sad = fn_ptr->osdf(get_buf_from_fullmv(ref_buf, best_mv), 2348 ref_buf->stride, wsrc, mask) + 2349 mvsad_err_cost_(best_mv, mv_cost_params); 2350 2351 for (int i = 0; i < kSearchRange; i++) { 2352 int best_site = -1; 2353 2354 for (int j = 0; j < 4; j++) { 2355 const FULLPEL_MV mv = { best_mv->row + neighbors[j].row, 2356 best_mv->col + neighbors[j].col }; 2357 if (av1_is_fullmv_in_range(&ms_params->mv_limits, mv)) { 2358 unsigned int sad = fn_ptr->osdf(get_buf_from_fullmv(ref_buf, &mv), 2359 ref_buf->stride, wsrc, mask); 2360 if (sad < best_sad) { 2361 sad += mvsad_err_cost_(&mv, mv_cost_params); 2362 2363 if (sad < best_sad) { 2364 best_sad = sad; 2365 best_site = j; 2366 } 2367 } 2368 } 2369 } 2370 2371 if (best_site == -1) { 2372 break; 2373 } else { 2374 best_mv->row += neighbors[best_site].row; 2375 best_mv->col += neighbors[best_site].col; 2376 } 2377 } 2378 return best_sad; 2379 } 2380 2381 static int obmc_diamond_search_sad( 2382 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, FULLPEL_MV start_mv, 2383 FULLPEL_MV *best_mv, int search_step, int *num00) { 2384 const aom_variance_fn_ptr_t *fn_ptr = ms_params->vfp; 2385 const search_site_config *cfg = ms_params->search_sites; 2386 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 2387 const MSBuffers *ms_buffers = &ms_params->ms_buffers; 2388 const int32_t *wsrc = ms_buffers->wsrc; 2389 const int32_t *mask = ms_buffers->obmc_mask; 2390 const struct buf_2d *const ref_buf = ms_buffers->ref; 2391 2392 // search_step determines the length of the initial step and hence the number 2393 // of iterations. 2394 const int tot_steps = cfg->num_search_steps - search_step; 2395 const uint8_t *best_address, *init_ref; 2396 int best_sad = INT_MAX; 2397 int best_site = 0; 2398 2399 clamp_fullmv(&start_mv, &ms_params->mv_limits); 2400 best_address = init_ref = get_buf_from_fullmv(ref_buf, &start_mv); 2401 *num00 = 0; 2402 *best_mv = start_mv; 2403 2404 // Check the starting position 2405 best_sad = fn_ptr->osdf(best_address, ref_buf->stride, wsrc, mask) + 2406 mvsad_err_cost_(best_mv, mv_cost_params); 2407 2408 for (int step = tot_steps - 1; step >= 0; --step) { 2409 const search_site *const site = cfg->site[step]; 2410 best_site = 0; 2411 for (int idx = 1; idx <= cfg->searches_per_step[step]; ++idx) { 2412 const FULLPEL_MV mv = { best_mv->row + site[idx].mv.row, 2413 best_mv->col + site[idx].mv.col }; 2414 if (av1_is_fullmv_in_range(&ms_params->mv_limits, mv)) { 2415 int sad = fn_ptr->osdf(best_address + site[idx].offset, ref_buf->stride, 2416 wsrc, mask); 2417 if (sad < best_sad) { 2418 sad += mvsad_err_cost_(&mv, mv_cost_params); 2419 2420 if (sad < best_sad) { 2421 best_sad = sad; 2422 best_site = idx; 2423 } 2424 } 2425 } 2426 } 2427 2428 if (best_site != 0) { 2429 best_mv->row += site[best_site].mv.row; 2430 best_mv->col += site[best_site].mv.col; 2431 best_address += site[best_site].offset; 2432 } else if (best_address == init_ref) { 2433 (*num00)++; 2434 } 2435 } 2436 return best_sad; 2437 } 2438 2439 static int obmc_full_pixel_diamond( 2440 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv, 2441 int step_param, FULLPEL_MV *best_mv) { 2442 const search_site_config *cfg = ms_params->search_sites; 2443 FULLPEL_MV tmp_mv; 2444 int thissme, n, num00 = 0; 2445 int bestsme = 2446 obmc_diamond_search_sad(ms_params, start_mv, &tmp_mv, step_param, &n); 2447 if (bestsme < INT_MAX) bestsme = get_obmc_mvpred_var(ms_params, &tmp_mv); 2448 *best_mv = tmp_mv; 2449 2450 // If there won't be more n-step search, check to see if refining search is 2451 // needed. 2452 const int further_steps = cfg->num_search_steps - 1 - step_param; 2453 2454 while (n < further_steps) { 2455 ++n; 2456 2457 if (num00) { 2458 num00--; 2459 } else { 2460 thissme = obmc_diamond_search_sad(ms_params, start_mv, &tmp_mv, 2461 step_param + n, &num00); 2462 if (thissme < INT_MAX) thissme = get_obmc_mvpred_var(ms_params, &tmp_mv); 2463 2464 if (thissme < bestsme) { 2465 bestsme = thissme; 2466 *best_mv = tmp_mv; 2467 } 2468 } 2469 } 2470 2471 return bestsme; 2472 } 2473 2474 int av1_obmc_full_pixel_search(const FULLPEL_MV start_mv, 2475 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, 2476 const int step_param, FULLPEL_MV *best_mv) { 2477 if (!ms_params->fast_obmc_search) { 2478 const int bestsme = 2479 obmc_full_pixel_diamond(ms_params, start_mv, step_param, best_mv); 2480 return bestsme; 2481 } else { 2482 *best_mv = start_mv; 2483 clamp_fullmv(best_mv, &ms_params->mv_limits); 2484 int thissme = obmc_refining_search_sad(ms_params, best_mv); 2485 if (thissme < INT_MAX) thissme = get_obmc_mvpred_var(ms_params, best_mv); 2486 return thissme; 2487 } 2488 } 2489 2490 // ============================================================================= 2491 // Subpixel Motion Search: Translational 2492 // ============================================================================= 2493 #define INIT_SUBPEL_STEP_SIZE (4) 2494 /* 2495 * To avoid the penalty for crossing cache-line read, preload the reference 2496 * area in a small buffer, which is aligned to make sure there won't be crossing 2497 * cache-line read while reading from this buffer. This reduced the cpu 2498 * cycles spent on reading ref data in sub-pixel filter functions. 2499 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 2500 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 2501 * could reduce the area. 2502 */ 2503 2504 // Returns the subpel offset used by various subpel variance functions [m]sv[a]f 2505 static inline int get_subpel_part(int x) { return x & 7; } 2506 2507 // Gets the address of the ref buffer at subpel location (r, c), rounded to the 2508 // nearest fullpel precision toward - \infty 2509 static inline const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 2510 const MV mv) { 2511 const int offset = (mv.row >> 3) * buf->stride + (mv.col >> 3); 2512 return &buf->buf[offset]; 2513 } 2514 2515 // Estimates the variance of prediction residue using bilinear filter for fast 2516 // search. 2517 static inline int estimated_pref_error( 2518 const MV *this_mv, const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2519 unsigned int *sse) { 2520 const aom_variance_fn_ptr_t *vfp = var_params->vfp; 2521 2522 const MSBuffers *ms_buffers = &var_params->ms_buffers; 2523 const uint8_t *src = ms_buffers->src->buf; 2524 const uint8_t *ref = get_buf_from_mv(ms_buffers->ref, *this_mv); 2525 const int src_stride = ms_buffers->src->stride; 2526 const int ref_stride = ms_buffers->ref->stride; 2527 const uint8_t *second_pred = ms_buffers->second_pred; 2528 const uint8_t *mask = ms_buffers->mask; 2529 const int mask_stride = ms_buffers->mask_stride; 2530 const int invert_mask = ms_buffers->inv_mask; 2531 2532 const int subpel_x_q3 = get_subpel_part(this_mv->col); 2533 const int subpel_y_q3 = get_subpel_part(this_mv->row); 2534 2535 if (second_pred == NULL) { 2536 return vfp->svf(ref, ref_stride, subpel_x_q3, subpel_y_q3, src, src_stride, 2537 sse); 2538 } else if (mask) { 2539 return vfp->msvf(ref, ref_stride, subpel_x_q3, subpel_y_q3, src, src_stride, 2540 second_pred, mask, mask_stride, invert_mask, sse); 2541 } else { 2542 return vfp->svaf(ref, ref_stride, subpel_x_q3, subpel_y_q3, src, src_stride, 2543 sse, second_pred); 2544 } 2545 } 2546 2547 // Calculates the variance of prediction residue. 2548 static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *cm, 2549 const MV *this_mv, 2550 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2551 unsigned int *sse) { 2552 const aom_variance_fn_ptr_t *vfp = var_params->vfp; 2553 const SUBPEL_SEARCH_TYPE subpel_search_type = var_params->subpel_search_type; 2554 2555 const MSBuffers *ms_buffers = &var_params->ms_buffers; 2556 const uint8_t *src = ms_buffers->src->buf; 2557 const uint8_t *ref = get_buf_from_mv(ms_buffers->ref, *this_mv); 2558 const int src_stride = ms_buffers->src->stride; 2559 const int ref_stride = ms_buffers->ref->stride; 2560 const uint8_t *second_pred = ms_buffers->second_pred; 2561 const uint8_t *mask = ms_buffers->mask; 2562 const int mask_stride = ms_buffers->mask_stride; 2563 const int invert_mask = ms_buffers->inv_mask; 2564 const int w = var_params->w; 2565 const int h = var_params->h; 2566 2567 const int mi_row = xd->mi_row; 2568 const int mi_col = xd->mi_col; 2569 const int subpel_x_q3 = get_subpel_part(this_mv->col); 2570 const int subpel_y_q3 = get_subpel_part(this_mv->row); 2571 2572 unsigned int besterr; 2573 #if CONFIG_AV1_HIGHBITDEPTH 2574 if (is_cur_buf_hbd(xd)) { 2575 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); 2576 uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16); 2577 if (second_pred != NULL) { 2578 if (mask) { 2579 aom_highbd_comp_mask_upsampled_pred( 2580 xd, cm, mi_row, mi_col, this_mv, pred8, second_pred, w, h, 2581 subpel_x_q3, subpel_y_q3, ref, ref_stride, mask, mask_stride, 2582 invert_mask, xd->bd, subpel_search_type); 2583 } else { 2584 aom_highbd_comp_avg_upsampled_pred( 2585 xd, cm, mi_row, mi_col, this_mv, pred8, second_pred, w, h, 2586 subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, 2587 subpel_search_type); 2588 } 2589 } else { 2590 aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred8, w, h, 2591 subpel_x_q3, subpel_y_q3, ref, ref_stride, 2592 xd->bd, subpel_search_type); 2593 } 2594 besterr = vfp->vf(pred8, w, src, src_stride, sse); 2595 } else { 2596 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); 2597 if (second_pred != NULL) { 2598 if (mask) { 2599 aom_comp_mask_upsampled_pred( 2600 xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, 2601 subpel_x_q3, subpel_y_q3, ref, ref_stride, mask, mask_stride, 2602 invert_mask, subpel_search_type); 2603 } else { 2604 aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, 2605 second_pred, w, h, subpel_x_q3, subpel_y_q3, 2606 ref, ref_stride, subpel_search_type); 2607 } 2608 } else { 2609 aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, 2610 subpel_x_q3, subpel_y_q3, ref, ref_stride, 2611 subpel_search_type); 2612 } 2613 2614 besterr = vfp->vf(pred, w, src, src_stride, sse); 2615 } 2616 #else 2617 DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); 2618 if (second_pred != NULL) { 2619 if (mask) { 2620 aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, 2621 second_pred, w, h, subpel_x_q3, subpel_y_q3, 2622 ref, ref_stride, mask, mask_stride, 2623 invert_mask, subpel_search_type); 2624 } else { 2625 aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, 2626 second_pred, w, h, subpel_x_q3, subpel_y_q3, 2627 ref, ref_stride, subpel_search_type); 2628 } 2629 } else { 2630 aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, subpel_x_q3, 2631 subpel_y_q3, ref, ref_stride, subpel_search_type); 2632 } 2633 2634 besterr = vfp->vf(pred, w, src, src_stride, sse); 2635 #endif 2636 return besterr; 2637 } 2638 2639 // Estimates whether this_mv is better than best_mv. This function incorporates 2640 // both prediction error and residue into account. It is suffixed "fast" because 2641 // it uses bilinear filter to estimate the prediction. 2642 static inline unsigned int check_better_fast( 2643 MACROBLOCKD *xd, const AV1_COMMON *cm, const MV *this_mv, MV *best_mv, 2644 const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2645 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 2646 unsigned int *sse1, int *distortion, int *has_better_mv, int is_scaled) { 2647 unsigned int cost; 2648 if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { 2649 unsigned int sse; 2650 int thismse; 2651 if (is_scaled) { 2652 thismse = upsampled_pref_error(xd, cm, this_mv, var_params, &sse); 2653 } else { 2654 thismse = estimated_pref_error(this_mv, var_params, &sse); 2655 } 2656 cost = mv_err_cost_(this_mv, mv_cost_params); 2657 cost += thismse; 2658 2659 if (cost < *besterr) { 2660 *besterr = cost; 2661 *best_mv = *this_mv; 2662 *distortion = thismse; 2663 *sse1 = sse; 2664 *has_better_mv |= 1; 2665 } 2666 } else { 2667 cost = INT_MAX; 2668 } 2669 return cost; 2670 } 2671 2672 // Checks whether this_mv is better than best_mv. This function incorporates 2673 // both prediction error and residue into account. 2674 static AOM_FORCE_INLINE unsigned int check_better( 2675 MACROBLOCKD *xd, const AV1_COMMON *cm, const MV *this_mv, MV *best_mv, 2676 const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2677 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 2678 unsigned int *sse1, int *distortion, int *is_better) { 2679 unsigned int cost; 2680 if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { 2681 unsigned int sse; 2682 int thismse; 2683 thismse = upsampled_pref_error(xd, cm, this_mv, var_params, &sse); 2684 cost = mv_err_cost_(this_mv, mv_cost_params); 2685 cost += thismse; 2686 if (cost < *besterr) { 2687 *besterr = cost; 2688 *best_mv = *this_mv; 2689 *distortion = thismse; 2690 *sse1 = sse; 2691 *is_better |= 1; 2692 } 2693 } else { 2694 cost = INT_MAX; 2695 } 2696 return cost; 2697 } 2698 2699 static inline MV get_best_diag_step(int step_size, unsigned int left_cost, 2700 unsigned int right_cost, 2701 unsigned int up_cost, 2702 unsigned int down_cost) { 2703 const MV diag_step = { up_cost <= down_cost ? -step_size : step_size, 2704 left_cost <= right_cost ? -step_size : step_size }; 2705 2706 return diag_step; 2707 } 2708 2709 // Searches the four cardinal direction for a better mv, then follows up with a 2710 // search in the best quadrant. This uses bilinear filter to speed up the 2711 // calculation. 2712 static AOM_FORCE_INLINE MV first_level_check_fast( 2713 MACROBLOCKD *xd, const AV1_COMMON *cm, const MV this_mv, MV *best_mv, 2714 int hstep, const SubpelMvLimits *mv_limits, 2715 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2716 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 2717 unsigned int *sse1, int *distortion, int is_scaled) { 2718 // Check the four cardinal directions 2719 const MV left_mv = { this_mv.row, this_mv.col - hstep }; 2720 int dummy = 0; 2721 const unsigned int left = check_better_fast( 2722 xd, cm, &left_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, 2723 sse1, distortion, &dummy, is_scaled); 2724 2725 const MV right_mv = { this_mv.row, this_mv.col + hstep }; 2726 const unsigned int right = check_better_fast( 2727 xd, cm, &right_mv, best_mv, mv_limits, var_params, mv_cost_params, 2728 besterr, sse1, distortion, &dummy, is_scaled); 2729 2730 const MV top_mv = { this_mv.row - hstep, this_mv.col }; 2731 const unsigned int up = check_better_fast( 2732 xd, cm, &top_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, 2733 sse1, distortion, &dummy, is_scaled); 2734 2735 const MV bottom_mv = { this_mv.row + hstep, this_mv.col }; 2736 const unsigned int down = check_better_fast( 2737 xd, cm, &bottom_mv, best_mv, mv_limits, var_params, mv_cost_params, 2738 besterr, sse1, distortion, &dummy, is_scaled); 2739 2740 const MV diag_step = get_best_diag_step(hstep, left, right, up, down); 2741 const MV diag_mv = { this_mv.row + diag_step.row, 2742 this_mv.col + diag_step.col }; 2743 2744 // Check the diagonal direction with the best mv 2745 check_better_fast(xd, cm, &diag_mv, best_mv, mv_limits, var_params, 2746 mv_cost_params, besterr, sse1, distortion, &dummy, 2747 is_scaled); 2748 2749 return diag_step; 2750 } 2751 2752 // Performs a following up search after first_level_check_fast is called. This 2753 // performs two extra chess pattern searches in the best quadrant. 2754 static AOM_FORCE_INLINE void second_level_check_fast( 2755 MACROBLOCKD *xd, const AV1_COMMON *cm, const MV this_mv, const MV diag_step, 2756 MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, 2757 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2758 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 2759 unsigned int *sse1, int *distortion, int is_scaled) { 2760 assert(diag_step.row == hstep || diag_step.row == -hstep); 2761 assert(diag_step.col == hstep || diag_step.col == -hstep); 2762 const int tr = this_mv.row; 2763 const int tc = this_mv.col; 2764 const int br = best_mv->row; 2765 const int bc = best_mv->col; 2766 int dummy = 0; 2767 if (tr != br && tc != bc) { 2768 assert(diag_step.col == bc - tc); 2769 assert(diag_step.row == br - tr); 2770 const MV chess_mv_1 = { br, bc + diag_step.col }; 2771 const MV chess_mv_2 = { br + diag_step.row, bc }; 2772 check_better_fast(xd, cm, &chess_mv_1, best_mv, mv_limits, var_params, 2773 mv_cost_params, besterr, sse1, distortion, &dummy, 2774 is_scaled); 2775 2776 check_better_fast(xd, cm, &chess_mv_2, best_mv, mv_limits, var_params, 2777 mv_cost_params, besterr, sse1, distortion, &dummy, 2778 is_scaled); 2779 } else if (tr == br && tc != bc) { 2780 assert(diag_step.col == bc - tc); 2781 // Continue searching in the best direction 2782 const MV bottom_long_mv = { br + hstep, bc + diag_step.col }; 2783 const MV top_long_mv = { br - hstep, bc + diag_step.col }; 2784 check_better_fast(xd, cm, &bottom_long_mv, best_mv, mv_limits, var_params, 2785 mv_cost_params, besterr, sse1, distortion, &dummy, 2786 is_scaled); 2787 check_better_fast(xd, cm, &top_long_mv, best_mv, mv_limits, var_params, 2788 mv_cost_params, besterr, sse1, distortion, &dummy, 2789 is_scaled); 2790 2791 // Search in the direction opposite of the best quadrant 2792 const MV rev_mv = { br - diag_step.row, bc }; 2793 check_better_fast(xd, cm, &rev_mv, best_mv, mv_limits, var_params, 2794 mv_cost_params, besterr, sse1, distortion, &dummy, 2795 is_scaled); 2796 } else if (tr != br && tc == bc) { 2797 assert(diag_step.row == br - tr); 2798 // Continue searching in the best direction 2799 const MV right_long_mv = { br + diag_step.row, bc + hstep }; 2800 const MV left_long_mv = { br + diag_step.row, bc - hstep }; 2801 check_better_fast(xd, cm, &right_long_mv, best_mv, mv_limits, var_params, 2802 mv_cost_params, besterr, sse1, distortion, &dummy, 2803 is_scaled); 2804 check_better_fast(xd, cm, &left_long_mv, best_mv, mv_limits, var_params, 2805 mv_cost_params, besterr, sse1, distortion, &dummy, 2806 is_scaled); 2807 2808 // Search in the direction opposite of the best quadrant 2809 const MV rev_mv = { br, bc - diag_step.col }; 2810 check_better_fast(xd, cm, &rev_mv, best_mv, mv_limits, var_params, 2811 mv_cost_params, besterr, sse1, distortion, &dummy, 2812 is_scaled); 2813 } 2814 } 2815 2816 // Combines first level check and second level check when applicable. This first 2817 // searches the four cardinal directions, and perform several 2818 // diagonal/chess-pattern searches in the best quadrant. 2819 static AOM_FORCE_INLINE void two_level_checks_fast( 2820 MACROBLOCKD *xd, const AV1_COMMON *cm, const MV this_mv, MV *best_mv, 2821 int hstep, const SubpelMvLimits *mv_limits, 2822 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2823 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 2824 unsigned int *sse1, int *distortion, int iters, int is_scaled) { 2825 const MV diag_step = first_level_check_fast( 2826 xd, cm, this_mv, best_mv, hstep, mv_limits, var_params, mv_cost_params, 2827 besterr, sse1, distortion, is_scaled); 2828 if (iters > 1) { 2829 second_level_check_fast(xd, cm, this_mv, diag_step, best_mv, hstep, 2830 mv_limits, var_params, mv_cost_params, besterr, 2831 sse1, distortion, is_scaled); 2832 } 2833 } 2834 2835 static AOM_FORCE_INLINE MV 2836 first_level_check(MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV this_mv, 2837 MV *best_mv, const int hstep, const SubpelMvLimits *mv_limits, 2838 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2839 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 2840 unsigned int *sse1, int *distortion) { 2841 int dummy = 0; 2842 const MV left_mv = { this_mv.row, this_mv.col - hstep }; 2843 const MV right_mv = { this_mv.row, this_mv.col + hstep }; 2844 const MV top_mv = { this_mv.row - hstep, this_mv.col }; 2845 const MV bottom_mv = { this_mv.row + hstep, this_mv.col }; 2846 2847 const unsigned int left = 2848 check_better(xd, cm, &left_mv, best_mv, mv_limits, var_params, 2849 mv_cost_params, besterr, sse1, distortion, &dummy); 2850 const unsigned int right = 2851 check_better(xd, cm, &right_mv, best_mv, mv_limits, var_params, 2852 mv_cost_params, besterr, sse1, distortion, &dummy); 2853 const unsigned int up = 2854 check_better(xd, cm, &top_mv, best_mv, mv_limits, var_params, 2855 mv_cost_params, besterr, sse1, distortion, &dummy); 2856 const unsigned int down = 2857 check_better(xd, cm, &bottom_mv, best_mv, mv_limits, var_params, 2858 mv_cost_params, besterr, sse1, distortion, &dummy); 2859 2860 const MV diag_step = get_best_diag_step(hstep, left, right, up, down); 2861 const MV diag_mv = { this_mv.row + diag_step.row, 2862 this_mv.col + diag_step.col }; 2863 2864 // Check the diagonal direction with the best mv 2865 check_better(xd, cm, &diag_mv, best_mv, mv_limits, var_params, mv_cost_params, 2866 besterr, sse1, distortion, &dummy); 2867 2868 return diag_step; 2869 } 2870 2871 // A newer version of second level check that gives better quality. 2872 // TODO(chiyotsai@google.com): evaluate this on subpel_search_types different 2873 // from av1_find_best_sub_pixel_tree 2874 static AOM_FORCE_INLINE void second_level_check_v2( 2875 MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV this_mv, MV diag_step, 2876 MV *best_mv, const SubpelMvLimits *mv_limits, 2877 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2878 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 2879 unsigned int *sse1, int *distortion, int is_scaled) { 2880 assert(best_mv->row == this_mv.row + diag_step.row || 2881 best_mv->col == this_mv.col + diag_step.col); 2882 if (CHECK_MV_EQUAL(this_mv, *best_mv)) { 2883 return; 2884 } else if (this_mv.row == best_mv->row) { 2885 // Search away from diagonal step since diagonal search did not provide any 2886 // improvement 2887 diag_step.row *= -1; 2888 } else if (this_mv.col == best_mv->col) { 2889 diag_step.col *= -1; 2890 } 2891 2892 const MV row_bias_mv = { best_mv->row + diag_step.row, best_mv->col }; 2893 const MV col_bias_mv = { best_mv->row, best_mv->col + diag_step.col }; 2894 const MV diag_bias_mv = { best_mv->row + diag_step.row, 2895 best_mv->col + diag_step.col }; 2896 int has_better_mv = 0; 2897 2898 if (var_params->subpel_search_type != USE_2_TAPS_ORIG) { 2899 check_better(xd, cm, &row_bias_mv, best_mv, mv_limits, var_params, 2900 mv_cost_params, besterr, sse1, distortion, &has_better_mv); 2901 check_better(xd, cm, &col_bias_mv, best_mv, mv_limits, var_params, 2902 mv_cost_params, besterr, sse1, distortion, &has_better_mv); 2903 2904 // Do an additional search if the second iteration gives a better mv 2905 if (has_better_mv) { 2906 check_better(xd, cm, &diag_bias_mv, best_mv, mv_limits, var_params, 2907 mv_cost_params, besterr, sse1, distortion, &has_better_mv); 2908 } 2909 } else { 2910 check_better_fast(xd, cm, &row_bias_mv, best_mv, mv_limits, var_params, 2911 mv_cost_params, besterr, sse1, distortion, &has_better_mv, 2912 is_scaled); 2913 check_better_fast(xd, cm, &col_bias_mv, best_mv, mv_limits, var_params, 2914 mv_cost_params, besterr, sse1, distortion, &has_better_mv, 2915 is_scaled); 2916 2917 // Do an additional search if the second iteration gives a better mv 2918 if (has_better_mv) { 2919 check_better_fast(xd, cm, &diag_bias_mv, best_mv, mv_limits, var_params, 2920 mv_cost_params, besterr, sse1, distortion, 2921 &has_better_mv, is_scaled); 2922 } 2923 } 2924 } 2925 2926 // Gets the error at the beginning when the mv has fullpel precision 2927 static unsigned int setup_center_error( 2928 const MACROBLOCKD *xd, const MV *bestmv, 2929 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2930 const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { 2931 const aom_variance_fn_ptr_t *vfp = var_params->vfp; 2932 const int w = var_params->w; 2933 const int h = var_params->h; 2934 2935 const MSBuffers *ms_buffers = &var_params->ms_buffers; 2936 const uint8_t *src = ms_buffers->src->buf; 2937 const uint8_t *y = get_buf_from_mv(ms_buffers->ref, *bestmv); 2938 const int src_stride = ms_buffers->src->stride; 2939 const int y_stride = ms_buffers->ref->stride; 2940 const uint8_t *second_pred = ms_buffers->second_pred; 2941 const uint8_t *mask = ms_buffers->mask; 2942 const int mask_stride = ms_buffers->mask_stride; 2943 const int invert_mask = ms_buffers->inv_mask; 2944 2945 unsigned int besterr; 2946 2947 if (second_pred != NULL) { 2948 #if CONFIG_AV1_HIGHBITDEPTH 2949 if (is_cur_buf_hbd(xd)) { 2950 DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); 2951 uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16); 2952 if (mask) { 2953 aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, 2954 mask, mask_stride, invert_mask); 2955 } else { 2956 aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 2957 } 2958 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 2959 } else { 2960 DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); 2961 if (mask) { 2962 aom_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, mask, 2963 mask_stride, invert_mask); 2964 } else { 2965 aom_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 2966 } 2967 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 2968 } 2969 #else 2970 (void)xd; 2971 DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); 2972 if (mask) { 2973 aom_comp_mask_pred(comp_pred, second_pred, w, h, y, y_stride, mask, 2974 mask_stride, invert_mask); 2975 } else { 2976 aom_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 2977 } 2978 besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); 2979 #endif 2980 } else { 2981 besterr = vfp->vf(y, y_stride, src, src_stride, sse1); 2982 } 2983 *distortion = besterr; 2984 besterr += mv_err_cost_(bestmv, mv_cost_params); 2985 return besterr; 2986 } 2987 2988 // Gets the error at the beginning when the mv has fullpel precision 2989 static unsigned int upsampled_setup_center_error( 2990 MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *bestmv, 2991 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 2992 const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { 2993 unsigned int besterr = upsampled_pref_error(xd, cm, bestmv, var_params, sse1); 2994 *distortion = besterr; 2995 besterr += mv_err_cost_(bestmv, mv_cost_params); 2996 return besterr; 2997 } 2998 2999 static inline int divide_and_round(int n, int d) { 3000 return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); 3001 } 3002 3003 static inline int is_cost_list_wellbehaved(const int *cost_list) { 3004 return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && 3005 cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; 3006 } 3007 3008 // Returns surface minima estimate at given precision in 1/2^n bits. 3009 // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C 3010 // For a given set of costs S0, S1, S2, S3, S4 at points 3011 // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, 3012 // the solution for the location of the minima (x0, y0) is given by: 3013 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), 3014 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). 3015 // The code below is an integerized version of that. 3016 static inline void get_cost_surf_min(const int *cost_list, int *ir, int *ic, 3017 int bits) { 3018 *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)), 3019 (cost_list[1] - 2 * cost_list[0] + cost_list[3])); 3020 *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)), 3021 (cost_list[4] - 2 * cost_list[0] + cost_list[2])); 3022 } 3023 3024 // Checks the list of mvs searched in the last iteration and see if we are 3025 // repeating it. If so, return 1. Otherwise we update the last_mv_search_list 3026 // with current_mv and return 0. 3027 static inline int check_repeated_mv_and_update(int_mv *last_mv_search_list, 3028 const MV current_mv, int iter) { 3029 if (last_mv_search_list) { 3030 if (CHECK_MV_EQUAL(last_mv_search_list[iter].as_mv, current_mv)) { 3031 return 1; 3032 } 3033 3034 last_mv_search_list[iter].as_mv = current_mv; 3035 } 3036 return 0; 3037 } 3038 3039 static inline int setup_center_error_facade( 3040 MACROBLOCKD *xd, const AV1_COMMON *cm, const MV *bestmv, 3041 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3042 const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion, 3043 int is_scaled) { 3044 if (is_scaled) { 3045 return upsampled_setup_center_error(xd, cm, bestmv, var_params, 3046 mv_cost_params, sse1, distortion); 3047 } else { 3048 return setup_center_error(xd, bestmv, var_params, mv_cost_params, sse1, 3049 distortion); 3050 } 3051 } 3052 3053 int av1_find_best_sub_pixel_tree_pruned_more( 3054 MACROBLOCKD *xd, const AV1_COMMON *const cm, 3055 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, MV start_mv, 3056 const FULLPEL_MV_STATS *start_mv_stats, MV *bestmv, int *distortion, 3057 unsigned int *sse1, int_mv *last_mv_search_list) { 3058 (void)cm; 3059 const int allow_hp = ms_params->allow_hp; 3060 const int forced_stop = ms_params->forced_stop; 3061 const int iters_per_step = ms_params->iters_per_step; 3062 const int *cost_list = ms_params->cost_list; 3063 const SubpelMvLimits *mv_limits = &ms_params->mv_limits; 3064 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 3065 const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; 3066 3067 // The iteration we are current searching for. Iter 0 corresponds to fullpel 3068 // mv, iter 1 to half pel, and so on 3069 int iter = 0; 3070 int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel 3071 unsigned int besterr = INT_MAX; 3072 *bestmv = start_mv; 3073 3074 const struct scale_factors *const sf = is_intrabc_block(xd->mi[0]) 3075 ? &cm->sf_identity 3076 : xd->block_ref_scale_factors[0]; 3077 const int is_scaled = av1_is_scaled(sf); 3078 3079 if (start_mv_stats != NULL && !is_scaled) { 3080 besterr = start_mv_stats->distortion + start_mv_stats->err_cost; 3081 *distortion = start_mv_stats->distortion; 3082 *sse1 = start_mv_stats->sse; 3083 } else { 3084 besterr = 3085 setup_center_error_facade(xd, cm, bestmv, var_params, mv_cost_params, 3086 sse1, distortion, is_scaled); 3087 } 3088 3089 // If forced_stop is FULL_PEL, return. 3090 if (forced_stop == FULL_PEL) return besterr; 3091 3092 if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { 3093 return INT_MAX; 3094 } 3095 iter++; 3096 3097 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 3098 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 3099 cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { 3100 int ir, ic; 3101 get_cost_surf_min(cost_list, &ir, &ic, 1); 3102 if (ir != 0 || ic != 0) { 3103 const MV this_mv = { start_mv.row + ir * hstep, 3104 start_mv.col + ic * hstep }; 3105 int dummy = 0; 3106 check_better_fast(xd, cm, &this_mv, bestmv, mv_limits, var_params, 3107 mv_cost_params, &besterr, sse1, distortion, &dummy, 3108 is_scaled); 3109 } 3110 } else { 3111 two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits, 3112 var_params, mv_cost_params, &besterr, sse1, 3113 distortion, iters_per_step, is_scaled); 3114 } 3115 3116 // Each subsequent iteration checks at least one point in common with 3117 // the last iteration could be 2 ( if diag selected) 1/4 pel 3118 if (forced_stop < HALF_PEL) { 3119 if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { 3120 return INT_MAX; 3121 } 3122 iter++; 3123 3124 hstep >>= 1; 3125 start_mv = *bestmv; 3126 two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits, 3127 var_params, mv_cost_params, &besterr, sse1, 3128 distortion, iters_per_step, is_scaled); 3129 } 3130 3131 if (allow_hp && forced_stop == EIGHTH_PEL) { 3132 if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { 3133 return INT_MAX; 3134 } 3135 iter++; 3136 3137 hstep >>= 1; 3138 start_mv = *bestmv; 3139 two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits, 3140 var_params, mv_cost_params, &besterr, sse1, 3141 distortion, iters_per_step, is_scaled); 3142 } 3143 3144 return besterr; 3145 } 3146 3147 int av1_find_best_sub_pixel_tree_pruned( 3148 MACROBLOCKD *xd, const AV1_COMMON *const cm, 3149 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, MV start_mv, 3150 const FULLPEL_MV_STATS *start_mv_stats, MV *bestmv, int *distortion, 3151 unsigned int *sse1, int_mv *last_mv_search_list) { 3152 (void)cm; 3153 (void)start_mv_stats; 3154 const int allow_hp = ms_params->allow_hp; 3155 const int forced_stop = ms_params->forced_stop; 3156 const int iters_per_step = ms_params->iters_per_step; 3157 const int *cost_list = ms_params->cost_list; 3158 const SubpelMvLimits *mv_limits = &ms_params->mv_limits; 3159 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 3160 const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; 3161 3162 // The iteration we are current searching for. Iter 0 corresponds to fullpel 3163 // mv, iter 1 to half pel, and so on 3164 int iter = 0; 3165 int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel 3166 unsigned int besterr = INT_MAX; 3167 *bestmv = start_mv; 3168 3169 const struct scale_factors *const sf = is_intrabc_block(xd->mi[0]) 3170 ? &cm->sf_identity 3171 : xd->block_ref_scale_factors[0]; 3172 const int is_scaled = av1_is_scaled(sf); 3173 3174 if (start_mv_stats != NULL && !is_scaled) { 3175 besterr = start_mv_stats->distortion + start_mv_stats->err_cost; 3176 *distortion = start_mv_stats->distortion; 3177 *sse1 = start_mv_stats->sse; 3178 } else { 3179 besterr = 3180 setup_center_error_facade(xd, cm, bestmv, var_params, mv_cost_params, 3181 sse1, distortion, is_scaled); 3182 } 3183 3184 // If forced_stop is FULL_PEL, return. 3185 if (forced_stop == FULL_PEL) return besterr; 3186 3187 if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { 3188 return INT_MAX; 3189 } 3190 iter++; 3191 3192 if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && 3193 cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && 3194 cost_list[4] != INT_MAX) { 3195 const unsigned int whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + 3196 (cost_list[2] < cost_list[4] ? 0 : 2); 3197 3198 const MV left_mv = { start_mv.row, start_mv.col - hstep }; 3199 const MV right_mv = { start_mv.row, start_mv.col + hstep }; 3200 const MV bottom_mv = { start_mv.row + hstep, start_mv.col }; 3201 const MV top_mv = { start_mv.row - hstep, start_mv.col }; 3202 3203 const MV bottom_left_mv = { start_mv.row + hstep, start_mv.col - hstep }; 3204 const MV bottom_right_mv = { start_mv.row + hstep, start_mv.col + hstep }; 3205 const MV top_left_mv = { start_mv.row - hstep, start_mv.col - hstep }; 3206 const MV top_right_mv = { start_mv.row - hstep, start_mv.col + hstep }; 3207 3208 int dummy = 0; 3209 3210 switch (whichdir) { 3211 case 0: // bottom left quadrant 3212 check_better_fast(xd, cm, &left_mv, bestmv, mv_limits, var_params, 3213 mv_cost_params, &besterr, sse1, distortion, &dummy, 3214 is_scaled); 3215 check_better_fast(xd, cm, &bottom_mv, bestmv, mv_limits, var_params, 3216 mv_cost_params, &besterr, sse1, distortion, &dummy, 3217 is_scaled); 3218 check_better_fast(xd, cm, &bottom_left_mv, bestmv, mv_limits, 3219 var_params, mv_cost_params, &besterr, sse1, 3220 distortion, &dummy, is_scaled); 3221 break; 3222 case 1: // bottom right quadrant 3223 check_better_fast(xd, cm, &right_mv, bestmv, mv_limits, var_params, 3224 mv_cost_params, &besterr, sse1, distortion, &dummy, 3225 is_scaled); 3226 check_better_fast(xd, cm, &bottom_mv, bestmv, mv_limits, var_params, 3227 mv_cost_params, &besterr, sse1, distortion, &dummy, 3228 is_scaled); 3229 check_better_fast(xd, cm, &bottom_right_mv, bestmv, mv_limits, 3230 var_params, mv_cost_params, &besterr, sse1, 3231 distortion, &dummy, is_scaled); 3232 break; 3233 case 2: // top left quadrant 3234 check_better_fast(xd, cm, &left_mv, bestmv, mv_limits, var_params, 3235 mv_cost_params, &besterr, sse1, distortion, &dummy, 3236 is_scaled); 3237 check_better_fast(xd, cm, &top_mv, bestmv, mv_limits, var_params, 3238 mv_cost_params, &besterr, sse1, distortion, &dummy, 3239 is_scaled); 3240 check_better_fast(xd, cm, &top_left_mv, bestmv, mv_limits, var_params, 3241 mv_cost_params, &besterr, sse1, distortion, &dummy, 3242 is_scaled); 3243 break; 3244 case 3: // top right quadrant 3245 check_better_fast(xd, cm, &right_mv, bestmv, mv_limits, var_params, 3246 mv_cost_params, &besterr, sse1, distortion, &dummy, 3247 is_scaled); 3248 check_better_fast(xd, cm, &top_mv, bestmv, mv_limits, var_params, 3249 mv_cost_params, &besterr, sse1, distortion, &dummy, 3250 is_scaled); 3251 check_better_fast(xd, cm, &top_right_mv, bestmv, mv_limits, var_params, 3252 mv_cost_params, &besterr, sse1, distortion, &dummy, 3253 is_scaled); 3254 break; 3255 } 3256 } else { 3257 two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits, 3258 var_params, mv_cost_params, &besterr, sse1, 3259 distortion, iters_per_step, is_scaled); 3260 } 3261 3262 // Each subsequent iteration checks at least one point in common with 3263 // the last iteration could be 2 ( if diag selected) 1/4 pel 3264 if (forced_stop < HALF_PEL) { 3265 if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { 3266 return INT_MAX; 3267 } 3268 iter++; 3269 3270 hstep >>= 1; 3271 start_mv = *bestmv; 3272 two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits, 3273 var_params, mv_cost_params, &besterr, sse1, 3274 distortion, iters_per_step, is_scaled); 3275 } 3276 3277 if (allow_hp && forced_stop == EIGHTH_PEL) { 3278 if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { 3279 return INT_MAX; 3280 } 3281 iter++; 3282 3283 hstep >>= 1; 3284 start_mv = *bestmv; 3285 two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits, 3286 var_params, mv_cost_params, &besterr, sse1, 3287 distortion, iters_per_step, is_scaled); 3288 } 3289 3290 return besterr; 3291 } 3292 3293 int av1_find_best_sub_pixel_tree(MACROBLOCKD *xd, const AV1_COMMON *const cm, 3294 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, 3295 MV start_mv, 3296 const FULLPEL_MV_STATS *start_mv_stats, 3297 MV *bestmv, int *distortion, 3298 unsigned int *sse1, 3299 int_mv *last_mv_search_list) { 3300 (void)start_mv_stats; 3301 const int allow_hp = ms_params->allow_hp; 3302 const int forced_stop = ms_params->forced_stop; 3303 const int iters_per_step = ms_params->iters_per_step; 3304 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 3305 const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; 3306 const SUBPEL_SEARCH_TYPE subpel_search_type = 3307 ms_params->var_params.subpel_search_type; 3308 const SubpelMvLimits *mv_limits = &ms_params->mv_limits; 3309 3310 // How many steps to take. A round of 0 means fullpel search only, 1 means 3311 // half-pel, and so on. 3312 const int round = AOMMIN(FULL_PEL - forced_stop, 3 - !allow_hp); 3313 int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel 3314 3315 unsigned int besterr = INT_MAX; 3316 3317 *bestmv = start_mv; 3318 3319 const struct scale_factors *const sf = is_intrabc_block(xd->mi[0]) 3320 ? &cm->sf_identity 3321 : xd->block_ref_scale_factors[0]; 3322 const int is_scaled = av1_is_scaled(sf); 3323 3324 if (start_mv_stats != NULL && !is_scaled) { 3325 besterr = start_mv_stats->distortion + start_mv_stats->err_cost; 3326 *distortion = start_mv_stats->distortion; 3327 *sse1 = start_mv_stats->sse; 3328 } else { 3329 if (subpel_search_type != USE_2_TAPS_ORIG) { 3330 besterr = upsampled_setup_center_error(xd, cm, bestmv, var_params, 3331 mv_cost_params, sse1, distortion); 3332 } else { 3333 besterr = setup_center_error(xd, bestmv, var_params, mv_cost_params, sse1, 3334 distortion); 3335 } 3336 } 3337 3338 // If forced_stop is FULL_PEL, return. 3339 if (!round) return besterr; 3340 3341 for (int iter = 0; iter < round; ++iter) { 3342 MV iter_center_mv = *bestmv; 3343 if (check_repeated_mv_and_update(last_mv_search_list, iter_center_mv, 3344 iter)) { 3345 return INT_MAX; 3346 } 3347 3348 MV diag_step; 3349 if (subpel_search_type != USE_2_TAPS_ORIG) { 3350 diag_step = first_level_check(xd, cm, iter_center_mv, bestmv, hstep, 3351 mv_limits, var_params, mv_cost_params, 3352 &besterr, sse1, distortion); 3353 } else { 3354 diag_step = first_level_check_fast(xd, cm, iter_center_mv, bestmv, hstep, 3355 mv_limits, var_params, mv_cost_params, 3356 &besterr, sse1, distortion, is_scaled); 3357 } 3358 3359 // Check diagonal sub-pixel position 3360 if (!CHECK_MV_EQUAL(iter_center_mv, *bestmv) && iters_per_step > 1) { 3361 second_level_check_v2(xd, cm, iter_center_mv, diag_step, bestmv, 3362 mv_limits, var_params, mv_cost_params, &besterr, 3363 sse1, distortion, is_scaled); 3364 } 3365 3366 hstep >>= 1; 3367 } 3368 3369 return besterr; 3370 } 3371 3372 // Note(yunqingwang): The following 2 functions are only used in the motion 3373 // vector unit test, which return extreme motion vectors allowed by the MV 3374 // limits. 3375 // Returns the maximum MV. 3376 int av1_return_max_sub_pixel_mv(MACROBLOCKD *xd, const AV1_COMMON *const cm, 3377 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, 3378 MV start_mv, 3379 const FULLPEL_MV_STATS *start_mv_stats, 3380 MV *bestmv, int *distortion, unsigned int *sse1, 3381 int_mv *last_mv_search_list) { 3382 (void)xd; 3383 (void)cm; 3384 (void)start_mv; 3385 (void)start_mv_stats; 3386 (void)last_mv_search_list; 3387 3388 const int allow_hp = ms_params->allow_hp; 3389 const SubpelMvLimits *mv_limits = &ms_params->mv_limits; 3390 3391 bestmv->row = mv_limits->row_max; 3392 bestmv->col = mv_limits->col_max; 3393 3394 unsigned int besterr = 0; 3395 3396 // In the sub-pel motion search, if hp is not used, then the last bit of mv 3397 // has to be 0. 3398 lower_mv_precision(bestmv, allow_hp, 0); 3399 *distortion = besterr; 3400 *sse1 = besterr; 3401 return besterr; 3402 } 3403 3404 // Returns the minimum MV. 3405 int av1_return_min_sub_pixel_mv(MACROBLOCKD *xd, const AV1_COMMON *const cm, 3406 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, 3407 MV start_mv, 3408 const FULLPEL_MV_STATS *start_mv_stats, 3409 MV *bestmv, int *distortion, unsigned int *sse1, 3410 int_mv *last_mv_search_list) { 3411 (void)xd; 3412 (void)cm; 3413 (void)start_mv; 3414 (void)start_mv_stats; 3415 (void)last_mv_search_list; 3416 3417 const int allow_hp = ms_params->allow_hp; 3418 const SubpelMvLimits *mv_limits = &ms_params->mv_limits; 3419 3420 bestmv->row = mv_limits->row_min; 3421 bestmv->col = mv_limits->col_min; 3422 3423 unsigned int besterr = 0; 3424 // In the sub-pel motion search, if hp is not used, then the last bit of mv 3425 // has to be 0. 3426 lower_mv_precision(bestmv, allow_hp, 0); 3427 *distortion = besterr; 3428 *sse1 = besterr; 3429 return besterr; 3430 } 3431 3432 #if !CONFIG_REALTIME_ONLY 3433 // Computes the cost of the current predictor by going through the whole 3434 // av1_enc_build_inter_predictor pipeline. This is mainly used by warped mv 3435 // during motion_mode_rd. We are going through the whole 3436 // av1_enc_build_inter_predictor because we might have changed the interpolation 3437 // filter, etc before motion_mode_rd is called. 3438 static inline unsigned int compute_motion_cost( 3439 MACROBLOCKD *xd, const AV1_COMMON *const cm, 3440 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, BLOCK_SIZE bsize, 3441 const MV *this_mv) { 3442 unsigned int mse; 3443 unsigned int sse; 3444 const int mi_row = xd->mi_row; 3445 const int mi_col = xd->mi_col; 3446 3447 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 3448 AOM_PLANE_Y, AOM_PLANE_Y); 3449 3450 const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; 3451 const MSBuffers *ms_buffers = &var_params->ms_buffers; 3452 3453 const uint8_t *const src = ms_buffers->src->buf; 3454 const int src_stride = ms_buffers->src->stride; 3455 const uint8_t *const dst = xd->plane[0].dst.buf; 3456 const int dst_stride = xd->plane[0].dst.stride; 3457 const aom_variance_fn_ptr_t *vfp = ms_params->var_params.vfp; 3458 3459 mse = vfp->vf(dst, dst_stride, src, src_stride, &sse); 3460 mse += mv_err_cost_(this_mv, &ms_params->mv_cost_params); 3461 return mse; 3462 } 3463 3464 // Refines MV in a small range 3465 3466 // Macros to build bitmasks which help us avoid redundant computations 3467 // 3468 // To explain the idea here, imagine that on the first iteration of the 3469 // loop below, we step rightwards. Then, on the second iteration, the neighbors 3470 // to consider are: 3471 // . . . 3472 // 0 1 . 3473 // . . . 3474 // Where 0 is the initial search point, 1 is the best candidate found in the 3475 // first iteration, and the dots are the other neighbors of point 1. 3476 // 3477 // Naively, we would now need to scan all 8 neighbors of point 1 (point 0 and 3478 // the seven points marked with dots), and compare them to see where to move 3479 // next. However, we already evaluated 5 of those 8 neighbors in the last 3480 // iteration, and decided that they are worse than point 1. So we don't need 3481 // to re-consider these points. We only really need to consider the three 3482 // points which are adjacent to point 1 but *not* to point 0. 3483 // 3484 // As the algorithm goes on, there are other ways that redundant evaluations 3485 // can happen, if the search path curls back around on itself. 3486 // 3487 // To avoid all possible redundancies, we'd have to build a set containing 3488 // every point we have already checked, and this would be quite expensive. 3489 // 3490 // So instead, we apply a 95%-effective solution with a much lower overhead: 3491 // we prune out the points which were considered during the previous 3492 // iteration, but we don't worry about any prior iteration. This can be done 3493 // as follows: 3494 // 3495 // We build a static table, called neighbor_mask, which answers the question 3496 // "if we moved in direction X last time, which neighbors are new, and which 3497 // were scanned last iteration?" 3498 // Then we can query this table to quickly determine which points we need to 3499 // evaluate, and which we can skip. 3500 // 3501 // To query the table, the logic is simply: 3502 // neighbor_mask[i] & (1 << j) == "if we moved in direction i last iteration, 3503 // do we need to scan neighbor j this iteration?" 3504 #define NEIGHBOR_MASK_DIA(left, down, right, up) \ 3505 (left | (down << 1) | (right << 2) | (up << 3)) 3506 3507 #define NEIGHBOR_MASK_SQR(left, down, right, up, down_left, down_right, \ 3508 up_left, up_right) \ 3509 (left | (down << 1) | (right << 2) | (up << 3) | (down_left << 4) | \ 3510 (down_right << 5) | (up_left << 6) | (up_right << 7)) 3511 3512 static const warp_search_config warp_search_info[WARP_SEARCH_METHODS] = { 3513 // WARP_SEARCH_DIAMOND 3514 { 3515 .num_neighbors = 4, 3516 .neighbors = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }, 3517 .neighbor_mask = { 3518 // If we stepped left last time, consider all points except right 3519 NEIGHBOR_MASK_DIA(1, 1, 0, 1), 3520 // If we stepped down last time, consider all points except up 3521 NEIGHBOR_MASK_DIA(1, 1, 1, 0), 3522 // Stepped right last time 3523 NEIGHBOR_MASK_DIA(0, 1, 1, 1), 3524 // Stepped up last time 3525 NEIGHBOR_MASK_DIA(1, 0, 1, 1), 3526 }, 3527 }, 3528 // WARP_SEARCH_SQUARE 3529 { 3530 .num_neighbors = 8, 3531 .neighbors = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }, 3532 { 1, -1 }, { 1, 1 }, { -1, -1 }, { -1, 1 } }, 3533 .neighbor_mask = { 3534 // If we stepped left last time, then we only need to consider 3 points: 3535 // left, down+left, up+left 3536 NEIGHBOR_MASK_SQR(1, 0, 0, 0, 1, 0, 1, 0), 3537 // If we stepped down last time, then we only need to consider 3 points: 3538 // down, down+left, down+right 3539 NEIGHBOR_MASK_SQR(0, 1, 0, 0, 1, 1, 0, 0), 3540 // Stepped right last time 3541 NEIGHBOR_MASK_SQR(0, 0, 1, 0, 0, 1, 0, 1), 3542 // Stepped up last time 3543 NEIGHBOR_MASK_SQR(0, 0, 0, 1, 0, 0, 1, 1), 3544 3545 // If we stepped down+left last time, then we need to consider 5 points: 3546 // left, down, down+left, down+right, up+left 3547 NEIGHBOR_MASK_SQR(1, 1, 0, 0, 1, 1, 1, 0), 3548 // Stepped down+right last time 3549 NEIGHBOR_MASK_SQR(0, 1, 1, 0, 1, 1, 0, 1), 3550 // Stepped up+left last time 3551 NEIGHBOR_MASK_SQR(1, 0, 0, 1, 1, 0, 1, 1), 3552 // Stepped up+right last time 3553 NEIGHBOR_MASK_SQR(0, 0, 1, 1, 0, 1, 1, 1), 3554 }, 3555 }, 3556 }; 3557 3558 unsigned int av1_refine_warped_mv(MACROBLOCKD *xd, const AV1_COMMON *const cm, 3559 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, 3560 BLOCK_SIZE bsize, const int *pts0, 3561 const int *pts_inref0, int total_samples, 3562 WARP_SEARCH_METHOD search_method, 3563 int num_iterations) { 3564 MB_MODE_INFO *mbmi = xd->mi[0]; 3565 3566 const MV *neighbors = warp_search_info[search_method].neighbors; 3567 const int num_neighbors = warp_search_info[search_method].num_neighbors; 3568 const uint8_t *neighbor_mask = warp_search_info[search_method].neighbor_mask; 3569 3570 MV *best_mv = &mbmi->mv[0].as_mv; 3571 3572 WarpedMotionParams best_wm_params = mbmi->wm_params; 3573 int best_num_proj_ref = mbmi->num_proj_ref; 3574 unsigned int bestmse; 3575 const SubpelMvLimits *mv_limits = &ms_params->mv_limits; 3576 3577 const int mv_shift = ms_params->allow_hp ? 0 : 1; 3578 3579 // Calculate the center position's error 3580 assert(av1_is_subpelmv_in_range(mv_limits, *best_mv)); 3581 bestmse = compute_motion_cost(xd, cm, ms_params, bsize, best_mv); 3582 3583 // MV search 3584 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; 3585 const int mi_row = xd->mi_row; 3586 const int mi_col = xd->mi_col; 3587 3588 // First step always scans all neighbors 3589 uint8_t valid_neighbors = UINT8_MAX; 3590 3591 for (int ite = 0; ite < num_iterations; ++ite) { 3592 int best_idx = -1; 3593 3594 for (int idx = 0; idx < num_neighbors; ++idx) { 3595 if ((valid_neighbors & (1 << idx)) == 0) { 3596 continue; 3597 } 3598 3599 unsigned int thismse; 3600 3601 MV this_mv = { best_mv->row + neighbors[idx].row * (1 << mv_shift), 3602 best_mv->col + neighbors[idx].col * (1 << mv_shift) }; 3603 if (av1_is_subpelmv_in_range(mv_limits, this_mv)) { 3604 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); 3605 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); 3606 if (total_samples > 1) { 3607 mbmi->num_proj_ref = 3608 av1_selectSamples(&this_mv, pts, pts_inref, total_samples, bsize); 3609 } 3610 3611 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, 3612 this_mv.row, this_mv.col, &mbmi->wm_params, 3613 mi_row, mi_col)) { 3614 thismse = compute_motion_cost(xd, cm, ms_params, bsize, &this_mv); 3615 3616 if (thismse < bestmse) { 3617 best_idx = idx; 3618 best_wm_params = mbmi->wm_params; 3619 best_num_proj_ref = mbmi->num_proj_ref; 3620 bestmse = thismse; 3621 } 3622 } 3623 } 3624 } 3625 3626 if (best_idx == -1) break; 3627 3628 if (best_idx >= 0) { 3629 best_mv->row += neighbors[best_idx].row * (1 << mv_shift); 3630 best_mv->col += neighbors[best_idx].col * (1 << mv_shift); 3631 valid_neighbors = neighbor_mask[best_idx]; 3632 } 3633 } 3634 3635 mbmi->wm_params = best_wm_params; 3636 mbmi->num_proj_ref = best_num_proj_ref; 3637 return bestmse; 3638 } 3639 3640 #endif // !CONFIG_REALTIME_ONLY 3641 // ============================================================================= 3642 // Subpixel Motion Search: OBMC 3643 // ============================================================================= 3644 // Estimates the variance of prediction residue 3645 static inline int estimate_obmc_pref_error( 3646 const MV *this_mv, const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3647 unsigned int *sse) { 3648 const aom_variance_fn_ptr_t *vfp = var_params->vfp; 3649 3650 const MSBuffers *ms_buffers = &var_params->ms_buffers; 3651 const int32_t *src = ms_buffers->wsrc; 3652 const int32_t *mask = ms_buffers->obmc_mask; 3653 const uint8_t *ref = get_buf_from_mv(ms_buffers->ref, *this_mv); 3654 const int ref_stride = ms_buffers->ref->stride; 3655 3656 const int subpel_x_q3 = get_subpel_part(this_mv->col); 3657 const int subpel_y_q3 = get_subpel_part(this_mv->row); 3658 3659 return vfp->osvf(ref, ref_stride, subpel_x_q3, subpel_y_q3, src, mask, sse); 3660 } 3661 3662 // Calculates the variance of prediction residue 3663 static int upsampled_obmc_pref_error(MACROBLOCKD *xd, const AV1_COMMON *cm, 3664 const MV *this_mv, 3665 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3666 unsigned int *sse) { 3667 const aom_variance_fn_ptr_t *vfp = var_params->vfp; 3668 const SUBPEL_SEARCH_TYPE subpel_search_type = var_params->subpel_search_type; 3669 const int w = var_params->w; 3670 const int h = var_params->h; 3671 3672 const MSBuffers *ms_buffers = &var_params->ms_buffers; 3673 const int32_t *wsrc = ms_buffers->wsrc; 3674 const int32_t *mask = ms_buffers->obmc_mask; 3675 const uint8_t *ref = get_buf_from_mv(ms_buffers->ref, *this_mv); 3676 const int ref_stride = ms_buffers->ref->stride; 3677 3678 const int subpel_x_q3 = get_subpel_part(this_mv->col); 3679 const int subpel_y_q3 = get_subpel_part(this_mv->row); 3680 3681 const int mi_row = xd->mi_row; 3682 const int mi_col = xd->mi_col; 3683 3684 unsigned int besterr; 3685 DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]); 3686 #if CONFIG_AV1_HIGHBITDEPTH 3687 if (is_cur_buf_hbd(xd)) { 3688 uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred); 3689 aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred8, w, h, 3690 subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, 3691 subpel_search_type); 3692 besterr = vfp->ovf(pred8, w, wsrc, mask, sse); 3693 } else { 3694 aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, subpel_x_q3, 3695 subpel_y_q3, ref, ref_stride, subpel_search_type); 3696 3697 besterr = vfp->ovf(pred, w, wsrc, mask, sse); 3698 } 3699 #else 3700 aom_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, subpel_x_q3, 3701 subpel_y_q3, ref, ref_stride, subpel_search_type); 3702 3703 besterr = vfp->ovf(pred, w, wsrc, mask, sse); 3704 #endif 3705 return besterr; 3706 } 3707 3708 static unsigned int setup_obmc_center_error( 3709 const MV *this_mv, const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3710 const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { 3711 // TODO(chiyotsai@google.com): There might be a bug here where we didn't use 3712 // get_buf_from_mv(ref, *this_mv). 3713 const MSBuffers *ms_buffers = &var_params->ms_buffers; 3714 const int32_t *wsrc = ms_buffers->wsrc; 3715 const int32_t *mask = ms_buffers->obmc_mask; 3716 const uint8_t *ref = ms_buffers->ref->buf; 3717 const int ref_stride = ms_buffers->ref->stride; 3718 unsigned int besterr = 3719 var_params->vfp->ovf(ref, ref_stride, wsrc, mask, sse1); 3720 *distortion = besterr; 3721 besterr += mv_err_cost_(this_mv, mv_cost_params); 3722 return besterr; 3723 } 3724 3725 static unsigned int upsampled_setup_obmc_center_error( 3726 MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *this_mv, 3727 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3728 const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) { 3729 unsigned int besterr = 3730 upsampled_obmc_pref_error(xd, cm, this_mv, var_params, sse1); 3731 *distortion = besterr; 3732 besterr += mv_err_cost_(this_mv, mv_cost_params); 3733 return besterr; 3734 } 3735 3736 // Estimates the variance of prediction residue 3737 // TODO(chiyotsai@google.com): the cost does does not match the cost in 3738 // mv_cost_. Investigate this later. 3739 static inline int estimate_obmc_mvcost(const MV *this_mv, 3740 const MV_COST_PARAMS *mv_cost_params) { 3741 const MV *ref_mv = mv_cost_params->ref_mv; 3742 const int *mvjcost = mv_cost_params->mvjcost; 3743 const int *const *mvcost = mv_cost_params->mvcost; 3744 const int error_per_bit = mv_cost_params->error_per_bit; 3745 const MV_COST_TYPE mv_cost_type = mv_cost_params->mv_cost_type; 3746 const MV diff_mv = { GET_MV_SUBPEL(this_mv->row - ref_mv->row), 3747 GET_MV_SUBPEL(this_mv->col - ref_mv->col) }; 3748 3749 switch (mv_cost_type) { 3750 case MV_COST_ENTROPY: 3751 return (unsigned)((mv_cost(&diff_mv, mvjcost, 3752 CONVERT_TO_CONST_MVCOST(mvcost)) * 3753 error_per_bit + 3754 4096) >> 3755 13); 3756 case MV_COST_NONE: return 0; 3757 default: 3758 assert(0 && "L1 norm is not tuned for estimated obmc mvcost"); 3759 return 0; 3760 } 3761 } 3762 3763 // Estimates whether this_mv is better than best_mv. This function incorporates 3764 // both prediction error and residue into account. 3765 static inline unsigned int obmc_check_better_fast( 3766 const MV *this_mv, MV *best_mv, const SubpelMvLimits *mv_limits, 3767 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3768 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 3769 unsigned int *sse1, int *distortion, int *has_better_mv) { 3770 unsigned int cost; 3771 if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { 3772 unsigned int sse; 3773 const int thismse = estimate_obmc_pref_error(this_mv, var_params, &sse); 3774 3775 cost = estimate_obmc_mvcost(this_mv, mv_cost_params); 3776 cost += thismse; 3777 3778 if (cost < *besterr) { 3779 *besterr = cost; 3780 *best_mv = *this_mv; 3781 *distortion = thismse; 3782 *sse1 = sse; 3783 *has_better_mv |= 1; 3784 } 3785 } else { 3786 cost = INT_MAX; 3787 } 3788 return cost; 3789 } 3790 3791 // Estimates whether this_mv is better than best_mv. This function incorporates 3792 // both prediction error and residue into account. 3793 static inline unsigned int obmc_check_better( 3794 MACROBLOCKD *xd, const AV1_COMMON *cm, const MV *this_mv, MV *best_mv, 3795 const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3796 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 3797 unsigned int *sse1, int *distortion, int *has_better_mv) { 3798 unsigned int cost; 3799 if (av1_is_subpelmv_in_range(mv_limits, *this_mv)) { 3800 unsigned int sse; 3801 const int thismse = 3802 upsampled_obmc_pref_error(xd, cm, this_mv, var_params, &sse); 3803 cost = mv_err_cost_(this_mv, mv_cost_params); 3804 3805 cost += thismse; 3806 3807 if (cost < *besterr) { 3808 *besterr = cost; 3809 *best_mv = *this_mv; 3810 *distortion = thismse; 3811 *sse1 = sse; 3812 *has_better_mv |= 1; 3813 } 3814 } else { 3815 cost = INT_MAX; 3816 } 3817 return cost; 3818 } 3819 3820 static AOM_FORCE_INLINE MV obmc_first_level_check( 3821 MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV this_mv, MV *best_mv, 3822 const int hstep, const SubpelMvLimits *mv_limits, 3823 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3824 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 3825 unsigned int *sse1, int *distortion) { 3826 int dummy = 0; 3827 const MV left_mv = { this_mv.row, this_mv.col - hstep }; 3828 const MV right_mv = { this_mv.row, this_mv.col + hstep }; 3829 const MV top_mv = { this_mv.row - hstep, this_mv.col }; 3830 const MV bottom_mv = { this_mv.row + hstep, this_mv.col }; 3831 3832 if (var_params->subpel_search_type != USE_2_TAPS_ORIG) { 3833 const unsigned int left = 3834 obmc_check_better(xd, cm, &left_mv, best_mv, mv_limits, var_params, 3835 mv_cost_params, besterr, sse1, distortion, &dummy); 3836 const unsigned int right = 3837 obmc_check_better(xd, cm, &right_mv, best_mv, mv_limits, var_params, 3838 mv_cost_params, besterr, sse1, distortion, &dummy); 3839 const unsigned int up = 3840 obmc_check_better(xd, cm, &top_mv, best_mv, mv_limits, var_params, 3841 mv_cost_params, besterr, sse1, distortion, &dummy); 3842 const unsigned int down = 3843 obmc_check_better(xd, cm, &bottom_mv, best_mv, mv_limits, var_params, 3844 mv_cost_params, besterr, sse1, distortion, &dummy); 3845 3846 const MV diag_step = get_best_diag_step(hstep, left, right, up, down); 3847 const MV diag_mv = { this_mv.row + diag_step.row, 3848 this_mv.col + diag_step.col }; 3849 3850 // Check the diagonal direction with the best mv 3851 obmc_check_better(xd, cm, &diag_mv, best_mv, mv_limits, var_params, 3852 mv_cost_params, besterr, sse1, distortion, &dummy); 3853 3854 return diag_step; 3855 } else { 3856 const unsigned int left = obmc_check_better_fast( 3857 &left_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, 3858 distortion, &dummy); 3859 const unsigned int right = obmc_check_better_fast( 3860 &right_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, 3861 sse1, distortion, &dummy); 3862 3863 const unsigned int up = obmc_check_better_fast( 3864 &top_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, 3865 distortion, &dummy); 3866 3867 const unsigned int down = obmc_check_better_fast( 3868 &bottom_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, 3869 sse1, distortion, &dummy); 3870 3871 const MV diag_step = get_best_diag_step(hstep, left, right, up, down); 3872 const MV diag_mv = { this_mv.row + diag_step.row, 3873 this_mv.col + diag_step.col }; 3874 3875 // Check the diagonal direction with the best mv 3876 obmc_check_better_fast(&diag_mv, best_mv, mv_limits, var_params, 3877 mv_cost_params, besterr, sse1, distortion, &dummy); 3878 3879 return diag_step; 3880 } 3881 } 3882 3883 // A newer version of second level check for obmc that gives better quality. 3884 static AOM_FORCE_INLINE void obmc_second_level_check_v2( 3885 MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV this_mv, MV diag_step, 3886 MV *best_mv, const SubpelMvLimits *mv_limits, 3887 const SUBPEL_SEARCH_VAR_PARAMS *var_params, 3888 const MV_COST_PARAMS *mv_cost_params, unsigned int *besterr, 3889 unsigned int *sse1, int *distortion) { 3890 assert(best_mv->row == this_mv.row + diag_step.row || 3891 best_mv->col == this_mv.col + diag_step.col); 3892 if (CHECK_MV_EQUAL(this_mv, *best_mv)) { 3893 return; 3894 } else if (this_mv.row == best_mv->row) { 3895 // Search away from diagonal step since diagonal search did not provide any 3896 // improvement 3897 diag_step.row *= -1; 3898 } else if (this_mv.col == best_mv->col) { 3899 diag_step.col *= -1; 3900 } 3901 3902 const MV row_bias_mv = { best_mv->row + diag_step.row, best_mv->col }; 3903 const MV col_bias_mv = { best_mv->row, best_mv->col + diag_step.col }; 3904 const MV diag_bias_mv = { best_mv->row + diag_step.row, 3905 best_mv->col + diag_step.col }; 3906 int has_better_mv = 0; 3907 3908 if (var_params->subpel_search_type != USE_2_TAPS_ORIG) { 3909 obmc_check_better(xd, cm, &row_bias_mv, best_mv, mv_limits, var_params, 3910 mv_cost_params, besterr, sse1, distortion, 3911 &has_better_mv); 3912 obmc_check_better(xd, cm, &col_bias_mv, best_mv, mv_limits, var_params, 3913 mv_cost_params, besterr, sse1, distortion, 3914 &has_better_mv); 3915 3916 // Do an additional search if the second iteration gives a better mv 3917 if (has_better_mv) { 3918 obmc_check_better(xd, cm, &diag_bias_mv, best_mv, mv_limits, var_params, 3919 mv_cost_params, besterr, sse1, distortion, 3920 &has_better_mv); 3921 } 3922 } else { 3923 obmc_check_better_fast(&row_bias_mv, best_mv, mv_limits, var_params, 3924 mv_cost_params, besterr, sse1, distortion, 3925 &has_better_mv); 3926 obmc_check_better_fast(&col_bias_mv, best_mv, mv_limits, var_params, 3927 mv_cost_params, besterr, sse1, distortion, 3928 &has_better_mv); 3929 3930 // Do an additional search if the second iteration gives a better mv 3931 if (has_better_mv) { 3932 obmc_check_better_fast(&diag_bias_mv, best_mv, mv_limits, var_params, 3933 mv_cost_params, besterr, sse1, distortion, 3934 &has_better_mv); 3935 } 3936 } 3937 } 3938 3939 int av1_find_best_obmc_sub_pixel_tree_up( 3940 MACROBLOCKD *xd, const AV1_COMMON *const cm, 3941 const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, MV start_mv, 3942 const FULLPEL_MV_STATS *start_mv_stats, MV *bestmv, int *distortion, 3943 unsigned int *sse1, int_mv *last_mv_search_list) { 3944 (void)last_mv_search_list; 3945 (void)start_mv_stats; 3946 const int allow_hp = ms_params->allow_hp; 3947 const int forced_stop = ms_params->forced_stop; 3948 const int iters_per_step = ms_params->iters_per_step; 3949 const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params; 3950 const SUBPEL_SEARCH_VAR_PARAMS *var_params = &ms_params->var_params; 3951 const SUBPEL_SEARCH_TYPE subpel_search_type = 3952 ms_params->var_params.subpel_search_type; 3953 const SubpelMvLimits *mv_limits = &ms_params->mv_limits; 3954 3955 int hstep = INIT_SUBPEL_STEP_SIZE; 3956 const int round = AOMMIN(FULL_PEL - forced_stop, 3 - !allow_hp); 3957 3958 unsigned int besterr = INT_MAX; 3959 *bestmv = start_mv; 3960 3961 if (subpel_search_type != USE_2_TAPS_ORIG) 3962 besterr = upsampled_setup_obmc_center_error( 3963 xd, cm, bestmv, var_params, mv_cost_params, sse1, distortion); 3964 else 3965 besterr = setup_obmc_center_error(bestmv, var_params, mv_cost_params, sse1, 3966 distortion); 3967 3968 for (int iter = 0; iter < round; ++iter) { 3969 MV iter_center_mv = *bestmv; 3970 MV diag_step = obmc_first_level_check(xd, cm, iter_center_mv, bestmv, hstep, 3971 mv_limits, var_params, mv_cost_params, 3972 &besterr, sse1, distortion); 3973 3974 if (!CHECK_MV_EQUAL(iter_center_mv, *bestmv) && iters_per_step > 1) { 3975 obmc_second_level_check_v2(xd, cm, iter_center_mv, diag_step, bestmv, 3976 mv_limits, var_params, mv_cost_params, 3977 &besterr, sse1, distortion); 3978 } 3979 hstep >>= 1; 3980 } 3981 3982 return besterr; 3983 } 3984 3985 // ============================================================================= 3986 // Public cost function: mv_cost + pred error 3987 // ============================================================================= 3988 int av1_get_mvpred_sse(const MV_COST_PARAMS *mv_cost_params, 3989 const FULLPEL_MV best_mv, 3990 const aom_variance_fn_ptr_t *vfp, 3991 const struct buf_2d *src, const struct buf_2d *pre) { 3992 const MV mv = get_mv_from_fullmv(&best_mv); 3993 unsigned int sse, var; 3994 3995 var = vfp->vf(src->buf, src->stride, get_buf_from_fullmv(pre, &best_mv), 3996 pre->stride, &sse); 3997 (void)var; 3998 3999 return sse + mv_err_cost_(&mv, mv_cost_params); 4000 }