rdopt.c (279172B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <math.h> 14 #include <stdbool.h> 15 #include <stdint.h> 16 #include <string.h> 17 18 #include "config/aom_config.h" 19 #include "config/aom_dsp_rtcd.h" 20 #include "config/av1_rtcd.h" 21 22 #include "aom_dsp/aom_dsp_common.h" 23 #include "aom_dsp/blend.h" 24 #include "aom_mem/aom_mem.h" 25 #include "aom_ports/aom_timer.h" 26 #include "aom_ports/mem.h" 27 28 #include "av1/common/av1_common_int.h" 29 #include "av1/common/cfl.h" 30 #include "av1/common/blockd.h" 31 #include "av1/common/common.h" 32 #include "av1/common/common_data.h" 33 #include "av1/common/entropy.h" 34 #include "av1/common/entropymode.h" 35 #include "av1/common/enums.h" 36 #include "av1/common/idct.h" 37 #include "av1/common/mvref_common.h" 38 #include "av1/common/obmc.h" 39 #include "av1/common/pred_common.h" 40 #include "av1/common/quant_common.h" 41 #include "av1/common/reconinter.h" 42 #include "av1/common/reconintra.h" 43 #include "av1/common/scan.h" 44 #include "av1/common/seg_common.h" 45 #include "av1/common/txb_common.h" 46 #include "av1/common/warped_motion.h" 47 48 #include "av1/encoder/aq_variance.h" 49 #include "av1/encoder/av1_quantize.h" 50 #include "av1/encoder/block.h" 51 #include "av1/encoder/cost.h" 52 #include "av1/encoder/compound_type.h" 53 #include "av1/encoder/encodemb.h" 54 #include "av1/encoder/encodemv.h" 55 #include "av1/encoder/encoder.h" 56 #include "av1/encoder/encodetxb.h" 57 #include "av1/encoder/hybrid_fwd_txfm.h" 58 #include "av1/encoder/interp_search.h" 59 #include "av1/encoder/intra_mode_search.h" 60 #include "av1/encoder/intra_mode_search_utils.h" 61 #include "av1/encoder/mcomp.h" 62 #include "av1/encoder/ml.h" 63 #include "av1/encoder/mode_prune_model_weights.h" 64 #include "av1/encoder/model_rd.h" 65 #include "av1/encoder/motion_search_facade.h" 66 #include "av1/encoder/palette.h" 67 #include "av1/encoder/pustats.h" 68 #include "av1/encoder/random.h" 69 #include "av1/encoder/ratectrl.h" 70 #include "av1/encoder/rd.h" 71 #include "av1/encoder/rdopt.h" 72 #include "av1/encoder/reconinter_enc.h" 73 #include "av1/encoder/tokenize.h" 74 #include "av1/encoder/tpl_model.h" 75 #include "av1/encoder/tx_search.h" 76 #include "av1/encoder/var_based_part.h" 77 78 #define LAST_NEW_MV_INDEX 6 79 80 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable 81 // The values are kept in Q12 format and equation used to derive is 82 // (2.5 - ((float)x->qindex / MAXQ) * 1.5) 83 #define MODE_THRESH_QBITS 12 84 static const int mode_threshold_mul_factor[QINDEX_RANGE] = { 85 10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999, 86 9975, 9951, 9927, 9903, 9879, 9854, 9830, 9806, 9782, 9758, 9734, 87 9710, 9686, 9662, 9638, 9614, 9589, 9565, 9541, 9517, 9493, 9469, 88 9445, 9421, 9397, 9373, 9349, 9324, 9300, 9276, 9252, 9228, 9204, 89 9180, 9156, 9132, 9108, 9083, 9059, 9035, 9011, 8987, 8963, 8939, 90 8915, 8891, 8867, 8843, 8818, 8794, 8770, 8746, 8722, 8698, 8674, 91 8650, 8626, 8602, 8578, 8553, 8529, 8505, 8481, 8457, 8433, 8409, 92 8385, 8361, 8337, 8312, 8288, 8264, 8240, 8216, 8192, 8168, 8144, 93 8120, 8096, 8072, 8047, 8023, 7999, 7975, 7951, 7927, 7903, 7879, 94 7855, 7831, 7806, 7782, 7758, 7734, 7710, 7686, 7662, 7638, 7614, 95 7590, 7566, 7541, 7517, 7493, 7469, 7445, 7421, 7397, 7373, 7349, 96 7325, 7301, 7276, 7252, 7228, 7204, 7180, 7156, 7132, 7108, 7084, 97 7060, 7035, 7011, 6987, 6963, 6939, 6915, 6891, 6867, 6843, 6819, 98 6795, 6770, 6746, 6722, 6698, 6674, 6650, 6626, 6602, 6578, 6554, 99 6530, 6505, 6481, 6457, 6433, 6409, 6385, 6361, 6337, 6313, 6289, 100 6264, 6240, 6216, 6192, 6168, 6144, 6120, 6096, 6072, 6048, 6024, 101 5999, 5975, 5951, 5927, 5903, 5879, 5855, 5831, 5807, 5783, 5758, 102 5734, 5710, 5686, 5662, 5638, 5614, 5590, 5566, 5542, 5518, 5493, 103 5469, 5445, 5421, 5397, 5373, 5349, 5325, 5301, 5277, 5253, 5228, 104 5204, 5180, 5156, 5132, 5108, 5084, 5060, 5036, 5012, 4987, 4963, 105 4939, 4915, 4891, 4867, 4843, 4819, 4795, 4771, 4747, 4722, 4698, 106 4674, 4650, 4626, 4602, 4578, 4554, 4530, 4506, 4482, 4457, 4433, 107 4409, 4385, 4361, 4337, 4313, 4289, 4265, 4241, 4216, 4192, 4168, 108 4144, 4120, 4096 109 }; 110 111 static const THR_MODES av1_default_mode_order[MAX_MODES] = { 112 THR_NEARESTMV, 113 THR_NEARESTL2, 114 THR_NEARESTL3, 115 THR_NEARESTB, 116 THR_NEARESTA2, 117 THR_NEARESTA, 118 THR_NEARESTG, 119 120 THR_NEWMV, 121 THR_NEWL2, 122 THR_NEWL3, 123 THR_NEWB, 124 THR_NEWA2, 125 THR_NEWA, 126 THR_NEWG, 127 128 THR_NEARMV, 129 THR_NEARL2, 130 THR_NEARL3, 131 THR_NEARB, 132 THR_NEARA2, 133 THR_NEARA, 134 THR_NEARG, 135 136 THR_GLOBALMV, 137 THR_GLOBALL2, 138 THR_GLOBALL3, 139 THR_GLOBALB, 140 THR_GLOBALA2, 141 THR_GLOBALA, 142 THR_GLOBALG, 143 144 THR_COMP_NEAREST_NEARESTLA, 145 THR_COMP_NEAREST_NEARESTL2A, 146 THR_COMP_NEAREST_NEARESTL3A, 147 THR_COMP_NEAREST_NEARESTGA, 148 THR_COMP_NEAREST_NEARESTLB, 149 THR_COMP_NEAREST_NEARESTL2B, 150 THR_COMP_NEAREST_NEARESTL3B, 151 THR_COMP_NEAREST_NEARESTGB, 152 THR_COMP_NEAREST_NEARESTLA2, 153 THR_COMP_NEAREST_NEARESTL2A2, 154 THR_COMP_NEAREST_NEARESTL3A2, 155 THR_COMP_NEAREST_NEARESTGA2, 156 THR_COMP_NEAREST_NEARESTLL2, 157 THR_COMP_NEAREST_NEARESTLL3, 158 THR_COMP_NEAREST_NEARESTLG, 159 THR_COMP_NEAREST_NEARESTBA, 160 161 THR_COMP_NEAR_NEARLB, 162 THR_COMP_NEW_NEWLB, 163 THR_COMP_NEW_NEARESTLB, 164 THR_COMP_NEAREST_NEWLB, 165 THR_COMP_NEW_NEARLB, 166 THR_COMP_NEAR_NEWLB, 167 THR_COMP_GLOBAL_GLOBALLB, 168 169 THR_COMP_NEAR_NEARLA, 170 THR_COMP_NEW_NEWLA, 171 THR_COMP_NEW_NEARESTLA, 172 THR_COMP_NEAREST_NEWLA, 173 THR_COMP_NEW_NEARLA, 174 THR_COMP_NEAR_NEWLA, 175 THR_COMP_GLOBAL_GLOBALLA, 176 177 THR_COMP_NEAR_NEARL2A, 178 THR_COMP_NEW_NEWL2A, 179 THR_COMP_NEW_NEARESTL2A, 180 THR_COMP_NEAREST_NEWL2A, 181 THR_COMP_NEW_NEARL2A, 182 THR_COMP_NEAR_NEWL2A, 183 THR_COMP_GLOBAL_GLOBALL2A, 184 185 THR_COMP_NEAR_NEARL3A, 186 THR_COMP_NEW_NEWL3A, 187 THR_COMP_NEW_NEARESTL3A, 188 THR_COMP_NEAREST_NEWL3A, 189 THR_COMP_NEW_NEARL3A, 190 THR_COMP_NEAR_NEWL3A, 191 THR_COMP_GLOBAL_GLOBALL3A, 192 193 THR_COMP_NEAR_NEARGA, 194 THR_COMP_NEW_NEWGA, 195 THR_COMP_NEW_NEARESTGA, 196 THR_COMP_NEAREST_NEWGA, 197 THR_COMP_NEW_NEARGA, 198 THR_COMP_NEAR_NEWGA, 199 THR_COMP_GLOBAL_GLOBALGA, 200 201 THR_COMP_NEAR_NEARL2B, 202 THR_COMP_NEW_NEWL2B, 203 THR_COMP_NEW_NEARESTL2B, 204 THR_COMP_NEAREST_NEWL2B, 205 THR_COMP_NEW_NEARL2B, 206 THR_COMP_NEAR_NEWL2B, 207 THR_COMP_GLOBAL_GLOBALL2B, 208 209 THR_COMP_NEAR_NEARL3B, 210 THR_COMP_NEW_NEWL3B, 211 THR_COMP_NEW_NEARESTL3B, 212 THR_COMP_NEAREST_NEWL3B, 213 THR_COMP_NEW_NEARL3B, 214 THR_COMP_NEAR_NEWL3B, 215 THR_COMP_GLOBAL_GLOBALL3B, 216 217 THR_COMP_NEAR_NEARGB, 218 THR_COMP_NEW_NEWGB, 219 THR_COMP_NEW_NEARESTGB, 220 THR_COMP_NEAREST_NEWGB, 221 THR_COMP_NEW_NEARGB, 222 THR_COMP_NEAR_NEWGB, 223 THR_COMP_GLOBAL_GLOBALGB, 224 225 THR_COMP_NEAR_NEARLA2, 226 THR_COMP_NEW_NEWLA2, 227 THR_COMP_NEW_NEARESTLA2, 228 THR_COMP_NEAREST_NEWLA2, 229 THR_COMP_NEW_NEARLA2, 230 THR_COMP_NEAR_NEWLA2, 231 THR_COMP_GLOBAL_GLOBALLA2, 232 233 THR_COMP_NEAR_NEARL2A2, 234 THR_COMP_NEW_NEWL2A2, 235 THR_COMP_NEW_NEARESTL2A2, 236 THR_COMP_NEAREST_NEWL2A2, 237 THR_COMP_NEW_NEARL2A2, 238 THR_COMP_NEAR_NEWL2A2, 239 THR_COMP_GLOBAL_GLOBALL2A2, 240 241 THR_COMP_NEAR_NEARL3A2, 242 THR_COMP_NEW_NEWL3A2, 243 THR_COMP_NEW_NEARESTL3A2, 244 THR_COMP_NEAREST_NEWL3A2, 245 THR_COMP_NEW_NEARL3A2, 246 THR_COMP_NEAR_NEWL3A2, 247 THR_COMP_GLOBAL_GLOBALL3A2, 248 249 THR_COMP_NEAR_NEARGA2, 250 THR_COMP_NEW_NEWGA2, 251 THR_COMP_NEW_NEARESTGA2, 252 THR_COMP_NEAREST_NEWGA2, 253 THR_COMP_NEW_NEARGA2, 254 THR_COMP_NEAR_NEWGA2, 255 THR_COMP_GLOBAL_GLOBALGA2, 256 257 THR_COMP_NEAR_NEARLL2, 258 THR_COMP_NEW_NEWLL2, 259 THR_COMP_NEW_NEARESTLL2, 260 THR_COMP_NEAREST_NEWLL2, 261 THR_COMP_NEW_NEARLL2, 262 THR_COMP_NEAR_NEWLL2, 263 THR_COMP_GLOBAL_GLOBALLL2, 264 265 THR_COMP_NEAR_NEARLL3, 266 THR_COMP_NEW_NEWLL3, 267 THR_COMP_NEW_NEARESTLL3, 268 THR_COMP_NEAREST_NEWLL3, 269 THR_COMP_NEW_NEARLL3, 270 THR_COMP_NEAR_NEWLL3, 271 THR_COMP_GLOBAL_GLOBALLL3, 272 273 THR_COMP_NEAR_NEARLG, 274 THR_COMP_NEW_NEWLG, 275 THR_COMP_NEW_NEARESTLG, 276 THR_COMP_NEAREST_NEWLG, 277 THR_COMP_NEW_NEARLG, 278 THR_COMP_NEAR_NEWLG, 279 THR_COMP_GLOBAL_GLOBALLG, 280 281 THR_COMP_NEAR_NEARBA, 282 THR_COMP_NEW_NEWBA, 283 THR_COMP_NEW_NEARESTBA, 284 THR_COMP_NEAREST_NEWBA, 285 THR_COMP_NEW_NEARBA, 286 THR_COMP_NEAR_NEWBA, 287 THR_COMP_GLOBAL_GLOBALBA, 288 289 THR_DC, 290 THR_PAETH, 291 THR_SMOOTH, 292 THR_SMOOTH_V, 293 THR_SMOOTH_H, 294 THR_H_PRED, 295 THR_V_PRED, 296 THR_D135_PRED, 297 THR_D203_PRED, 298 THR_D157_PRED, 299 THR_D67_PRED, 300 THR_D113_PRED, 301 THR_D45_PRED, 302 }; 303 304 /*!\cond */ 305 typedef struct SingleInterModeState { 306 int64_t rd; 307 MV_REFERENCE_FRAME ref_frame; 308 int valid; 309 } SingleInterModeState; 310 311 typedef struct InterModeSearchState { 312 int64_t best_rd; 313 int64_t best_skip_rd[2]; 314 MB_MODE_INFO best_mbmode; 315 int best_rate_y; 316 int best_rate_uv; 317 int best_mode_skippable; 318 int best_skip2; 319 THR_MODES best_mode_index; 320 int num_available_refs; 321 int64_t dist_refs[REF_FRAMES]; 322 int dist_order_refs[REF_FRAMES]; 323 int64_t mode_threshold[MAX_MODES]; 324 int64_t best_intra_rd; 325 unsigned int best_pred_sse; 326 327 /*! 328 * \brief Keep track of best intra rd for use in compound mode. 329 */ 330 int64_t best_pred_rd[REFERENCE_MODES]; 331 // Save a set of single_newmv for each checked ref_mv. 332 int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES]; 333 int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES]; 334 int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES]; 335 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES]; 336 // The rd of simple translation in single inter modes 337 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES]; 338 int64_t best_single_rd[REF_FRAMES]; 339 PREDICTION_MODE best_single_mode[REF_FRAMES]; 340 341 // Single search results by [directions][modes][reference frames] 342 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS]; 343 int single_state_cnt[2][SINGLE_INTER_MODE_NUM]; 344 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM] 345 [FWD_REFS]; 346 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM]; 347 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS]; 348 IntraModeSearchState intra_search_state; 349 RD_STATS best_y_rdcost; 350 } InterModeSearchState; 351 /*!\endcond */ 352 353 void av1_inter_mode_data_init(TileDataEnc *tile_data) { 354 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) { 355 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i]; 356 md->ready = 0; 357 md->num = 0; 358 md->dist_sum = 0; 359 md->ld_sum = 0; 360 md->sse_sum = 0; 361 md->sse_sse_sum = 0; 362 md->sse_ld_sum = 0; 363 } 364 } 365 366 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize, 367 int64_t sse, int *est_residue_cost, 368 int64_t *est_dist) { 369 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; 370 if (md->ready) { 371 if (sse < md->dist_mean) { 372 *est_residue_cost = 0; 373 *est_dist = sse; 374 } else { 375 *est_dist = (int64_t)round(md->dist_mean); 376 const double est_ld = md->a * sse + md->b; 377 // Clamp estimated rate cost by INT_MAX / 2. 378 // TODO(angiebird@google.com): find better solution than clamping. 379 if (fabs(est_ld) < 1e-2) { 380 *est_residue_cost = INT_MAX / 2; 381 } else { 382 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld); 383 if (est_residue_cost_dbl < 0) { 384 *est_residue_cost = 0; 385 } else { 386 *est_residue_cost = 387 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2); 388 } 389 } 390 if (*est_residue_cost <= 0) { 391 *est_residue_cost = 0; 392 *est_dist = sse; 393 } 394 } 395 return 1; 396 } 397 return 0; 398 } 399 400 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) { 401 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { 402 const int block_idx = inter_mode_data_block_idx(bsize); 403 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; 404 if (block_idx == -1) continue; 405 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) { 406 continue; 407 } else { 408 if (md->ready == 0) { 409 md->dist_mean = md->dist_sum / md->num; 410 md->ld_mean = md->ld_sum / md->num; 411 md->sse_mean = md->sse_sum / md->num; 412 md->sse_sse_mean = md->sse_sse_sum / md->num; 413 md->sse_ld_mean = md->sse_ld_sum / md->num; 414 } else { 415 const double factor = 3; 416 md->dist_mean = 417 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1); 418 md->ld_mean = 419 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1); 420 md->sse_mean = 421 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1); 422 md->sse_sse_mean = 423 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) / 424 (factor + 1); 425 md->sse_ld_mean = 426 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) / 427 (factor + 1); 428 } 429 430 const double my = md->ld_mean; 431 const double mx = md->sse_mean; 432 const double dx = sqrt(md->sse_sse_mean); 433 const double dxy = md->sse_ld_mean; 434 435 md->a = (dxy - mx * my) / (dx * dx - mx * mx); 436 md->b = my - md->a * mx; 437 md->ready = 1; 438 439 md->num = 0; 440 md->dist_sum = 0; 441 md->ld_sum = 0; 442 md->sse_sum = 0; 443 md->sse_sse_sum = 0; 444 md->sse_ld_sum = 0; 445 } 446 (void)rdmult; 447 } 448 } 449 450 static inline void inter_mode_data_push(TileDataEnc *tile_data, 451 BLOCK_SIZE bsize, int64_t sse, 452 int64_t dist, int residue_cost) { 453 if (residue_cost == 0 || sse == dist) return; 454 const int block_idx = inter_mode_data_block_idx(bsize); 455 if (block_idx == -1) return; 456 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize]; 457 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) { 458 const double ld = (sse - dist) * 1. / residue_cost; 459 ++rd_model->num; 460 rd_model->dist_sum += dist; 461 rd_model->ld_sum += ld; 462 rd_model->sse_sum += sse; 463 rd_model->sse_sse_sum += (double)sse * (double)sse; 464 rd_model->sse_ld_sum += sse * ld; 465 } 466 } 467 468 static inline void inter_modes_info_push(InterModesInfo *inter_modes_info, 469 int mode_rate, int64_t sse, int64_t rd, 470 RD_STATS *rd_cost, RD_STATS *rd_cost_y, 471 RD_STATS *rd_cost_uv, 472 const MB_MODE_INFO *mbmi) { 473 const int num = inter_modes_info->num; 474 assert(num < MAX_INTER_MODES); 475 inter_modes_info->mbmi_arr[num] = *mbmi; 476 inter_modes_info->mode_rate_arr[num] = mode_rate; 477 inter_modes_info->sse_arr[num] = sse; 478 inter_modes_info->est_rd_arr[num] = rd; 479 inter_modes_info->rd_cost_arr[num] = *rd_cost; 480 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y; 481 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv; 482 ++inter_modes_info->num; 483 } 484 485 static int compare_rd_idx_pair(const void *a, const void *b) { 486 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) { 487 // To avoid inconsistency in qsort() ordering when two elements are equal, 488 // using idx as tie breaker. Refer aomedia:2928 489 if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx) 490 return 0; 491 else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx) 492 return 1; 493 else 494 return -1; 495 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) { 496 return 1; 497 } else { 498 return -1; 499 } 500 } 501 502 static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info, 503 RdIdxPair *rd_idx_pair_arr) { 504 if (inter_modes_info->num == 0) { 505 return; 506 } 507 for (int i = 0; i < inter_modes_info->num; ++i) { 508 rd_idx_pair_arr[i].idx = i; 509 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i]; 510 } 511 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]), 512 compare_rd_idx_pair); 513 } 514 515 // Similar to get_horver_correlation, but also takes into account first 516 // row/column, when computing horizontal/vertical correlation. 517 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride, 518 int width, int height, float *hcorr, 519 float *vcorr) { 520 // The following notation is used: 521 // x - current pixel 522 // y - left neighbor pixel 523 // z - top neighbor pixel 524 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0; 525 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0; 526 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0; 527 528 // First, process horizontal correlation on just the first row 529 x_sum += diff[0]; 530 x2_sum += diff[0] * diff[0]; 531 x_firstrow += diff[0]; 532 x2_firstrow += diff[0] * diff[0]; 533 for (int j = 1; j < width; ++j) { 534 const int16_t x = diff[j]; 535 const int16_t y = diff[j - 1]; 536 x_sum += x; 537 x_firstrow += x; 538 x2_sum += x * x; 539 x2_firstrow += x * x; 540 xy_sum += x * y; 541 } 542 543 // Process vertical correlation in the first column 544 x_firstcol += diff[0]; 545 x2_firstcol += diff[0] * diff[0]; 546 for (int i = 1; i < height; ++i) { 547 const int16_t x = diff[i * stride]; 548 const int16_t z = diff[(i - 1) * stride]; 549 x_sum += x; 550 x_firstcol += x; 551 x2_sum += x * x; 552 x2_firstcol += x * x; 553 xz_sum += x * z; 554 } 555 556 // Now process horiz and vert correlation through the rest unit 557 for (int i = 1; i < height; ++i) { 558 for (int j = 1; j < width; ++j) { 559 const int16_t x = diff[i * stride + j]; 560 const int16_t y = diff[i * stride + j - 1]; 561 const int16_t z = diff[(i - 1) * stride + j]; 562 x_sum += x; 563 x2_sum += x * x; 564 xy_sum += x * y; 565 xz_sum += x * z; 566 } 567 } 568 569 for (int j = 0; j < width; ++j) { 570 x_finalrow += diff[(height - 1) * stride + j]; 571 x2_finalrow += 572 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j]; 573 } 574 for (int i = 0; i < height; ++i) { 575 x_finalcol += diff[i * stride + width - 1]; 576 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1]; 577 } 578 579 int64_t xhor_sum = x_sum - x_finalcol; 580 int64_t xver_sum = x_sum - x_finalrow; 581 int64_t y_sum = x_sum - x_firstcol; 582 int64_t z_sum = x_sum - x_firstrow; 583 int64_t x2hor_sum = x2_sum - x2_finalcol; 584 int64_t x2ver_sum = x2_sum - x2_finalrow; 585 int64_t y2_sum = x2_sum - x2_firstcol; 586 int64_t z2_sum = x2_sum - x2_firstrow; 587 588 const float num_hor = (float)(height * (width - 1)); 589 const float num_ver = (float)((height - 1) * width); 590 591 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor; 592 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver; 593 594 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor; 595 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver; 596 597 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor; 598 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver; 599 600 if (xhor_var_n > 0 && y_var_n > 0) { 601 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n); 602 *hcorr = *hcorr < 0 ? 0 : *hcorr; 603 } else { 604 *hcorr = 1.0; 605 } 606 if (xver_var_n > 0 && z_var_n > 0) { 607 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n); 608 *vcorr = *vcorr < 0 ? 0 : *vcorr; 609 } else { 610 *vcorr = 1.0; 611 } 612 } 613 614 static void get_variance_stats_hbd(const MACROBLOCK *x, int64_t *src_var, 615 int64_t *rec_var) { 616 const MACROBLOCKD *xd = &x->e_mbd; 617 const MB_MODE_INFO *mbmi = xd->mi[0]; 618 const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; 619 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y]; 620 621 BLOCK_SIZE bsize = mbmi->bsize; 622 int bw = block_size_wide[bsize]; 623 int bh = block_size_high[bsize]; 624 625 static const int gau_filter[3][3] = { 626 { 1, 2, 1 }, 627 { 2, 4, 2 }, 628 { 1, 2, 1 }, 629 }; 630 631 DECLARE_ALIGNED(16, uint16_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]); 632 633 uint16_t *pred_ptr = &dclevel[bw + 1]; 634 int pred_stride = xd->plane[0].dst.stride; 635 636 for (int idy = -1; idy < bh + 1; ++idy) { 637 for (int idx = -1; idx < bw + 1; ++idx) { 638 int offset_idy = idy; 639 int offset_idx = idx; 640 if (idy == -1) offset_idy = 0; 641 if (idy == bh) offset_idy = bh - 1; 642 if (idx == -1) offset_idx = 0; 643 if (idx == bw) offset_idx = bw - 1; 644 645 int offset = offset_idy * pred_stride + offset_idx; 646 pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(pd->dst.buf)[offset]; 647 } 648 } 649 650 *rec_var = 0; 651 for (int idy = 0; idy < bh; ++idy) { 652 for (int idx = 0; idx < bw; ++idx) { 653 int sum = 0; 654 for (int iy = 0; iy < 3; ++iy) 655 for (int ix = 0; ix < 3; ++ix) 656 sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] * 657 gau_filter[iy][ix]; 658 659 sum = sum >> 4; 660 661 int64_t diff = pred_ptr[idy * bw + idx] - sum; 662 *rec_var += diff * diff; 663 } 664 } 665 *rec_var <<= 4; 666 667 int src_stride = p->src.stride; 668 for (int idy = -1; idy < bh + 1; ++idy) { 669 for (int idx = -1; idx < bw + 1; ++idx) { 670 int offset_idy = idy; 671 int offset_idx = idx; 672 if (idy == -1) offset_idy = 0; 673 if (idy == bh) offset_idy = bh - 1; 674 if (idx == -1) offset_idx = 0; 675 if (idx == bw) offset_idx = bw - 1; 676 677 int offset = offset_idy * src_stride + offset_idx; 678 pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(p->src.buf)[offset]; 679 } 680 } 681 682 *src_var = 0; 683 for (int idy = 0; idy < bh; ++idy) { 684 for (int idx = 0; idx < bw; ++idx) { 685 int sum = 0; 686 for (int iy = 0; iy < 3; ++iy) 687 for (int ix = 0; ix < 3; ++ix) 688 sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] * 689 gau_filter[iy][ix]; 690 691 sum = sum >> 4; 692 693 int64_t diff = pred_ptr[idy * bw + idx] - sum; 694 *src_var += diff * diff; 695 } 696 } 697 *src_var <<= 4; 698 } 699 700 static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var, 701 int64_t *rec_var) { 702 const MACROBLOCKD *xd = &x->e_mbd; 703 const MB_MODE_INFO *mbmi = xd->mi[0]; 704 const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; 705 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y]; 706 707 BLOCK_SIZE bsize = mbmi->bsize; 708 int bw = block_size_wide[bsize]; 709 int bh = block_size_high[bsize]; 710 711 static const int gau_filter[3][3] = { 712 { 1, 2, 1 }, 713 { 2, 4, 2 }, 714 { 1, 2, 1 }, 715 }; 716 717 DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]); 718 719 uint8_t *pred_ptr = &dclevel[bw + 1]; 720 int pred_stride = xd->plane[0].dst.stride; 721 722 for (int idy = -1; idy < bh + 1; ++idy) { 723 for (int idx = -1; idx < bw + 1; ++idx) { 724 int offset_idy = idy; 725 int offset_idx = idx; 726 if (idy == -1) offset_idy = 0; 727 if (idy == bh) offset_idy = bh - 1; 728 if (idx == -1) offset_idx = 0; 729 if (idx == bw) offset_idx = bw - 1; 730 731 int offset = offset_idy * pred_stride + offset_idx; 732 pred_ptr[idy * bw + idx] = pd->dst.buf[offset]; 733 } 734 } 735 736 *rec_var = 0; 737 for (int idy = 0; idy < bh; ++idy) { 738 for (int idx = 0; idx < bw; ++idx) { 739 int sum = 0; 740 for (int iy = 0; iy < 3; ++iy) 741 for (int ix = 0; ix < 3; ++ix) 742 sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] * 743 gau_filter[iy][ix]; 744 745 sum = sum >> 4; 746 747 int64_t diff = pred_ptr[idy * bw + idx] - sum; 748 *rec_var += diff * diff; 749 } 750 } 751 *rec_var <<= 4; 752 753 int src_stride = p->src.stride; 754 for (int idy = -1; idy < bh + 1; ++idy) { 755 for (int idx = -1; idx < bw + 1; ++idx) { 756 int offset_idy = idy; 757 int offset_idx = idx; 758 if (idy == -1) offset_idy = 0; 759 if (idy == bh) offset_idy = bh - 1; 760 if (idx == -1) offset_idx = 0; 761 if (idx == bw) offset_idx = bw - 1; 762 763 int offset = offset_idy * src_stride + offset_idx; 764 pred_ptr[idy * bw + idx] = p->src.buf[offset]; 765 } 766 } 767 768 *src_var = 0; 769 for (int idy = 0; idy < bh; ++idy) { 770 for (int idx = 0; idx < bw; ++idx) { 771 int sum = 0; 772 for (int iy = 0; iy < 3; ++iy) 773 for (int ix = 0; ix < 3; ++ix) 774 sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] * 775 gau_filter[iy][ix]; 776 777 sum = sum >> 4; 778 779 int64_t diff = pred_ptr[idy * bw + idx] - sum; 780 *src_var += diff * diff; 781 } 782 } 783 *src_var <<= 4; 784 } 785 786 static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x, 787 RD_STATS *rd_cost) { 788 if (cpi->oxcf.algo_cfg.sharpness != 3) return; 789 790 if (frame_is_kf_gf_arf(cpi)) return; 791 792 int64_t src_var, rec_var; 793 794 const bool is_hbd = is_cur_buf_hbd(&x->e_mbd); 795 if (is_hbd) 796 get_variance_stats_hbd(x, &src_var, &rec_var); 797 else 798 get_variance_stats(x, &src_var, &rec_var); 799 800 if (src_var <= rec_var) return; 801 802 int64_t var_offset = src_var - rec_var; 803 804 rd_cost->dist += var_offset; 805 806 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); 807 } 808 809 static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x, 810 int64_t *rd_cost) { 811 if (cpi->oxcf.algo_cfg.sharpness != 3) return; 812 813 if (frame_is_kf_gf_arf(cpi)) return; 814 815 int64_t src_var, rec_var; 816 const bool is_hbd = is_cur_buf_hbd(&x->e_mbd); 817 818 if (is_hbd) 819 get_variance_stats_hbd(x, &src_var, &rec_var); 820 else 821 get_variance_stats(x, &src_var, &rec_var); 822 823 if (src_var <= rec_var) return; 824 825 int64_t var_offset = src_var - rec_var; 826 827 *rd_cost += RDCOST(x->rdmult, 0, var_offset); 828 } 829 830 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x, 831 int64_t *sse_y) { 832 const AV1_COMMON *cm = &cpi->common; 833 const int num_planes = av1_num_planes(cm); 834 const MACROBLOCKD *xd = &x->e_mbd; 835 const MB_MODE_INFO *mbmi = xd->mi[0]; 836 int64_t total_sse = 0; 837 for (int plane = 0; plane < num_planes; ++plane) { 838 if (plane && !xd->is_chroma_ref) break; 839 const struct macroblock_plane *const p = &x->plane[plane]; 840 const struct macroblockd_plane *const pd = &xd->plane[plane]; 841 const BLOCK_SIZE bs = 842 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y); 843 unsigned int sse; 844 845 cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, 846 pd->dst.stride, &sse); 847 total_sse += sse; 848 if (!plane && sse_y) *sse_y = sse; 849 } 850 total_sse <<= 4; 851 return total_sse; 852 } 853 854 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, 855 intptr_t block_size, int64_t *ssz) { 856 int i; 857 int64_t error = 0, sqcoeff = 0; 858 859 for (i = 0; i < block_size; i++) { 860 const int diff = coeff[i] - dqcoeff[i]; 861 error += diff * diff; 862 sqcoeff += coeff[i] * coeff[i]; 863 } 864 865 *ssz = sqcoeff; 866 return error; 867 } 868 869 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff, 870 intptr_t block_size) { 871 int64_t error = 0; 872 873 for (int i = 0; i < block_size; i++) { 874 const int diff = coeff[i] - dqcoeff[i]; 875 error += diff * diff; 876 } 877 878 return error; 879 } 880 881 #if CONFIG_AV1_HIGHBITDEPTH 882 int64_t av1_highbd_block_error_c(const tran_low_t *coeff, 883 const tran_low_t *dqcoeff, intptr_t block_size, 884 int64_t *ssz, int bd) { 885 int i; 886 int64_t error = 0, sqcoeff = 0; 887 int shift = 2 * (bd - 8); 888 int rounding = (1 << shift) >> 1; 889 890 for (i = 0; i < block_size; i++) { 891 const int64_t diff = coeff[i] - dqcoeff[i]; 892 error += diff * diff; 893 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; 894 } 895 error = (error + rounding) >> shift; 896 sqcoeff = (sqcoeff + rounding) >> shift; 897 898 *ssz = sqcoeff; 899 return error; 900 } 901 #endif 902 903 static int conditional_skipintra(PREDICTION_MODE mode, 904 PREDICTION_MODE best_intra_mode) { 905 if (mode == D113_PRED && best_intra_mode != V_PRED && 906 best_intra_mode != D135_PRED) 907 return 1; 908 if (mode == D67_PRED && best_intra_mode != V_PRED && 909 best_intra_mode != D45_PRED) 910 return 1; 911 if (mode == D203_PRED && best_intra_mode != H_PRED && 912 best_intra_mode != D45_PRED) 913 return 1; 914 if (mode == D157_PRED && best_intra_mode != H_PRED && 915 best_intra_mode != D135_PRED) 916 return 1; 917 return 0; 918 } 919 920 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode, 921 int16_t mode_context) { 922 if (is_inter_compound_mode(mode)) { 923 return mode_costs 924 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)]; 925 } 926 927 int mode_cost = 0; 928 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; 929 930 assert(is_inter_mode(mode)); 931 932 if (mode == NEWMV) { 933 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0]; 934 return mode_cost; 935 } else { 936 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1]; 937 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; 938 939 if (mode == GLOBALMV) { 940 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0]; 941 return mode_cost; 942 } else { 943 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1]; 944 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; 945 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV]; 946 return mode_cost; 947 } 948 } 949 } 950 951 static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode, 952 int ref_idx) { 953 return ref_idx ? compound_ref1_mode(this_mode) 954 : compound_ref0_mode(this_mode); 955 } 956 957 static inline void estimate_ref_frame_costs( 958 const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs, 959 int segment_id, unsigned int *ref_costs_single, 960 unsigned int (*ref_costs_comp)[REF_FRAMES]) { 961 int seg_ref_active = 962 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); 963 if (seg_ref_active) { 964 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single)); 965 int ref_frame; 966 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) 967 memset(ref_costs_comp[ref_frame], 0, 968 REF_FRAMES * sizeof((*ref_costs_comp)[0])); 969 } else { 970 int intra_inter_ctx = av1_get_intra_inter_context(xd); 971 ref_costs_single[INTRA_FRAME] = 972 mode_costs->intra_inter_cost[intra_inter_ctx][0]; 973 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1]; 974 975 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) 976 ref_costs_single[i] = base_cost; 977 978 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd); 979 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd); 980 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd); 981 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd); 982 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd); 983 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd); 984 985 // Determine cost of a single ref frame, where frame types are represented 986 // by a tree: 987 // Level 0: add cost whether this ref is a forward or backward ref 988 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0]; 989 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0]; 990 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0]; 991 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0]; 992 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1]; 993 ref_costs_single[ALTREF2_FRAME] += 994 mode_costs->single_ref_cost[ctx_p1][0][1]; 995 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1]; 996 997 // Level 1: if this ref is forward ref, 998 // add cost whether it is last/last2 or last3/golden 999 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0]; 1000 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0]; 1001 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1]; 1002 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1]; 1003 1004 // Level 1: if this ref is backward ref 1005 // then add cost whether this ref is altref or backward ref 1006 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0]; 1007 ref_costs_single[ALTREF2_FRAME] += 1008 mode_costs->single_ref_cost[ctx_p2][1][0]; 1009 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1]; 1010 1011 // Level 2: further add cost whether this ref is last or last2 1012 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0]; 1013 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1]; 1014 1015 // Level 2: last3 or golden 1016 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0]; 1017 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1]; 1018 1019 // Level 2: bwdref or altref2 1020 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0]; 1021 ref_costs_single[ALTREF2_FRAME] += 1022 mode_costs->single_ref_cost[ctx_p6][5][1]; 1023 1024 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) { 1025 // Similar to single ref, determine cost of compound ref frames. 1026 // cost_compound_refs = cost_first_ref + cost_second_ref 1027 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd); 1028 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd); 1029 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd); 1030 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd); 1031 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd); 1032 1033 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd); 1034 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 }; 1035 1036 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] = 1037 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] = 1038 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1]; 1039 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0; 1040 ref_bicomp_costs[ALTREF_FRAME] = 0; 1041 1042 // cost of first ref frame 1043 ref_bicomp_costs[LAST_FRAME] += 1044 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0]; 1045 ref_bicomp_costs[LAST2_FRAME] += 1046 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0]; 1047 ref_bicomp_costs[LAST3_FRAME] += 1048 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1]; 1049 ref_bicomp_costs[GOLDEN_FRAME] += 1050 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1]; 1051 1052 ref_bicomp_costs[LAST_FRAME] += 1053 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0]; 1054 ref_bicomp_costs[LAST2_FRAME] += 1055 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1]; 1056 1057 ref_bicomp_costs[LAST3_FRAME] += 1058 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0]; 1059 ref_bicomp_costs[GOLDEN_FRAME] += 1060 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1]; 1061 1062 // cost of second ref frame 1063 ref_bicomp_costs[BWDREF_FRAME] += 1064 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0]; 1065 ref_bicomp_costs[ALTREF2_FRAME] += 1066 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0]; 1067 ref_bicomp_costs[ALTREF_FRAME] += 1068 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1]; 1069 1070 ref_bicomp_costs[BWDREF_FRAME] += 1071 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0]; 1072 ref_bicomp_costs[ALTREF2_FRAME] += 1073 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1]; 1074 1075 // cost: if one ref frame is forward ref, the other ref is backward ref 1076 int ref0, ref1; 1077 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { 1078 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) { 1079 ref_costs_comp[ref0][ref1] = 1080 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1]; 1081 } 1082 } 1083 1084 // cost: if both ref frames are the same side. 1085 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd); 1086 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd); 1087 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd); 1088 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 1089 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] + 1090 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + 1091 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0]; 1092 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 1093 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] + 1094 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + 1095 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] + 1096 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0]; 1097 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 1098 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] + 1099 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + 1100 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] + 1101 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1]; 1102 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 1103 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] + 1104 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1]; 1105 } else { 1106 int ref0, ref1; 1107 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { 1108 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) 1109 ref_costs_comp[ref0][ref1] = 512; 1110 } 1111 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512; 1112 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512; 1113 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512; 1114 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512; 1115 } 1116 } 1117 } 1118 1119 static inline void store_coding_context( 1120 #if CONFIG_INTERNAL_STATS 1121 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, 1122 #else 1123 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, 1124 #endif // CONFIG_INTERNAL_STATS 1125 int skippable) { 1126 MACROBLOCKD *const xd = &x->e_mbd; 1127 1128 // Take a snapshot of the coding context so it can be 1129 // restored if we decide to encode this way 1130 ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm; 1131 ctx->skippable = skippable; 1132 #if CONFIG_INTERNAL_STATS 1133 ctx->best_mode_index = mode_index; 1134 #endif // CONFIG_INTERNAL_STATS 1135 ctx->mic = *xd->mi[0]; 1136 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext, 1137 av1_ref_frame_type(xd->mi[0]->ref_frame)); 1138 } 1139 1140 static inline void setup_buffer_ref_mvs_inter( 1141 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, 1142 BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { 1143 const AV1_COMMON *cm = &cpi->common; 1144 const int num_planes = av1_num_planes(cm); 1145 const YV12_BUFFER_CONFIG *scaled_ref_frame = 1146 av1_get_scaled_ref_frame(cpi, ref_frame); 1147 MACROBLOCKD *const xd = &x->e_mbd; 1148 MB_MODE_INFO *const mbmi = xd->mi[0]; 1149 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 1150 const struct scale_factors *const sf = 1151 get_ref_scale_factors_const(cm, ref_frame); 1152 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame); 1153 assert(yv12 != NULL); 1154 1155 if (scaled_ref_frame) { 1156 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't 1157 // support scaling. 1158 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL, 1159 num_planes); 1160 } else { 1161 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes); 1162 } 1163 1164 // Gets an initial list of candidate vectors from neighbours and orders them 1165 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, 1166 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs, 1167 mbmi_ext->mode_context); 1168 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and 1169 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. 1170 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame); 1171 // Further refinement that is encode side only to test the top few candidates 1172 // in full and choose the best as the center point for subsequent searches. 1173 // The current implementation doesn't support scaling. 1174 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride, 1175 ref_frame, block_size); 1176 1177 // Go back to unscaled reference. 1178 if (scaled_ref_frame) { 1179 // We had temporarily setup pred block based on scaled reference above. Go 1180 // back to unscaled reference now, for subsequent use. 1181 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes); 1182 } 1183 } 1184 1185 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3) 1186 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3) 1187 1188 // TODO(jingning): this mv clamping function should be block size dependent. 1189 static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { 1190 const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN, 1191 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, 1192 xd->mb_to_top_edge - LEFT_TOP_MARGIN, 1193 xd->mb_to_bottom_edge + 1194 RIGHT_BOTTOM_MARGIN }; 1195 clamp_mv(mv, &mv_limits); 1196 } 1197 1198 /* If the current mode shares the same mv with other modes with higher cost, 1199 * skip this mode. */ 1200 static int skip_repeated_mv(const AV1_COMMON *const cm, 1201 const MACROBLOCK *const x, 1202 PREDICTION_MODE this_mode, 1203 const MV_REFERENCE_FRAME ref_frames[2], 1204 InterModeSearchState *search_state) { 1205 const int is_comp_pred = ref_frames[1] > INTRA_FRAME; 1206 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames); 1207 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 1208 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; 1209 PREDICTION_MODE compare_mode = MB_MODE_COUNT; 1210 if (!is_comp_pred) { 1211 if (this_mode == NEARMV) { 1212 if (ref_mv_count == 0) { 1213 // NEARMV has the same motion vector as NEARESTMV 1214 compare_mode = NEARESTMV; 1215 } 1216 if (ref_mv_count == 1 && 1217 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) { 1218 // NEARMV has the same motion vector as GLOBALMV 1219 compare_mode = GLOBALMV; 1220 } 1221 } 1222 if (this_mode == GLOBALMV) { 1223 if (ref_mv_count == 0 && 1224 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) { 1225 // GLOBALMV has the same motion vector as NEARESTMV 1226 compare_mode = NEARESTMV; 1227 } 1228 if (ref_mv_count == 1) { 1229 // GLOBALMV has the same motion vector as NEARMV 1230 compare_mode = NEARMV; 1231 } 1232 } 1233 1234 if (compare_mode != MB_MODE_COUNT) { 1235 // Use modelled_rd to check whether compare mode was searched 1236 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] != 1237 INT64_MAX) { 1238 const int16_t mode_ctx = 1239 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames); 1240 const int compare_cost = 1241 cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx); 1242 const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx); 1243 1244 // Only skip if the mode cost is larger than compare mode cost 1245 if (this_cost > compare_cost) { 1246 search_state->modelled_rd[this_mode][0][ref_frames[0]] = 1247 search_state->modelled_rd[compare_mode][0][ref_frames[0]]; 1248 return 1; 1249 } 1250 } 1251 } 1252 } 1253 return 0; 1254 } 1255 1256 static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv, 1257 const AV1_COMMON *cm, 1258 const MACROBLOCK *x) { 1259 const MACROBLOCKD *const xd = &x->e_mbd; 1260 *out_mv = in_mv; 1261 lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv, 1262 cm->features.cur_frame_force_integer_mv); 1263 clamp_mv2(&out_mv->as_mv, xd); 1264 return av1_is_fullmv_in_range(&x->mv_limits, 1265 get_fullmv_from_mv(&out_mv->as_mv)); 1266 } 1267 1268 // To use single newmv directly for compound modes, need to clamp the mv to the 1269 // valid mv range. Without this, encoder would generate out of range mv, and 1270 // this is seen in 8k encoding. 1271 static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv, 1272 int ref_idx) { 1273 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx); 1274 SubpelMvLimits mv_limits; 1275 1276 av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv); 1277 clamp_mv(&mv->as_mv, &mv_limits); 1278 } 1279 1280 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, 1281 const BLOCK_SIZE bsize, int_mv *cur_mv, 1282 int *const rate_mv, HandleInterModeArgs *const args, 1283 inter_mode_info *mode_info) { 1284 MACROBLOCKD *const xd = &x->e_mbd; 1285 MB_MODE_INFO *const mbmi = xd->mi[0]; 1286 const int is_comp_pred = has_second_ref(mbmi); 1287 const PREDICTION_MODE this_mode = mbmi->mode; 1288 const int refs[2] = { mbmi->ref_frame[0], 1289 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; 1290 const int ref_mv_idx = mbmi->ref_mv_idx; 1291 1292 if (is_comp_pred) { 1293 const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]]; 1294 const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]]; 1295 if (this_mode == NEW_NEWMV) { 1296 if (valid_mv0) { 1297 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; 1298 clamp_mv_in_range(x, &cur_mv[0], 0); 1299 } 1300 if (valid_mv1) { 1301 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; 1302 clamp_mv_in_range(x, &cur_mv[1], 1); 1303 } 1304 *rate_mv = 0; 1305 for (int i = 0; i < 2; ++i) { 1306 const int_mv ref_mv = av1_get_ref_mv(x, i); 1307 *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, 1308 x->mv_costs->nmv_joint_cost, 1309 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT); 1310 } 1311 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { 1312 if (valid_mv1) { 1313 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; 1314 clamp_mv_in_range(x, &cur_mv[1], 1); 1315 } 1316 const int_mv ref_mv = av1_get_ref_mv(x, 1); 1317 *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv, 1318 x->mv_costs->nmv_joint_cost, 1319 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT); 1320 } else { 1321 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV); 1322 if (valid_mv0) { 1323 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; 1324 clamp_mv_in_range(x, &cur_mv[0], 0); 1325 } 1326 const int_mv ref_mv = av1_get_ref_mv(x, 0); 1327 *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv, 1328 x->mv_costs->nmv_joint_cost, 1329 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT); 1330 } 1331 } else { 1332 // Single ref case. 1333 const int ref_idx = 0; 1334 int search_range = INT_MAX; 1335 1336 if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) { 1337 const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv; 1338 int min_mv_diff = INT_MAX; 1339 int best_match = -1; 1340 MV prev_ref_mv[2] = { { 0 } }; 1341 for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) { 1342 prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, 1343 idx, &x->mbmi_ext) 1344 .as_mv; 1345 const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row), 1346 abs(ref_mv.col - prev_ref_mv[idx].col)); 1347 1348 if (min_mv_diff > ref_mv_diff) { 1349 min_mv_diff = ref_mv_diff; 1350 best_match = idx; 1351 } 1352 } 1353 1354 if (min_mv_diff < (16 << 3)) { 1355 if (args->single_newmv_valid[best_match][refs[0]]) { 1356 search_range = min_mv_diff; 1357 search_range += 1358 AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row - 1359 prev_ref_mv[best_match].row), 1360 abs(args->single_newmv[best_match][refs[0]].as_mv.col - 1361 prev_ref_mv[best_match].col)); 1362 // Get full pixel search range. 1363 search_range = (search_range + 4) >> 3; 1364 } 1365 } 1366 } 1367 1368 int_mv best_mv; 1369 av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range, 1370 mode_info, &best_mv, args); 1371 if (best_mv.as_int == INVALID_MV) return INT64_MAX; 1372 1373 args->single_newmv[ref_mv_idx][refs[0]] = best_mv; 1374 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv; 1375 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1; 1376 cur_mv[0].as_int = best_mv.as_int; 1377 1378 // Return after single_newmv is set. 1379 if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX; 1380 } 1381 1382 return 0; 1383 } 1384 1385 static inline void update_mode_start_end_index( 1386 const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi, 1387 int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed, 1388 int interintra_allowed, int eval_motion_mode) { 1389 *mode_index_start = (int)SIMPLE_TRANSLATION; 1390 *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed; 1391 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) { 1392 if (!eval_motion_mode) { 1393 *mode_index_end = (int)SIMPLE_TRANSLATION; 1394 } else { 1395 // Set the start index appropriately to process motion modes other than 1396 // simple translation 1397 *mode_index_start = 1; 1398 } 1399 } 1400 if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16) 1401 *mode_index_end = SIMPLE_TRANSLATION; 1402 } 1403 1404 // Increase rd cost of warp mode for low complexity decoding. 1405 static inline void increase_warp_mode_rd(const MB_MODE_INFO *const best_mbmi, 1406 const MB_MODE_INFO *const this_mbmi, 1407 int64_t *const best_scaled_rd, 1408 int64_t *const this_scaled_rd, 1409 int rd_bias_scale_pct) { 1410 // Check rd bias percentage is non-zero. 1411 if (!rd_bias_scale_pct) return; 1412 if (*best_scaled_rd == INT64_MAX || *this_scaled_rd == INT64_MAX) return; 1413 1414 // Experiments have been performed with increasing the RD cost of warp mode at 1415 // the below locations of inter mode evaluation. 1416 // (1). Inter mode evaluation loop in av1_rd_pick_inter_mode(). 1417 // (2). Motion mode evaluation during handle_inter_mode() call. 1418 // (3). Motion mode evaluation for winner motion modes. 1419 // (4). Tx search for best inter candidates. 1420 // Based on the speed quality trade-off results of this speed feature, the rd 1421 // bias logic is enabled only at (2), (3) and (4). 1422 const double rd_bias_scale = rd_bias_scale_pct / 100.0; 1423 if (best_mbmi->motion_mode == WARPED_CAUSAL) 1424 *best_scaled_rd += (int64_t)(rd_bias_scale * *best_scaled_rd); 1425 if (this_mbmi->motion_mode == WARPED_CAUSAL) 1426 *this_scaled_rd += (int64_t)(rd_bias_scale * *this_scaled_rd); 1427 } 1428 1429 /*!\brief AV1 motion mode search 1430 * 1431 * \ingroup inter_mode_search 1432 * Function to search over and determine the motion mode. It will update 1433 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or 1434 * WARPED_CAUSAL and determine any necessary side information for the selected 1435 * motion mode. It will also perform the full transform search, unless the 1436 * input parameter do_tx_search indicates to do an estimation of the RD rather 1437 * than an RD corresponding to a full transform search. It will return the 1438 * RD for the final motion_mode. 1439 * Do the RD search for a given inter mode and compute all information relevant 1440 * to the input mode. It will compute the best MV, 1441 * compound parameters (if the mode is a compound mode) and interpolation filter 1442 * parameters. 1443 * 1444 * \param[in] cpi Top-level encoder structure. 1445 * \param[in] tile_data Pointer to struct holding adaptive 1446 * data/contexts/models for the tile during 1447 * encoding. 1448 * \param[in] x Pointer to struct holding all the data for 1449 * the current macroblock. 1450 * \param[in] bsize Current block size. 1451 * \param[in,out] rd_stats Struct to keep track of the overall RD 1452 * information. 1453 * \param[in,out] rd_stats_y Struct to keep track of the RD information 1454 * for only the Y plane. 1455 * \param[in,out] rd_stats_uv Struct to keep track of the RD information 1456 * for only the UV planes. 1457 * \param[in] args HandleInterModeArgs struct holding 1458 * miscellaneous arguments for inter mode 1459 * search. See the documentation for this 1460 * struct for a description of each member. 1461 * \param[in] ref_best_rd Best RD found so far for this block. 1462 * It is used for early termination of this 1463 * search if the RD exceeds this value. 1464 * \param[in,out] ref_skip_rd A length 2 array, where skip_rd[0] is the 1465 * best total RD for a skip mode so far, and 1466 * skip_rd[1] is the best RD for a skip mode so 1467 * far in luma. This is used as a speed feature 1468 * to skip the transform search if the computed 1469 * skip RD for the current mode is not better 1470 * than the best skip_rd so far. 1471 * \param[in,out] rate_mv The rate associated with the motion vectors. 1472 * This will be modified if a motion search is 1473 * done in the motion mode search. 1474 * \param[in,out] orig_dst A prediction buffer to hold a computed 1475 * prediction. This will eventually hold the 1476 * final prediction, and the tmp_dst info will 1477 * be copied here. 1478 * \param[in,out] best_est_rd Estimated RD for motion mode search if 1479 * do_tx_search (see below) is 0. 1480 * \param[in] do_tx_search Parameter to indicate whether or not to do 1481 * a full transform search. This will compute 1482 * an estimated RD for the modes without the 1483 * transform search and later perform the full 1484 * transform search on the best candidates. 1485 * \param[in] inter_modes_info InterModesInfo struct to hold inter mode 1486 * information to perform a full transform 1487 * search only on winning candidates searched 1488 * with an estimate for transform coding RD. 1489 * \param[in] eval_motion_mode Boolean whether or not to evaluate motion 1490 * motion modes other than SIMPLE_TRANSLATION. 1491 * \param[out] yrd Stores the rdcost corresponding to encoding 1492 * the luma plane. 1493 * \return Returns INT64_MAX if the determined motion mode is invalid and the 1494 * current motion mode being tested should be skipped. It returns 0 if the 1495 * motion mode search is a success. 1496 */ 1497 static int64_t motion_mode_rd( 1498 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x, 1499 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y, 1500 RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd, 1501 int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst, 1502 int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info, 1503 int eval_motion_mode, int64_t *yrd) { 1504 const AV1_COMMON *const cm = &cpi->common; 1505 const FeatureFlags *const features = &cm->features; 1506 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 1507 const int num_planes = av1_num_planes(cm); 1508 MACROBLOCKD *xd = &x->e_mbd; 1509 MB_MODE_INFO *mbmi = xd->mi[0]; 1510 const int is_comp_pred = has_second_ref(mbmi); 1511 const PREDICTION_MODE this_mode = mbmi->mode; 1512 const int rate2_nocoeff = rd_stats->rate; 1513 int best_xskip_txfm = 0; 1514 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv; 1515 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1516 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1517 const int rate_mv0 = *rate_mv; 1518 const int interintra_allowed = cm->seq_params->enable_interintra_compound && 1519 is_interintra_allowed(mbmi) && 1520 mbmi->compound_idx; 1521 WARP_SAMPLE_INFO *const warp_sample_info = 1522 &x->warp_sample_info[mbmi->ref_frame[0]]; 1523 int *pts0 = warp_sample_info->pts; 1524 int *pts_inref0 = warp_sample_info->pts_inref; 1525 1526 assert(mbmi->ref_frame[1] != INTRA_FRAME); 1527 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1]; 1528 av1_invalid_rd_stats(&best_rd_stats); 1529 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1 1530 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION; 1531 *yrd = INT64_MAX; 1532 if (features->switchable_motion_mode) { 1533 // Determine which motion modes to search if more than SIMPLE_TRANSLATION 1534 // is allowed. 1535 last_motion_mode_allowed = motion_mode_allowed( 1536 xd->global_motion, xd, mbmi, features->allow_warped_motion); 1537 } 1538 1539 if (last_motion_mode_allowed == WARPED_CAUSAL) { 1540 // Collect projection samples used in least squares approximation of 1541 // the warped motion parameters if WARPED_CAUSAL is going to be searched. 1542 if (warp_sample_info->num < 0) { 1543 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0); 1544 } 1545 mbmi->num_proj_ref = warp_sample_info->num; 1546 } 1547 const int total_samples = mbmi->num_proj_ref; 1548 if (total_samples == 0) { 1549 // Do not search WARPED_CAUSAL if there are no samples to use to determine 1550 // warped parameters. 1551 last_motion_mode_allowed = OBMC_CAUSAL; 1552 } 1553 1554 const MB_MODE_INFO base_mbmi = *mbmi; 1555 MB_MODE_INFO best_mbmi; 1556 const int interp_filter = features->interp_filter; 1557 const int switchable_rate = 1558 av1_is_interp_needed(xd) 1559 ? av1_get_switchable_rate(x, xd, interp_filter, 1560 cm->seq_params->enable_dual_filter) 1561 : 0; 1562 int64_t best_rd = INT64_MAX; 1563 int best_rate_mv = rate_mv0; 1564 const int mi_row = xd->mi_row; 1565 const int mi_col = xd->mi_col; 1566 int mode_index_start, mode_index_end; 1567 const int txfm_rd_gate_level = 1568 get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound, 1569 cpi->sf.inter_sf.txfm_rd_gate_level, bsize, 1570 TX_SEARCH_MOTION_MODE, eval_motion_mode); 1571 1572 // Modify the start and end index according to speed features. For example, 1573 // if SIMPLE_TRANSLATION has already been searched according to 1574 // the motion_mode_for_winner_cand speed feature, update the mode_index_start 1575 // to avoid searching it again. 1576 update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end, 1577 last_motion_mode_allowed, interintra_allowed, 1578 eval_motion_mode); 1579 // Main function loop. This loops over all of the possible motion modes and 1580 // computes RD to determine the best one. This process includes computing 1581 // any necessary side information for the motion mode and performing the 1582 // transform search. 1583 for (int mode_index = mode_index_start; mode_index <= mode_index_end; 1584 mode_index++) { 1585 if (args->skip_motion_mode && mode_index) continue; 1586 int tmp_rate2 = rate2_nocoeff; 1587 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed; 1588 int tmp_rate_mv = rate_mv0; 1589 1590 *mbmi = base_mbmi; 1591 if (is_interintra_mode) { 1592 // Only use SIMPLE_TRANSLATION for interintra 1593 mbmi->motion_mode = SIMPLE_TRANSLATION; 1594 } else { 1595 mbmi->motion_mode = (MOTION_MODE)mode_index; 1596 assert(mbmi->ref_frame[1] != INTRA_FRAME); 1597 } 1598 1599 if (cpi->oxcf.algo_cfg.sharpness == 3 && 1600 (mbmi->motion_mode == OBMC_CAUSAL || 1601 mbmi->motion_mode == WARPED_CAUSAL)) 1602 continue; 1603 1604 // Do not search OBMC if the probability of selecting it is below a 1605 // predetermined threshold for this update_type and block size. 1606 const FRAME_UPDATE_TYPE update_type = 1607 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 1608 int use_actual_frame_probs = 1; 1609 int prune_obmc; 1610 #if CONFIG_FPMT_TEST 1611 use_actual_frame_probs = 1612 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1; 1613 if (!use_actual_frame_probs) { 1614 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] < 1615 cpi->sf.inter_sf.prune_obmc_prob_thresh; 1616 } 1617 #endif 1618 if (use_actual_frame_probs) { 1619 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] < 1620 cpi->sf.inter_sf.prune_obmc_prob_thresh; 1621 } 1622 if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) && 1623 mbmi->motion_mode == OBMC_CAUSAL) 1624 continue; 1625 1626 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) { 1627 // SIMPLE_TRANSLATION mode: no need to recalculate. 1628 // The prediction is calculated before motion_mode_rd() is called in 1629 // handle_inter_mode() 1630 } else if (mbmi->motion_mode == OBMC_CAUSAL) { 1631 const uint32_t cur_mv = mbmi->mv[0].as_int; 1632 // OBMC_CAUSAL not allowed for compound prediction 1633 assert(!is_comp_pred); 1634 if (have_newmv_in_inter_mode(this_mode)) { 1635 av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL, 1636 &mbmi->mv[0], NULL); 1637 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; 1638 } 1639 if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) { 1640 // Build the predictor according to the current motion vector if it has 1641 // not already been built 1642 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 1643 0, av1_num_planes(cm) - 1); 1644 } 1645 // Build the inter predictor by blending the predictor corresponding to 1646 // this MV, and the neighboring blocks using the OBMC model 1647 av1_build_obmc_inter_prediction( 1648 cm, xd, args->above_pred_buf, args->above_pred_stride, 1649 args->left_pred_buf, args->left_pred_stride); 1650 #if !CONFIG_REALTIME_ONLY 1651 } else if (mbmi->motion_mode == WARPED_CAUSAL) { 1652 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; 1653 mbmi->motion_mode = WARPED_CAUSAL; 1654 mbmi->wm_params.wmtype = DEFAULT_WMTYPE; 1655 mbmi->interp_filters = 1656 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter)); 1657 1658 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); 1659 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); 1660 // Select the samples according to motion vector difference 1661 if (mbmi->num_proj_ref > 1) { 1662 mbmi->num_proj_ref = av1_selectSamples( 1663 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize); 1664 } 1665 1666 // Compute the warped motion parameters with a least squares fit 1667 // using the collected samples 1668 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, 1669 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, 1670 &mbmi->wm_params, mi_row, mi_col)) { 1671 assert(!is_comp_pred); 1672 if (have_newmv_in_inter_mode(this_mode)) { 1673 // Refine MV for NEWMV mode 1674 const int_mv mv0 = mbmi->mv[0]; 1675 const WarpedMotionParams wm_params0 = mbmi->wm_params; 1676 const int num_proj_ref0 = mbmi->num_proj_ref; 1677 1678 const int_mv ref_mv = av1_get_ref_mv(x, 0); 1679 SUBPEL_MOTION_SEARCH_PARAMS ms_params; 1680 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, 1681 &ref_mv.as_mv, NULL); 1682 1683 // Refine MV in a small range. 1684 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0, 1685 total_samples, cpi->sf.mv_sf.warp_search_method, 1686 cpi->sf.mv_sf.warp_search_iters); 1687 1688 if (mv0.as_int != mbmi->mv[0].as_int) { 1689 // Keep the refined MV and WM parameters. 1690 tmp_rate_mv = av1_mv_bit_cost( 1691 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost, 1692 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT); 1693 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; 1694 } else { 1695 // Restore the old MV and WM parameters. 1696 mbmi->mv[0] = mv0; 1697 mbmi->wm_params = wm_params0; 1698 mbmi->num_proj_ref = num_proj_ref0; 1699 } 1700 } 1701 1702 // Build the warped predictor 1703 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 1704 av1_num_planes(cm) - 1); 1705 } else { 1706 continue; 1707 } 1708 #endif // !CONFIG_REALTIME_ONLY 1709 } else if (is_interintra_mode) { 1710 const int ret = 1711 av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd, 1712 &tmp_rate_mv, &tmp_rate2, orig_dst); 1713 if (ret < 0) continue; 1714 } 1715 1716 // If we are searching newmv and the mv is the same as refmv, skip the 1717 // current mode 1718 if (!av1_check_newmv_joint_nonzero(cm, x)) continue; 1719 1720 // Update rd_stats for the current motion mode 1721 txfm_info->skip_txfm = 0; 1722 rd_stats->dist = 0; 1723 rd_stats->sse = 0; 1724 rd_stats->skip_txfm = 1; 1725 rd_stats->rate = tmp_rate2; 1726 const ModeCosts *mode_costs = &x->mode_costs; 1727 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate; 1728 if (interintra_allowed) { 1729 rd_stats->rate += 1730 mode_costs->interintra_cost[size_group_lookup[bsize]] 1731 [mbmi->ref_frame[1] == INTRA_FRAME]; 1732 } 1733 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) && 1734 (mbmi->ref_frame[1] != INTRA_FRAME)) { 1735 if (last_motion_mode_allowed == WARPED_CAUSAL) { 1736 rd_stats->rate += 1737 mode_costs->motion_mode_cost[bsize][mbmi->motion_mode]; 1738 } else { 1739 rd_stats->rate += 1740 mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode]; 1741 } 1742 } 1743 1744 int64_t this_yrd = INT64_MAX; 1745 1746 if (!do_tx_search) { 1747 // Avoid doing a transform search here to speed up the overall mode 1748 // search. It will be done later in the mode search if the current 1749 // motion mode seems promising. 1750 int64_t curr_sse = -1; 1751 int64_t sse_y = -1; 1752 int est_residue_cost = 0; 1753 int64_t est_dist = 0; 1754 int64_t est_rd = 0; 1755 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { 1756 curr_sse = get_sse(cpi, x, &sse_y); 1757 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse, 1758 &est_residue_cost, &est_dist); 1759 (void)has_est_rd; 1760 assert(has_est_rd); 1761 } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 || 1762 cpi->sf.rt_sf.use_nonrd_pick_mode) { 1763 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD]( 1764 cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist, 1765 NULL, &curr_sse, NULL, NULL, NULL); 1766 sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]]; 1767 } 1768 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist); 1769 if (est_rd * 0.80 > *best_est_rd) { 1770 mbmi->ref_frame[1] = ref_frame_1; 1771 continue; 1772 } 1773 const int mode_rate = rd_stats->rate; 1774 rd_stats->rate += est_residue_cost; 1775 rd_stats->dist = est_dist; 1776 rd_stats->rdcost = est_rd; 1777 if (rd_stats->rdcost < *best_est_rd) { 1778 *best_est_rd = rd_stats->rdcost; 1779 assert(sse_y >= 0); 1780 ref_skip_rd[1] = txfm_rd_gate_level 1781 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4)) 1782 : INT64_MAX; 1783 } 1784 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) { 1785 if (!is_comp_pred) { 1786 assert(curr_sse >= 0); 1787 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, 1788 rd_stats->rdcost, rd_stats, rd_stats_y, 1789 rd_stats_uv, mbmi); 1790 } 1791 } else { 1792 assert(curr_sse >= 0); 1793 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, 1794 rd_stats->rdcost, rd_stats, rd_stats_y, 1795 rd_stats_uv, mbmi); 1796 } 1797 mbmi->skip_txfm = 0; 1798 } else { 1799 // Perform full transform search 1800 int64_t skip_rd = INT64_MAX; 1801 int64_t skip_rdy = INT64_MAX; 1802 if (txfm_rd_gate_level) { 1803 // Check if the mode is good enough based on skip RD 1804 int64_t sse_y = INT64_MAX; 1805 int64_t curr_sse = get_sse(cpi, x, &sse_y); 1806 skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse); 1807 skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4)); 1808 int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd, 1809 txfm_rd_gate_level, 0); 1810 if (!eval_txfm) continue; 1811 } 1812 1813 // Do transform search 1814 const int mode_rate = rd_stats->rate; 1815 if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, 1816 rd_stats->rate, ref_best_rd)) { 1817 if (rd_stats_y->rate == INT_MAX && mode_index == 0) { 1818 return INT64_MAX; 1819 } 1820 continue; 1821 } 1822 const int skip_ctx = av1_get_skip_txfm_context(xd); 1823 const int y_rate = 1824 rd_stats->skip_txfm 1825 ? x->mode_costs.skip_txfm_cost[skip_ctx][1] 1826 : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]); 1827 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist); 1828 1829 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 1830 if (curr_rd < ref_best_rd) { 1831 ref_best_rd = curr_rd; 1832 ref_skip_rd[0] = skip_rd; 1833 ref_skip_rd[1] = skip_rdy; 1834 } 1835 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { 1836 inter_mode_data_push( 1837 tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist, 1838 rd_stats_y->rate + rd_stats_uv->rate + 1839 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]); 1840 } 1841 } 1842 1843 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) { 1844 if (is_nontrans_global_motion(xd, xd->mi[0])) { 1845 mbmi->interp_filters = 1846 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter)); 1847 } 1848 } 1849 1850 adjust_cost(cpi, x, &this_yrd); 1851 adjust_rdcost(cpi, x, rd_stats); 1852 adjust_rdcost(cpi, x, rd_stats_y); 1853 1854 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 1855 if (mode_index == 0) { 1856 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd; 1857 } 1858 int64_t best_scaled_rd = best_rd; 1859 int64_t this_scaled_rd = tmp_rd; 1860 if (mode_index != 0) 1861 increase_warp_mode_rd(&best_mbmi, mbmi, &best_scaled_rd, &this_scaled_rd, 1862 cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct); 1863 1864 if (mode_index == 0 || this_scaled_rd < best_scaled_rd) { 1865 // Update best_rd data if this is the best motion mode so far 1866 best_mbmi = *mbmi; 1867 best_rd = tmp_rd; 1868 best_rd_stats = *rd_stats; 1869 best_rd_stats_y = *rd_stats_y; 1870 best_rate_mv = tmp_rate_mv; 1871 *yrd = this_yrd; 1872 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv; 1873 memcpy(best_blk_skip, txfm_info->blk_skip, 1874 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width); 1875 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width); 1876 best_xskip_txfm = mbmi->skip_txfm; 1877 } 1878 } 1879 // Update RD and mbmi stats for selected motion mode 1880 mbmi->ref_frame[1] = ref_frame_1; 1881 *rate_mv = best_rate_mv; 1882 if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) { 1883 av1_invalid_rd_stats(rd_stats); 1884 restore_dst_buf(xd, *orig_dst, num_planes); 1885 return INT64_MAX; 1886 } 1887 *mbmi = best_mbmi; 1888 *rd_stats = best_rd_stats; 1889 *rd_stats_y = best_rd_stats_y; 1890 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv; 1891 memcpy(txfm_info->blk_skip, best_blk_skip, 1892 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width); 1893 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width); 1894 txfm_info->skip_txfm = best_xskip_txfm; 1895 1896 restore_dst_buf(xd, *orig_dst, num_planes); 1897 return 0; 1898 } 1899 1900 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi, 1901 MACROBLOCK *const x, BLOCK_SIZE bsize, 1902 const BUFFER_SET *const orig_dst, int64_t best_rd) { 1903 assert(bsize < BLOCK_SIZES_ALL); 1904 const AV1_COMMON *cm = &cpi->common; 1905 const int num_planes = av1_num_planes(cm); 1906 MACROBLOCKD *const xd = &x->e_mbd; 1907 const int mi_row = xd->mi_row; 1908 const int mi_col = xd->mi_col; 1909 int64_t total_sse = 0; 1910 int64_t this_rd = INT64_MAX; 1911 const int skip_mode_ctx = av1_get_skip_mode_context(xd); 1912 rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1]; 1913 1914 for (int plane = 0; plane < num_planes; ++plane) { 1915 // Call av1_enc_build_inter_predictor() for one plane at a time. 1916 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 1917 plane, plane); 1918 const struct macroblockd_plane *const pd = &xd->plane[plane]; 1919 const BLOCK_SIZE plane_bsize = 1920 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 1921 1922 av1_subtract_plane(x, plane_bsize, plane); 1923 1924 int64_t sse = 1925 av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL); 1926 if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2); 1927 sse <<= 4; 1928 total_sse += sse; 1929 // When current rd cost is more than the best rd, skip evaluation of 1930 // remaining planes. 1931 this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse); 1932 if (this_rd > best_rd) break; 1933 } 1934 1935 rd_stats->dist = rd_stats->sse = total_sse; 1936 rd_stats->rdcost = this_rd; 1937 1938 restore_dst_buf(xd, *orig_dst, num_planes); 1939 return 0; 1940 } 1941 1942 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant 1943 // mode 1944 // Note(rachelbarker): This speed feature currently does not interact correctly 1945 // with global motion. The issue is that, when global motion is used, GLOBALMV 1946 // produces a different prediction to NEARESTMV/NEARMV even if the motion 1947 // vectors are the same. Thus GLOBALMV should not be pruned in this case. 1948 static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext, 1949 int ref_idx, 1950 const MV_REFERENCE_FRAME *ref_frame, 1951 PREDICTION_MODE single_mode) { 1952 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame); 1953 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; 1954 assert(single_mode != NEWMV); 1955 if (single_mode == NEARESTMV) { 1956 return 0; 1957 } else if (single_mode == NEARMV) { 1958 // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV 1959 // when ref_mv_count = 1, NEARMV is same as GLOBALMV 1960 if (ref_mv_count < 2) return 1; 1961 } else if (single_mode == GLOBALMV) { 1962 // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV 1963 if (ref_mv_count == 0) return 1; 1964 // when ref_mv_count == 1, NEARMV is same as GLOBALMV 1965 else if (ref_mv_count == 1) 1966 return 0; 1967 1968 int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count); 1969 // Check GLOBALMV is matching with any mv in ref_mv_stack 1970 for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) { 1971 int_mv this_mv; 1972 1973 if (ref_idx == 0) 1974 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; 1975 else 1976 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; 1977 1978 if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int) 1979 return 1; 1980 } 1981 } 1982 return 0; 1983 } 1984 1985 static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode, 1986 int ref_idx, int ref_mv_idx, 1987 int skip_repeated_ref_mv, 1988 const MV_REFERENCE_FRAME *ref_frame, 1989 const MB_MODE_INFO_EXT *mbmi_ext) { 1990 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx); 1991 assert(is_inter_singleref_mode(single_mode)); 1992 if (single_mode == NEWMV) { 1993 this_mv->as_int = INVALID_MV; 1994 } else if (single_mode == GLOBALMV) { 1995 if (skip_repeated_ref_mv && 1996 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode)) 1997 return 0; 1998 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]]; 1999 } else { 2000 assert(single_mode == NEARMV || single_mode == NEARESTMV); 2001 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame); 2002 const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1; 2003 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) { 2004 assert(ref_mv_offset >= 0); 2005 if (ref_idx == 0) { 2006 *this_mv = 2007 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv; 2008 } else { 2009 *this_mv = 2010 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv; 2011 } 2012 } else { 2013 if (skip_repeated_ref_mv && 2014 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode)) 2015 return 0; 2016 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]]; 2017 } 2018 } 2019 return 1; 2020 } 2021 2022 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list 2023 // population 2024 static inline int skip_nearest_near_mv_using_refmv_weight( 2025 const MACROBLOCK *const x, const PREDICTION_MODE this_mode, 2026 const int8_t ref_frame_type, PREDICTION_MODE best_mode) { 2027 if (this_mode != NEARESTMV && this_mode != NEARMV) return 0; 2028 // Do not skip the mode if the current block has not yet obtained a valid 2029 // inter mode. 2030 if (!is_inter_mode(best_mode)) return 0; 2031 2032 const MACROBLOCKD *xd = &x->e_mbd; 2033 // Do not skip the mode if both the top and left neighboring blocks are not 2034 // available. 2035 if (!xd->left_available || !xd->up_available) return 0; 2036 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 2037 const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type]; 2038 const int ref_mv_count = 2039 AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]); 2040 2041 if (ref_mv_count == 0) return 0; 2042 // If ref mv list has at least one nearest candidate do not prune NEARESTMV 2043 if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0; 2044 2045 // Count number of ref mvs populated from nearest candidates 2046 int nearest_refmv_count = 0; 2047 for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) { 2048 if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++; 2049 } 2050 2051 // nearest_refmv_count indicates the closeness of block motion characteristics 2052 // with respect to its spatial neighbor. Smaller value of nearest_refmv_count 2053 // w.r.t to ref_mv_count means less correlation with its spatial neighbors. 2054 // Hence less possibility for NEARESTMV and NEARMV modes becoming the best 2055 // mode since these modes work well for blocks that shares similar motion 2056 // characteristics with its neighbor. Thus, NEARMV mode is pruned when 2057 // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV 2058 // mode is pruned if none of the ref mvs are populated from nearest candidate. 2059 const int prune_thresh = 1 + (ref_mv_count >= 2); 2060 if (nearest_refmv_count < prune_thresh) return 1; 2061 return 0; 2062 } 2063 2064 // This function update the non-new mv for the current prediction mode 2065 static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode, 2066 const AV1_COMMON *cm, const MACROBLOCK *x, 2067 int skip_repeated_ref_mv) { 2068 const MACROBLOCKD *xd = &x->e_mbd; 2069 const MB_MODE_INFO *mbmi = xd->mi[0]; 2070 const int is_comp_pred = has_second_ref(mbmi); 2071 2072 int ret = 1; 2073 for (int i = 0; i < is_comp_pred + 1; ++i) { 2074 int_mv this_mv; 2075 this_mv.as_int = INVALID_MV; 2076 ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx, 2077 skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext); 2078 if (!ret) return 0; 2079 const PREDICTION_MODE single_mode = get_single_mode(this_mode, i); 2080 if (single_mode == NEWMV) { 2081 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 2082 cur_mv[i] = 2083 (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx] 2084 .this_mv 2085 : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx] 2086 .comp_mv; 2087 } else { 2088 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x); 2089 } 2090 } 2091 return ret; 2092 } 2093 2094 static inline int get_drl_cost(const MB_MODE_INFO *mbmi, 2095 const MB_MODE_INFO_EXT *mbmi_ext, 2096 const int (*const drl_mode_cost0)[2], 2097 int8_t ref_frame_type) { 2098 int cost = 0; 2099 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { 2100 for (int idx = 0; idx < 2; ++idx) { 2101 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 2102 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx); 2103 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx]; 2104 if (mbmi->ref_mv_idx == idx) return cost; 2105 } 2106 } 2107 return cost; 2108 } 2109 2110 if (have_nearmv_in_inter_mode(mbmi->mode)) { 2111 for (int idx = 1; idx < 3; ++idx) { 2112 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 2113 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx); 2114 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)]; 2115 if (mbmi->ref_mv_idx == (idx - 1)) return cost; 2116 } 2117 } 2118 return cost; 2119 } 2120 return cost; 2121 } 2122 2123 static inline int is_single_newmv_valid(const HandleInterModeArgs *const args, 2124 const MB_MODE_INFO *const mbmi, 2125 PREDICTION_MODE this_mode) { 2126 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) { 2127 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx); 2128 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx]; 2129 if (single_mode == NEWMV && 2130 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) { 2131 return 0; 2132 } 2133 } 2134 return 1; 2135 } 2136 2137 static int get_drl_refmv_count(const MACROBLOCK *const x, 2138 const MV_REFERENCE_FRAME *ref_frame, 2139 PREDICTION_MODE mode) { 2140 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 2141 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); 2142 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0; 2143 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; 2144 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV); 2145 const int has_drl = 2146 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1); 2147 const int ref_set = 2148 has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1; 2149 2150 return ref_set; 2151 } 2152 2153 // Checks if particular ref_mv_idx should be pruned. 2154 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes, 2155 const int qindex, 2156 const int ref_mv_idx) { 2157 if (reduce_inter_modes >= 3) return 1; 2158 // Q-index logic based pruning is enabled only for 2159 // reduce_inter_modes = 2. 2160 assert(reduce_inter_modes == 2); 2161 // When reduce_inter_modes=2, pruning happens as below based on q index. 2162 // For q index range between 0 and 85: prune if ref_mv_idx >= 1. 2163 // For q index range between 86 and 170: prune if ref_mv_idx == 2. 2164 // For q index range between 171 and 255: no pruning. 2165 const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1; 2166 return (ref_mv_idx >= min_prune_ref_mv_idx); 2167 } 2168 2169 // Whether this reference motion vector can be skipped, based on initial 2170 // heuristics. 2171 static bool ref_mv_idx_early_breakout( 2172 const SPEED_FEATURES *const sf, 2173 const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x, 2174 const HandleInterModeArgs *const args, int64_t ref_best_rd, 2175 int ref_mv_idx) { 2176 MACROBLOCKD *xd = &x->e_mbd; 2177 MB_MODE_INFO *mbmi = xd->mi[0]; 2178 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 2179 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 2180 const int is_comp_pred = has_second_ref(mbmi); 2181 if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) { 2182 if (mbmi->ref_frame[0] == LAST2_FRAME || 2183 mbmi->ref_frame[0] == LAST3_FRAME || 2184 mbmi->ref_frame[1] == LAST2_FRAME || 2185 mbmi->ref_frame[1] == LAST3_FRAME) { 2186 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0; 2187 if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] < 2188 REF_CAT_LEVEL) { 2189 return true; 2190 } 2191 } 2192 // TODO(any): Experiment with reduce_inter_modes for compound prediction 2193 if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred && 2194 have_newmv_in_inter_mode(mbmi->mode)) { 2195 if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref && 2196 mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) { 2197 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0; 2198 const int do_prune = prune_ref_mv_idx_using_qindex( 2199 sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx); 2200 if (do_prune && 2201 (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] < 2202 REF_CAT_LEVEL)) { 2203 return true; 2204 } 2205 } 2206 } 2207 } 2208 2209 mbmi->ref_mv_idx = ref_mv_idx; 2210 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) { 2211 return true; 2212 } 2213 size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost; 2214 const int drl_cost = get_drl_cost( 2215 mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type); 2216 est_rd_rate += drl_cost; 2217 if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd && 2218 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) { 2219 return true; 2220 } 2221 return false; 2222 } 2223 2224 // Compute the estimated RD cost for the motion vector with simple translation. 2225 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x, 2226 RD_STATS *rd_stats, 2227 HandleInterModeArgs *args, 2228 int ref_mv_idx, int64_t ref_best_rd, 2229 BLOCK_SIZE bsize) { 2230 MACROBLOCKD *xd = &x->e_mbd; 2231 MB_MODE_INFO *mbmi = xd->mi[0]; 2232 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 2233 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 2234 const AV1_COMMON *cm = &cpi->common; 2235 const int is_comp_pred = has_second_ref(mbmi); 2236 const ModeCosts *mode_costs = &x->mode_costs; 2237 2238 struct macroblockd_plane *p = xd->plane; 2239 const BUFFER_SET orig_dst = { 2240 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf }, 2241 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride }, 2242 }; 2243 av1_init_rd_stats(rd_stats); 2244 2245 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 2246 mbmi->comp_group_idx = 0; 2247 mbmi->compound_idx = 1; 2248 if (mbmi->ref_frame[1] == INTRA_FRAME) { 2249 mbmi->ref_frame[1] = NONE_FRAME; 2250 } 2251 int16_t mode_ctx = 2252 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); 2253 2254 mbmi->num_proj_ref = 0; 2255 mbmi->motion_mode = SIMPLE_TRANSLATION; 2256 mbmi->ref_mv_idx = ref_mv_idx; 2257 2258 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost; 2259 const int drl_cost = 2260 get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type); 2261 rd_stats->rate += drl_cost; 2262 2263 int_mv cur_mv[2]; 2264 if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) { 2265 return INT64_MAX; 2266 } 2267 assert(have_nearmv_in_inter_mode(mbmi->mode)); 2268 for (int i = 0; i < is_comp_pred + 1; ++i) { 2269 mbmi->mv[i].as_int = cur_mv[i].as_int; 2270 } 2271 const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx); 2272 rd_stats->rate += ref_mv_cost; 2273 2274 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) { 2275 return INT64_MAX; 2276 } 2277 2278 mbmi->motion_mode = SIMPLE_TRANSLATION; 2279 mbmi->num_proj_ref = 0; 2280 if (is_comp_pred) { 2281 // Only compound_average 2282 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 2283 mbmi->comp_group_idx = 0; 2284 mbmi->compound_idx = 1; 2285 } 2286 set_default_interp_filters(mbmi, cm->features.interp_filter); 2287 2288 const int mi_row = xd->mi_row; 2289 const int mi_col = xd->mi_col; 2290 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 2291 AOM_PLANE_Y, AOM_PLANE_Y); 2292 int est_rate; 2293 int64_t est_dist; 2294 model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist, 2295 NULL, NULL, NULL, NULL, NULL); 2296 return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist); 2297 } 2298 2299 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in 2300 // an integer. 0 for the i-th bit means that integer is excluded, 1 means 2301 // it is included. 2302 static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); } 2303 2304 static inline bool mask_check_bit(int mask, int index) { 2305 return (mask >> index) & 0x1; 2306 } 2307 2308 // Before performing the full MV search in handle_inter_mode, do a simple 2309 // translation search and see if we can eliminate any motion vectors. 2310 // Returns an integer where, if the i-th bit is set, it means that the i-th 2311 // motion vector should be searched. This is only set for NEAR_MV. 2312 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x, 2313 RD_STATS *rd_stats, 2314 HandleInterModeArgs *const args, 2315 int64_t ref_best_rd, BLOCK_SIZE bsize, 2316 const int ref_set) { 2317 // If the number of ref mv count is equal to 1, do not prune the same. It 2318 // is better to evaluate the same than to prune it. 2319 if (ref_set == 1) return 1; 2320 AV1_COMMON *const cm = &cpi->common; 2321 const MACROBLOCKD *const xd = &x->e_mbd; 2322 const MB_MODE_INFO *const mbmi = xd->mi[0]; 2323 const PREDICTION_MODE this_mode = mbmi->mode; 2324 2325 // Only search indices if they have some chance of being good. 2326 int good_indices = 0; 2327 for (int i = 0; i < ref_set; ++i) { 2328 if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args, 2329 ref_best_rd, i)) { 2330 continue; 2331 } 2332 mask_set_bit(&good_indices, i); 2333 } 2334 2335 // Only prune in NEARMV mode, if the speed feature is set, and the block size 2336 // is large enough. If these conditions are not met, return all good indices 2337 // found so far. 2338 if (!cpi->sf.inter_sf.prune_mode_search_simple_translation) 2339 return good_indices; 2340 if (!have_nearmv_in_inter_mode(this_mode)) return good_indices; 2341 if (num_pels_log2_lookup[bsize] <= 6) return good_indices; 2342 // Do not prune when there is internal resizing. TODO(elliottk) fix this 2343 // so b/2384 can be resolved. 2344 if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) || 2345 (mbmi->ref_frame[1] > 0 && 2346 av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) { 2347 return good_indices; 2348 } 2349 2350 // Calculate the RD cost for the motion vectors using simple translation. 2351 int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX }; 2352 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) { 2353 // If this index is bad, ignore it. 2354 if (!mask_check_bit(good_indices, ref_mv_idx)) { 2355 continue; 2356 } 2357 idx_rdcost[ref_mv_idx] = simple_translation_pred_rd( 2358 cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize); 2359 } 2360 // Find the index with the best RD cost. 2361 int best_idx = 0; 2362 for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) { 2363 if (idx_rdcost[i] < idx_rdcost[best_idx]) { 2364 best_idx = i; 2365 } 2366 } 2367 // Only include indices that are good and within a % of the best. 2368 const double dth = has_second_ref(mbmi) ? 1.05 : 1.001; 2369 // If the simple translation cost is not within this multiple of the 2370 // best RD, skip it. Note that the cutoff is derived experimentally. 2371 const double ref_dth = 5; 2372 int result = 0; 2373 for (int i = 0; i < ref_set; ++i) { 2374 if (mask_check_bit(good_indices, i) && 2375 (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth && 2376 (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) { 2377 mask_set_bit(&result, i); 2378 } 2379 } 2380 return result; 2381 } 2382 2383 /*!\brief Motion mode information for inter mode search speedup. 2384 * 2385 * Used in a speed feature to search motion modes other than 2386 * SIMPLE_TRANSLATION only on winning candidates. 2387 */ 2388 typedef struct motion_mode_candidate { 2389 /*! 2390 * Mode info for the motion mode candidate. 2391 */ 2392 MB_MODE_INFO mbmi; 2393 /*! 2394 * Rate describing the cost of the motion vectors for this candidate. 2395 */ 2396 int rate_mv; 2397 /*! 2398 * Rate before motion mode search and transform coding is applied. 2399 */ 2400 int rate2_nocoeff; 2401 /*! 2402 * An integer value 0 or 1 which indicates whether or not to skip the motion 2403 * mode search and default to SIMPLE_TRANSLATION as a speed feature for this 2404 * candidate. 2405 */ 2406 int skip_motion_mode; 2407 /*! 2408 * Total RD cost for this candidate. 2409 */ 2410 int64_t rd_cost; 2411 } motion_mode_candidate; 2412 2413 /*!\cond */ 2414 typedef struct motion_mode_best_st_candidate { 2415 motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES]; 2416 int num_motion_mode_cand; 2417 } motion_mode_best_st_candidate; 2418 2419 // Checks if the current reference frame matches with neighbouring block's 2420 // (top/left) reference frames 2421 static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi, 2422 MB_MODE_INFO *nb_mbmi) { 2423 MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0], 2424 nb_mbmi->ref_frame[1] }; 2425 MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0], 2426 cur_mbmi->ref_frame[1] }; 2427 const int is_cur_comp_pred = has_second_ref(cur_mbmi); 2428 int match_found = 0; 2429 2430 for (int i = 0; i < (is_cur_comp_pred + 1); i++) { 2431 if ((cur_ref_frames[i] == nb_ref_frames[0]) || 2432 (cur_ref_frames[i] == nb_ref_frames[1])) 2433 match_found = 1; 2434 } 2435 return match_found; 2436 } 2437 2438 static inline int find_ref_match_in_above_nbs(const int total_mi_cols, 2439 MACROBLOCKD *xd) { 2440 if (!xd->up_available) return 1; 2441 const int mi_col = xd->mi_col; 2442 MB_MODE_INFO **cur_mbmi = xd->mi; 2443 // prev_row_mi points into the mi array, starting at the beginning of the 2444 // previous row. 2445 MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride; 2446 const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols); 2447 uint8_t mi_step; 2448 for (int above_mi_col = mi_col; above_mi_col < end_col; 2449 above_mi_col += mi_step) { 2450 MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col; 2451 mi_step = mi_size_wide[above_mi[0]->bsize]; 2452 int match_found = 0; 2453 if (is_inter_block(*above_mi)) 2454 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi); 2455 if (match_found) return 1; 2456 } 2457 return 0; 2458 } 2459 2460 static inline int find_ref_match_in_left_nbs(const int total_mi_rows, 2461 MACROBLOCKD *xd) { 2462 if (!xd->left_available) return 1; 2463 const int mi_row = xd->mi_row; 2464 MB_MODE_INFO **cur_mbmi = xd->mi; 2465 // prev_col_mi points into the mi array, starting at the top of the 2466 // previous column 2467 MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride; 2468 const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows); 2469 uint8_t mi_step; 2470 for (int left_mi_row = mi_row; left_mi_row < end_row; 2471 left_mi_row += mi_step) { 2472 MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride; 2473 mi_step = mi_size_high[left_mi[0]->bsize]; 2474 int match_found = 0; 2475 if (is_inter_block(*left_mi)) 2476 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi); 2477 if (match_found) return 1; 2478 } 2479 return 0; 2480 } 2481 /*!\endcond */ 2482 2483 /*! \brief Struct used to hold TPL data to 2484 * narrow down parts of the inter mode search. 2485 */ 2486 typedef struct { 2487 /*! 2488 * The best inter cost out of all of the reference frames. 2489 */ 2490 int64_t best_inter_cost; 2491 /*! 2492 * The inter cost for each reference frame. 2493 */ 2494 int64_t ref_inter_cost[INTER_REFS_PER_FRAME]; 2495 } PruneInfoFromTpl; 2496 2497 #if !CONFIG_REALTIME_ONLY 2498 // TODO(Remya): Check if get_tpl_stats_b() can be reused 2499 static inline void get_block_level_tpl_stats( 2500 AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs, 2501 PruneInfoFromTpl *inter_cost_info_from_tpl) { 2502 AV1_COMMON *const cm = &cpi->common; 2503 2504 assert(IMPLIES(cpi->ppi->gf_group.size > 0, 2505 cpi->gf_frame_index < cpi->ppi->gf_group.size)); 2506 const int tpl_idx = cpi->gf_frame_index; 2507 TplParams *const tpl_data = &cpi->ppi->tpl_data; 2508 if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return; 2509 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx]; 2510 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 2511 const int mi_wide = mi_size_wide[bsize]; 2512 const int mi_high = mi_size_high[bsize]; 2513 const int tpl_stride = tpl_frame->stride; 2514 const int step = 1 << tpl_data->tpl_stats_block_mis_log2; 2515 const int mi_col_sr = 2516 coded_to_superres_mi(mi_col, cm->superres_scale_denominator); 2517 const int mi_col_end_sr = 2518 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator); 2519 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); 2520 2521 const int row_step = step; 2522 const int col_step_sr = 2523 coded_to_superres_mi(step, cm->superres_scale_denominator); 2524 for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows); 2525 row += row_step) { 2526 for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr); 2527 col += col_step_sr) { 2528 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos( 2529 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)]; 2530 2531 // Sums up the inter cost of corresponding ref frames 2532 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) { 2533 inter_cost_info_from_tpl->ref_inter_cost[ref_idx] += 2534 this_stats->pred_error[ref_idx]; 2535 } 2536 } 2537 } 2538 2539 // Computes the best inter cost (minimum inter_cost) 2540 int64_t best_inter_cost = INT64_MAX; 2541 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) { 2542 const int64_t cur_inter_cost = 2543 inter_cost_info_from_tpl->ref_inter_cost[ref_idx]; 2544 // For invalid ref frames, cur_inter_cost = 0 and has to be handled while 2545 // calculating the minimum inter_cost 2546 if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) && 2547 valid_refs[ref_idx]) 2548 best_inter_cost = cur_inter_cost; 2549 } 2550 inter_cost_info_from_tpl->best_inter_cost = best_inter_cost; 2551 } 2552 #endif 2553 2554 static inline int prune_modes_based_on_tpl_stats( 2555 PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx, 2556 const PREDICTION_MODE this_mode, int prune_mode_level) { 2557 const int have_newmv = have_newmv_in_inter_mode(this_mode); 2558 if ((prune_mode_level < 2) && have_newmv) return 0; 2559 2560 const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost; 2561 if (best_inter_cost == INT64_MAX) return 0; 2562 2563 const int prune_level = prune_mode_level - 1; 2564 int64_t cur_inter_cost; 2565 2566 const int is_globalmv = 2567 (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV); 2568 const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx; 2569 2570 // Thresholds used for pruning: 2571 // Lower value indicates aggressive pruning and higher value indicates 2572 // conservative pruning which is set based on ref_mv_idx and speed feature. 2573 // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index 2574 // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV 2575 static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = { 2576 { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 } 2577 }; 2578 2579 const int is_comp_pred = (refs[1] > INTRA_FRAME); 2580 if (!is_comp_pred) { 2581 cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1]; 2582 } else { 2583 const int64_t inter_cost_ref0 = 2584 inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1]; 2585 const int64_t inter_cost_ref1 = 2586 inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1]; 2587 // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for 2588 // more aggressive pruning 2589 cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1); 2590 } 2591 2592 // Prune the mode if cur_inter_cost is greater than threshold times 2593 // best_inter_cost 2594 if (cur_inter_cost > 2595 ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] * 2596 best_inter_cost) >> 2597 2)) 2598 return 1; 2599 return 0; 2600 } 2601 2602 /*!\brief High level function to select parameters for compound mode. 2603 * 2604 * \ingroup inter_mode_search 2605 * The main search functionality is done in the call to av1_compound_type_rd(). 2606 * 2607 * \param[in] cpi Top-level encoder structure. 2608 * \param[in] x Pointer to struct holding all the data for 2609 * the current macroblock. 2610 * \param[in] args HandleInterModeArgs struct holding 2611 * miscellaneous arguments for inter mode 2612 * search. See the documentation for this 2613 * struct for a description of each member. 2614 * \param[in] ref_best_rd Best RD found so far for this block. 2615 * It is used for early termination of this 2616 * search if the RD exceeds this value. 2617 * \param[in,out] cur_mv Current motion vector. 2618 * \param[in] bsize Current block size. 2619 * \param[in,out] compmode_interinter_cost RD of the selected interinter 2620 compound mode. 2621 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all 2622 * allocated buffers for the compound 2623 * predictors and masks in the compound type 2624 * search. 2625 * \param[in,out] orig_dst A prediction buffer to hold a computed 2626 * prediction. This will eventually hold the 2627 * final prediction, and the tmp_dst info will 2628 * be copied here. 2629 * \param[in] tmp_dst A temporary prediction buffer to hold a 2630 * computed prediction. 2631 * \param[in,out] rate_mv The rate associated with the motion vectors. 2632 * This will be modified if a motion search is 2633 * done in the motion mode search. 2634 * \param[in,out] rd_stats Struct to keep track of the overall RD 2635 * information. 2636 * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the 2637 * best total RD for a skip mode so far, and 2638 * skip_rd[1] is the best RD for a skip mode so 2639 * far in luma. This is used as a speed feature 2640 * to skip the transform search if the computed 2641 * skip RD for the current mode is not better 2642 * than the best skip_rd so far. 2643 * \param[in,out] skip_build_pred Indicates whether or not to build the inter 2644 * predictor. If this is 0, the inter predictor 2645 * has already been built and thus we can avoid 2646 * repeating computation. 2647 * \return Returns 1 if this mode is worse than one already seen and 0 if it is 2648 * a viable candidate. 2649 */ 2650 static int process_compound_inter_mode( 2651 AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args, 2652 int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize, 2653 int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers, 2654 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv, 2655 RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) { 2656 MACROBLOCKD *xd = &x->e_mbd; 2657 MB_MODE_INFO *mbmi = xd->mi[0]; 2658 const AV1_COMMON *cm = &cpi->common; 2659 const int masked_compound_used = is_any_masked_compound_used(bsize) && 2660 cm->seq_params->enable_masked_compound; 2661 int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) | 2662 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD); 2663 2664 const int num_planes = av1_num_planes(cm); 2665 const int mi_row = xd->mi_row; 2666 const int mi_col = xd->mi_col; 2667 int is_luma_interp_done = 0; 2668 set_default_interp_filters(mbmi, cm->features.interp_filter); 2669 2670 int64_t best_rd_compound; 2671 int64_t rd_thresh; 2672 const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT; 2673 const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE; 2674 rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift), 2675 comp_type_rd_scale); 2676 // Select compound type and any parameters related to that type 2677 // (for example, the mask parameters if it is a masked mode) and compute 2678 // the RD 2679 *compmode_interinter_cost = av1_compound_type_rd( 2680 cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used, 2681 orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats, 2682 ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh); 2683 if (ref_best_rd < INT64_MAX && 2684 (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale > 2685 ref_best_rd) { 2686 restore_dst_buf(xd, *orig_dst, num_planes); 2687 return 1; 2688 } 2689 2690 // Build only uv predictor for COMPOUND_AVERAGE. 2691 // Note there is no need to call av1_enc_build_inter_predictor 2692 // for luma if COMPOUND_AVERAGE is selected because it is the first 2693 // candidate in av1_compound_type_rd, which means it used the dst_buf 2694 // rather than the tmp_buf. 2695 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) { 2696 if (num_planes > 1) { 2697 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 2698 AOM_PLANE_U, num_planes - 1); 2699 } 2700 *skip_build_pred = 1; 2701 } 2702 return 0; 2703 } 2704 2705 // Speed feature to prune out MVs that are similar to previous MVs if they 2706 // don't achieve the best RD advantage. 2707 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx, 2708 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2], 2709 MB_MODE_INFO *mbmi, int pruning_factor) { 2710 int i; 2711 const int is_comp_pred = has_second_ref(mbmi); 2712 const int thr = (1 + is_comp_pred) << (pruning_factor + 1); 2713 2714 // Skip the evaluation if an MV match is found. 2715 if (ref_mv_idx > 0) { 2716 for (int idx = 0; idx < ref_mv_idx; ++idx) { 2717 if (save_mv[idx][0].as_int == INVALID_MV) continue; 2718 2719 int mv_diff = 0; 2720 for (i = 0; i < 1 + is_comp_pred; ++i) { 2721 mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) + 2722 abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col); 2723 } 2724 2725 // If this mode is not the best one, and current MV is similar to 2726 // previous stored MV, terminate this ref_mv_idx evaluation. 2727 if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1; 2728 } 2729 } 2730 2731 if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) { 2732 for (i = 0; i < is_comp_pred + 1; ++i) 2733 save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int; 2734 } 2735 2736 return 0; 2737 } 2738 2739 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE 2740 * 2741 * \ingroup inter_mode_search 2742 * 2743 * Compares the sse of zero mv and the best sse found in single new_mv. If the 2744 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped. 2745 * Else returns 0. 2746 * 2747 * Note that the sse of here comes from single_motion_search. So it is 2748 * interpolated with the filter in motion search, not the actual interpolation 2749 * filter used in encoding. 2750 * 2751 * \param[in] fn_ptr A table of function pointers to compute SSE. 2752 * \param[in] x Pointer to struct holding all the data for 2753 * the current macroblock. 2754 * \param[in] bsize The current block_size. 2755 * \param[in] args The args to handle_inter_mode, used to track 2756 * the best SSE. 2757 * \param[in] prune_zero_mv_with_sse The argument holds speed feature 2758 * prune_zero_mv_with_sse value 2759 * \return Returns 1 if zero_mv is pruned, 0 otherwise. 2760 */ 2761 static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr, 2762 const MACROBLOCK *x, BLOCK_SIZE bsize, 2763 const HandleInterModeArgs *args, 2764 int prune_zero_mv_with_sse) { 2765 const MACROBLOCKD *xd = &x->e_mbd; 2766 const MB_MODE_INFO *mbmi = xd->mi[0]; 2767 2768 const int is_comp_pred = has_second_ref(mbmi); 2769 const MV_REFERENCE_FRAME *refs = mbmi->ref_frame; 2770 2771 for (int idx = 0; idx < 1 + is_comp_pred; idx++) { 2772 if (xd->global_motion[refs[idx]].wmtype != IDENTITY) { 2773 // Pruning logic only works for IDENTITY type models 2774 // Note: In theory we could apply similar logic for TRANSLATION 2775 // type models, but we do not code these due to a spec bug 2776 // (see comments in gm_get_motion_vector() in av1/common/mv.h) 2777 assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION); 2778 return 0; 2779 } 2780 2781 // Don't prune if we have invalid data 2782 assert(mbmi->mv[idx].as_int == 0); 2783 if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) { 2784 return 0; 2785 } 2786 } 2787 2788 // Sum up the sse of ZEROMV and best NEWMV 2789 unsigned int this_sse_sum = 0; 2790 unsigned int best_sse_sum = 0; 2791 for (int idx = 0; idx < 1 + is_comp_pred; idx++) { 2792 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y]; 2793 const struct macroblockd_plane *pd = xd->plane; 2794 const struct buf_2d *src_buf = &p->src; 2795 const struct buf_2d *ref_buf = &pd->pre[idx]; 2796 const uint8_t *src = src_buf->buf; 2797 const uint8_t *ref = ref_buf->buf; 2798 const int src_stride = src_buf->stride; 2799 const int ref_stride = ref_buf->stride; 2800 2801 unsigned int this_sse; 2802 fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse); 2803 this_sse_sum += this_sse; 2804 2805 const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]]; 2806 best_sse_sum += best_sse; 2807 } 2808 2809 const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25; 2810 if ((double)this_sse_sum > (mul * (double)best_sse_sum)) { 2811 return 1; 2812 } 2813 2814 return 0; 2815 } 2816 2817 /*!\brief Searches for interpolation filter in realtime mode during winner eval 2818 * 2819 * \ingroup inter_mode_search 2820 * 2821 * Does a simple interpolation filter search during winner mode evaluation. This 2822 * is currently only used by realtime mode as \ref 2823 * av1_interpolation_filter_search is not called during realtime encoding. 2824 * 2825 * This function only searches over two possible filters. EIGHTTAP_REGULAR is 2826 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For 2827 * higher res slips (>240p), EIGHTTAP_SMOOTH is also searched. 2828 * * 2829 * \param[in] cpi Pointer to the compressor. Used for feature 2830 * flags. 2831 * \param[in,out] x Pointer to macroblock. This is primarily 2832 * used to access the buffers. 2833 * \param[in] mi_row The current row in mi unit (4X4 pixels). 2834 * \param[in] mi_col The current col in mi unit (4X4 pixels). 2835 * \param[in] bsize The current block_size. 2836 * \return Returns true if a predictor is built in xd->dst, false otherwise. 2837 */ 2838 static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x, 2839 int mi_row, int mi_col, 2840 BLOCK_SIZE bsize) { 2841 static const InterpFilters filters_ref_set[3] = { 2842 { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR }, 2843 { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH }, 2844 { MULTITAP_SHARP, MULTITAP_SHARP } 2845 }; 2846 2847 const AV1_COMMON *const cm = &cpi->common; 2848 MACROBLOCKD *const xd = &x->e_mbd; 2849 MB_MODE_INFO *const mi = xd->mi[0]; 2850 int64_t best_cost = INT64_MAX; 2851 int best_filter_index = -1; 2852 // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best 2853 const int num_planes = av1_num_planes(cm); 2854 const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240; 2855 assert(is_inter_mode(mi->mode)); 2856 assert(mi->motion_mode == SIMPLE_TRANSLATION); 2857 assert(!is_inter_compound_mode(mi->mode)); 2858 2859 if (!av1_is_interp_needed(xd)) { 2860 return false; 2861 } 2862 2863 struct macroblockd_plane *pd = xd->plane; 2864 const BUFFER_SET orig_dst = { 2865 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf }, 2866 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride }, 2867 }; 2868 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]); 2869 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE, 2870 tmp_buf + 2 * MAX_SB_SQUARE }, 2871 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } }; 2872 const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst }; 2873 2874 for (int i = 0; i < 3; ++i) { 2875 if (is_240p_or_lesser) { 2876 if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) { 2877 continue; 2878 } 2879 } else { 2880 if (filters_ref_set[i].x_filter == MULTITAP_SHARP) { 2881 continue; 2882 } 2883 } 2884 int64_t cost; 2885 RD_STATS tmp_rd = { 0 }; 2886 2887 mi->interp_filters.as_filters = filters_ref_set[i]; 2888 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col); 2889 2890 model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model 2891 ? MODELRD_LEGACY 2892 : MODELRD_TYPE_INTERP_FILTER]( 2893 cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist, 2894 &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL); 2895 2896 tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter, 2897 cm->seq_params->enable_dual_filter); 2898 cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist); 2899 if (cost < best_cost) { 2900 best_filter_index = i; 2901 best_cost = cost; 2902 swap_dst_buf(xd, dst_bufs, num_planes); 2903 } 2904 } 2905 assert(best_filter_index >= 0); 2906 2907 mi->interp_filters.as_filters = filters_ref_set[best_filter_index]; 2908 2909 const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1]; 2910 2911 if (is_best_pred_in_orig) { 2912 swap_dst_buf(xd, dst_bufs, num_planes); 2913 } else { 2914 // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if 2915 // is_best_pred_in_orig is false, that means the current buffer is the 2916 // original one. 2917 assert(&orig_dst == dst_bufs[0]); 2918 assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]); 2919 const int width = block_size_wide[bsize]; 2920 const int height = block_size_high[bsize]; 2921 #if CONFIG_AV1_HIGHBITDEPTH 2922 const bool is_hbd = is_cur_buf_hbd(xd); 2923 if (is_hbd) { 2924 aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]), 2925 tmp_dst.stride[AOM_PLANE_Y], 2926 CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]), 2927 orig_dst.stride[AOM_PLANE_Y], width, height); 2928 } else { 2929 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y], 2930 orig_dst.plane[AOM_PLANE_Y], 2931 orig_dst.stride[AOM_PLANE_Y], width, height); 2932 } 2933 #else 2934 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y], 2935 orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y], 2936 width, height); 2937 #endif 2938 } 2939 2940 // Build the YUV predictor. 2941 if (num_planes > 1) { 2942 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 2943 AOM_PLANE_U, AOM_PLANE_V); 2944 } 2945 2946 return true; 2947 } 2948 2949 /*!\brief AV1 inter mode RD computation 2950 * 2951 * \ingroup inter_mode_search 2952 * Do the RD search for a given inter mode and compute all information relevant 2953 * to the input mode. It will compute the best MV, 2954 * compound parameters (if the mode is a compound mode) and interpolation filter 2955 * parameters. 2956 * 2957 * \param[in] cpi Top-level encoder structure. 2958 * \param[in] tile_data Pointer to struct holding adaptive 2959 * data/contexts/models for the tile during 2960 * encoding. 2961 * \param[in] x Pointer to structure holding all the data 2962 * for the current macroblock. 2963 * \param[in] bsize Current block size. 2964 * \param[in,out] rd_stats Struct to keep track of the overall RD 2965 * information. 2966 * \param[in,out] rd_stats_y Struct to keep track of the RD information 2967 * for only the Y plane. 2968 * \param[in,out] rd_stats_uv Struct to keep track of the RD information 2969 * for only the UV planes. 2970 * \param[in] args HandleInterModeArgs struct holding 2971 * miscellaneous arguments for inter mode 2972 * search. See the documentation for this 2973 * struct for a description of each member. 2974 * \param[in] ref_best_rd Best RD found so far for this block. 2975 * It is used for early termination of this 2976 * search if the RD exceeds this value. 2977 * \param[in] tmp_buf Temporary buffer used to hold predictors 2978 * built in this search. 2979 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all 2980 * allocated buffers for the compound 2981 * predictors and masks in the compound type 2982 * search. 2983 * \param[in,out] best_est_rd Estimated RD for motion mode search if 2984 * do_tx_search (see below) is 0. 2985 * \param[in] do_tx_search Parameter to indicate whether or not to do 2986 * a full transform search. This will compute 2987 * an estimated RD for the modes without the 2988 * transform search and later perform the full 2989 * transform search on the best candidates. 2990 * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode 2991 * information to perform a full transform 2992 * search only on winning candidates searched 2993 * with an estimate for transform coding RD. 2994 * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store 2995 * motion mode information used in a speed 2996 * feature to search motion modes other than 2997 * SIMPLE_TRANSLATION only on winning 2998 * candidates. 2999 * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the 3000 * best total RD for a skip mode so far, and 3001 * skip_rd[1] is the best RD for a skip mode so 3002 * far in luma. This is used as a speed feature 3003 * to skip the transform search if the computed 3004 * skip RD for the current mode is not better 3005 * than the best skip_rd so far. 3006 * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to 3007 * narrow down the search based on data 3008 * collected in the TPL model. 3009 * \param[out] yrd Stores the rdcost corresponding to encoding 3010 * the luma plane. 3011 * 3012 * \return The RD cost for the mode being searched. 3013 */ 3014 static int64_t handle_inter_mode( 3015 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x, 3016 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y, 3017 RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd, 3018 uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers, 3019 int64_t *best_est_rd, const int do_tx_search, 3020 InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand, 3021 int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl, 3022 int64_t *yrd) { 3023 const AV1_COMMON *cm = &cpi->common; 3024 const int num_planes = av1_num_planes(cm); 3025 MACROBLOCKD *xd = &x->e_mbd; 3026 MB_MODE_INFO *mbmi = xd->mi[0]; 3027 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 3028 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 3029 const int is_comp_pred = has_second_ref(mbmi); 3030 const PREDICTION_MODE this_mode = mbmi->mode; 3031 3032 #if CONFIG_REALTIME_ONLY 3033 const int prune_modes_based_on_tpl = 0; 3034 #else // CONFIG_REALTIME_ONLY 3035 const TplParams *const tpl_data = &cpi->ppi->tpl_data; 3036 const int prune_modes_based_on_tpl = 3037 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl && 3038 av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index); 3039 #endif // CONFIG_REALTIME_ONLY 3040 int i; 3041 // Reference frames for this mode 3042 const int refs[2] = { mbmi->ref_frame[0], 3043 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; 3044 int rate_mv = 0; 3045 int64_t rd = INT64_MAX; 3046 // Do first prediction into the destination buffer. Do the next 3047 // prediction into a temporary buffer. Then keep track of which one 3048 // of these currently holds the best predictor, and use the other 3049 // one for future predictions. In the end, copy from tmp_buf to 3050 // dst if necessary. 3051 struct macroblockd_plane *pd = xd->plane; 3052 const BUFFER_SET orig_dst = { 3053 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf }, 3054 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride }, 3055 }; 3056 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE, 3057 tmp_buf + 2 * MAX_SB_SQUARE }, 3058 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } }; 3059 3060 int64_t ret_val = INT64_MAX; 3061 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 3062 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv; 3063 int64_t best_rd = INT64_MAX; 3064 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 3065 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 3066 int64_t best_yrd = INT64_MAX; 3067 MB_MODE_INFO best_mbmi = *mbmi; 3068 int best_xskip_txfm = 0; 3069 int64_t newmv_ret_val = INT64_MAX; 3070 inter_mode_info mode_info[MAX_REF_MV_SEARCH]; 3071 3072 // Do not prune the mode based on inter cost from tpl if the current ref frame 3073 // is the winner ref in neighbouring blocks. 3074 int ref_match_found_in_above_nb = 0; 3075 int ref_match_found_in_left_nb = 0; 3076 if (prune_modes_based_on_tpl) { 3077 ref_match_found_in_above_nb = 3078 find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd); 3079 ref_match_found_in_left_nb = 3080 find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd); 3081 } 3082 3083 // First, perform a simple translation search for each of the indices. If 3084 // an index performs well, it will be fully searched in the main loop 3085 // of this function. 3086 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode); 3087 // Save MV results from first 2 ref_mv_idx. 3088 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2]; 3089 int best_ref_mv_idx = -1; 3090 const int idx_mask = 3091 ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set); 3092 const int16_t mode_ctx = 3093 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); 3094 const ModeCosts *mode_costs = &x->mode_costs; 3095 const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx); 3096 const int base_rate = 3097 args->ref_frame_cost + args->single_comp_cost + ref_mv_cost; 3098 3099 for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) { 3100 save_mv[i][0].as_int = INVALID_MV; 3101 save_mv[i][1].as_int = INVALID_MV; 3102 } 3103 args->start_mv_cnt = 0; 3104 3105 // Main loop of this function. This will iterate over all of the ref mvs 3106 // in the dynamic reference list and do the following: 3107 // 1.) Get the current MV. Create newmv MV if necessary 3108 // 2.) Search compound type and parameters if applicable 3109 // 3.) Do interpolation filter search 3110 // 4.) Build the inter predictor 3111 // 5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL, 3112 // WARPED_CAUSAL) 3113 // 6.) Update stats if best so far 3114 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) { 3115 mbmi->ref_mv_idx = ref_mv_idx; 3116 3117 mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV; 3118 mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX; 3119 const int drl_cost = get_drl_cost( 3120 mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type); 3121 mode_info[ref_mv_idx].drl_cost = drl_cost; 3122 mode_info[ref_mv_idx].skip = 0; 3123 3124 if (!mask_check_bit(idx_mask, ref_mv_idx)) { 3125 // MV did not perform well in simple translation search. Skip it. 3126 continue; 3127 } 3128 if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb && 3129 !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) { 3130 // Skip mode if TPL model indicates it will not be beneficial. 3131 if (prune_modes_based_on_tpl_stats( 3132 inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode, 3133 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl)) 3134 continue; 3135 } 3136 av1_init_rd_stats(rd_stats); 3137 3138 // Initialize compound mode data 3139 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 3140 mbmi->comp_group_idx = 0; 3141 mbmi->compound_idx = 1; 3142 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME; 3143 3144 mbmi->num_proj_ref = 0; 3145 mbmi->motion_mode = SIMPLE_TRANSLATION; 3146 3147 // Compute cost for signalling this DRL index 3148 rd_stats->rate = base_rate; 3149 rd_stats->rate += drl_cost; 3150 3151 int rs = 0; 3152 int compmode_interinter_cost = 0; 3153 3154 int_mv cur_mv[2]; 3155 3156 // TODO(Cherma): Extend this speed feature to support compound mode 3157 int skip_repeated_ref_mv = 3158 is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv; 3159 // Generate the current mv according to the prediction mode 3160 if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) { 3161 continue; 3162 } 3163 3164 // The above call to build_cur_mv does not handle NEWMV modes. Build 3165 // the mv here if we have NEWMV for any predictors. 3166 if (have_newmv_in_inter_mode(this_mode)) { 3167 #if CONFIG_COLLECT_COMPONENT_TIMING 3168 start_timing(cpi, handle_newmv_time); 3169 #endif 3170 newmv_ret_val = 3171 handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info); 3172 #if CONFIG_COLLECT_COMPONENT_TIMING 3173 end_timing(cpi, handle_newmv_time); 3174 #endif 3175 3176 if (newmv_ret_val != 0) continue; 3177 3178 if (is_inter_singleref_mode(this_mode) && 3179 cur_mv[0].as_int != INVALID_MV) { 3180 const MV_REFERENCE_FRAME ref = refs[0]; 3181 const unsigned int this_sse = x->pred_sse[ref]; 3182 if (this_sse < args->best_single_sse_in_refs[ref]) { 3183 args->best_single_sse_in_refs[ref] = this_sse; 3184 } 3185 3186 if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) { 3187 const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1; 3188 const int pix_idx = num_pels_log2_lookup[bsize] - 4; 3189 const double scale_factor[3][11] = { 3190 { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 }, 3191 { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 }, 3192 { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 } 3193 }; 3194 assert(pix_idx >= 0); 3195 assert(th_idx <= 2); 3196 if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse) 3197 continue; 3198 } 3199 } 3200 3201 rd_stats->rate += rate_mv; 3202 } 3203 // Copy the motion vector for this mode into mbmi struct 3204 for (i = 0; i < is_comp_pred + 1; ++i) { 3205 mbmi->mv[i].as_int = cur_mv[i].as_int; 3206 } 3207 3208 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && 3209 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) { 3210 continue; 3211 } 3212 3213 // Skip the rest of the search if prune_ref_mv_idx_search speed feature 3214 // is enabled, and the current MV is similar to a previous one. 3215 if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred && 3216 prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi, 3217 cpi->sf.inter_sf.prune_ref_mv_idx_search)) 3218 continue; 3219 3220 if (cpi->sf.gm_sf.prune_zero_mv_with_sse && 3221 (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) { 3222 if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args, 3223 cpi->sf.gm_sf.prune_zero_mv_with_sse)) { 3224 continue; 3225 } 3226 } 3227 3228 int skip_build_pred = 0; 3229 const int mi_row = xd->mi_row; 3230 const int mi_col = xd->mi_col; 3231 3232 // Handle a compound predictor, continue if it is determined this 3233 // cannot be the best compound mode 3234 if (is_comp_pred) { 3235 #if CONFIG_COLLECT_COMPONENT_TIMING 3236 start_timing(cpi, compound_type_rd_time); 3237 #endif 3238 const int not_best_mode = process_compound_inter_mode( 3239 cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost, 3240 rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd, 3241 &skip_build_pred); 3242 #if CONFIG_COLLECT_COMPONENT_TIMING 3243 end_timing(cpi, compound_type_rd_time); 3244 #endif 3245 if (not_best_mode) continue; 3246 } 3247 3248 if (!args->skip_ifs) { 3249 #if CONFIG_COLLECT_COMPONENT_TIMING 3250 start_timing(cpi, interpolation_filter_search_time); 3251 #endif 3252 // Determine the interpolation filter for this mode 3253 ret_val = av1_interpolation_filter_search( 3254 x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs, 3255 &skip_build_pred, args, ref_best_rd); 3256 #if CONFIG_COLLECT_COMPONENT_TIMING 3257 end_timing(cpi, interpolation_filter_search_time); 3258 #endif 3259 if (args->modelled_rd != NULL && !is_comp_pred) { 3260 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd; 3261 } 3262 if (ret_val != 0) { 3263 restore_dst_buf(xd, orig_dst, num_planes); 3264 continue; 3265 } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout && 3266 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) { 3267 restore_dst_buf(xd, orig_dst, num_planes); 3268 continue; 3269 } 3270 3271 // Compute modelled RD if enabled 3272 if (args->modelled_rd != NULL) { 3273 if (is_comp_pred) { 3274 const int mode0 = compound_ref0_mode(this_mode); 3275 const int mode1 = compound_ref1_mode(this_mode); 3276 const int64_t mrd = 3277 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], 3278 args->modelled_rd[mode1][ref_mv_idx][refs[1]]); 3279 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) { 3280 restore_dst_buf(xd, orig_dst, num_planes); 3281 continue; 3282 } 3283 } 3284 } 3285 } 3286 3287 rd_stats->rate += compmode_interinter_cost; 3288 if (skip_build_pred != 1) { 3289 // Build this inter predictor if it has not been previously built 3290 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0, 3291 av1_num_planes(cm) - 1); 3292 } 3293 3294 #if CONFIG_COLLECT_COMPONENT_TIMING 3295 start_timing(cpi, motion_mode_rd_time); 3296 #endif 3297 int rate2_nocoeff = rd_stats->rate; 3298 // Determine the motion mode. This will be one of SIMPLE_TRANSLATION, 3299 // OBMC_CAUSAL or WARPED_CAUSAL 3300 int64_t this_yrd; 3301 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y, 3302 rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv, 3303 &orig_dst, best_est_rd, do_tx_search, 3304 inter_modes_info, 0, &this_yrd); 3305 #if CONFIG_COLLECT_COMPONENT_TIMING 3306 end_timing(cpi, motion_mode_rd_time); 3307 #endif 3308 assert( 3309 IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX)); 3310 3311 if (ret_val != INT64_MAX) { 3312 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 3313 const THR_MODES mode_enum = get_prediction_mode_idx( 3314 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); 3315 // Collect mode stats for multiwinner mode processing 3316 store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y, 3317 rd_stats_uv, mode_enum, NULL, bsize, tmp_rd, 3318 cpi->sf.winner_mode_sf.multi_winner_mode_type, 3319 do_tx_search); 3320 if (tmp_rd < best_rd) { 3321 best_yrd = this_yrd; 3322 // Update the best rd stats if we found the best mode so far 3323 best_rd_stats = *rd_stats; 3324 best_rd_stats_y = *rd_stats_y; 3325 best_rd_stats_uv = *rd_stats_uv; 3326 best_rd = tmp_rd; 3327 best_mbmi = *mbmi; 3328 best_xskip_txfm = txfm_info->skip_txfm; 3329 memcpy(best_blk_skip, txfm_info->blk_skip, 3330 sizeof(best_blk_skip[0]) * xd->height * xd->width); 3331 av1_copy_array(best_tx_type_map, xd->tx_type_map, 3332 xd->height * xd->width); 3333 motion_mode_cand->rate_mv = rate_mv; 3334 motion_mode_cand->rate2_nocoeff = rate2_nocoeff; 3335 } 3336 3337 if (tmp_rd < ref_best_rd) { 3338 ref_best_rd = tmp_rd; 3339 best_ref_mv_idx = ref_mv_idx; 3340 } 3341 } 3342 restore_dst_buf(xd, orig_dst, num_planes); 3343 } 3344 3345 if (best_rd == INT64_MAX) return INT64_MAX; 3346 3347 // re-instate status of the best choice 3348 *rd_stats = best_rd_stats; 3349 *rd_stats_y = best_rd_stats_y; 3350 *rd_stats_uv = best_rd_stats_uv; 3351 *yrd = best_yrd; 3352 *mbmi = best_mbmi; 3353 txfm_info->skip_txfm = best_xskip_txfm; 3354 assert(IMPLIES(mbmi->comp_group_idx == 1, 3355 mbmi->interinter_comp.type != COMPOUND_AVERAGE)); 3356 memcpy(txfm_info->blk_skip, best_blk_skip, 3357 sizeof(best_blk_skip[0]) * xd->height * xd->width); 3358 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width); 3359 3360 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); 3361 3362 return rd_stats->rdcost; 3363 } 3364 3365 /*!\brief Search for the best intrabc predictor 3366 * 3367 * \ingroup intra_mode_search 3368 * \callergraph 3369 * This function performs a motion search to find the best intrabc predictor. 3370 * 3371 * \returns Returns the best overall rdcost (including the non-intrabc modes 3372 * search before this function). 3373 */ 3374 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, 3375 PICK_MODE_CONTEXT *ctx, 3376 RD_STATS *rd_stats, BLOCK_SIZE bsize, 3377 int64_t best_rd) { 3378 const AV1_COMMON *const cm = &cpi->common; 3379 if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc || 3380 !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode) 3381 return INT64_MAX; 3382 if (cpi->sf.mv_sf.intrabc_search_level >= 1 && bsize != BLOCK_4X4 && 3383 bsize != BLOCK_8X8 && bsize != BLOCK_16X16) { 3384 return INT64_MAX; 3385 } 3386 const int num_planes = av1_num_planes(cm); 3387 3388 MACROBLOCKD *const xd = &x->e_mbd; 3389 const TileInfo *tile = &xd->tile; 3390 MB_MODE_INFO *mbmi = xd->mi[0]; 3391 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 3392 3393 const int mi_row = xd->mi_row; 3394 const int mi_col = xd->mi_col; 3395 const int w = block_size_wide[bsize]; 3396 const int h = block_size_high[bsize]; 3397 const int sb_row = mi_row >> cm->seq_params->mib_size_log2; 3398 const int sb_col = mi_col >> cm->seq_params->mib_size_log2; 3399 3400 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 3401 const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME; 3402 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, 3403 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs, 3404 mbmi_ext->mode_context); 3405 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and 3406 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. 3407 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame); 3408 int_mv nearestmv, nearmv; 3409 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv, 3410 0); 3411 3412 if (nearestmv.as_int == INVALID_MV) { 3413 nearestmv.as_int = 0; 3414 } 3415 if (nearmv.as_int == INVALID_MV) { 3416 nearmv.as_int = 0; 3417 } 3418 3419 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv; 3420 if (dv_ref.as_int == 0) { 3421 av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row); 3422 } 3423 // Ref DV should not have sub-pel. 3424 assert((dv_ref.as_mv.col & 7) == 0); 3425 assert((dv_ref.as_mv.row & 7) == 0); 3426 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref; 3427 3428 struct buf_2d yv12_mb[MAX_MB_PLANE]; 3429 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes); 3430 for (int i = 0; i < num_planes; ++i) { 3431 xd->plane[i].pre[0] = yv12_mb[i]; 3432 } 3433 3434 enum IntrabcMotionDirection { 3435 IBC_MOTION_ABOVE, 3436 IBC_MOTION_LEFT, 3437 IBC_MOTION_DIRECTIONS 3438 }; 3439 3440 MB_MODE_INFO best_mbmi = *mbmi; 3441 RD_STATS best_rdstats = *rd_stats; 3442 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 }; 3443 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 3444 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 3445 3446 FULLPEL_MOTION_SEARCH_PARAMS fullms_params; 3447 const SEARCH_METHODS search_method = 3448 av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize); 3449 const search_site_config *lookahead_search_sites = 3450 cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD]; 3451 const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv); 3452 av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize, 3453 &dv_ref.as_mv, start_mv, 3454 lookahead_search_sites, search_method, 3455 /*fine_search_interval=*/0); 3456 const IntraBCMVCosts *const dv_costs = x->dv_costs; 3457 av1_set_ms_to_intra_mode(&fullms_params, dv_costs); 3458 3459 const enum IntrabcMotionDirection max_dir = cpi->sf.mv_sf.intrabc_search_level 3460 ? IBC_MOTION_LEFT 3461 : IBC_MOTION_DIRECTIONS; 3462 3463 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir; 3464 ++dir) { 3465 switch (dir) { 3466 case IBC_MOTION_ABOVE: 3467 fullms_params.mv_limits.col_min = 3468 (tile->mi_col_start - mi_col) * MI_SIZE; 3469 fullms_params.mv_limits.col_max = 3470 (tile->mi_col_end - mi_col) * MI_SIZE - w; 3471 fullms_params.mv_limits.row_min = 3472 (tile->mi_row_start - mi_row) * MI_SIZE; 3473 fullms_params.mv_limits.row_max = 3474 (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h; 3475 break; 3476 case IBC_MOTION_LEFT: 3477 fullms_params.mv_limits.col_min = 3478 (tile->mi_col_start - mi_col) * MI_SIZE; 3479 fullms_params.mv_limits.col_max = 3480 (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w; 3481 // TODO(aconverse@google.com): Minimize the overlap between above and 3482 // left areas. 3483 fullms_params.mv_limits.row_min = 3484 (tile->mi_row_start - mi_row) * MI_SIZE; 3485 int bottom_coded_mi_edge = 3486 AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end); 3487 fullms_params.mv_limits.row_max = 3488 (bottom_coded_mi_edge - mi_row) * MI_SIZE - h; 3489 break; 3490 default: assert(0); 3491 } 3492 assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min); 3493 assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max); 3494 assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min); 3495 assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max); 3496 3497 av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv); 3498 3499 if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min || 3500 fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) { 3501 continue; 3502 } 3503 3504 const int step_param = cpi->mv_search_params.mv_step_param; 3505 IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info; 3506 int_mv best_mv; 3507 FULLPEL_MV_STATS best_mv_stats; 3508 int bestsme = INT_MAX; 3509 3510 // Perform a hash search first, and see if we get any matches. 3511 if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) { 3512 bestsme = av1_intrabc_hash_search(cpi, xd, &fullms_params, 3513 intrabc_hash_info, &best_mv.as_fullmv); 3514 } 3515 3516 // If intrabc_search_level is not 0 and we found a hash search match, do 3517 // not proceed with pixel search as the hash match is very likely to be the 3518 // best intrabc candidate anyway. 3519 if (bestsme == INT_MAX || cpi->sf.mv_sf.intrabc_search_level == 0) { 3520 int_mv best_pixel_mv; 3521 const int pixelsme = 3522 av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL, 3523 &best_pixel_mv.as_fullmv, &best_mv_stats, NULL); 3524 if (pixelsme < bestsme) { 3525 bestsme = pixelsme; 3526 best_mv = best_pixel_mv; 3527 } 3528 } 3529 if (bestsme == INT_MAX) continue; 3530 const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv); 3531 if (!av1_is_fullmv_in_range(&fullms_params.mv_limits, 3532 get_fullmv_from_mv(&dv))) 3533 continue; 3534 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize, 3535 cm->seq_params->mib_size_log2)) 3536 continue; 3537 3538 // DV should not have sub-pel. 3539 assert((dv.col & 7) == 0); 3540 assert((dv.row & 7) == 0); 3541 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info)); 3542 mbmi->filter_intra_mode_info.use_filter_intra = 0; 3543 mbmi->use_intrabc = 1; 3544 mbmi->mode = DC_PRED; 3545 mbmi->uv_mode = UV_DC_PRED; 3546 mbmi->motion_mode = SIMPLE_TRANSLATION; 3547 mbmi->mv[0].as_mv = dv; 3548 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR); 3549 mbmi->skip_txfm = 0; 3550 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 3551 av1_num_planes(cm) - 1); 3552 3553 // TODO(aconverse@google.com): The full motion field defining discount 3554 // in MV_COST_WEIGHT is too large. Explore other values. 3555 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv, 3556 dv_costs->dv_costs, MV_COST_WEIGHT_SUB); 3557 const int rate_mode = x->mode_costs.intrabc_cost[1]; 3558 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv; 3559 if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y, 3560 &rd_stats_uv, rate_mode + rate_mv, INT64_MAX)) 3561 continue; 3562 rd_stats_yuv.rdcost = 3563 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist); 3564 if (rd_stats_yuv.rdcost < best_rd) { 3565 best_rd = rd_stats_yuv.rdcost; 3566 best_mbmi = *mbmi; 3567 best_rdstats = rd_stats_yuv; 3568 memcpy(best_blk_skip, txfm_info->blk_skip, 3569 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width); 3570 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width); 3571 } 3572 } 3573 *mbmi = best_mbmi; 3574 *rd_stats = best_rdstats; 3575 memcpy(txfm_info->blk_skip, best_blk_skip, 3576 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width); 3577 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); 3578 #if CONFIG_RD_DEBUG 3579 mbmi->rd_stats = *rd_stats; 3580 #endif 3581 return best_rd; 3582 } 3583 3584 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their 3585 // typedef here because Doxygen doesn't know about the typedefs yet. So using 3586 // the typedef will prevent doxygen from finding this function and generating 3587 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to 3588 // doxygen, we can revert back to using the typedefs. 3589 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x, 3590 struct RD_STATS *rd_cost, BLOCK_SIZE bsize, 3591 PICK_MODE_CONTEXT *ctx, int64_t best_rd) { 3592 const AV1_COMMON *const cm = &cpi->common; 3593 MACROBLOCKD *const xd = &x->e_mbd; 3594 MB_MODE_INFO *const mbmi = xd->mi[0]; 3595 const int num_planes = av1_num_planes(cm); 3596 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 3597 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; 3598 uint8_t y_skip_txfm = 0, uv_skip_txfm = 0; 3599 int64_t dist_y = 0, dist_uv = 0; 3600 3601 ctx->rd_stats.skip_txfm = 0; 3602 mbmi->ref_frame[0] = INTRA_FRAME; 3603 mbmi->ref_frame[1] = NONE_FRAME; 3604 mbmi->use_intrabc = 0; 3605 mbmi->mv[0].as_int = 0; 3606 mbmi->skip_mode = 0; 3607 3608 const int64_t intra_yrd = 3609 av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, 3610 &y_skip_txfm, bsize, best_rd, ctx); 3611 3612 // Initialize default mode evaluation params 3613 set_mode_eval_params(cpi, x, DEFAULT_EVAL); 3614 3615 if (intra_yrd < best_rd) { 3616 // Search intra modes for uv planes if needed 3617 if (num_planes > 1) { 3618 // Set up the tx variables for reproducing the y predictions in case we 3619 // need it for chroma-from-luma. 3620 if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) { 3621 memcpy(txfm_info->blk_skip, ctx->blk_skip, 3622 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk); 3623 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk); 3624 } 3625 const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); 3626 av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, 3627 &dist_uv, &uv_skip_txfm, bsize, 3628 max_uv_tx_size); 3629 } 3630 3631 // Intra block is always coded as non-skip 3632 rd_cost->rate = 3633 rate_y + rate_uv + 3634 x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0]; 3635 rd_cost->dist = dist_y + dist_uv; 3636 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); 3637 rd_cost->skip_txfm = 0; 3638 } else { 3639 rd_cost->rate = INT_MAX; 3640 } 3641 3642 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd) 3643 best_rd = rd_cost->rdcost; 3644 if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) { 3645 ctx->rd_stats.skip_txfm = mbmi->skip_txfm; 3646 memcpy(ctx->blk_skip, txfm_info->blk_skip, 3647 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk); 3648 assert(rd_cost->rate != INT_MAX); 3649 } 3650 if (rd_cost->rate == INT_MAX) return; 3651 3652 ctx->mic = *xd->mi[0]; 3653 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext, 3654 av1_ref_frame_type(xd->mi[0]->ref_frame)); 3655 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 3656 } 3657 3658 static inline void calc_target_weighted_pred( 3659 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd, 3660 const uint8_t *above, int above_stride, const uint8_t *left, 3661 int left_stride); 3662 3663 static inline void rd_pick_skip_mode( 3664 RD_STATS *rd_cost, InterModeSearchState *search_state, 3665 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, 3666 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { 3667 const AV1_COMMON *const cm = &cpi->common; 3668 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info; 3669 const int num_planes = av1_num_planes(cm); 3670 MACROBLOCKD *const xd = &x->e_mbd; 3671 MB_MODE_INFO *const mbmi = xd->mi[0]; 3672 3673 x->compound_idx = 1; // COMPOUND_AVERAGE 3674 RD_STATS skip_mode_rd_stats; 3675 av1_invalid_rd_stats(&skip_mode_rd_stats); 3676 3677 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX || 3678 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) { 3679 return; 3680 } 3681 3682 const MV_REFERENCE_FRAME ref_frame = 3683 LAST_FRAME + skip_mode_info->ref_frame_idx_0; 3684 const MV_REFERENCE_FRAME second_ref_frame = 3685 LAST_FRAME + skip_mode_info->ref_frame_idx_1; 3686 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV; 3687 const THR_MODES mode_index = 3688 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame); 3689 3690 if (mode_index == THR_INVALID) { 3691 return; 3692 } 3693 3694 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp || 3695 cpi->sf.inter_sf.disable_onesided_comp) && 3696 cpi->all_one_sided_refs) { 3697 return; 3698 } 3699 3700 mbmi->mode = this_mode; 3701 mbmi->uv_mode = UV_DC_PRED; 3702 mbmi->ref_frame[0] = ref_frame; 3703 mbmi->ref_frame[1] = second_ref_frame; 3704 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 3705 if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) { 3706 MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext; 3707 if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX || 3708 mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) { 3709 return; 3710 } 3711 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count, 3712 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs, 3713 mbmi_ext->mode_context); 3714 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and 3715 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. 3716 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type); 3717 } 3718 3719 assert(this_mode == NEAREST_NEARESTMV); 3720 if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) { 3721 return; 3722 } 3723 3724 mbmi->filter_intra_mode_info.use_filter_intra = 0; 3725 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); 3726 mbmi->comp_group_idx = 0; 3727 mbmi->compound_idx = x->compound_idx; 3728 mbmi->interinter_comp.type = COMPOUND_AVERAGE; 3729 mbmi->motion_mode = SIMPLE_TRANSLATION; 3730 mbmi->ref_mv_idx = 0; 3731 mbmi->skip_mode = mbmi->skip_txfm = 1; 3732 mbmi->palette_mode_info.palette_size[0] = 0; 3733 mbmi->palette_mode_info.palette_size[1] = 0; 3734 3735 set_default_interp_filters(mbmi, cm->features.interp_filter); 3736 3737 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 3738 for (int i = 0; i < num_planes; i++) { 3739 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 3740 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 3741 } 3742 3743 BUFFER_SET orig_dst; 3744 for (int i = 0; i < num_planes; i++) { 3745 orig_dst.plane[i] = xd->plane[i].dst.buf; 3746 orig_dst.stride[i] = xd->plane[i].dst.stride; 3747 } 3748 3749 // Compare the use of skip_mode with the best intra/inter mode obtained. 3750 const int skip_mode_ctx = av1_get_skip_mode_context(xd); 3751 int64_t best_intra_inter_mode_cost = INT64_MAX; 3752 if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) { 3753 const ModeCosts *mode_costs = &x->mode_costs; 3754 best_intra_inter_mode_cost = RDCOST( 3755 x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0], 3756 rd_cost->dist); 3757 // Account for non-skip mode rate in total rd stats 3758 rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0]; 3759 av1_rd_cost_update(x->rdmult, rd_cost); 3760 } 3761 3762 // Obtain the rdcost for skip_mode. 3763 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst, 3764 best_intra_inter_mode_cost); 3765 3766 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost && 3767 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) { 3768 assert(mode_index != THR_INVALID); 3769 search_state->best_mbmode.skip_mode = 1; 3770 search_state->best_mbmode = *mbmi; 3771 memset(search_state->best_mbmode.inter_tx_size, 3772 search_state->best_mbmode.tx_size, 3773 sizeof(search_state->best_mbmode.inter_tx_size)); 3774 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height, 3775 search_state->best_mbmode.skip_txfm && is_inter_block(mbmi), 3776 xd); 3777 search_state->best_mode_index = mode_index; 3778 3779 // Update rd_cost 3780 rd_cost->rate = skip_mode_rd_stats.rate; 3781 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist; 3782 rd_cost->rdcost = skip_mode_rd_stats.rdcost; 3783 3784 search_state->best_rd = rd_cost->rdcost; 3785 search_state->best_skip2 = 1; 3786 search_state->best_mode_skippable = 1; 3787 3788 x->txfm_search_info.skip_txfm = 1; 3789 } 3790 } 3791 3792 // Get winner mode stats of given mode index 3793 static inline MB_MODE_INFO *get_winner_mode_stats( 3794 MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost, 3795 int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index, 3796 RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv, 3797 THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type, 3798 int mode_idx) { 3799 MB_MODE_INFO *winner_mbmi; 3800 if (multi_winner_mode_type) { 3801 assert(mode_idx >= 0 && mode_idx < x->winner_mode_count); 3802 WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx]; 3803 winner_mbmi = &winner_mode_stat->mbmi; 3804 3805 *winner_rd_cost = &winner_mode_stat->rd_cost; 3806 *winner_rate_y = winner_mode_stat->rate_y; 3807 *winner_rate_uv = winner_mode_stat->rate_uv; 3808 *winner_mode_index = winner_mode_stat->mode_index; 3809 } else { 3810 winner_mbmi = best_mbmode; 3811 *winner_rd_cost = best_rd_cost; 3812 *winner_rate_y = best_rate_y; 3813 *winner_rate_uv = best_rate_uv; 3814 *winner_mode_index = *best_mode_index; 3815 } 3816 return winner_mbmi; 3817 } 3818 3819 // speed feature: fast intra/inter transform type search 3820 // Used for speed >= 2 3821 // When this speed feature is on, in rd mode search, only DCT is used. 3822 // After the mode is determined, this function is called, to select 3823 // transform types and get accurate rdcost. 3824 static inline void refine_winner_mode_tx( 3825 const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize, 3826 PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index, 3827 MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], 3828 int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) { 3829 const AV1_COMMON *const cm = &cpi->common; 3830 MACROBLOCKD *const xd = &x->e_mbd; 3831 MB_MODE_INFO *const mbmi = xd->mi[0]; 3832 TxfmSearchParams *txfm_params = &x->txfm_search_params; 3833 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 3834 int64_t best_rd; 3835 const int num_planes = av1_num_planes(cm); 3836 3837 if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode, 3838 rd_cost->skip_txfm)) 3839 return; 3840 3841 // Set params for winner mode evaluation 3842 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL); 3843 3844 // No best mode identified so far 3845 if (*best_mode_index == THR_INVALID) return; 3846 3847 best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); 3848 for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) { 3849 RD_STATS *winner_rd_stats = NULL; 3850 int winner_rate_y = 0, winner_rate_uv = 0; 3851 THR_MODES winner_mode_index = 0; 3852 3853 // TODO(any): Combine best mode and multi-winner mode processing paths 3854 // Get winner mode stats for current mode index 3855 MB_MODE_INFO *winner_mbmi = get_winner_mode_stats( 3856 x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index, 3857 &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index, 3858 cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx); 3859 3860 if (xd->lossless[winner_mbmi->segment_id] == 0 && 3861 winner_mode_index != THR_INVALID && 3862 is_winner_mode_processing_enabled(cpi, x, winner_mbmi, 3863 rd_cost->skip_txfm)) { 3864 RD_STATS rd_stats = *winner_rd_stats; 3865 int skip_blk = 0; 3866 RD_STATS rd_stats_y, rd_stats_uv; 3867 const int skip_ctx = av1_get_skip_txfm_context(xd); 3868 3869 *mbmi = *winner_mbmi; 3870 3871 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 3872 3873 // Select prediction reference frames. 3874 for (int i = 0; i < num_planes; i++) { 3875 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 3876 if (has_second_ref(mbmi)) 3877 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 3878 } 3879 3880 if (is_inter_mode(mbmi->mode)) { 3881 const int mi_row = xd->mi_row; 3882 const int mi_col = xd->mi_col; 3883 bool is_predictor_built = false; 3884 const PREDICTION_MODE prediction_mode = mbmi->mode; 3885 // Do interpolation filter search for realtime mode if applicable. 3886 if (cpi->sf.winner_mode_sf.winner_mode_ifs && 3887 cpi->oxcf.mode == REALTIME && 3888 cm->current_frame.reference_mode == SINGLE_REFERENCE && 3889 is_inter_mode(prediction_mode) && 3890 mbmi->motion_mode == SIMPLE_TRANSLATION && 3891 !is_inter_compound_mode(prediction_mode)) { 3892 is_predictor_built = 3893 fast_interp_search(cpi, x, mi_row, mi_col, bsize); 3894 } 3895 if (!is_predictor_built) { 3896 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 3897 av1_num_planes(cm) - 1); 3898 } 3899 if (mbmi->motion_mode == OBMC_CAUSAL) 3900 av1_build_obmc_inter_predictors_sb(cm, xd); 3901 3902 av1_subtract_plane(x, bsize, 0); 3903 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT && 3904 !xd->lossless[mbmi->segment_id]) { 3905 av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, 3906 INT64_MAX); 3907 assert(rd_stats_y.rate != INT_MAX); 3908 } else { 3909 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, 3910 INT64_MAX); 3911 memset(mbmi->inter_tx_size, mbmi->tx_size, 3912 sizeof(mbmi->inter_tx_size)); 3913 for (int i = 0; i < xd->height * xd->width; ++i) 3914 set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm); 3915 } 3916 } else { 3917 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, 3918 INT64_MAX); 3919 } 3920 3921 if (num_planes > 1) { 3922 av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); 3923 } else { 3924 av1_init_rd_stats(&rd_stats_uv); 3925 } 3926 3927 const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; 3928 3929 const ModeCosts *mode_costs = &x->mode_costs; 3930 if (is_inter_mode(mbmi->mode) && 3931 (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) && 3932 RDCOST(x->rdmult, 3933 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate + 3934 rd_stats_uv.rate, 3935 (rd_stats_y.dist + rd_stats_uv.dist)) > 3936 RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1], 3937 (rd_stats_y.sse + rd_stats_uv.sse))) { 3938 skip_blk = 1; 3939 rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1]; 3940 rd_stats_uv.rate = 0; 3941 rd_stats_y.dist = rd_stats_y.sse; 3942 rd_stats_uv.dist = rd_stats_uv.sse; 3943 } else { 3944 skip_blk = 0; 3945 rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0]; 3946 } 3947 int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate - 3948 winner_rate_y - winner_rate_uv; 3949 int64_t this_rd = 3950 RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist)); 3951 if (best_rd > this_rd) { 3952 *best_mbmode = *mbmi; 3953 *best_mode_index = winner_mode_index; 3954 av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk); 3955 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 3956 rd_cost->rate = this_rate; 3957 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist; 3958 rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse; 3959 rd_cost->rdcost = this_rd; 3960 best_rd = this_rd; 3961 *best_skip2 = skip_blk; 3962 } 3963 } 3964 } 3965 } 3966 3967 /*!\cond */ 3968 typedef struct { 3969 // Mask for each reference frame, specifying which prediction modes to NOT try 3970 // during search. 3971 uint32_t pred_modes[REF_FRAMES]; 3972 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of 3973 // reference frames (i, j). 3974 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1 3975 // (NONE_FRAME). 3976 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]; 3977 } mode_skip_mask_t; 3978 /*!\endcond */ 3979 3980 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes. 3981 static inline void disable_reference( 3982 MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) { 3983 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) { 3984 ref_combo[ref][ref2 + 1] = true; 3985 } 3986 } 3987 3988 // Update 'ref_combo' mask to disable all inter references except ALTREF. 3989 static inline void disable_inter_references_except_altref( 3990 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) { 3991 disable_reference(LAST_FRAME, ref_combo); 3992 disable_reference(LAST2_FRAME, ref_combo); 3993 disable_reference(LAST3_FRAME, ref_combo); 3994 disable_reference(GOLDEN_FRAME, ref_combo); 3995 disable_reference(BWDREF_FRAME, ref_combo); 3996 disable_reference(ALTREF2_FRAME, ref_combo); 3997 } 3998 3999 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = { 4000 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME }, 4001 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME }, 4002 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }, 4003 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME }, 4004 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME }, 4005 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME }, 4006 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME }, 4007 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME }, 4008 }; 4009 4010 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET; 4011 4012 static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) { 4013 if (ref_set == REF_SET_FULL) { 4014 // Everything available by default. 4015 memset(mask, 0, sizeof(*mask)); 4016 } else { 4017 // All modes available by default. 4018 memset(mask->pred_modes, 0, sizeof(mask->pred_modes)); 4019 // All references disabled first. 4020 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) { 4021 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) { 4022 mask->ref_combo[ref1][ref2 + 1] = true; 4023 } 4024 } 4025 const MV_REFERENCE_FRAME(*ref_set_combos)[2]; 4026 int num_ref_combos; 4027 4028 // Then enable reduced set of references explicitly. 4029 switch (ref_set) { 4030 case REF_SET_REDUCED: 4031 ref_set_combos = reduced_ref_combos; 4032 num_ref_combos = 4033 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]); 4034 break; 4035 case REF_SET_REALTIME: 4036 ref_set_combos = real_time_ref_combos; 4037 num_ref_combos = 4038 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]); 4039 break; 4040 default: assert(0); num_ref_combos = 0; 4041 } 4042 4043 for (int i = 0; i < num_ref_combos; ++i) { 4044 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i]; 4045 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false; 4046 } 4047 } 4048 } 4049 4050 static inline void init_mode_skip_mask(mode_skip_mask_t *mask, 4051 const AV1_COMP *cpi, MACROBLOCK *x, 4052 BLOCK_SIZE bsize) { 4053 const AV1_COMMON *const cm = &cpi->common; 4054 const struct segmentation *const seg = &cm->seg; 4055 MACROBLOCKD *const xd = &x->e_mbd; 4056 MB_MODE_INFO *const mbmi = xd->mi[0]; 4057 unsigned char segment_id = mbmi->segment_id; 4058 const SPEED_FEATURES *const sf = &cpi->sf; 4059 const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf; 4060 REF_SET ref_set = REF_SET_FULL; 4061 4062 if (sf->rt_sf.use_real_time_ref_set) 4063 ref_set = REF_SET_REALTIME; 4064 else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set) 4065 ref_set = REF_SET_REDUCED; 4066 4067 default_skip_mask(mask, ref_set); 4068 4069 int min_pred_mv_sad = INT_MAX; 4070 MV_REFERENCE_FRAME ref_frame; 4071 if (ref_set == REF_SET_REALTIME) { 4072 // For real-time encoding, we only look at a subset of ref frames. So the 4073 // threshold for pruning should be computed from this subset as well. 4074 const int num_rt_refs = 4075 sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos); 4076 for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) { 4077 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0]; 4078 if (ref != INTRA_FRAME) { 4079 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]); 4080 } 4081 } 4082 } else { 4083 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) 4084 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]); 4085 } 4086 4087 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 4088 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) { 4089 // Skip checking missing reference in both single and compound reference 4090 // modes. 4091 disable_reference(ref_frame, mask->ref_combo); 4092 } else { 4093 // Skip fixed mv modes for poor references 4094 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) { 4095 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO; 4096 } 4097 } 4098 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && 4099 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { 4100 // Reference not used for the segment. 4101 disable_reference(ref_frame, mask->ref_combo); 4102 } 4103 } 4104 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature 4105 // is disabled for this segment. This is to prevent the possibility that we 4106 // end up unable to pick any mode. 4107 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 4108 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame, 4109 // unless ARNR filtering is enabled in which case we want 4110 // an unfiltered alternative. We allow near/nearest as well 4111 // because they may result in zero-zero MVs but be cheaper. 4112 if (cpi->rc.is_src_frame_alt_ref && 4113 (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) { 4114 disable_inter_references_except_altref(mask->ref_combo); 4115 4116 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; 4117 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME }; 4118 int_mv near_mv, nearest_mv, global_mv; 4119 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames, 4120 &x->mbmi_ext); 4121 get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext); 4122 get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext); 4123 4124 if (near_mv.as_int != global_mv.as_int) 4125 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV); 4126 if (nearest_mv.as_int != global_mv.as_int) 4127 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV); 4128 } 4129 } 4130 4131 if (cpi->rc.is_src_frame_alt_ref) { 4132 if (inter_sf->alt_ref_search_fp && 4133 (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) { 4134 mask->pred_modes[ALTREF_FRAME] = 0; 4135 disable_inter_references_except_altref(mask->ref_combo); 4136 disable_reference(INTRA_FRAME, mask->ref_combo); 4137 } 4138 } 4139 4140 if (inter_sf->alt_ref_search_fp) { 4141 if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) { 4142 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3); 4143 // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if 4144 // those are past frames 4145 MV_REFERENCE_FRAME start_frame = 4146 inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME; 4147 for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) { 4148 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 4149 0) { 4150 // Prune inter modes when relative dist of ALTREF2 and ALTREF is close 4151 // to the relative dist of LAST_FRAME. 4152 if (inter_sf->alt_ref_search_fp == 1 && 4153 (abs(cpi->ref_frame_dist_info 4154 .ref_relative_dist[ref_frame - LAST_FRAME]) > 4155 1.5 * abs(cpi->ref_frame_dist_info 4156 .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) { 4157 continue; 4158 } 4159 if (x->pred_mv_sad[ref_frame] > sad_thresh) 4160 mask->pred_modes[ref_frame] |= INTER_ALL; 4161 } 4162 } 4163 } 4164 } 4165 4166 if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) { 4167 if (x->best_pred_mv_sad[0] < INT_MAX) { 4168 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1); 4169 const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME }; 4170 4171 // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references 4172 for (int ref_idx = 0; ref_idx < 2; ref_idx++) { 4173 ref_frame = prune_ref_list[ref_idx]; 4174 if (x->pred_mv_sad[ref_frame] > sad_thresh) 4175 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO; 4176 } 4177 } 4178 } 4179 4180 if (bsize > sf->part_sf.max_intra_bsize) { 4181 disable_reference(INTRA_FRAME, mask->ref_combo); 4182 } 4183 4184 if (!cpi->oxcf.tool_cfg.enable_global_motion) { 4185 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 4186 mask->pred_modes[ref_frame] |= (1 << GLOBALMV); 4187 mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV); 4188 } 4189 } 4190 4191 mask->pred_modes[INTRA_FRAME] |= 4192 ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; 4193 4194 // Prune reference frames which are not the closest to the current 4195 // frame and with large pred_mv_sad. 4196 if (inter_sf->prune_single_ref) { 4197 assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3); 4198 const double prune_threshes[2] = { 1.20, 1.05 }; 4199 4200 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 4201 const RefFrameDistanceInfo *const ref_frame_dist_info = 4202 &cpi->ref_frame_dist_info; 4203 const int is_closest_ref = 4204 (ref_frame == ref_frame_dist_info->nearest_past_ref) || 4205 (ref_frame == ref_frame_dist_info->nearest_future_ref); 4206 4207 if (!is_closest_ref) { 4208 const int dir = 4209 (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0) 4210 ? 0 4211 : 1; 4212 if (x->best_pred_mv_sad[dir] < INT_MAX && 4213 x->pred_mv_sad[ref_frame] > 4214 prune_threshes[inter_sf->prune_single_ref - 1] * 4215 x->best_pred_mv_sad[dir]) 4216 mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL; 4217 } 4218 } 4219 } 4220 } 4221 4222 static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer, 4223 HandleInterModeArgs *const args, 4224 int is_hbd) { 4225 if (is_hbd) { 4226 const int len = sizeof(uint16_t); 4227 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred); 4228 args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + 4229 (MAX_SB_SQUARE >> 1) * len); 4230 args->above_pred_buf[2] = 4231 CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len); 4232 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred); 4233 args->left_pred_buf[1] = 4234 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len); 4235 args->left_pred_buf[2] = 4236 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len); 4237 } else { 4238 args->above_pred_buf[0] = obmc_buffer->above_pred; 4239 args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1); 4240 args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE; 4241 args->left_pred_buf[0] = obmc_buffer->left_pred; 4242 args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1); 4243 args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE; 4244 } 4245 } 4246 4247 static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x, 4248 MV_REFERENCE_FRAME ref_frame) { 4249 const AV1_COMMON *const cm = &cpi->common; 4250 MV_REFERENCE_FRAME rf[2]; 4251 av1_set_ref_frame(rf, ref_frame); 4252 4253 if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1; 4254 4255 if (prune_ref_by_selective_ref_frame(cpi, x, rf, 4256 cm->cur_frame->ref_display_order_hint)) { 4257 return 1; 4258 } 4259 4260 return 0; 4261 } 4262 4263 static inline int is_ref_frame_used_by_compound_ref(int ref_frame, 4264 int skip_ref_frame_mask) { 4265 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { 4266 if (!(skip_ref_frame_mask & (1 << r))) { 4267 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; 4268 if (rf[0] == ref_frame || rf[1] == ref_frame) { 4269 return 1; 4270 } 4271 } 4272 } 4273 return 0; 4274 } 4275 4276 static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame, 4277 const MB_MODE_INFO *mi_cache) { 4278 if (!mi_cache) { 4279 return 0; 4280 } 4281 4282 if (ref_frame < REF_FRAMES) { 4283 return (ref_frame == mi_cache->ref_frame[0] || 4284 ref_frame == mi_cache->ref_frame[1]); 4285 } 4286 4287 // if we are here, then the current mode is compound. 4288 MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame); 4289 return ref_frame == cached_ref_type; 4290 } 4291 4292 // Please add/modify parameter setting in this function, making it consistent 4293 // and easy to read and maintain. 4294 static inline void set_params_rd_pick_inter_mode( 4295 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args, 4296 BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask, 4297 unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES], 4298 struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) { 4299 const AV1_COMMON *const cm = &cpi->common; 4300 MACROBLOCKD *const xd = &x->e_mbd; 4301 MB_MODE_INFO *const mbmi = xd->mi[0]; 4302 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 4303 unsigned char segment_id = mbmi->segment_id; 4304 4305 init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd)); 4306 av1_collect_neighbors_ref_counts(xd); 4307 estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single, 4308 ref_costs_comp); 4309 4310 const int mi_row = xd->mi_row; 4311 const int mi_col = xd->mi_col; 4312 x->best_pred_mv_sad[0] = INT_MAX; 4313 x->best_pred_mv_sad[1] = INT_MAX; 4314 4315 for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; 4316 ++ref_frame) { 4317 x->pred_mv_sad[ref_frame] = INT_MAX; 4318 mbmi_ext->mode_context[ref_frame] = 0; 4319 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; 4320 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) { 4321 // Skip the ref frame if the mask says skip and the ref is not used by 4322 // compound ref. 4323 if (skip_ref_frame_mask & (1 << ref_frame) && 4324 !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) && 4325 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) { 4326 continue; 4327 } 4328 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL); 4329 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb); 4330 } 4331 if (cpi->sf.inter_sf.alt_ref_search_fp || 4332 cpi->sf.inter_sf.prune_single_ref || 4333 cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) { 4334 // Store the best pred_mv_sad across all past frames 4335 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 4336 0) 4337 x->best_pred_mv_sad[0] = 4338 AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]); 4339 else 4340 // Store the best pred_mv_sad across all future frames 4341 x->best_pred_mv_sad[1] = 4342 AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]); 4343 } 4344 } 4345 4346 if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) { 4347 // No second reference on RT ref set, so no need to initialize 4348 for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME; 4349 ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) { 4350 mbmi_ext->mode_context[ref_frame] = 0; 4351 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; 4352 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES]; 4353 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) && 4354 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) { 4355 continue; 4356 } 4357 4358 if (skip_ref_frame_mask & (1 << ref_frame) && 4359 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) { 4360 continue; 4361 } 4362 // Ref mv list population is not required, when compound references are 4363 // pruned. 4364 if (prune_ref_frame(cpi, x, ref_frame)) continue; 4365 4366 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, 4367 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs, 4368 mbmi_ext->mode_context); 4369 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and 4370 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. 4371 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame); 4372 } 4373 } 4374 4375 av1_count_overlappable_neighbors(cm, xd); 4376 const FRAME_UPDATE_TYPE update_type = 4377 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 4378 int use_actual_frame_probs = 1; 4379 int prune_obmc; 4380 #if CONFIG_FPMT_TEST 4381 use_actual_frame_probs = 4382 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1; 4383 if (!use_actual_frame_probs) { 4384 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] < 4385 cpi->sf.inter_sf.prune_obmc_prob_thresh; 4386 } 4387 #endif 4388 if (use_actual_frame_probs) { 4389 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] < 4390 cpi->sf.inter_sf.prune_obmc_prob_thresh; 4391 } 4392 if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) { 4393 if (check_num_overlappable_neighbors(mbmi) && 4394 is_motion_variation_allowed_bsize(bsize)) { 4395 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; 4396 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, 4397 MAX_SB_SIZE >> 1 }; 4398 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, 4399 MAX_SB_SIZE >> 1 }; 4400 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; 4401 av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf, 4402 dst_width1, dst_height1, 4403 args->above_pred_stride); 4404 av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf, 4405 dst_width2, dst_height2, 4406 args->left_pred_stride); 4407 const int num_planes = av1_num_planes(cm); 4408 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, 4409 mi_col, 0, num_planes); 4410 calc_target_weighted_pred( 4411 cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0], 4412 args->left_pred_buf[0], args->left_pred_stride[0]); 4413 } 4414 } 4415 4416 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize); 4417 4418 // Set params for mode evaluation 4419 set_mode_eval_params(cpi, x, MODE_EVAL); 4420 4421 x->comp_rd_stats_idx = 0; 4422 4423 for (int idx = 0; idx < REF_FRAMES; idx++) { 4424 args->best_single_sse_in_refs[idx] = INT32_MAX; 4425 } 4426 } 4427 4428 static inline void init_single_inter_mode_search_state( 4429 InterModeSearchState *search_state) { 4430 for (int dir = 0; dir < 2; ++dir) { 4431 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 4432 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) { 4433 SingleInterModeState *state; 4434 4435 state = &search_state->single_state[dir][mode][ref_frame]; 4436 state->ref_frame = NONE_FRAME; 4437 state->rd = INT64_MAX; 4438 4439 state = &search_state->single_state_modelled[dir][mode][ref_frame]; 4440 state->ref_frame = NONE_FRAME; 4441 state->rd = INT64_MAX; 4442 4443 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME; 4444 } 4445 } 4446 } 4447 4448 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) { 4449 search_state->best_single_rd[ref_frame] = INT64_MAX; 4450 search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID; 4451 } 4452 av1_zero(search_state->single_state_cnt); 4453 av1_zero(search_state->single_state_modelled_cnt); 4454 } 4455 4456 static inline void init_inter_mode_search_state( 4457 InterModeSearchState *search_state, const AV1_COMP *cpi, 4458 const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) { 4459 init_intra_mode_search_state(&search_state->intra_search_state); 4460 av1_invalid_rd_stats(&search_state->best_y_rdcost); 4461 4462 search_state->best_rd = best_rd_so_far; 4463 search_state->best_skip_rd[0] = INT64_MAX; 4464 search_state->best_skip_rd[1] = INT64_MAX; 4465 4466 av1_zero(search_state->best_mbmode); 4467 4468 search_state->best_rate_y = INT_MAX; 4469 4470 search_state->best_rate_uv = INT_MAX; 4471 4472 search_state->best_mode_skippable = 0; 4473 4474 search_state->best_skip2 = 0; 4475 4476 search_state->best_mode_index = THR_INVALID; 4477 4478 const MACROBLOCKD *const xd = &x->e_mbd; 4479 const MB_MODE_INFO *const mbmi = xd->mi[0]; 4480 const unsigned char segment_id = mbmi->segment_id; 4481 4482 search_state->num_available_refs = 0; 4483 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs)); 4484 memset(search_state->dist_order_refs, -1, 4485 sizeof(search_state->dist_order_refs)); 4486 4487 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i) 4488 search_state->mode_threshold[i] = 0; 4489 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize]; 4490 for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i) 4491 search_state->mode_threshold[i] = 4492 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >> 4493 RD_THRESH_FAC_FRAC_BITS; 4494 4495 search_state->best_intra_rd = INT64_MAX; 4496 4497 search_state->best_pred_sse = UINT_MAX; 4498 4499 av1_zero(search_state->single_newmv); 4500 av1_zero(search_state->single_newmv_rate); 4501 av1_zero(search_state->single_newmv_valid); 4502 for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) { 4503 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) { 4504 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) { 4505 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX; 4506 search_state->simple_rd[i][j][ref_frame] = INT64_MAX; 4507 } 4508 } 4509 } 4510 4511 for (int i = 0; i < REFERENCE_MODES; ++i) { 4512 search_state->best_pred_rd[i] = INT64_MAX; 4513 } 4514 4515 if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) { 4516 for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i) 4517 search_state->mode_threshold[i] = 4518 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >> 4519 RD_THRESH_FAC_FRAC_BITS; 4520 4521 for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) { 4522 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) { 4523 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) { 4524 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX; 4525 search_state->simple_rd[i][j][ref_frame] = INT64_MAX; 4526 } 4527 } 4528 } 4529 4530 init_single_inter_mode_search_state(search_state); 4531 } 4532 } 4533 4534 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask, 4535 const MV_REFERENCE_FRAME *ref_frame, 4536 const PREDICTION_MODE this_mode) { 4537 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) { 4538 return true; 4539 } 4540 4541 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1]; 4542 } 4543 4544 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x, 4545 BLOCK_SIZE bsize, 4546 PREDICTION_MODE curr_mode, 4547 const MV_REFERENCE_FRAME *ref_frames) { 4548 const int comp_pred = ref_frames[1] > INTRA_FRAME; 4549 if (comp_pred) { 4550 if (!is_comp_ref_allowed(bsize)) return 1; 4551 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) { 4552 return 1; 4553 } 4554 4555 const AV1_COMMON *const cm = &cpi->common; 4556 if (frame_is_intra_only(cm)) return 1; 4557 4558 const CurrentFrame *const current_frame = &cm->current_frame; 4559 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1; 4560 4561 const struct segmentation *const seg = &cm->seg; 4562 const unsigned char segment_id = x->e_mbd.mi[0]->segment_id; 4563 // Do not allow compound prediction if the segment level reference frame 4564 // feature is in use as in this case there can only be one reference. 4565 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1; 4566 } 4567 4568 if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) { 4569 // Mode must be compatible 4570 if (!is_interintra_allowed_bsize(bsize)) return 1; 4571 if (!is_interintra_allowed_mode(curr_mode)) return 1; 4572 } 4573 4574 return 0; 4575 } 4576 4577 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x, 4578 BLOCK_SIZE bsize, int mib_size) { 4579 const int sb_size_mask = mib_size - 1; 4580 const MACROBLOCKD *const xd = &x->e_mbd; 4581 const int mi_row = xd->mi_row; 4582 const int mi_col = xd->mi_col; 4583 const int mi_row_in_sb = mi_row & sb_size_mask; 4584 const int mi_col_in_sb = mi_col & sb_size_mask; 4585 const int mi_w = mi_size_wide[bsize]; 4586 const int mi_h = mi_size_high[bsize]; 4587 int picked_ref_frames_mask = 0; 4588 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) { 4589 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) { 4590 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j]; 4591 } 4592 } 4593 return picked_ref_frames_mask; 4594 } 4595 4596 // Check if reference frame pair of the current block matches with the given 4597 // block. 4598 static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi, 4599 const MV_REFERENCE_FRAME *ref_frames) { 4600 return ((ref_frames[0] == mbmi->ref_frame[0]) && 4601 (ref_frames[1] == mbmi->ref_frame[1])); 4602 } 4603 4604 // Case 1: return 0, means don't skip this mode 4605 // Case 2: return 1, means skip this mode completely 4606 // Case 3: return 2, means skip compound only, but still try single motion modes 4607 static int inter_mode_search_order_independent_skip( 4608 const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask, 4609 InterModeSearchState *search_state, int skip_ref_frame_mask, 4610 PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) { 4611 if (mask_says_skip(mode_skip_mask, ref_frame, mode)) { 4612 return 1; 4613 } 4614 4615 const int ref_type = av1_ref_frame_type(ref_frame); 4616 if (!cpi->sf.rt_sf.use_real_time_ref_set) 4617 if (prune_ref_frame(cpi, x, ref_type)) return 1; 4618 4619 // This is only used in motion vector unit test. 4620 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test && 4621 ref_frame[0] == INTRA_FRAME) 4622 return 1; 4623 4624 const AV1_COMMON *const cm = &cpi->common; 4625 if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) { 4626 return 1; 4627 } 4628 4629 // Reuse the prediction mode in cache 4630 if (x->use_mb_mode_cache) { 4631 const MB_MODE_INFO *cached_mi = x->mb_mode_cache; 4632 const PREDICTION_MODE cached_mode = cached_mi->mode; 4633 const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame; 4634 const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME; 4635 4636 // If the cached mode is intra, then we just need to match the mode. 4637 if (is_mode_intra(cached_mode) && mode != cached_mode) { 4638 return 1; 4639 } 4640 4641 // If the cached mode is single inter mode, then we match the mode and 4642 // reference frame. 4643 if (cached_mode_is_single) { 4644 if (mode != cached_mode || ref_frame[0] != cached_frame[0]) { 4645 return 1; 4646 } 4647 } else { 4648 // If the cached mode is compound, then we need to consider several cases. 4649 const int mode_is_single = ref_frame[1] <= INTRA_FRAME; 4650 if (mode_is_single) { 4651 // If the mode is single, we know the modes can't match. But we might 4652 // still want to search it if compound mode depends on the current mode. 4653 int skip_motion_mode_only = 0; 4654 if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) { 4655 skip_motion_mode_only = (ref_frame[0] == cached_frame[0]); 4656 } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) { 4657 skip_motion_mode_only = (ref_frame[0] == cached_frame[1]); 4658 } else if (cached_mode == NEW_NEWMV) { 4659 skip_motion_mode_only = (ref_frame[0] == cached_frame[0] || 4660 ref_frame[0] == cached_frame[1]); 4661 } 4662 4663 return 1 + skip_motion_mode_only; 4664 } else { 4665 // If both modes are compound, then everything must match. 4666 if (mode != cached_mode || ref_frame[0] != cached_frame[0] || 4667 ref_frame[1] != cached_frame[1]) { 4668 return 1; 4669 } 4670 } 4671 } 4672 } 4673 4674 const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0]; 4675 // If no valid mode has been found so far in PARTITION_NONE when finding a 4676 // valid partition is required, do not skip mode. 4677 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE && 4678 x->must_find_valid_partition) 4679 return 0; 4680 4681 const SPEED_FEATURES *const sf = &cpi->sf; 4682 // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference 4683 // frames 4684 if (sf->inter_sf.prune_nearmv_using_neighbors && 4685 (mode == NEAR_NEARMV || mode == NEARMV)) { 4686 const MACROBLOCKD *const xd = &x->e_mbd; 4687 if (search_state->best_rd != INT64_MAX && xd->left_available && 4688 xd->up_available) { 4689 const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 }, 4690 { 1, 1, 0 }, 4691 { 2, 1, 0 } }; 4692 const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE; 4693 4694 assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX && 4695 qindex_sub_range < 3); 4696 const int num_ref_frame_pair_match_thresh = 4697 thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1] 4698 [qindex_sub_range]; 4699 4700 assert(num_ref_frame_pair_match_thresh <= 2 && 4701 num_ref_frame_pair_match_thresh >= 0); 4702 int num_ref_frame_pair_match = 0; 4703 4704 num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame); 4705 num_ref_frame_pair_match += 4706 match_ref_frame_pair(xd->above_mbmi, ref_frame); 4707 4708 // Pruning based on ref frame pair match with neighbors. 4709 if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1; 4710 } 4711 } 4712 4713 int skip_motion_mode = 0; 4714 if (mbmi->partition != PARTITION_NONE) { 4715 int skip_ref = skip_ref_frame_mask & (1 << ref_type); 4716 if (ref_type <= ALTREF_FRAME && skip_ref) { 4717 // Since the compound ref modes depends on the motion estimation result of 4718 // two single ref modes (best mv of single ref modes as the start point), 4719 // if current single ref mode is marked skip, we need to check if it will 4720 // be used in compound ref modes. 4721 if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) { 4722 // Found a not skipped compound ref mode which contains current 4723 // single ref. So this single ref can't be skipped completely 4724 // Just skip its motion mode search, still try its simple 4725 // transition mode. 4726 skip_motion_mode = 1; 4727 skip_ref = 0; 4728 } 4729 } 4730 // If we are reusing the prediction from cache, and the current frame is 4731 // required by the cache, then we cannot prune it. 4732 if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) { 4733 skip_ref = 0; 4734 // If the cache only needs the current reference type for compound 4735 // prediction, then we can skip motion mode search. 4736 skip_motion_mode = (ref_type <= ALTREF_FRAME && 4737 x->mb_mode_cache->ref_frame[1] > INTRA_FRAME); 4738 } 4739 if (skip_ref) return 1; 4740 } 4741 4742 if (ref_frame[0] == INTRA_FRAME) { 4743 if (mode != DC_PRED) { 4744 // Disable intra modes other than DC_PRED for blocks with low variance 4745 // Threshold for intra skipping based on source variance 4746 // TODO(debargha): Specialize the threshold for super block sizes 4747 const unsigned int skip_intra_var_thresh = 64; 4748 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && 4749 x->source_variance < skip_intra_var_thresh) 4750 return 1; 4751 } 4752 } 4753 4754 if (skip_motion_mode) return 2; 4755 4756 return 0; 4757 } 4758 4759 static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode, 4760 const MV_REFERENCE_FRAME *ref_frames, 4761 const AV1_COMMON *cm) { 4762 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 4763 mbmi->ref_mv_idx = 0; 4764 mbmi->mode = curr_mode; 4765 mbmi->uv_mode = UV_DC_PRED; 4766 mbmi->ref_frame[0] = ref_frames[0]; 4767 mbmi->ref_frame[1] = ref_frames[1]; 4768 pmi->palette_size[0] = 0; 4769 pmi->palette_size[1] = 0; 4770 mbmi->filter_intra_mode_info.use_filter_intra = 0; 4771 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; 4772 mbmi->motion_mode = SIMPLE_TRANSLATION; 4773 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); 4774 set_default_interp_filters(mbmi, cm->features.interp_filter); 4775 } 4776 4777 static inline void collect_single_states(MACROBLOCK *x, 4778 InterModeSearchState *search_state, 4779 const MB_MODE_INFO *const mbmi) { 4780 int i, j; 4781 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0]; 4782 const PREDICTION_MODE this_mode = mbmi->mode; 4783 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1; 4784 const int mode_offset = INTER_OFFSET(this_mode); 4785 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode); 4786 4787 // Simple rd 4788 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame]; 4789 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) { 4790 const int64_t rd = 4791 search_state->simple_rd[this_mode][ref_mv_idx][ref_frame]; 4792 if (rd < simple_rd) simple_rd = rd; 4793 } 4794 4795 // Insertion sort of single_state 4796 const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 }; 4797 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset]; 4798 i = search_state->single_state_cnt[dir][mode_offset]; 4799 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j) 4800 state_s[j] = state_s[j - 1]; 4801 state_s[j] = this_state_s; 4802 search_state->single_state_cnt[dir][mode_offset]++; 4803 4804 // Modelled rd 4805 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame]; 4806 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) { 4807 const int64_t rd = 4808 search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame]; 4809 if (rd < modelled_rd) modelled_rd = rd; 4810 } 4811 4812 // Insertion sort of single_state_modelled 4813 const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 }; 4814 SingleInterModeState *state_m = 4815 search_state->single_state_modelled[dir][mode_offset]; 4816 i = search_state->single_state_modelled_cnt[dir][mode_offset]; 4817 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j) 4818 state_m[j] = state_m[j - 1]; 4819 state_m[j] = this_state_m; 4820 search_state->single_state_modelled_cnt[dir][mode_offset]++; 4821 } 4822 4823 static inline void analyze_single_states(const AV1_COMP *cpi, 4824 InterModeSearchState *search_state) { 4825 const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result; 4826 assert(prune_level >= 1); 4827 int i, j, dir, mode; 4828 4829 for (dir = 0; dir < 2; ++dir) { 4830 int64_t best_rd; 4831 SingleInterModeState(*state)[FWD_REFS]; 4832 const int prune_factor = prune_level >= 2 ? 6 : 5; 4833 4834 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely 4835 // reference frames for all the modes (NEARESTMV and NEARMV may not 4836 // have same motion vectors). Always keep the best of each mode 4837 // because it might form the best possible combination with other mode. 4838 state = search_state->single_state[dir]; 4839 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd, 4840 state[INTER_OFFSET(GLOBALMV)][0].rd); 4841 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 4842 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) { 4843 if (state[mode][i].rd != INT64_MAX && 4844 (state[mode][i].rd >> 3) * prune_factor > best_rd) { 4845 state[mode][i].valid = 0; 4846 } 4847 } 4848 } 4849 4850 state = search_state->single_state_modelled[dir]; 4851 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd, 4852 state[INTER_OFFSET(GLOBALMV)][0].rd); 4853 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 4854 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) { 4855 if (state[mode][i].rd != INT64_MAX && 4856 (state[mode][i].rd >> 3) * prune_factor > best_rd) { 4857 state[mode][i].valid = 0; 4858 } 4859 } 4860 } 4861 } 4862 4863 // Ordering by simple rd first, then by modelled rd 4864 for (dir = 0; dir < 2; ++dir) { 4865 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { 4866 const int state_cnt_s = search_state->single_state_cnt[dir][mode]; 4867 const int state_cnt_m = 4868 search_state->single_state_modelled_cnt[dir][mode]; 4869 SingleInterModeState *state_s = search_state->single_state[dir][mode]; 4870 SingleInterModeState *state_m = 4871 search_state->single_state_modelled[dir][mode]; 4872 int count = 0; 4873 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m); 4874 for (i = 0; i < state_cnt_s; ++i) { 4875 if (state_s[i].rd == INT64_MAX) break; 4876 if (state_s[i].valid) { 4877 search_state->single_rd_order[dir][mode][count++] = 4878 state_s[i].ref_frame; 4879 } 4880 } 4881 if (count >= max_candidates) continue; 4882 4883 for (i = 0; i < state_cnt_m && count < max_candidates; ++i) { 4884 if (state_m[i].rd == INT64_MAX) break; 4885 if (!state_m[i].valid) continue; 4886 const int ref_frame = state_m[i].ref_frame; 4887 int match = 0; 4888 // Check if existing already 4889 for (j = 0; j < count; ++j) { 4890 if (search_state->single_rd_order[dir][mode][j] == ref_frame) { 4891 match = 1; 4892 break; 4893 } 4894 } 4895 if (match) continue; 4896 // Check if this ref_frame is removed in simple rd 4897 int valid = 1; 4898 for (j = 0; j < state_cnt_s; ++j) { 4899 if (ref_frame == state_s[j].ref_frame) { 4900 valid = state_s[j].valid; 4901 break; 4902 } 4903 } 4904 if (valid) { 4905 search_state->single_rd_order[dir][mode][count++] = ref_frame; 4906 } 4907 } 4908 } 4909 } 4910 } 4911 4912 static int compound_skip_get_candidates( 4913 const AV1_COMP *cpi, const InterModeSearchState *search_state, 4914 const int dir, const PREDICTION_MODE mode) { 4915 const int mode_offset = INTER_OFFSET(mode); 4916 const SingleInterModeState *state = 4917 search_state->single_state[dir][mode_offset]; 4918 const SingleInterModeState *state_modelled = 4919 search_state->single_state_modelled[dir][mode_offset]; 4920 4921 int max_candidates = 0; 4922 for (int i = 0; i < FWD_REFS; ++i) { 4923 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break; 4924 max_candidates++; 4925 } 4926 4927 int candidates = max_candidates; 4928 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) { 4929 candidates = AOMMIN(2, max_candidates); 4930 } 4931 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) { 4932 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX && 4933 state[0].ref_frame == state_modelled[0].ref_frame) 4934 candidates = 1; 4935 if (mode == NEARMV || mode == GLOBALMV) candidates = 1; 4936 } 4937 4938 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) { 4939 // Limit the number of candidates to 1 in each direction for compound 4940 // prediction 4941 candidates = AOMMIN(1, candidates); 4942 } 4943 return candidates; 4944 } 4945 4946 static int compound_skip_by_single_states( 4947 const AV1_COMP *cpi, const InterModeSearchState *search_state, 4948 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame, 4949 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) { 4950 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame }; 4951 const int mode[2] = { compound_ref0_mode(this_mode), 4952 compound_ref1_mode(this_mode) }; 4953 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) }; 4954 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1, 4955 refs[1] <= GOLDEN_FRAME ? 0 : 1 }; 4956 int ref_searched[2] = { 0, 0 }; 4957 int ref_mv_match[2] = { 1, 1 }; 4958 int i, j; 4959 4960 for (i = 0; i < 2; ++i) { 4961 const SingleInterModeState *state = 4962 search_state->single_state[mode_dir[i]][mode_offset[i]]; 4963 const int state_cnt = 4964 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]]; 4965 for (j = 0; j < state_cnt; ++j) { 4966 if (state[j].ref_frame == refs[i]) { 4967 ref_searched[i] = 1; 4968 break; 4969 } 4970 } 4971 } 4972 4973 const int ref_set = get_drl_refmv_count(x, refs, this_mode); 4974 for (i = 0; i < 2; ++i) { 4975 if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) { 4976 continue; 4977 } 4978 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME }; 4979 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) { 4980 int_mv single_mv; 4981 int_mv comp_mv; 4982 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs, 4983 &x->mbmi_ext); 4984 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext); 4985 if (single_mv.as_int != comp_mv.as_int) { 4986 ref_mv_match[i] = 0; 4987 break; 4988 } 4989 } 4990 } 4991 4992 for (i = 0; i < 2; ++i) { 4993 if (!ref_searched[i] || !ref_mv_match[i]) continue; 4994 const int candidates = 4995 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]); 4996 const MV_REFERENCE_FRAME *ref_order = 4997 search_state->single_rd_order[mode_dir[i]][mode_offset[i]]; 4998 int match = 0; 4999 for (j = 0; j < candidates; ++j) { 5000 if (refs[i] == ref_order[j]) { 5001 match = 1; 5002 break; 5003 } 5004 } 5005 if (!match) return 1; 5006 } 5007 5008 return 0; 5009 } 5010 5011 // Check if ref frames of current block matches with given block. 5012 static inline void match_ref_frame(const MB_MODE_INFO *const mbmi, 5013 const MV_REFERENCE_FRAME *ref_frames, 5014 int *const is_ref_match) { 5015 if (is_inter_block(mbmi)) { 5016 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0]; 5017 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0]; 5018 if (has_second_ref(mbmi)) { 5019 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1]; 5020 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1]; 5021 } 5022 } 5023 } 5024 5025 // Prune compound mode using ref frames of neighbor blocks. 5026 static inline int compound_skip_using_neighbor_refs( 5027 MACROBLOCKD *const xd, const PREDICTION_MODE this_mode, 5028 const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) { 5029 // Exclude non-extended compound modes from pruning 5030 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV || 5031 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV) 5032 return 0; 5033 5034 if (prune_ext_comp_using_neighbors >= 3) return 1; 5035 5036 int is_ref_match[2] = { 0 }; // 0 - match for forward refs 5037 // 1 - match for backward refs 5038 // Check if ref frames of this block matches with left neighbor. 5039 if (xd->left_available) 5040 match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match); 5041 5042 // Check if ref frames of this block matches with above neighbor. 5043 if (xd->up_available) 5044 match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match); 5045 5046 // Combine ref frame match with neighbors in forward and backward refs. 5047 const int track_ref_match = is_ref_match[0] + is_ref_match[1]; 5048 5049 // Pruning based on ref frame match with neighbors. 5050 if (track_ref_match >= prune_ext_comp_using_neighbors) return 0; 5051 return 1; 5052 } 5053 5054 // Update best single mode for the given reference frame based on simple rd. 5055 static inline void update_best_single_mode(InterModeSearchState *search_state, 5056 const PREDICTION_MODE this_mode, 5057 const MV_REFERENCE_FRAME ref_frame, 5058 int64_t this_rd) { 5059 if (this_rd < search_state->best_single_rd[ref_frame]) { 5060 search_state->best_single_rd[ref_frame] = this_rd; 5061 search_state->best_single_mode[ref_frame] = this_mode; 5062 } 5063 } 5064 5065 // Prune compound mode using best single mode for the same reference. 5066 static inline int skip_compound_using_best_single_mode_ref( 5067 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames, 5068 const PREDICTION_MODE *best_single_mode, 5069 int prune_comp_using_best_single_mode_ref) { 5070 // Exclude non-extended compound modes from pruning 5071 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV || 5072 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV) 5073 return 0; 5074 5075 assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV); 5076 const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode); 5077 // Get ref frame direction corresponding to NEWMV 5078 // 0 - NEWMV corresponding to forward direction 5079 // 1 - NEWMV corresponding to backward direction 5080 const int newmv_dir = comp_mode_ref0 != NEWMV; 5081 5082 // Avoid pruning the compound mode when ref frame corresponding to NEWMV 5083 // have NEWMV as single mode winner. 5084 // Example: For an extended-compound mode, 5085 // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}} 5086 // - Ref frame corresponding to NEWMV is ALTREF_FRAME 5087 // - Avoid pruning this mode, if best single mode corresponding to ref frame 5088 // ALTREF_FRAME is NEWMV 5089 const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]]; 5090 if (single_mode == NEWMV) return 0; 5091 5092 // Avoid pruning the compound mode when best single mode is not available 5093 if (prune_comp_using_best_single_mode_ref == 1) 5094 if (single_mode == MB_MODE_COUNT) return 0; 5095 return 1; 5096 } 5097 5098 static int compare_int64(const void *a, const void *b) { 5099 int64_t a64 = *((int64_t *)a); 5100 int64_t b64 = *((int64_t *)b); 5101 if (a64 < b64) { 5102 return -1; 5103 } else if (a64 == b64) { 5104 return 0; 5105 } else { 5106 return 1; 5107 } 5108 } 5109 5110 static inline void update_search_state( 5111 InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst, 5112 PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats, 5113 const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv, 5114 THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) { 5115 const MACROBLOCKD *xd = &x->e_mbd; 5116 const MB_MODE_INFO *mbmi = xd->mi[0]; 5117 const int skip_ctx = av1_get_skip_txfm_context(xd); 5118 const int skip_txfm = 5119 mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode); 5120 const TxfmSearchInfo *txfm_info = &x->txfm_search_info; 5121 5122 search_state->best_rd = new_best_rd_stats->rdcost; 5123 search_state->best_mode_index = new_best_mode; 5124 *best_rd_stats_dst = *new_best_rd_stats; 5125 search_state->best_mbmode = *mbmi; 5126 search_state->best_skip2 = skip_txfm; 5127 search_state->best_mode_skippable = new_best_rd_stats->skip_txfm; 5128 // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and 5129 // rate_uv because av1_txfm_search process is replaced by rd estimation. 5130 // Therefore, we should avoid updating best_rate_y and best_rate_uv here. 5131 // These two values will be updated when av1_txfm_search is called. 5132 if (txfm_search_done) { 5133 search_state->best_rate_y = 5134 new_best_rd_stats_y->rate + 5135 x->mode_costs.skip_txfm_cost[skip_ctx] 5136 [new_best_rd_stats->skip_txfm || skip_txfm]; 5137 search_state->best_rate_uv = new_best_rd_stats_uv->rate; 5138 } 5139 search_state->best_y_rdcost = *new_best_rd_stats_y; 5140 memcpy(ctx->blk_skip, txfm_info->blk_skip, 5141 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk); 5142 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 5143 } 5144 5145 // Find the best RD for a reference frame (among single reference modes) 5146 // and store +10% of it in the 0-th element in ref_frame_rd. 5147 static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) { 5148 assert(ref_frame_rd[0] == INT64_MAX); 5149 int64_t ref_copy[REF_FRAMES - 1]; 5150 memcpy(ref_copy, ref_frame_rd + 1, 5151 sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1)); 5152 qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64); 5153 5154 int64_t cutoff = ref_copy[0]; 5155 // The cut-off is within 10% of the best. 5156 if (cutoff != INT64_MAX) { 5157 assert(cutoff < INT64_MAX / 200); 5158 cutoff = (110 * cutoff) / 100; 5159 } 5160 ref_frame_rd[0] = cutoff; 5161 } 5162 5163 // Check if either frame is within the cutoff. 5164 static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES], 5165 MV_REFERENCE_FRAME frame1, 5166 MV_REFERENCE_FRAME frame2) { 5167 assert(frame2 > 0); 5168 return ref_frame_rd[frame1] <= ref_frame_rd[0] || 5169 ref_frame_rd[frame2] <= ref_frame_rd[0]; 5170 } 5171 5172 static inline void evaluate_motion_mode_for_winner_candidates( 5173 const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost, 5174 HandleInterModeArgs *const args, TileDataEnc *const tile_data, 5175 PICK_MODE_CONTEXT *const ctx, 5176 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], 5177 const motion_mode_best_st_candidate *const best_motion_mode_cands, 5178 int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd, 5179 InterModeSearchState *const search_state, int64_t *yrd) { 5180 const AV1_COMMON *const cm = &cpi->common; 5181 const int num_planes = av1_num_planes(cm); 5182 MACROBLOCKD *const xd = &x->e_mbd; 5183 MB_MODE_INFO *const mbmi = xd->mi[0]; 5184 InterModesInfo *const inter_modes_info = x->inter_modes_info; 5185 const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand; 5186 5187 for (int cand = 0; cand < num_best_cand; cand++) { 5188 RD_STATS rd_stats; 5189 RD_STATS rd_stats_y; 5190 RD_STATS rd_stats_uv; 5191 av1_init_rd_stats(&rd_stats); 5192 av1_init_rd_stats(&rd_stats_y); 5193 av1_init_rd_stats(&rd_stats_uv); 5194 int rate_mv; 5195 5196 rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv; 5197 args->skip_motion_mode = 5198 best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode; 5199 *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi; 5200 rd_stats.rate = 5201 best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff; 5202 5203 // Continue if the best candidate is compound. 5204 if (!is_inter_singleref_mode(mbmi->mode)) continue; 5205 5206 x->txfm_search_info.skip_txfm = 0; 5207 struct macroblockd_plane *pd = xd->plane; 5208 const BUFFER_SET orig_dst = { 5209 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf }, 5210 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride }, 5211 }; 5212 5213 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 5214 // Initialize motion mode to simple translation 5215 // Calculation of switchable rate depends on it. 5216 mbmi->motion_mode = 0; 5217 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; 5218 for (int i = 0; i < num_planes; i++) { 5219 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 5220 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 5221 } 5222 5223 int64_t skip_rd[2] = { search_state->best_skip_rd[0], 5224 search_state->best_skip_rd[1] }; 5225 int64_t this_yrd = INT64_MAX; 5226 int64_t ret_value = motion_mode_rd( 5227 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args, 5228 search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd, 5229 do_tx_search, inter_modes_info, 1, &this_yrd); 5230 5231 if (ret_value != INT64_MAX) { 5232 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist); 5233 const THR_MODES mode_enum = get_prediction_mode_idx( 5234 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); 5235 // Collect mode stats for multiwinner mode processing 5236 store_winner_mode_stats( 5237 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, 5238 mode_enum, NULL, bsize, rd_stats.rdcost, 5239 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search); 5240 5241 int64_t best_scaled_rd = search_state->best_rd; 5242 int64_t this_scaled_rd = rd_stats.rdcost; 5243 if (search_state->best_mode_index != THR_INVALID) 5244 increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd, 5245 &this_scaled_rd, 5246 cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct); 5247 5248 if (this_scaled_rd < best_scaled_rd) { 5249 *yrd = this_yrd; 5250 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y, 5251 &rd_stats_uv, mode_enum, x, do_tx_search); 5252 if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0]; 5253 } 5254 } 5255 } 5256 } 5257 5258 /*!\cond */ 5259 // Arguments for speed feature pruning of inter mode search 5260 typedef struct { 5261 int *skip_motion_mode; 5262 mode_skip_mask_t *mode_skip_mask; 5263 InterModeSearchState *search_state; 5264 int skip_ref_frame_mask; 5265 int reach_first_comp_mode; 5266 int mode_thresh_mul_fact; 5267 int num_single_modes_processed; 5268 int prune_cpd_using_sr_stats_ready; 5269 } InterModeSFArgs; 5270 /*!\endcond */ 5271 5272 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize, 5273 int64_t *ref_frame_rd, int midx, 5274 InterModeSFArgs *args, int is_low_temp_var) { 5275 const SPEED_FEATURES *const sf = &cpi->sf; 5276 MACROBLOCKD *const xd = &x->e_mbd; 5277 // Get the actual prediction mode we are trying in this iteration 5278 const THR_MODES mode_enum = av1_default_mode_order[midx]; 5279 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum]; 5280 const PREDICTION_MODE this_mode = mode_def->mode; 5281 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame; 5282 const MV_REFERENCE_FRAME ref_frame = ref_frames[0]; 5283 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1]; 5284 const int comp_pred = second_ref_frame > INTRA_FRAME; 5285 5286 if (ref_frame == INTRA_FRAME) return 1; 5287 5288 const FRAME_UPDATE_TYPE update_type = 5289 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 5290 if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE && 5291 comp_pred) { 5292 return 1; 5293 } 5294 5295 // This is for real time encoding. 5296 if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME && 5297 this_mode != NEARESTMV) 5298 return 1; 5299 5300 // Check if this mode should be skipped because it is incompatible with the 5301 // current frame 5302 if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames)) 5303 return 1; 5304 const int ret = inter_mode_search_order_independent_skip( 5305 cpi, x, args->mode_skip_mask, args->search_state, 5306 args->skip_ref_frame_mask, this_mode, mode_def->ref_frame); 5307 if (ret == 1) return 1; 5308 *(args->skip_motion_mode) = (ret == 2); 5309 5310 // We've reached the first compound prediction mode, get stats from the 5311 // single reference predictors to help with pruning. 5312 // Disable this pruning logic if interpolation filter search was skipped for 5313 // single prediction modes as it can result in aggressive pruning of compound 5314 // prediction modes due to the absence of modelled_rd populated by 5315 // av1_interpolation_filter_search(). 5316 // TODO(Remya): Check the impact of the sf 5317 // 'prune_comp_search_by_single_result' if compound prediction modes are 5318 // enabled in future for REALTIME encode. 5319 if (!sf->interp_sf.skip_interp_filter_search && 5320 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred && 5321 args->reach_first_comp_mode == 0) { 5322 analyze_single_states(cpi, args->search_state); 5323 args->reach_first_comp_mode = 1; 5324 } 5325 5326 // Prune aggressively when best mode is skippable. 5327 int mul_fact = args->search_state->best_mode_skippable 5328 ? args->mode_thresh_mul_fact 5329 : (1 << MODE_THRESH_QBITS); 5330 int64_t mode_threshold = 5331 (args->search_state->mode_threshold[mode_enum] * mul_fact) >> 5332 MODE_THRESH_QBITS; 5333 5334 if (args->search_state->best_rd < mode_threshold) return 1; 5335 5336 // Skip this compound mode based on the RD results from the single prediction 5337 // modes 5338 if (!sf->interp_sf.skip_interp_filter_search && 5339 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) { 5340 if (compound_skip_by_single_states(cpi, args->search_state, this_mode, 5341 ref_frame, second_ref_frame, x)) 5342 return 1; 5343 } 5344 5345 if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) { 5346 // After we done with single reference modes, find the 2nd best RD 5347 // for a reference frame. Only search compound modes that have a reference 5348 // frame at least as good as the 2nd best. 5349 if (!args->prune_cpd_using_sr_stats_ready && 5350 args->num_single_modes_processed == NUM_SINGLE_REF_MODES) { 5351 find_top_ref(ref_frame_rd); 5352 args->prune_cpd_using_sr_stats_ready = 1; 5353 } 5354 if (args->prune_cpd_using_sr_stats_ready && 5355 !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame)) 5356 return 1; 5357 } 5358 5359 // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes 5360 if (sf->inter_sf.skip_ext_comp_nearmv_mode && 5361 (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) { 5362 return 1; 5363 } 5364 5365 if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) { 5366 if (compound_skip_using_neighbor_refs( 5367 xd, this_mode, ref_frames, 5368 sf->inter_sf.prune_ext_comp_using_neighbors)) 5369 return 1; 5370 } 5371 5372 if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) { 5373 if (skip_compound_using_best_single_mode_ref( 5374 this_mode, ref_frames, args->search_state->best_single_mode, 5375 sf->inter_sf.prune_comp_using_best_single_mode_ref)) 5376 return 1; 5377 } 5378 5379 if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) { 5380 const int8_t ref_frame_type = av1_ref_frame_type(ref_frames); 5381 if (skip_nearest_near_mv_using_refmv_weight( 5382 x, this_mode, ref_frame_type, 5383 args->search_state->best_mbmode.mode)) { 5384 // Ensure the mode is pruned only when the current block has obtained a 5385 // valid inter mode. 5386 assert(is_inter_mode(args->search_state->best_mbmode.mode)); 5387 return 1; 5388 } 5389 } 5390 5391 if (sf->rt_sf.prune_inter_modes_with_golden_ref && 5392 ref_frame == GOLDEN_FRAME && !comp_pred) { 5393 const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL); 5394 if (cpi->rc.frames_since_golden > (subgop_size >> 2) && 5395 args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) { 5396 if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV) 5397 return 1; 5398 } 5399 } 5400 5401 return 0; 5402 } 5403 5404 static void record_best_compound(REFERENCE_MODE reference_mode, 5405 RD_STATS *rd_stats, int comp_pred, int rdmult, 5406 InterModeSearchState *search_state, 5407 int compmode_cost) { 5408 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; 5409 5410 if (reference_mode == REFERENCE_MODE_SELECT) { 5411 single_rate = rd_stats->rate - compmode_cost; 5412 hybrid_rate = rd_stats->rate; 5413 } else { 5414 single_rate = rd_stats->rate; 5415 hybrid_rate = rd_stats->rate + compmode_cost; 5416 } 5417 5418 single_rd = RDCOST(rdmult, single_rate, rd_stats->dist); 5419 hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist); 5420 5421 if (!comp_pred) { 5422 if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE]) 5423 search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd; 5424 } else { 5425 if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE]) 5426 search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd; 5427 } 5428 if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT]) 5429 search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; 5430 } 5431 5432 // Does a transform search over a list of the best inter mode candidates. 5433 // This is called if the original mode search computed an RD estimate 5434 // for the transform search rather than doing a full search. 5435 static void tx_search_best_inter_candidates( 5436 AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, 5437 int64_t best_rd_so_far, BLOCK_SIZE bsize, 5438 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col, 5439 InterModeSearchState *search_state, RD_STATS *rd_cost, 5440 PICK_MODE_CONTEXT *ctx, int64_t *yrd) { 5441 AV1_COMMON *const cm = &cpi->common; 5442 MACROBLOCKD *const xd = &x->e_mbd; 5443 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 5444 const ModeCosts *mode_costs = &x->mode_costs; 5445 const int num_planes = av1_num_planes(cm); 5446 const int skip_ctx = av1_get_skip_txfm_context(xd); 5447 MB_MODE_INFO *const mbmi = xd->mi[0]; 5448 InterModesInfo *inter_modes_info = x->inter_modes_info; 5449 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr); 5450 search_state->best_rd = best_rd_so_far; 5451 search_state->best_mode_index = THR_INVALID; 5452 // Initialize best mode stats for winner mode processing 5453 x->winner_mode_count = 0; 5454 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID, 5455 NULL, bsize, best_rd_so_far, 5456 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0); 5457 inter_modes_info->num = 5458 inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search 5459 ? inter_modes_info->num 5460 : cpi->sf.rt_sf.num_inter_modes_for_tx_search; 5461 const int64_t top_est_rd = 5462 inter_modes_info->num > 0 5463 ? inter_modes_info 5464 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx] 5465 : INT64_MAX; 5466 *yrd = INT64_MAX; 5467 int64_t best_rd_in_this_partition = INT64_MAX; 5468 int num_inter_mode_cands = inter_modes_info->num; 5469 int newmv_mode_evaled = 0; 5470 int max_allowed_cands = INT_MAX; 5471 if (cpi->sf.inter_sf.limit_inter_mode_cands) { 5472 // The bound on the no. of inter mode candidates, beyond which the 5473 // candidates are limited if a newmv mode got evaluated, is set as 5474 // max_allowed_cands + 1. 5475 const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 }; 5476 assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4); 5477 max_allowed_cands = 5478 num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands]; 5479 } 5480 5481 int num_mode_thresh = INT_MAX; 5482 if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) { 5483 // Bound the no. of transform searches per prediction mode beyond a 5484 // threshold. 5485 const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 }; 5486 assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3); 5487 num_mode_thresh = 5488 num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode]; 5489 } 5490 5491 int num_tx_cands = 0; 5492 int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 }; 5493 // Iterate over best inter mode candidates and perform tx search 5494 for (int j = 0; j < num_inter_mode_cands; ++j) { 5495 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx; 5496 *mbmi = inter_modes_info->mbmi_arr[data_idx]; 5497 const PREDICTION_MODE prediction_mode = mbmi->mode; 5498 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx]; 5499 if (curr_est_rd * 0.80 > top_est_rd) break; 5500 5501 if (num_tx_cands > num_mode_thresh) { 5502 if ((prediction_mode != NEARESTMV && 5503 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) || 5504 (prediction_mode == NEARESTMV && 5505 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2)) 5506 continue; 5507 } 5508 5509 txfm_info->skip_txfm = 0; 5510 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 5511 5512 // Select prediction reference frames. 5513 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; 5514 for (int i = 0; i < num_planes; i++) { 5515 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; 5516 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; 5517 } 5518 5519 bool is_predictor_built = false; 5520 5521 // Initialize RD stats 5522 RD_STATS rd_stats; 5523 RD_STATS rd_stats_y; 5524 RD_STATS rd_stats_uv; 5525 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx]; 5526 int64_t skip_rd = INT64_MAX; 5527 const int txfm_rd_gate_level = get_txfm_rd_gate_level( 5528 cm->seq_params->enable_masked_compound, 5529 cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT, 5530 /*eval_motion_mode=*/0); 5531 if (txfm_rd_gate_level) { 5532 // Check if the mode is good enough based on skip RD 5533 int64_t curr_sse = inter_modes_info->sse_arr[data_idx]; 5534 skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse); 5535 int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0], 5536 skip_rd, txfm_rd_gate_level, 0); 5537 if (!eval_txfm) continue; 5538 } 5539 5540 // Build the prediction for this mode 5541 if (!is_predictor_built) { 5542 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 5543 av1_num_planes(cm) - 1); 5544 } 5545 if (mbmi->motion_mode == OBMC_CAUSAL) { 5546 av1_build_obmc_inter_predictors_sb(cm, xd); 5547 } 5548 5549 num_tx_cands++; 5550 if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1; 5551 num_tx_search_modes[prediction_mode - INTER_MODE_START]++; 5552 int64_t this_yrd = INT64_MAX; 5553 // Do the transform search 5554 if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, 5555 mode_rate, search_state->best_rd)) { 5556 continue; 5557 } else { 5558 const int y_rate = 5559 rd_stats.skip_txfm 5560 ? mode_costs->skip_txfm_cost[skip_ctx][1] 5561 : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]); 5562 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist); 5563 5564 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { 5565 inter_mode_data_push( 5566 tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist, 5567 rd_stats_y.rate + rd_stats_uv.rate + 5568 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]); 5569 } 5570 } 5571 5572 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist); 5573 5574 const THR_MODES mode_enum = get_prediction_mode_idx( 5575 prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); 5576 5577 // Collect mode stats for multiwinner mode processing 5578 const int txfm_search_done = 1; 5579 store_winner_mode_stats( 5580 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum, 5581 NULL, bsize, rd_stats.rdcost, 5582 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done); 5583 5584 int64_t best_scaled_rd = search_state->best_rd; 5585 int64_t this_scaled_rd = rd_stats.rdcost; 5586 increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd, 5587 &this_scaled_rd, 5588 cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct); 5589 if (this_scaled_rd < best_rd_in_this_partition) { 5590 best_rd_in_this_partition = rd_stats.rdcost; 5591 *yrd = this_yrd; 5592 } 5593 5594 if (this_scaled_rd < best_scaled_rd) { 5595 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y, 5596 &rd_stats_uv, mode_enum, x, txfm_search_done); 5597 search_state->best_skip_rd[0] = skip_rd; 5598 // Limit the total number of modes to be evaluated if the first is valid 5599 // and transform skip or compound 5600 if (cpi->sf.inter_sf.inter_mode_txfm_breakout) { 5601 if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) { 5602 // Evaluate more candidates at high quantizers where occurrence of 5603 // transform skip is high. 5604 const int max_cands_cap[5] = { 2, 3, 5, 7, 9 }; 5605 const int qindex_band = (5 * x->qindex) >> QINDEX_BITS; 5606 num_inter_mode_cands = 5607 AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num); 5608 } else if (!j && has_second_ref(&search_state->best_mbmode)) { 5609 const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1; 5610 // Evaluate more candidates at low quantizers where occurrence of 5611 // single reference mode is high. 5612 const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 }, 5613 { 10, 7, 5, 3 } }; 5614 const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS; 5615 num_inter_mode_cands = AOMMIN( 5616 max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num); 5617 } 5618 } 5619 } 5620 // If the number of candidates evaluated exceeds max_allowed_cands, break if 5621 // a newmv mode was evaluated already. 5622 if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break; 5623 } 5624 } 5625 5626 // Indicates number of winner simple translation modes to be used 5627 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 }; 5628 5629 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand 5630 // speed feature. This list consists of modes that have only searched 5631 // SIMPLE_TRANSLATION. The final list will be used to search other motion 5632 // modes after the initial RD search. 5633 static void handle_winner_cand( 5634 MB_MODE_INFO *const mbmi, 5635 motion_mode_best_st_candidate *best_motion_mode_cands, 5636 int max_winner_motion_mode_cand, int64_t this_rd, 5637 motion_mode_candidate *motion_mode_cand, int skip_motion_mode) { 5638 // Number of current motion mode candidates in list 5639 const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand; 5640 int valid_motion_mode_cand_loc = num_motion_mode_cand; 5641 5642 // find the best location to insert new motion mode candidate 5643 for (int j = 0; j < num_motion_mode_cand; j++) { 5644 if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) { 5645 valid_motion_mode_cand_loc = j; 5646 break; 5647 } 5648 } 5649 5650 // Insert motion mode if location is found 5651 if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) { 5652 if (num_motion_mode_cand > 0 && 5653 valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1) 5654 memmove( 5655 &best_motion_mode_cands 5656 ->motion_mode_cand[valid_motion_mode_cand_loc + 1], 5657 &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc], 5658 (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) - 5659 valid_motion_mode_cand_loc) * 5660 sizeof(best_motion_mode_cands->motion_mode_cand[0])); 5661 motion_mode_cand->mbmi = *mbmi; 5662 motion_mode_cand->rd_cost = this_rd; 5663 motion_mode_cand->skip_motion_mode = skip_motion_mode; 5664 best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] = 5665 *motion_mode_cand; 5666 best_motion_mode_cands->num_motion_mode_cand = 5667 AOMMIN(max_winner_motion_mode_cand, 5668 best_motion_mode_cands->num_motion_mode_cand + 1); 5669 } 5670 } 5671 5672 /*!\brief Search intra modes in interframes 5673 * 5674 * \ingroup intra_mode_search 5675 * 5676 * This function searches for the best intra mode when the current frame is an 5677 * interframe. This function however does *not* handle luma palette mode. 5678 * Palette mode is currently handled by \ref av1_search_palette_mode. 5679 * 5680 * This function will first iterate through the luma mode candidates to find the 5681 * best luma intra mode. Once the best luma mode it's found, it will then search 5682 * for the best chroma mode. Because palette mode is currently not handled by 5683 * here, a cache of uv mode is stored in 5684 * InterModeSearchState::intra_search_state so it can be reused later by \ref 5685 * av1_search_palette_mode. 5686 * 5687 * \param[in,out] search_state Struct keep track of the prediction mode 5688 * search state in interframe. 5689 * 5690 * \param[in] cpi Top-level encoder structure. 5691 * \param[in,out] x Pointer to struct holding all the data for 5692 * the current prediction block. 5693 * \param[out] rd_cost Stores the best rd_cost among all the 5694 * prediction modes searched. 5695 * \param[in] bsize Current block size. 5696 * \param[in,out] ctx Structure to hold the number of 4x4 blks to 5697 * copy the tx_type and txfm_skip arrays. 5698 * for only the Y plane. 5699 * \param[in] sf_args Stores the list of intra mode candidates 5700 * to be searched. 5701 * \param[in] intra_ref_frame_cost The entropy cost for signaling that the 5702 * current ref frame is an intra frame. 5703 * \param[in] yrd_threshold The rdcost threshold for luma intra mode to 5704 * terminate chroma intra mode search. 5705 * 5706 * \remark If a new best mode is found, search_state and rd_costs are updated 5707 * correspondingly. While x is also modified, it is only used as a temporary 5708 * buffer, and the final decisions are stored in search_state. 5709 */ 5710 static inline void search_intra_modes_in_interframe( 5711 InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x, 5712 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, 5713 const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost, 5714 int64_t yrd_threshold) { 5715 const AV1_COMMON *const cm = &cpi->common; 5716 const SPEED_FEATURES *const sf = &cpi->sf; 5717 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg; 5718 MACROBLOCKD *const xd = &x->e_mbd; 5719 MB_MODE_INFO *const mbmi = xd->mi[0]; 5720 IntraModeSearchState *intra_search_state = &search_state->intra_search_state; 5721 5722 int is_best_y_mode_intra = 0; 5723 RD_STATS best_intra_rd_stats_y; 5724 int64_t best_rd_y = INT64_MAX; 5725 int best_mode_cost_y = -1; 5726 MB_MODE_INFO best_mbmi = *xd->mi[0]; 5727 THR_MODES best_mode_enum = THR_INVALID; 5728 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 5729 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 5730 const int num_4x4 = bsize_to_num_blk(bsize); 5731 5732 // Performs luma search 5733 int64_t best_model_rd = INT64_MAX; 5734 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT]; 5735 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) { 5736 top_intra_model_rd[i] = INT64_MAX; 5737 } 5738 5739 if (cpi->oxcf.algo_cfg.sharpness) { 5740 int bh = mi_size_high[bsize]; 5741 int bw = mi_size_wide[bsize]; 5742 if (bh > 4 || bw > 4) return; 5743 } 5744 5745 for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) { 5746 if (sf->intra_sf.skip_intra_in_interframe && 5747 search_state->intra_search_state.skip_intra_modes) 5748 break; 5749 set_y_mode_and_delta_angle( 5750 mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra); 5751 assert(mbmi->mode < INTRA_MODE_END); 5752 5753 // Use intra_y_mode_mask speed feature to skip intra mode evaluation. 5754 if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode)) 5755 continue; 5756 5757 const THR_MODES mode_enum = 5758 get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME); 5759 if ((!intra_mode_cfg->enable_smooth_intra || 5760 cpi->sf.intra_sf.disable_smooth_intra) && 5761 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED || 5762 mbmi->mode == SMOOTH_V_PRED)) 5763 continue; 5764 if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED) 5765 continue; 5766 if (av1_is_directional_mode(mbmi->mode) && 5767 !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) && 5768 mbmi->angle_delta[PLANE_TYPE_Y] != 0) 5769 continue; 5770 const PREDICTION_MODE this_mode = mbmi->mode; 5771 5772 assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME); 5773 assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME); 5774 init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm); 5775 x->txfm_search_info.skip_txfm = 0; 5776 5777 if (this_mode != DC_PRED) { 5778 // Only search the oblique modes if the best so far is 5779 // one of the neighboring directional modes 5780 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && 5781 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) { 5782 if (search_state->best_mode_index != THR_INVALID && 5783 search_state->best_mbmode.ref_frame[0] > INTRA_FRAME) 5784 continue; 5785 } 5786 if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { 5787 if (conditional_skipintra( 5788 this_mode, search_state->intra_search_state.best_intra_mode)) 5789 continue; 5790 } 5791 } 5792 5793 RD_STATS intra_rd_stats_y; 5794 int mode_cost_y; 5795 int64_t intra_rd_y = INT64_MAX; 5796 const int is_luma_result_valid = av1_handle_intra_y_mode( 5797 intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx, 5798 &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y, 5799 &best_model_rd, top_intra_model_rd); 5800 5801 if (intra_rd_y < INT64_MAX) { 5802 adjust_cost(cpi, x, &intra_rd_y); 5803 } 5804 5805 if (is_luma_result_valid && intra_rd_y < yrd_threshold) { 5806 is_best_y_mode_intra = 1; 5807 if (intra_rd_y < best_rd_y) { 5808 best_intra_rd_stats_y = intra_rd_stats_y; 5809 best_mode_cost_y = mode_cost_y; 5810 best_rd_y = intra_rd_y; 5811 best_mbmi = *mbmi; 5812 best_mode_enum = mode_enum; 5813 memcpy(best_blk_skip, x->txfm_search_info.blk_skip, 5814 sizeof(best_blk_skip[0]) * num_4x4); 5815 av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4); 5816 } 5817 } 5818 } 5819 5820 if (!is_best_y_mode_intra) { 5821 return; 5822 } 5823 5824 assert(best_rd_y < INT64_MAX); 5825 5826 // Restores the best luma mode 5827 *mbmi = best_mbmi; 5828 memcpy(x->txfm_search_info.blk_skip, best_blk_skip, 5829 sizeof(best_blk_skip[0]) * num_4x4); 5830 av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4); 5831 5832 // Performs chroma search 5833 RD_STATS intra_rd_stats, intra_rd_stats_uv; 5834 av1_init_rd_stats(&intra_rd_stats); 5835 av1_init_rd_stats(&intra_rd_stats_uv); 5836 const int num_planes = av1_num_planes(cm); 5837 if (num_planes > 1) { 5838 const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe( 5839 intra_search_state, cpi, x, bsize, &intra_rd_stats, 5840 &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd); 5841 5842 if (!intra_uv_mode_valid) { 5843 return; 5844 } 5845 } 5846 5847 // Merge the luma and chroma rd stats 5848 assert(best_mode_cost_y >= 0); 5849 intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y; 5850 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) { 5851 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size 5852 // in the tokenonly rate, but for intra blocks, tx_size is always coded 5853 // (prediction granularity), so we account for it in the full rate, 5854 // not the tokenonly rate. 5855 best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size); 5856 } 5857 5858 const ModeCosts *mode_costs = &x->mode_costs; 5859 const PREDICTION_MODE mode = mbmi->mode; 5860 if (num_planes > 1 && xd->is_chroma_ref) { 5861 const int uv_mode_cost = 5862 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode]; 5863 intra_rd_stats.rate += 5864 intra_rd_stats_uv.rate + 5865 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost); 5866 } 5867 5868 // Intra block is always coded as non-skip 5869 intra_rd_stats.skip_txfm = 0; 5870 intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist; 5871 // Add in the cost of the no skip flag. 5872 const int skip_ctx = av1_get_skip_txfm_context(xd); 5873 intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0]; 5874 // Calculate the final RD estimate for this mode. 5875 const int64_t this_rd = 5876 RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist); 5877 // Keep record of best intra rd 5878 if (this_rd < search_state->best_intra_rd) { 5879 search_state->best_intra_rd = this_rd; 5880 intra_search_state->best_intra_mode = mode; 5881 } 5882 5883 for (int i = 0; i < REFERENCE_MODES; ++i) { 5884 search_state->best_pred_rd[i] = 5885 AOMMIN(search_state->best_pred_rd[i], this_rd); 5886 } 5887 5888 intra_rd_stats.rdcost = this_rd; 5889 5890 adjust_rdcost(cpi, x, &intra_rd_stats); 5891 5892 // Collect mode stats for multiwinner mode processing 5893 const int txfm_search_done = 1; 5894 store_winner_mode_stats( 5895 &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y, 5896 &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost, 5897 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done); 5898 if (intra_rd_stats.rdcost < search_state->best_rd) { 5899 update_search_state(search_state, rd_cost, ctx, &intra_rd_stats, 5900 &best_intra_rd_stats_y, &intra_rd_stats_uv, 5901 best_mode_enum, x, txfm_search_done); 5902 } 5903 } 5904 5905 #if !CONFIG_REALTIME_ONLY 5906 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML 5907 // features in intra mode pruning. 5908 static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi, 5909 MACROBLOCK *x, BLOCK_SIZE bsize, 5910 int mi_row, int mi_col, 5911 int64_t *inter_cost, 5912 int64_t *intra_cost) { 5913 const AV1_COMMON *const cm = &cpi->common; 5914 // Only consider full SB. 5915 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 5916 const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d; 5917 const int len = (block_size_wide[sb_size] / tpl_bsize_1d) * 5918 (block_size_high[sb_size] / tpl_bsize_1d); 5919 SuperBlockEnc *sb_enc = &x->sb_enc; 5920 if (sb_enc->tpl_data_count == len) { 5921 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d); 5922 const int tpl_stride = sb_enc->tpl_stride; 5923 const int tplw = mi_size_wide[tpl_bsize]; 5924 const int tplh = mi_size_high[tpl_bsize]; 5925 const int nw = mi_size_wide[bsize] / tplw; 5926 const int nh = mi_size_high[bsize] / tplh; 5927 if (nw >= 1 && nh >= 1) { 5928 const int of_h = mi_row % mi_size_high[sb_size]; 5929 const int of_w = mi_col % mi_size_wide[sb_size]; 5930 const int start = of_h / tplh * tpl_stride + of_w / tplw; 5931 5932 for (int k = 0; k < nh; k++) { 5933 for (int l = 0; l < nw; l++) { 5934 *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l]; 5935 *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l]; 5936 } 5937 } 5938 *inter_cost /= nw * nh; 5939 *intra_cost /= nw * nh; 5940 } 5941 } 5942 } 5943 #endif // !CONFIG_REALTIME_ONLY 5944 5945 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune 5946 // intra mode search. 5947 static inline void skip_intra_modes_in_interframe( 5948 AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize, 5949 InterModeSearchState *search_state, const SPEED_FEATURES *const sf, 5950 int64_t inter_cost, int64_t intra_cost) { 5951 MACROBLOCKD *const xd = &x->e_mbd; 5952 const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME; 5953 if (sf->rt_sf.prune_intra_mode_based_on_mv_range && 5954 bsize > sf->part_sf.max_intra_bsize && !comp_pred) { 5955 const MV best_mv = search_state->best_mbmode.mv[0].as_mv; 5956 const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range; 5957 if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh && 5958 x->source_variance > 128) { 5959 search_state->intra_search_state.skip_intra_modes = 1; 5960 return; 5961 } 5962 } 5963 5964 const unsigned int src_var_thresh_intra_skip = 1; 5965 const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe; 5966 if (!(skip_intra_in_interframe && 5967 (x->source_variance > src_var_thresh_intra_skip))) 5968 return; 5969 5970 // Prune intra search based on best inter mode being transfrom skip. 5971 if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) { 5972 const int qindex_thresh[2] = { 200, MAXQ }; 5973 const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0; 5974 if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) && 5975 (x->qindex <= qindex_thresh[ind])) { 5976 search_state->intra_search_state.skip_intra_modes = 1; 5977 return; 5978 } else if ((skip_intra_in_interframe >= 4) && 5979 (inter_cost < 0 || intra_cost < 0)) { 5980 search_state->intra_search_state.skip_intra_modes = 1; 5981 return; 5982 } 5983 } 5984 // Use ML model to prune intra search. 5985 if (inter_cost >= 0 && intra_cost >= 0) { 5986 const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480) 5987 ? &av1_intrap_nn_config 5988 : &av1_intrap_hd_nn_config; 5989 float nn_features[6]; 5990 float scores[2] = { 0.0f }; 5991 5992 nn_features[0] = (float)search_state->best_mbmode.skip_txfm; 5993 nn_features[1] = (float)mi_size_wide_log2[bsize]; 5994 nn_features[2] = (float)mi_size_high_log2[bsize]; 5995 nn_features[3] = (float)intra_cost; 5996 nn_features[4] = (float)inter_cost; 5997 const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd); 5998 const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd); 5999 nn_features[5] = (float)(ac_q_max / ac_q); 6000 6001 av1_nn_predict(nn_features, nn_config, 1, scores); 6002 6003 // For two parameters, the max prob returned from av1_nn_softmax equals 6004 // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the 6005 // calling of av1_nn_softmax. 6006 const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f }; 6007 assert(skip_intra_in_interframe <= 5); 6008 if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) { 6009 search_state->intra_search_state.skip_intra_modes = 1; 6010 } 6011 } 6012 } 6013 6014 static inline bool skip_interp_filter_search(const AV1_COMP *cpi, 6015 int is_single_pred) { 6016 const MODE encoding_mode = cpi->oxcf.mode; 6017 if (encoding_mode == REALTIME) { 6018 return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE && 6019 (cpi->sf.interp_sf.skip_interp_filter_search || 6020 cpi->sf.winner_mode_sf.winner_mode_ifs)); 6021 } else if (encoding_mode == GOOD) { 6022 // Skip interpolation filter search for single prediction modes. 6023 return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred); 6024 } 6025 return false; 6026 } 6027 6028 static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x, 6029 BLOCK_SIZE bsize) { 6030 const AV1_COMMON *const cm = &cpi->common; 6031 const SPEED_FEATURES *const sf = &cpi->sf; 6032 6033 if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION || 6034 !sf->rt_sf.short_circuit_low_temp_var || 6035 !sf->rt_sf.prune_inter_modes_using_temp_var) { 6036 return 0; 6037 } 6038 6039 const int mi_row = x->e_mbd.mi_row; 6040 const int mi_col = x->e_mbd.mi_col; 6041 int is_low_temp_var = 0; 6042 6043 if (cm->seq_params->sb_size == BLOCK_64X64) 6044 is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb( 6045 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize); 6046 else 6047 is_low_temp_var = av1_get_force_skip_low_temp_var( 6048 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize); 6049 6050 return is_low_temp_var; 6051 } 6052 6053 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb. 6054 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data, 6055 struct macroblock *x, struct RD_STATS *rd_cost, 6056 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, 6057 int64_t best_rd_so_far) { 6058 AV1_COMMON *const cm = &cpi->common; 6059 const FeatureFlags *const features = &cm->features; 6060 const int num_planes = av1_num_planes(cm); 6061 const SPEED_FEATURES *const sf = &cpi->sf; 6062 MACROBLOCKD *const xd = &x->e_mbd; 6063 MB_MODE_INFO *const mbmi = xd->mi[0]; 6064 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 6065 int i; 6066 const ModeCosts *mode_costs = &x->mode_costs; 6067 const int *comp_inter_cost = 6068 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)]; 6069 6070 InterModeSearchState search_state; 6071 init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far); 6072 INTERINTRA_MODE interintra_modes[REF_FRAMES] = { 6073 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, 6074 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES 6075 }; 6076 HandleInterModeArgs args = { { NULL }, 6077 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, 6078 { NULL }, 6079 { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, 6080 MAX_SB_SIZE >> 1 }, 6081 NULL, 6082 NULL, 6083 NULL, 6084 search_state.modelled_rd, 6085 INT_MAX, 6086 INT_MAX, 6087 search_state.simple_rd, 6088 0, 6089 false, 6090 interintra_modes, 6091 { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } }, 6092 { { 0, 0 } }, 6093 { 0 }, 6094 0, 6095 0, 6096 -1, 6097 -1, 6098 -1, 6099 { 0 }, 6100 { 0 }, 6101 UINT_MAX }; 6102 // Currently, is_low_temp_var is used in real time encoding. 6103 const int is_low_temp_var = get_block_temp_var(cpi, x, bsize); 6104 6105 for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1; 6106 // Indicates the appropriate number of simple translation winner modes for 6107 // exhaustive motion mode evaluation 6108 const int max_winner_motion_mode_cand = 6109 num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand]; 6110 assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES); 6111 motion_mode_candidate motion_mode_cand; 6112 motion_mode_best_st_candidate best_motion_mode_cands; 6113 // Initializing the number of motion mode candidates to zero. 6114 best_motion_mode_cands.num_motion_mode_cand = 0; 6115 for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i) 6116 best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX; 6117 6118 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; 6119 6120 av1_invalid_rd_stats(rd_cost); 6121 6122 for (i = 0; i < REF_FRAMES; ++i) { 6123 x->warp_sample_info[i].num = -1; 6124 } 6125 6126 // Ref frames that are selected by square partition blocks. 6127 int picked_ref_frames_mask = 0; 6128 if (sf->inter_sf.prune_ref_frame_for_rect_partitions && 6129 mbmi->partition != PARTITION_NONE) { 6130 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended 6131 // partition blocks. prune_ref_frame_for_rect_partitions >=2 6132 // implies prune for vert, horiz and extended partition blocks. 6133 if ((mbmi->partition != PARTITION_VERT && 6134 mbmi->partition != PARTITION_HORZ) || 6135 sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) { 6136 picked_ref_frames_mask = 6137 fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size); 6138 } 6139 } 6140 6141 #if CONFIG_COLLECT_COMPONENT_TIMING 6142 start_timing(cpi, set_params_rd_pick_inter_mode_time); 6143 #endif 6144 // Skip ref frames that never selected by square blocks. 6145 const int skip_ref_frame_mask = 6146 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0; 6147 mode_skip_mask_t mode_skip_mask; 6148 unsigned int ref_costs_single[REF_FRAMES]; 6149 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; 6150 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; 6151 // init params, set frame modes, speed features 6152 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask, 6153 skip_ref_frame_mask, ref_costs_single, 6154 ref_costs_comp, yv12_mb); 6155 #if CONFIG_COLLECT_COMPONENT_TIMING 6156 end_timing(cpi, set_params_rd_pick_inter_mode_time); 6157 #endif 6158 6159 int64_t best_est_rd = INT64_MAX; 6160 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; 6161 // If do_tx_search is 0, only estimated RD should be computed. 6162 // If do_tx_search is 1, all modes have TX search performed. 6163 const int do_tx_search = 6164 !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) || 6165 (sf->inter_sf.inter_mode_rd_model_estimation == 2 && 6166 num_pels_log2_lookup[bsize] > 8)); 6167 InterModesInfo *inter_modes_info = x->inter_modes_info; 6168 inter_modes_info->num = 0; 6169 6170 // Temporary buffers used by handle_inter_mode(). 6171 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]); 6172 6173 // The best RD found for the reference frame, among single reference modes. 6174 // Note that the 0-th element will contain a cut-off that is later used 6175 // to determine if we should skip a compound mode. 6176 int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX, 6177 INT64_MAX, INT64_MAX, INT64_MAX, 6178 INT64_MAX, INT64_MAX }; 6179 6180 // Prepared stats used later to check if we could skip intra mode eval. 6181 int64_t inter_cost = -1; 6182 int64_t intra_cost = -1; 6183 // Need to tweak the threshold for hdres speed 0 & 1. 6184 const int mi_row = xd->mi_row; 6185 const int mi_col = xd->mi_col; 6186 6187 // Obtain the relevant tpl stats for pruning inter modes 6188 PruneInfoFromTpl inter_cost_info_from_tpl; 6189 #if !CONFIG_REALTIME_ONLY 6190 if (sf->inter_sf.prune_inter_modes_based_on_tpl) { 6191 // x->tpl_keep_ref_frame[id] = 1 => no pruning in 6192 // prune_ref_by_selective_ref_frame() 6193 // x->tpl_keep_ref_frame[id] = 0 => ref frame can be pruned in 6194 // prune_ref_by_selective_ref_frame() 6195 // Populating valid_refs[idx] = 1 ensures that 6196 // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a 6197 // pruned ref frame. 6198 int valid_refs[INTER_REFS_PER_FRAME]; 6199 for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) { 6200 const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME }; 6201 valid_refs[frame - 1] = 6202 x->tpl_keep_ref_frame[frame] || 6203 !prune_ref_by_selective_ref_frame( 6204 cpi, x, refs, cm->cur_frame->ref_display_order_hint); 6205 } 6206 av1_zero(inter_cost_info_from_tpl); 6207 get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs, 6208 &inter_cost_info_from_tpl); 6209 } 6210 6211 const int do_pruning = 6212 (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1; 6213 if (do_pruning && sf->intra_sf.skip_intra_in_interframe && 6214 cpi->oxcf.algo_cfg.enable_tpl_model) 6215 calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost, 6216 &intra_cost); 6217 #endif // !CONFIG_REALTIME_ONLY 6218 6219 // Initialize best mode stats for winner mode processing. 6220 const int max_winner_mode_count = 6221 winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type]; 6222 zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats); 6223 x->winner_mode_count = 0; 6224 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID, 6225 NULL, bsize, best_rd_so_far, 6226 sf->winner_mode_sf.multi_winner_mode_type, 0); 6227 6228 int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS); 6229 if (sf->inter_sf.prune_inter_modes_if_skippable) { 6230 // Higher multiplication factor values for lower quantizers. 6231 mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex]; 6232 } 6233 6234 // Initialize arguments for mode loop speed features 6235 InterModeSFArgs sf_args = { &args.skip_motion_mode, 6236 &mode_skip_mask, 6237 &search_state, 6238 skip_ref_frame_mask, 6239 0, 6240 mode_thresh_mul_fact, 6241 0, 6242 0 }; 6243 int64_t best_inter_yrd = INT64_MAX; 6244 6245 // This is the main loop of this function. It loops over all possible inter 6246 // modes and calls handle_inter_mode() to compute the RD for each. 6247 // Here midx is just an iterator index that should not be used by itself 6248 // except to keep track of the number of modes searched. It should be used 6249 // with av1_default_mode_order to get the enum that defines the mode, which 6250 // can be used with av1_mode_defs to get the prediction mode and the ref 6251 // frames. 6252 // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings 6253 // good speedup for real time case. If we decide to use compound mode in real 6254 // time, maybe we can modify av1_default_mode_order table. 6255 THR_MODES mode_start = THR_INTER_MODE_START; 6256 THR_MODES mode_end = THR_INTER_MODE_END; 6257 const CurrentFrame *const current_frame = &cm->current_frame; 6258 if (current_frame->reference_mode == SINGLE_REFERENCE) { 6259 mode_start = SINGLE_REF_MODE_START; 6260 mode_end = SINGLE_REF_MODE_END; 6261 } 6262 6263 for (THR_MODES midx = mode_start; midx < mode_end; ++midx) { 6264 // Get the actual prediction mode we are trying in this iteration 6265 const THR_MODES mode_enum = av1_default_mode_order[midx]; 6266 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum]; 6267 const PREDICTION_MODE this_mode = mode_def->mode; 6268 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame; 6269 6270 const MV_REFERENCE_FRAME ref_frame = ref_frames[0]; 6271 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1]; 6272 const int is_single_pred = 6273 ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME; 6274 const int comp_pred = second_ref_frame > INTRA_FRAME; 6275 6276 init_mbmi(mbmi, this_mode, ref_frames, cm); 6277 6278 txfm_info->skip_txfm = 0; 6279 sf_args.num_single_modes_processed += is_single_pred; 6280 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); 6281 #if CONFIG_COLLECT_COMPONENT_TIMING 6282 start_timing(cpi, skip_inter_mode_time); 6283 #endif 6284 // Apply speed features to decide if this inter mode can be skipped 6285 const int is_skip_inter_mode = skip_inter_mode( 6286 cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var); 6287 #if CONFIG_COLLECT_COMPONENT_TIMING 6288 end_timing(cpi, skip_inter_mode_time); 6289 #endif 6290 if (is_skip_inter_mode) continue; 6291 6292 // Select prediction reference frames. 6293 for (i = 0; i < num_planes; i++) { 6294 xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; 6295 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; 6296 } 6297 6298 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 6299 mbmi->angle_delta[PLANE_TYPE_UV] = 0; 6300 mbmi->filter_intra_mode_info.use_filter_intra = 0; 6301 mbmi->ref_mv_idx = 0; 6302 6303 const int64_t ref_best_rd = search_state.best_rd; 6304 RD_STATS rd_stats, rd_stats_y, rd_stats_uv; 6305 av1_init_rd_stats(&rd_stats); 6306 6307 const int ref_frame_cost = comp_pred 6308 ? ref_costs_comp[ref_frame][second_ref_frame] 6309 : ref_costs_single[ref_frame]; 6310 const int compmode_cost = 6311 is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0; 6312 const int real_compmode_cost = 6313 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT 6314 ? compmode_cost 6315 : 0; 6316 // Point to variables that are maintained between loop iterations 6317 args.single_newmv = search_state.single_newmv; 6318 args.single_newmv_rate = search_state.single_newmv_rate; 6319 args.single_newmv_valid = search_state.single_newmv_valid; 6320 args.single_comp_cost = real_compmode_cost; 6321 args.ref_frame_cost = ref_frame_cost; 6322 args.best_pred_sse = search_state.best_pred_sse; 6323 args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred); 6324 int64_t skip_rd[2] = { search_state.best_skip_rd[0], 6325 search_state.best_skip_rd[1] }; 6326 int64_t this_yrd = INT64_MAX; 6327 #if CONFIG_COLLECT_COMPONENT_TIMING 6328 start_timing(cpi, handle_inter_mode_time); 6329 #endif 6330 int64_t this_rd = handle_inter_mode( 6331 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args, 6332 ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search, 6333 inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl, 6334 &this_yrd); 6335 #if CONFIG_COLLECT_COMPONENT_TIMING 6336 end_timing(cpi, handle_inter_mode_time); 6337 #endif 6338 if (current_frame->reference_mode != SINGLE_REFERENCE) { 6339 if (!args.skip_ifs && 6340 sf->inter_sf.prune_comp_search_by_single_result > 0 && 6341 is_inter_singleref_mode(this_mode)) { 6342 collect_single_states(x, &search_state, mbmi); 6343 } 6344 6345 if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 && 6346 is_inter_singleref_mode(this_mode)) 6347 update_best_single_mode(&search_state, this_mode, ref_frame, this_rd); 6348 } 6349 6350 if (this_rd == INT64_MAX) continue; 6351 6352 if (mbmi->skip_txfm) { 6353 rd_stats_y.rate = 0; 6354 rd_stats_uv.rate = 0; 6355 } 6356 6357 if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred && 6358 this_rd < ref_frame_rd[ref_frame]) { 6359 ref_frame_rd[ref_frame] = this_rd; 6360 } 6361 6362 adjust_cost(cpi, x, &this_rd); 6363 adjust_rdcost(cpi, x, &rd_stats); 6364 6365 // Did this mode help, i.e., is it the new best mode 6366 if (this_rd < search_state.best_rd) { 6367 assert(IMPLIES(comp_pred, 6368 cm->current_frame.reference_mode != SINGLE_REFERENCE)); 6369 search_state.best_pred_sse = x->pred_sse[ref_frame]; 6370 best_inter_yrd = this_yrd; 6371 update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y, 6372 &rd_stats_uv, mode_enum, x, do_tx_search); 6373 if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0]; 6374 // skip_rd[0] is the best total rd for a skip mode so far. 6375 // skip_rd[1] is the best total rd for a skip mode so far in luma. 6376 // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated. 6377 // When do_tx_search = 0, skip_rd[1] is updated. 6378 search_state.best_skip_rd[1] = skip_rd[1]; 6379 } 6380 if (sf->winner_mode_sf.motion_mode_for_winner_cand) { 6381 // Add this mode to motion mode candidate list for motion mode search 6382 // if using motion_mode_for_winner_cand speed feature 6383 handle_winner_cand(mbmi, &best_motion_mode_cands, 6384 max_winner_motion_mode_cand, this_rd, 6385 &motion_mode_cand, args.skip_motion_mode); 6386 } 6387 6388 /* keep record of best compound/single-only prediction */ 6389 record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred, 6390 x->rdmult, &search_state, compmode_cost); 6391 } 6392 6393 #if CONFIG_COLLECT_COMPONENT_TIMING 6394 start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time); 6395 #endif 6396 if (sf->winner_mode_sf.motion_mode_for_winner_cand) { 6397 // For the single ref winner candidates, evaluate other motion modes (non 6398 // simple translation). 6399 evaluate_motion_mode_for_winner_candidates( 6400 cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb, 6401 &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd, 6402 &search_state, &best_inter_yrd); 6403 } 6404 #if CONFIG_COLLECT_COMPONENT_TIMING 6405 end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time); 6406 #endif 6407 6408 #if CONFIG_COLLECT_COMPONENT_TIMING 6409 start_timing(cpi, do_tx_search_time); 6410 #endif 6411 if (do_tx_search != 1) { 6412 // A full tx search has not yet been done, do tx search for 6413 // top mode candidates 6414 tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize, 6415 yv12_mb, mi_row, mi_col, &search_state, 6416 rd_cost, ctx, &best_inter_yrd); 6417 } 6418 #if CONFIG_COLLECT_COMPONENT_TIMING 6419 end_timing(cpi, do_tx_search_time); 6420 #endif 6421 6422 #if CONFIG_COLLECT_COMPONENT_TIMING 6423 start_timing(cpi, handle_intra_mode_time); 6424 #endif 6425 // Gate intra mode evaluation if best of inter is skip except when source 6426 // variance is extremely low and also based on max intra bsize. 6427 skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost, 6428 intra_cost); 6429 6430 const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME]; 6431 search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx, 6432 &sf_args, intra_ref_frame_cost, 6433 best_inter_yrd); 6434 #if CONFIG_COLLECT_COMPONENT_TIMING 6435 end_timing(cpi, handle_intra_mode_time); 6436 #endif 6437 6438 #if CONFIG_COLLECT_COMPONENT_TIMING 6439 start_timing(cpi, refine_winner_mode_tx_time); 6440 #endif 6441 int winner_mode_count = 6442 sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1; 6443 // In effect only when fast tx search speed features are enabled. 6444 refine_winner_mode_tx( 6445 cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index, 6446 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y, 6447 search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count); 6448 #if CONFIG_COLLECT_COMPONENT_TIMING 6449 end_timing(cpi, refine_winner_mode_tx_time); 6450 #endif 6451 6452 // Initialize default mode evaluation params 6453 set_mode_eval_params(cpi, x, DEFAULT_EVAL); 6454 6455 // Only try palette mode when the best mode so far is an intra mode. 6456 const int try_palette = 6457 cpi->oxcf.tool_cfg.enable_palette && 6458 av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) && 6459 !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX; 6460 RD_STATS this_rd_cost; 6461 int this_skippable = 0; 6462 if (try_palette) { 6463 #if CONFIG_COLLECT_COMPONENT_TIMING 6464 start_timing(cpi, av1_search_palette_mode_time); 6465 #endif 6466 this_skippable = av1_search_palette_mode( 6467 &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost, 6468 ctx, &this_rd_cost, search_state.best_rd); 6469 #if CONFIG_COLLECT_COMPONENT_TIMING 6470 end_timing(cpi, av1_search_palette_mode_time); 6471 #endif 6472 if (this_rd_cost.rdcost < search_state.best_rd) { 6473 search_state.best_mode_index = THR_DC; 6474 mbmi->mv[0].as_int = 0; 6475 rd_cost->rate = this_rd_cost.rate; 6476 rd_cost->dist = this_rd_cost.dist; 6477 rd_cost->rdcost = this_rd_cost.rdcost; 6478 search_state.best_rd = rd_cost->rdcost; 6479 search_state.best_mbmode = *mbmi; 6480 search_state.best_skip2 = 0; 6481 search_state.best_mode_skippable = this_skippable; 6482 memcpy(ctx->blk_skip, txfm_info->blk_skip, 6483 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk); 6484 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 6485 } 6486 } 6487 6488 search_state.best_mbmode.skip_mode = 0; 6489 if (cm->current_frame.skip_mode_info.skip_mode_flag && 6490 cpi->oxcf.algo_cfg.sharpness != 3 && is_comp_ref_allowed(bsize)) { 6491 const struct segmentation *const seg = &cm->seg; 6492 unsigned char segment_id = mbmi->segment_id; 6493 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { 6494 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb); 6495 } 6496 } 6497 6498 // Make sure that the ref_mv_idx is only nonzero when we're 6499 // using a mode which can support ref_mv_idx 6500 if (search_state.best_mbmode.ref_mv_idx != 0 && 6501 !(search_state.best_mbmode.mode == NEWMV || 6502 search_state.best_mbmode.mode == NEW_NEWMV || 6503 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) { 6504 search_state.best_mbmode.ref_mv_idx = 0; 6505 } 6506 6507 if (search_state.best_mode_index == THR_INVALID || 6508 search_state.best_rd >= best_rd_so_far) { 6509 rd_cost->rate = INT_MAX; 6510 rd_cost->rdcost = INT64_MAX; 6511 return; 6512 } 6513 6514 const InterpFilter interp_filter = features->interp_filter; 6515 assert((interp_filter == SWITCHABLE) || 6516 (interp_filter == 6517 search_state.best_mbmode.interp_filters.as_filters.y_filter) || 6518 !is_inter_block(&search_state.best_mbmode)); 6519 assert((interp_filter == SWITCHABLE) || 6520 (interp_filter == 6521 search_state.best_mbmode.interp_filters.as_filters.x_filter) || 6522 !is_inter_block(&search_state.best_mbmode)); 6523 6524 if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) { 6525 av1_update_rd_thresh_fact( 6526 cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize, 6527 search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES); 6528 } 6529 6530 // macroblock modes 6531 *mbmi = search_state.best_mbmode; 6532 txfm_info->skip_txfm |= search_state.best_skip2; 6533 6534 // Note: this section is needed since the mode may have been forced to 6535 // GLOBALMV by the all-zero mode handling of ref-mv. 6536 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) { 6537 // Correct the interp filters for GLOBALMV 6538 if (is_nontrans_global_motion(xd, xd->mi[0])) { 6539 int_interpfilters filters = 6540 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter)); 6541 assert(mbmi->interp_filters.as_int == filters.as_int); 6542 (void)filters; 6543 } 6544 } 6545 6546 txfm_info->skip_txfm |= search_state.best_mode_skippable; 6547 6548 assert(search_state.best_mode_index != THR_INVALID); 6549 6550 #if CONFIG_INTERNAL_STATS 6551 store_coding_context(x, ctx, search_state.best_mode_index, 6552 search_state.best_mode_skippable); 6553 #else 6554 store_coding_context(x, ctx, search_state.best_mode_skippable); 6555 #endif // CONFIG_INTERNAL_STATS 6556 6557 if (mbmi->palette_mode_info.palette_size[1] > 0) { 6558 assert(try_palette); 6559 av1_restore_uv_color_map(cpi, x); 6560 } 6561 } 6562 6563 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, 6564 TileDataEnc *tile_data, MACROBLOCK *x, 6565 int mi_row, int mi_col, 6566 RD_STATS *rd_cost, BLOCK_SIZE bsize, 6567 PICK_MODE_CONTEXT *ctx, 6568 int64_t best_rd_so_far) { 6569 const AV1_COMMON *const cm = &cpi->common; 6570 const FeatureFlags *const features = &cm->features; 6571 MACROBLOCKD *const xd = &x->e_mbd; 6572 MB_MODE_INFO *const mbmi = xd->mi[0]; 6573 unsigned char segment_id = mbmi->segment_id; 6574 const int comp_pred = 0; 6575 int i; 6576 unsigned int ref_costs_single[REF_FRAMES]; 6577 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; 6578 const ModeCosts *mode_costs = &x->mode_costs; 6579 const int *comp_inter_cost = 6580 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)]; 6581 InterpFilter best_filter = SWITCHABLE; 6582 int64_t this_rd = INT64_MAX; 6583 int rate2 = 0; 6584 const int64_t distortion2 = 0; 6585 (void)mi_row; 6586 (void)mi_col; 6587 (void)tile_data; 6588 6589 av1_collect_neighbors_ref_counts(xd); 6590 6591 estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single, 6592 ref_costs_comp); 6593 6594 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; 6595 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX; 6596 6597 rd_cost->rate = INT_MAX; 6598 6599 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); 6600 6601 mbmi->palette_mode_info.palette_size[0] = 0; 6602 mbmi->palette_mode_info.palette_size[1] = 0; 6603 mbmi->filter_intra_mode_info.use_filter_intra = 0; 6604 mbmi->mode = GLOBALMV; 6605 mbmi->motion_mode = SIMPLE_TRANSLATION; 6606 mbmi->uv_mode = UV_DC_PRED; 6607 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) 6608 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); 6609 else 6610 mbmi->ref_frame[0] = LAST_FRAME; 6611 mbmi->ref_frame[1] = NONE_FRAME; 6612 mbmi->mv[0].as_int = 6613 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]], 6614 features->allow_high_precision_mv, bsize, mi_col, 6615 mi_row, features->cur_frame_force_integer_mv) 6616 .as_int; 6617 mbmi->tx_size = max_txsize_lookup[bsize]; 6618 x->txfm_search_info.skip_txfm = 1; 6619 6620 mbmi->ref_mv_idx = 0; 6621 6622 mbmi->motion_mode = SIMPLE_TRANSLATION; 6623 av1_count_overlappable_neighbors(cm, xd); 6624 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) { 6625 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; 6626 mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref); 6627 // Select the samples according to motion vector difference 6628 if (mbmi->num_proj_ref > 1) { 6629 mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref, 6630 mbmi->num_proj_ref, bsize); 6631 } 6632 } 6633 6634 const InterpFilter interp_filter = features->interp_filter; 6635 set_default_interp_filters(mbmi, interp_filter); 6636 6637 if (interp_filter != SWITCHABLE) { 6638 best_filter = interp_filter; 6639 } else { 6640 best_filter = EIGHTTAP_REGULAR; 6641 if (av1_is_interp_needed(xd)) { 6642 int rs; 6643 int best_rs = INT_MAX; 6644 for (i = 0; i < SWITCHABLE_FILTERS; ++i) { 6645 mbmi->interp_filters = av1_broadcast_interp_filter(i); 6646 rs = av1_get_switchable_rate(x, xd, interp_filter, 6647 cm->seq_params->enable_dual_filter); 6648 if (rs < best_rs) { 6649 best_rs = rs; 6650 best_filter = mbmi->interp_filters.as_filters.y_filter; 6651 } 6652 } 6653 } 6654 } 6655 // Set the appropriate filter 6656 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter); 6657 rate2 += av1_get_switchable_rate(x, xd, interp_filter, 6658 cm->seq_params->enable_dual_filter); 6659 6660 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) 6661 rate2 += comp_inter_cost[comp_pred]; 6662 6663 // Estimate the reference frame signaling cost and add it 6664 // to the rolling cost variable. 6665 rate2 += ref_costs_single[LAST_FRAME]; 6666 this_rd = RDCOST(x->rdmult, rate2, distortion2); 6667 6668 rd_cost->rate = rate2; 6669 rd_cost->dist = distortion2; 6670 rd_cost->rdcost = this_rd; 6671 6672 if (this_rd >= best_rd_so_far) { 6673 rd_cost->rate = INT_MAX; 6674 rd_cost->rdcost = INT64_MAX; 6675 return; 6676 } 6677 6678 assert((interp_filter == SWITCHABLE) || 6679 (interp_filter == mbmi->interp_filters.as_filters.y_filter)); 6680 6681 if (cpi->sf.inter_sf.adaptive_rd_thresh) { 6682 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact, 6683 cpi->sf.inter_sf.adaptive_rd_thresh, bsize, 6684 THR_GLOBALMV, THR_INTER_MODE_START, 6685 THR_INTER_MODE_END, THR_DC, MAX_MODES); 6686 } 6687 6688 #if CONFIG_INTERNAL_STATS 6689 store_coding_context(x, ctx, THR_GLOBALMV, 0); 6690 #else 6691 store_coding_context(x, ctx, 0); 6692 #endif // CONFIG_INTERNAL_STATS 6693 } 6694 6695 /*!\cond */ 6696 struct calc_target_weighted_pred_ctxt { 6697 const OBMCBuffer *obmc_buffer; 6698 const uint8_t *tmp; 6699 int tmp_stride; 6700 int overlap; 6701 }; 6702 /*!\endcond */ 6703 6704 static inline void calc_target_weighted_pred_above( 6705 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size, 6706 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) { 6707 (void)nb_mi; 6708 (void)num_planes; 6709 (void)rel_mi_row; 6710 (void)dir; 6711 6712 struct calc_target_weighted_pred_ctxt *ctxt = 6713 (struct calc_target_weighted_pred_ctxt *)fun_ctxt; 6714 6715 const int bw = xd->width << MI_SIZE_LOG2; 6716 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); 6717 6718 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE); 6719 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE); 6720 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE; 6721 const int is_hbd = is_cur_buf_hbd(xd); 6722 6723 if (!is_hbd) { 6724 for (int row = 0; row < ctxt->overlap; ++row) { 6725 const uint8_t m0 = mask1d[row]; 6726 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 6727 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) { 6728 wsrc[col] = m1 * tmp[col]; 6729 mask[col] = m0; 6730 } 6731 wsrc += bw; 6732 mask += bw; 6733 tmp += ctxt->tmp_stride; 6734 } 6735 } else { 6736 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); 6737 6738 for (int row = 0; row < ctxt->overlap; ++row) { 6739 const uint8_t m0 = mask1d[row]; 6740 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 6741 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) { 6742 wsrc[col] = m1 * tmp16[col]; 6743 mask[col] = m0; 6744 } 6745 wsrc += bw; 6746 mask += bw; 6747 tmp16 += ctxt->tmp_stride; 6748 } 6749 } 6750 } 6751 6752 static inline void calc_target_weighted_pred_left( 6753 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size, 6754 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) { 6755 (void)nb_mi; 6756 (void)num_planes; 6757 (void)rel_mi_col; 6758 (void)dir; 6759 6760 struct calc_target_weighted_pred_ctxt *ctxt = 6761 (struct calc_target_weighted_pred_ctxt *)fun_ctxt; 6762 6763 const int bw = xd->width << MI_SIZE_LOG2; 6764 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); 6765 6766 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw); 6767 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw); 6768 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride); 6769 const int is_hbd = is_cur_buf_hbd(xd); 6770 6771 if (!is_hbd) { 6772 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) { 6773 for (int col = 0; col < ctxt->overlap; ++col) { 6774 const uint8_t m0 = mask1d[col]; 6775 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 6776 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 + 6777 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1; 6778 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0; 6779 } 6780 wsrc += bw; 6781 mask += bw; 6782 tmp += ctxt->tmp_stride; 6783 } 6784 } else { 6785 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); 6786 6787 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) { 6788 for (int col = 0; col < ctxt->overlap; ++col) { 6789 const uint8_t m0 = mask1d[col]; 6790 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; 6791 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 + 6792 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1; 6793 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0; 6794 } 6795 wsrc += bw; 6796 mask += bw; 6797 tmp16 += ctxt->tmp_stride; 6798 } 6799 } 6800 } 6801 6802 // This function has a structure similar to av1_build_obmc_inter_prediction 6803 // 6804 // The OBMC predictor is computed as: 6805 // 6806 // PObmc(x,y) = 6807 // AOM_BLEND_A64(Mh(x), 6808 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)), 6809 // PLeft(x, y)) 6810 // 6811 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate 6812 // rounding, this can be written as: 6813 // 6814 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) = 6815 // Mh(x) * Mv(y) * P(x,y) + 6816 // Mh(x) * Cv(y) * Pabove(x,y) + 6817 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y) 6818 // 6819 // Where : 6820 // 6821 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y) 6822 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y) 6823 // 6824 // This function computes 'wsrc' and 'mask' as: 6825 // 6826 // wsrc(x, y) = 6827 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) - 6828 // Mh(x) * Cv(y) * Pabove(x,y) + 6829 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y) 6830 // 6831 // mask(x, y) = Mh(x) * Mv(y) 6832 // 6833 // These can then be used to efficiently approximate the error for any 6834 // predictor P in the context of the provided neighbouring predictors by 6835 // computing: 6836 // 6837 // error(x, y) = 6838 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2) 6839 // 6840 static inline void calc_target_weighted_pred( 6841 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd, 6842 const uint8_t *above, int above_stride, const uint8_t *left, 6843 int left_stride) { 6844 const BLOCK_SIZE bsize = xd->mi[0]->bsize; 6845 const int bw = xd->width << MI_SIZE_LOG2; 6846 const int bh = xd->height << MI_SIZE_LOG2; 6847 const OBMCBuffer *obmc_buffer = &x->obmc_buffer; 6848 int32_t *mask_buf = obmc_buffer->mask; 6849 int32_t *wsrc_buf = obmc_buffer->wsrc; 6850 6851 const int is_hbd = is_cur_buf_hbd(xd); 6852 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA; 6853 6854 // plane 0 should not be sub-sampled 6855 assert(xd->plane[0].subsampling_x == 0); 6856 assert(xd->plane[0].subsampling_y == 0); 6857 6858 av1_zero_array(wsrc_buf, bw * bh); 6859 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA; 6860 6861 // handle above row 6862 if (xd->up_available) { 6863 const int overlap = 6864 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; 6865 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above, 6866 above_stride, overlap }; 6867 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, 6868 max_neighbor_obmc[mi_size_wide_log2[bsize]], 6869 calc_target_weighted_pred_above, &ctxt); 6870 } 6871 6872 for (int i = 0; i < bw * bh; ++i) { 6873 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA; 6874 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA; 6875 } 6876 6877 // handle left column 6878 if (xd->left_available) { 6879 const int overlap = 6880 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1; 6881 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left, 6882 left_stride, overlap }; 6883 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, 6884 max_neighbor_obmc[mi_size_high_log2[bsize]], 6885 calc_target_weighted_pred_left, &ctxt); 6886 } 6887 6888 if (!is_hbd) { 6889 const uint8_t *src = x->plane[0].src.buf; 6890 6891 for (int row = 0; row < bh; ++row) { 6892 for (int col = 0; col < bw; ++col) { 6893 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; 6894 } 6895 wsrc_buf += bw; 6896 src += x->plane[0].src.stride; 6897 } 6898 } else { 6899 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf); 6900 6901 for (int row = 0; row < bh; ++row) { 6902 for (int col = 0; col < bw; ++col) { 6903 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; 6904 } 6905 wsrc_buf += bw; 6906 src += x->plane[0].src.stride; 6907 } 6908 } 6909 }