tpl_model.c (101531B)
1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 #include <float.h> 14 #include <stdint.h> 15 16 #include "config/aom_config.h" 17 18 #if CONFIG_THREE_PASS 19 #include "av1/encoder/thirdpass.h" 20 #endif 21 #include "config/aom_dsp_rtcd.h" 22 #include "config/aom_scale_rtcd.h" 23 24 #include "aom/aom_codec.h" 25 #include "aom_util/aom_pthread.h" 26 27 #include "av1/common/av1_common_int.h" 28 #include "av1/common/enums.h" 29 #include "av1/common/idct.h" 30 #include "av1/common/reconintra.h" 31 32 #include "av1/encoder/encoder.h" 33 #include "av1/encoder/ethread.h" 34 #include "av1/encoder/encodeframe_utils.h" 35 #include "av1/encoder/encode_strategy.h" 36 #include "av1/encoder/hybrid_fwd_txfm.h" 37 #include "av1/encoder/motion_search_facade.h" 38 #include "av1/encoder/rd.h" 39 #include "av1/encoder/rdopt.h" 40 #include "av1/encoder/reconinter_enc.h" 41 #include "av1/encoder/tpl_model.h" 42 43 static inline double exp_bounded(double v) { 44 // When v > 700 or <-700, the exp function will be close to overflow 45 // For details, see the "Notes" in the following link. 46 // https://en.cppreference.com/w/c/numeric/math/exp 47 if (v > 700) { 48 return DBL_MAX; 49 } else if (v < -700) { 50 return 0; 51 } 52 return exp(v); 53 } 54 55 void av1_init_tpl_txfm_stats(TplTxfmStats *tpl_txfm_stats) { 56 tpl_txfm_stats->ready = 0; 57 tpl_txfm_stats->coeff_num = 256; 58 tpl_txfm_stats->txfm_block_count = 0; 59 memset(tpl_txfm_stats->abs_coeff_sum, 0, 60 sizeof(tpl_txfm_stats->abs_coeff_sum[0]) * tpl_txfm_stats->coeff_num); 61 memset(tpl_txfm_stats->abs_coeff_mean, 0, 62 sizeof(tpl_txfm_stats->abs_coeff_mean[0]) * tpl_txfm_stats->coeff_num); 63 } 64 65 #if CONFIG_BITRATE_ACCURACY 66 void av1_accumulate_tpl_txfm_stats(const TplTxfmStats *sub_stats, 67 TplTxfmStats *accumulated_stats) { 68 accumulated_stats->txfm_block_count += sub_stats->txfm_block_count; 69 for (int i = 0; i < accumulated_stats->coeff_num; ++i) { 70 accumulated_stats->abs_coeff_sum[i] += sub_stats->abs_coeff_sum[i]; 71 } 72 } 73 74 void av1_record_tpl_txfm_block(TplTxfmStats *tpl_txfm_stats, 75 const tran_low_t *coeff) { 76 // For transform larger than 16x16, the scale of coeff need to be adjusted. 77 // It's not LOSSLESS_Q_STEP. 78 assert(tpl_txfm_stats->coeff_num <= 256); 79 for (int i = 0; i < tpl_txfm_stats->coeff_num; ++i) { 80 tpl_txfm_stats->abs_coeff_sum[i] += abs(coeff[i]) / (double)LOSSLESS_Q_STEP; 81 } 82 ++tpl_txfm_stats->txfm_block_count; 83 } 84 85 void av1_tpl_txfm_stats_update_abs_coeff_mean(TplTxfmStats *txfm_stats) { 86 if (txfm_stats->txfm_block_count > 0) { 87 for (int j = 0; j < txfm_stats->coeff_num; j++) { 88 txfm_stats->abs_coeff_mean[j] = 89 txfm_stats->abs_coeff_sum[j] / txfm_stats->txfm_block_count; 90 } 91 txfm_stats->ready = 1; 92 } else { 93 txfm_stats->ready = 0; 94 } 95 } 96 97 static inline void av1_tpl_store_txfm_stats(TplParams *tpl_data, 98 const TplTxfmStats *tpl_txfm_stats, 99 const int frame_index) { 100 tpl_data->txfm_stats_list[frame_index] = *tpl_txfm_stats; 101 } 102 #endif // CONFIG_BITRATE_ACCURACY 103 104 static inline void get_quantize_error(const MACROBLOCK *x, int plane, 105 const tran_low_t *coeff, 106 tran_low_t *qcoeff, tran_low_t *dqcoeff, 107 TX_SIZE tx_size, uint16_t *eob, 108 int64_t *recon_error, int64_t *sse) { 109 const struct macroblock_plane *const p = &x->plane[plane]; 110 const MACROBLOCKD *xd = &x->e_mbd; 111 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; 112 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; 113 const int shift = tx_size == TX_32X32 ? 0 : 2; 114 115 QUANT_PARAM quant_param; 116 av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param); 117 118 #if CONFIG_AV1_HIGHBITDEPTH 119 if (is_cur_buf_hbd(xd)) { 120 av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, eob, 121 scan_order, &quant_param); 122 *recon_error = 123 av1_highbd_block_error(coeff, dqcoeff, pix_num, sse, xd->bd) >> shift; 124 } else { 125 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, eob, scan_order, 126 &quant_param); 127 *recon_error = av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift; 128 } 129 #else 130 (void)xd; 131 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, eob, scan_order, 132 &quant_param); 133 *recon_error = av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift; 134 #endif // CONFIG_AV1_HIGHBITDEPTH 135 136 *recon_error = AOMMAX(*recon_error, 1); 137 138 *sse = (*sse) >> shift; 139 *sse = AOMMAX(*sse, 1); 140 } 141 142 static inline void set_tpl_stats_block_size(uint8_t *block_mis_log2, 143 uint8_t *tpl_bsize_1d) { 144 // tpl stats bsize: 2 means 16x16 145 *block_mis_log2 = 2; 146 // Block size used in tpl motion estimation 147 *tpl_bsize_1d = 16; 148 // MIN_TPL_BSIZE_1D = 16; 149 assert(*tpl_bsize_1d >= 16); 150 } 151 152 void av1_setup_tpl_buffers(AV1_PRIMARY *const ppi, 153 CommonModeInfoParams *const mi_params, int width, 154 int height, int byte_alignment, int lag_in_frames) { 155 SequenceHeader *const seq_params = &ppi->seq_params; 156 TplParams *const tpl_data = &ppi->tpl_data; 157 set_tpl_stats_block_size(&tpl_data->tpl_stats_block_mis_log2, 158 &tpl_data->tpl_bsize_1d); 159 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 160 tpl_data->border_in_pixels = 161 ALIGN_POWER_OF_TWO(tpl_data->tpl_bsize_1d + 2 * AOM_INTERP_EXTEND, 5); 162 163 const int alloc_y_plane_only = 164 ppi->cpi->sf.tpl_sf.use_y_only_rate_distortion ? 1 : 0; 165 for (int frame = 0; frame < MAX_LENGTH_TPL_FRAME_STATS; ++frame) { 166 const int mi_cols = 167 ALIGN_POWER_OF_TWO(mi_params->mi_cols, MAX_MIB_SIZE_LOG2); 168 const int mi_rows = 169 ALIGN_POWER_OF_TWO(mi_params->mi_rows, MAX_MIB_SIZE_LOG2); 170 TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame]; 171 tpl_frame->is_valid = 0; 172 tpl_frame->width = mi_cols >> block_mis_log2; 173 tpl_frame->height = mi_rows >> block_mis_log2; 174 tpl_frame->stride = tpl_data->tpl_stats_buffer[frame].width; 175 tpl_frame->mi_rows = mi_params->mi_rows; 176 tpl_frame->mi_cols = mi_params->mi_cols; 177 } 178 tpl_data->tpl_frame = &tpl_data->tpl_stats_buffer[REF_FRAMES + 1]; 179 180 // If lag_in_frames <= 1, TPL module is not invoked. Hence dynamic memory 181 // allocations are avoided for buffers in tpl_data. 182 if (lag_in_frames <= 1) return; 183 184 AOM_CHECK_MEM_ERROR(&ppi->error, tpl_data->txfm_stats_list, 185 aom_calloc(MAX_LENGTH_TPL_FRAME_STATS, 186 sizeof(*tpl_data->txfm_stats_list))); 187 188 for (int frame = 0; frame < lag_in_frames; ++frame) { 189 AOM_CHECK_MEM_ERROR( 190 &ppi->error, tpl_data->tpl_stats_pool[frame], 191 aom_calloc(tpl_data->tpl_stats_buffer[frame].width * 192 tpl_data->tpl_stats_buffer[frame].height, 193 sizeof(*tpl_data->tpl_stats_buffer[frame].tpl_stats_ptr))); 194 195 if (aom_alloc_frame_buffer( 196 &tpl_data->tpl_rec_pool[frame], width, height, 197 seq_params->subsampling_x, seq_params->subsampling_y, 198 seq_params->use_highbitdepth, tpl_data->border_in_pixels, 199 byte_alignment, false, alloc_y_plane_only)) 200 aom_internal_error(&ppi->error, AOM_CODEC_MEM_ERROR, 201 "Failed to allocate frame buffer"); 202 } 203 } 204 205 static inline int32_t tpl_get_satd_cost(BitDepthInfo bd_info, int16_t *src_diff, 206 int diff_stride, const uint8_t *src, 207 int src_stride, const uint8_t *dst, 208 int dst_stride, tran_low_t *coeff, 209 int bw, int bh, TX_SIZE tx_size) { 210 const int pix_num = bw * bh; 211 212 av1_subtract_block(bd_info, bh, bw, src_diff, diff_stride, src, src_stride, 213 dst, dst_stride); 214 av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, bw, coeff); 215 return aom_satd(coeff, pix_num); 216 } 217 218 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) { 219 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; 220 221 assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob); 222 int rate_cost = 1; 223 224 for (int idx = 0; idx < eob; ++idx) { 225 unsigned int abs_level = abs(qcoeff[scan_order->scan[idx]]); 226 rate_cost += get_msb(abs_level + 1) + 1 + (abs_level > 0); 227 } 228 229 return (rate_cost << AV1_PROB_COST_SHIFT); 230 } 231 232 static inline void txfm_quant_rdcost( 233 const MACROBLOCK *x, int16_t *src_diff, int diff_stride, uint8_t *src, 234 int src_stride, uint8_t *dst, int dst_stride, tran_low_t *coeff, 235 tran_low_t *qcoeff, tran_low_t *dqcoeff, int bw, int bh, TX_SIZE tx_size, 236 int do_recon, int *rate_cost, int64_t *recon_error, int64_t *sse) { 237 const MACROBLOCKD *xd = &x->e_mbd; 238 const BitDepthInfo bd_info = get_bit_depth_info(xd); 239 uint16_t eob; 240 av1_subtract_block(bd_info, bh, bw, src_diff, diff_stride, src, src_stride, 241 dst, dst_stride); 242 av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, bw, coeff); 243 244 get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, &eob, recon_error, 245 sse); 246 247 *rate_cost = rate_estimator(qcoeff, eob, tx_size); 248 249 if (do_recon) 250 av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst, 251 dst_stride, eob, 0); 252 } 253 254 static uint32_t motion_estimation(AV1_COMP *cpi, MACROBLOCK *x, 255 uint8_t *cur_frame_buf, 256 uint8_t *ref_frame_buf, int stride, 257 int ref_stride, int width, int ref_width, 258 BLOCK_SIZE bsize, MV center_mv, 259 int_mv *best_mv) { 260 AV1_COMMON *cm = &cpi->common; 261 MACROBLOCKD *const xd = &x->e_mbd; 262 TPL_SPEED_FEATURES *tpl_sf = &cpi->sf.tpl_sf; 263 int step_param; 264 uint32_t bestsme = UINT_MAX; 265 FULLPEL_MV_STATS best_mv_stats; 266 int distortion; 267 uint32_t sse; 268 int cost_list[5]; 269 FULLPEL_MV start_mv = get_fullmv_from_mv(¢er_mv); 270 271 // Setup frame pointers 272 x->plane[0].src.buf = cur_frame_buf; 273 x->plane[0].src.stride = stride; 274 x->plane[0].src.width = width; 275 xd->plane[0].pre[0].buf = ref_frame_buf; 276 xd->plane[0].pre[0].stride = ref_stride; 277 xd->plane[0].pre[0].width = ref_width; 278 279 step_param = tpl_sf->reduce_first_step_size; 280 step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2); 281 282 const search_site_config *search_site_cfg = 283 cpi->mv_search_params.search_site_cfg[SS_CFG_SRC]; 284 if (search_site_cfg->stride != ref_stride) 285 search_site_cfg = cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD]; 286 assert(search_site_cfg->stride == ref_stride); 287 288 FULLPEL_MOTION_SEARCH_PARAMS full_ms_params; 289 av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, ¢er_mv, 290 start_mv, search_site_cfg, 291 tpl_sf->search_method, 292 /*fine_search_interval=*/0); 293 294 bestsme = av1_full_pixel_search(start_mv, &full_ms_params, step_param, 295 cond_cost_list(cpi, cost_list), 296 &best_mv->as_fullmv, &best_mv_stats, NULL); 297 298 // When sub-pel motion search is skipped, populate sub-pel precision MV and 299 // return. 300 if (tpl_sf->subpel_force_stop == FULL_PEL) { 301 best_mv->as_mv = get_mv_from_fullmv(&best_mv->as_fullmv); 302 return bestsme; 303 } 304 305 SUBPEL_MOTION_SEARCH_PARAMS ms_params; 306 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, ¢er_mv, 307 cost_list); 308 ms_params.forced_stop = tpl_sf->subpel_force_stop; 309 ms_params.var_params.subpel_search_type = USE_2_TAPS; 310 ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE; 311 best_mv_stats.err_cost = 0; 312 MV subpel_start_mv = get_mv_from_fullmv(&best_mv->as_fullmv); 313 assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); 314 bestsme = cpi->mv_search_params.find_fractional_mv_step( 315 xd, cm, &ms_params, subpel_start_mv, &best_mv_stats, &best_mv->as_mv, 316 &distortion, &sse, NULL); 317 318 return bestsme; 319 } 320 321 typedef struct { 322 int_mv mv; 323 int sad; 324 } center_mv_t; 325 326 static int compare_sad(const void *a, const void *b) { 327 const int diff = ((center_mv_t *)a)->sad - ((center_mv_t *)b)->sad; 328 if (diff < 0) 329 return -1; 330 else if (diff > 0) 331 return 1; 332 return 0; 333 } 334 335 static int is_alike_mv(int_mv candidate_mv, center_mv_t *center_mvs, 336 int center_mvs_count, int skip_alike_starting_mv) { 337 // MV difference threshold is in 1/8 precision. 338 const int mv_diff_thr[3] = { 1, (8 << 3), (16 << 3) }; 339 int thr = mv_diff_thr[skip_alike_starting_mv]; 340 int i; 341 342 for (i = 0; i < center_mvs_count; i++) { 343 if (abs(center_mvs[i].mv.as_mv.col - candidate_mv.as_mv.col) < thr && 344 abs(center_mvs[i].mv.as_mv.row - candidate_mv.as_mv.row) < thr) 345 return 1; 346 } 347 348 return 0; 349 } 350 351 static void get_rate_distortion( 352 int *rate_cost, int64_t *recon_error, int64_t *pred_error, 353 int16_t *src_diff, tran_low_t *coeff, tran_low_t *qcoeff, 354 tran_low_t *dqcoeff, AV1_COMMON *cm, MACROBLOCK *x, 355 const YV12_BUFFER_CONFIG *ref_frame_ptr[2], uint8_t *rec_buffer_pool[3], 356 const int rec_stride_pool[3], TX_SIZE tx_size, PREDICTION_MODE best_mode, 357 int mi_row, int mi_col, int use_y_only_rate_distortion, int do_recon, 358 TplTxfmStats *tpl_txfm_stats) { 359 const SequenceHeader *seq_params = cm->seq_params; 360 *rate_cost = 0; 361 *recon_error = 1; 362 *pred_error = 1; 363 364 (void)tpl_txfm_stats; 365 366 MACROBLOCKD *xd = &x->e_mbd; 367 int is_compound = (best_mode == NEW_NEWMV); 368 int num_planes = use_y_only_rate_distortion ? 1 : MAX_MB_PLANE; 369 370 uint8_t *src_buffer_pool[MAX_MB_PLANE] = { 371 xd->cur_buf->y_buffer, 372 xd->cur_buf->u_buffer, 373 xd->cur_buf->v_buffer, 374 }; 375 const int src_stride_pool[MAX_MB_PLANE] = { 376 xd->cur_buf->y_stride, 377 xd->cur_buf->uv_stride, 378 xd->cur_buf->uv_stride, 379 }; 380 381 const int_interpfilters kernel = 382 av1_broadcast_interp_filter(EIGHTTAP_REGULAR); 383 384 for (int plane = 0; plane < num_planes; ++plane) { 385 struct macroblockd_plane *pd = &xd->plane[plane]; 386 BLOCK_SIZE bsize_plane = 387 av1_ss_size_lookup[txsize_to_bsize[tx_size]][pd->subsampling_x] 388 [pd->subsampling_y]; 389 390 int dst_buffer_stride = rec_stride_pool[plane]; 391 int dst_mb_offset = 392 ((mi_row * MI_SIZE * dst_buffer_stride) >> pd->subsampling_y) + 393 ((mi_col * MI_SIZE) >> pd->subsampling_x); 394 uint8_t *dst_buffer = rec_buffer_pool[plane] + dst_mb_offset; 395 for (int ref = 0; ref < 1 + is_compound; ++ref) { 396 if (!is_inter_mode(best_mode)) { 397 av1_predict_intra_block( 398 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, 399 block_size_wide[bsize_plane], block_size_high[bsize_plane], 400 max_txsize_rect_lookup[bsize_plane], best_mode, 0, 0, 401 FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride, dst_buffer, 402 dst_buffer_stride, 0, 0, plane); 403 } else { 404 int_mv best_mv = xd->mi[0]->mv[ref]; 405 uint8_t *ref_buffer_pool[MAX_MB_PLANE] = { 406 ref_frame_ptr[ref]->y_buffer, 407 ref_frame_ptr[ref]->u_buffer, 408 ref_frame_ptr[ref]->v_buffer, 409 }; 410 InterPredParams inter_pred_params; 411 struct buf_2d ref_buf = { 412 NULL, ref_buffer_pool[plane], 413 plane ? ref_frame_ptr[ref]->uv_width : ref_frame_ptr[ref]->y_width, 414 plane ? ref_frame_ptr[ref]->uv_height : ref_frame_ptr[ref]->y_height, 415 plane ? ref_frame_ptr[ref]->uv_stride : ref_frame_ptr[ref]->y_stride 416 }; 417 av1_init_inter_params(&inter_pred_params, block_size_wide[bsize_plane], 418 block_size_high[bsize_plane], 419 (mi_row * MI_SIZE) >> pd->subsampling_y, 420 (mi_col * MI_SIZE) >> pd->subsampling_x, 421 pd->subsampling_x, pd->subsampling_y, xd->bd, 422 is_cur_buf_hbd(xd), 0, 423 xd->block_ref_scale_factors[0], &ref_buf, kernel); 424 if (is_compound) av1_init_comp_mode(&inter_pred_params); 425 inter_pred_params.conv_params = get_conv_params_no_round( 426 ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd); 427 428 av1_enc_build_one_inter_predictor(dst_buffer, dst_buffer_stride, 429 &best_mv.as_mv, &inter_pred_params); 430 } 431 } 432 433 int src_stride = src_stride_pool[plane]; 434 int src_mb_offset = ((mi_row * MI_SIZE * src_stride) >> pd->subsampling_y) + 435 ((mi_col * MI_SIZE) >> pd->subsampling_x); 436 437 int this_rate = 1; 438 int64_t this_recon_error = 1; 439 int64_t sse; 440 txfm_quant_rdcost( 441 x, src_diff, block_size_wide[bsize_plane], 442 src_buffer_pool[plane] + src_mb_offset, src_stride, dst_buffer, 443 dst_buffer_stride, coeff, qcoeff, dqcoeff, block_size_wide[bsize_plane], 444 block_size_high[bsize_plane], max_txsize_rect_lookup[bsize_plane], 445 do_recon, &this_rate, &this_recon_error, &sse); 446 447 #if CONFIG_BITRATE_ACCURACY 448 if (plane == 0 && tpl_txfm_stats) { 449 // We only collect Y plane's transform coefficient 450 av1_record_tpl_txfm_block(tpl_txfm_stats, coeff); 451 } 452 #endif // CONFIG_BITRATE_ACCURACY 453 454 *recon_error += this_recon_error; 455 *pred_error += sse; 456 *rate_cost += this_rate; 457 } 458 } 459 460 static inline int32_t get_inter_cost(const AV1_COMP *cpi, MACROBLOCKD *xd, 461 const uint8_t *src_mb_buffer, 462 int src_stride, 463 TplBuffers *tpl_tmp_buffers, 464 BLOCK_SIZE bsize, TX_SIZE tx_size, 465 int mi_row, int mi_col, int rf_idx, 466 MV *rfidx_mv, int use_pred_sad) { 467 const BitDepthInfo bd_info = get_bit_depth_info(xd); 468 TplParams *tpl_data = &cpi->ppi->tpl_data; 469 const YV12_BUFFER_CONFIG *const ref_frame_ptr = 470 tpl_data->src_ref_frame[rf_idx]; 471 int16_t *src_diff = tpl_tmp_buffers->src_diff; 472 tran_low_t *coeff = tpl_tmp_buffers->coeff; 473 const int bw = 4 << mi_size_wide_log2[bsize]; 474 const int bh = 4 << mi_size_high_log2[bsize]; 475 int32_t inter_cost; 476 477 if (cpi->sf.tpl_sf.subpel_force_stop != FULL_PEL) { 478 const int_interpfilters kernel = 479 av1_broadcast_interp_filter(EIGHTTAP_REGULAR); 480 uint8_t *predictor8 = tpl_tmp_buffers->predictor8; 481 uint8_t *predictor = 482 is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8; 483 struct buf_2d ref_buf = { NULL, ref_frame_ptr->y_buffer, 484 ref_frame_ptr->y_width, ref_frame_ptr->y_height, 485 ref_frame_ptr->y_stride }; 486 InterPredParams inter_pred_params; 487 av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE, 488 mi_col * MI_SIZE, 0, 0, xd->bd, is_cur_buf_hbd(xd), 0, 489 &tpl_data->sf, &ref_buf, kernel); 490 inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd); 491 492 av1_enc_build_one_inter_predictor(predictor, bw, rfidx_mv, 493 &inter_pred_params); 494 495 if (use_pred_sad) { 496 inter_cost = (int)cpi->ppi->fn_ptr[bsize].sdf(src_mb_buffer, src_stride, 497 predictor, bw); 498 } else { 499 inter_cost = 500 tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, 501 predictor, bw, coeff, bw, bh, tx_size); 502 } 503 } else { 504 int ref_mb_offset = 505 mi_row * MI_SIZE * ref_frame_ptr->y_stride + mi_col * MI_SIZE; 506 uint8_t *ref_mb = ref_frame_ptr->y_buffer + ref_mb_offset; 507 int ref_stride = ref_frame_ptr->y_stride; 508 const FULLPEL_MV fullmv = get_fullmv_from_mv(rfidx_mv); 509 // Since sub-pel motion search is not performed, use the prediction pixels 510 // directly from the reference block ref_mb 511 if (use_pred_sad) { 512 inter_cost = (int)cpi->ppi->fn_ptr[bsize].sdf( 513 src_mb_buffer, src_stride, 514 &ref_mb[fullmv.row * ref_stride + fullmv.col], ref_stride); 515 } else { 516 inter_cost = 517 tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, 518 &ref_mb[fullmv.row * ref_stride + fullmv.col], 519 ref_stride, coeff, bw, bh, tx_size); 520 } 521 } 522 return inter_cost; 523 } 524 525 static inline void mode_estimation(AV1_COMP *cpi, TplTxfmStats *tpl_txfm_stats, 526 TplBuffers *tpl_tmp_buffers, MACROBLOCK *x, 527 int mi_row, int mi_col, BLOCK_SIZE bsize, 528 TX_SIZE tx_size, TplDepStats *tpl_stats) { 529 AV1_COMMON *cm = &cpi->common; 530 const GF_GROUP *gf_group = &cpi->ppi->gf_group; 531 TPL_SPEED_FEATURES *tpl_sf = &cpi->sf.tpl_sf; 532 533 (void)gf_group; 534 535 MACROBLOCKD *xd = &x->e_mbd; 536 const BitDepthInfo bd_info = get_bit_depth_info(xd); 537 TplParams *tpl_data = &cpi->ppi->tpl_data; 538 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx]; 539 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 540 541 const int bw = 4 << mi_size_wide_log2[bsize]; 542 const int bh = 4 << mi_size_high_log2[bsize]; 543 544 int32_t best_intra_cost = INT32_MAX; 545 int32_t intra_cost; 546 PREDICTION_MODE best_mode = DC_PRED; 547 548 const int mb_y_offset = 549 mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; 550 uint8_t *src_mb_buffer = xd->cur_buf->y_buffer + mb_y_offset; 551 const int src_stride = xd->cur_buf->y_stride; 552 const int src_width = xd->cur_buf->y_width; 553 554 int dst_mb_offset = 555 mi_row * MI_SIZE * tpl_frame->rec_picture->y_stride + mi_col * MI_SIZE; 556 uint8_t *dst_buffer = tpl_frame->rec_picture->y_buffer + dst_mb_offset; 557 int dst_buffer_stride = tpl_frame->rec_picture->y_stride; 558 int use_y_only_rate_distortion = tpl_sf->use_y_only_rate_distortion; 559 560 uint8_t *rec_buffer_pool[3] = { 561 tpl_frame->rec_picture->y_buffer, 562 tpl_frame->rec_picture->u_buffer, 563 tpl_frame->rec_picture->v_buffer, 564 }; 565 566 const int rec_stride_pool[3] = { 567 tpl_frame->rec_picture->y_stride, 568 tpl_frame->rec_picture->uv_stride, 569 tpl_frame->rec_picture->uv_stride, 570 }; 571 572 for (int plane = 1; plane < MAX_MB_PLANE; ++plane) { 573 struct macroblockd_plane *pd = &xd->plane[plane]; 574 pd->subsampling_x = xd->cur_buf->subsampling_x; 575 pd->subsampling_y = xd->cur_buf->subsampling_y; 576 } 577 578 uint8_t *predictor8 = tpl_tmp_buffers->predictor8; 579 int16_t *src_diff = tpl_tmp_buffers->src_diff; 580 tran_low_t *coeff = tpl_tmp_buffers->coeff; 581 tran_low_t *qcoeff = tpl_tmp_buffers->qcoeff; 582 tran_low_t *dqcoeff = tpl_tmp_buffers->dqcoeff; 583 uint8_t *predictor = 584 is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8; 585 int64_t recon_error = 1; 586 int64_t pred_error = 1; 587 588 memset(tpl_stats, 0, sizeof(*tpl_stats)); 589 tpl_stats->ref_frame_index[0] = -1; 590 tpl_stats->ref_frame_index[1] = -1; 591 592 const int mi_width = mi_size_wide[bsize]; 593 const int mi_height = mi_size_high[bsize]; 594 set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, 595 mi_row, mi_col); 596 set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width, 597 cm->mi_params.mi_rows, cm->mi_params.mi_cols); 598 set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], 599 av1_num_planes(cm)); 600 xd->mi[0]->bsize = bsize; 601 xd->mi[0]->motion_mode = SIMPLE_TRANSLATION; 602 603 // Intra prediction search 604 xd->mi[0]->ref_frame[0] = INTRA_FRAME; 605 606 // Pre-load the bottom left line. 607 if (xd->left_available && 608 mi_row + tx_size_high_unit[tx_size] < xd->tile.mi_row_end) { 609 if (is_cur_buf_hbd(xd)) { 610 uint16_t *dst = CONVERT_TO_SHORTPTR(dst_buffer); 611 for (int i = 0; i < bw; ++i) 612 dst[(bw + i) * dst_buffer_stride - 1] = 613 dst[(bw - 1) * dst_buffer_stride - 1]; 614 } else { 615 for (int i = 0; i < bw; ++i) 616 dst_buffer[(bw + i) * dst_buffer_stride - 1] = 617 dst_buffer[(bw - 1) * dst_buffer_stride - 1]; 618 } 619 } 620 621 // if cpi->sf.tpl_sf.prune_intra_modes is on, then search only DC_PRED, 622 // H_PRED, and V_PRED 623 const PREDICTION_MODE last_intra_mode = 624 tpl_sf->prune_intra_modes ? D45_PRED : INTRA_MODE_END; 625 const SequenceHeader *seq_params = cm->seq_params; 626 for (PREDICTION_MODE mode = INTRA_MODE_START; mode < last_intra_mode; 627 ++mode) { 628 av1_predict_intra_block(xd, seq_params->sb_size, 629 seq_params->enable_intra_edge_filter, 630 block_size_wide[bsize], block_size_high[bsize], 631 tx_size, mode, 0, 0, FILTER_INTRA_MODES, dst_buffer, 632 dst_buffer_stride, predictor, bw, 0, 0, 0); 633 634 if (tpl_frame->use_pred_sad) { 635 intra_cost = (int32_t)cpi->ppi->fn_ptr[bsize].sdf( 636 src_mb_buffer, src_stride, predictor, bw); 637 } else { 638 intra_cost = 639 tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, 640 predictor, bw, coeff, bw, bh, tx_size); 641 } 642 643 if (intra_cost < best_intra_cost) { 644 best_intra_cost = intra_cost; 645 best_mode = mode; 646 } 647 } 648 // Calculate SATD of the best intra mode if SAD was used for mode decision 649 // as best_intra_cost is used in ML model to skip intra mode evaluation. 650 if (tpl_frame->use_pred_sad) { 651 av1_predict_intra_block( 652 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, 653 block_size_wide[bsize], block_size_high[bsize], tx_size, best_mode, 0, 654 0, FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride, predictor, bw, 0, 655 0, 0); 656 best_intra_cost = 657 tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, 658 predictor, bw, coeff, bw, bh, tx_size); 659 } 660 661 int rate_cost = 1; 662 663 if (cpi->use_ducky_encode) { 664 get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, 665 qcoeff, dqcoeff, cm, x, NULL, rec_buffer_pool, 666 rec_stride_pool, tx_size, best_mode, mi_row, mi_col, 667 use_y_only_rate_distortion, 1 /*do_recon*/, NULL); 668 669 tpl_stats->intra_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; 670 tpl_stats->intra_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; 671 tpl_stats->intra_rate = rate_cost; 672 } 673 674 #if CONFIG_THREE_PASS 675 const int frame_offset = tpl_data->frame_idx - cpi->gf_frame_index; 676 677 if (cpi->third_pass_ctx && 678 frame_offset < cpi->third_pass_ctx->frame_info_count && 679 tpl_data->frame_idx < gf_group->size) { 680 double ratio_h, ratio_w; 681 av1_get_third_pass_ratio(cpi->third_pass_ctx, frame_offset, cm->height, 682 cm->width, &ratio_h, &ratio_w); 683 THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( 684 cpi->third_pass_ctx, frame_offset, mi_row, mi_col, ratio_h, ratio_w); 685 686 PREDICTION_MODE third_pass_mode = this_mi->pred_mode; 687 688 if (third_pass_mode >= last_intra_mode && 689 third_pass_mode < INTRA_MODE_END) { 690 av1_predict_intra_block( 691 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, 692 block_size_wide[bsize], block_size_high[bsize], tx_size, 693 third_pass_mode, 0, 0, FILTER_INTRA_MODES, dst_buffer, 694 dst_buffer_stride, predictor, bw, 0, 0, 0); 695 696 intra_cost = 697 tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, 698 predictor, bw, coeff, bw, bh, tx_size); 699 700 if (intra_cost < best_intra_cost) { 701 best_intra_cost = intra_cost; 702 best_mode = third_pass_mode; 703 } 704 } 705 } 706 #endif // CONFIG_THREE_PASS 707 708 // Motion compensated prediction 709 xd->mi[0]->ref_frame[0] = INTRA_FRAME; 710 xd->mi[0]->ref_frame[1] = NONE_FRAME; 711 xd->mi[0]->compound_idx = 1; 712 713 int best_rf_idx = -1; 714 int_mv best_mv[2]; 715 int32_t inter_cost; 716 int32_t best_inter_cost = INT32_MAX; 717 int rf_idx; 718 int_mv single_mv[INTER_REFS_PER_FRAME]; 719 720 best_mv[0].as_int = INVALID_MV; 721 best_mv[1].as_int = INVALID_MV; 722 723 for (rf_idx = 0; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx) { 724 single_mv[rf_idx].as_int = INVALID_MV; 725 if (tpl_data->ref_frame[rf_idx] == NULL || 726 tpl_data->src_ref_frame[rf_idx] == NULL) { 727 tpl_stats->mv[rf_idx].as_int = INVALID_MV; 728 continue; 729 } 730 731 const YV12_BUFFER_CONFIG *ref_frame_ptr = tpl_data->src_ref_frame[rf_idx]; 732 const int ref_mb_offset = 733 mi_row * MI_SIZE * ref_frame_ptr->y_stride + mi_col * MI_SIZE; 734 uint8_t *ref_mb = ref_frame_ptr->y_buffer + ref_mb_offset; 735 const int ref_stride = ref_frame_ptr->y_stride; 736 const int ref_width = ref_frame_ptr->y_width; 737 738 int_mv best_rfidx_mv = { 0 }; 739 uint32_t bestsme = UINT32_MAX; 740 741 center_mv_t center_mvs[4] = { { { 0 }, INT_MAX }, 742 { { 0 }, INT_MAX }, 743 { { 0 }, INT_MAX }, 744 { { 0 }, INT_MAX } }; 745 int refmv_count = 1; 746 int idx; 747 748 if (xd->up_available) { 749 TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( 750 mi_row - mi_height, mi_col, tpl_frame->stride, block_mis_log2)]; 751 if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count, 752 tpl_sf->skip_alike_starting_mv)) { 753 center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int; 754 ++refmv_count; 755 } 756 } 757 758 if (xd->left_available) { 759 TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( 760 mi_row, mi_col - mi_width, tpl_frame->stride, block_mis_log2)]; 761 if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count, 762 tpl_sf->skip_alike_starting_mv)) { 763 center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int; 764 ++refmv_count; 765 } 766 } 767 768 if (xd->up_available && mi_col + mi_width < xd->tile.mi_col_end) { 769 TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( 770 mi_row - mi_height, mi_col + mi_width, tpl_frame->stride, 771 block_mis_log2)]; 772 if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count, 773 tpl_sf->skip_alike_starting_mv)) { 774 center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int; 775 ++refmv_count; 776 } 777 } 778 779 #if CONFIG_THREE_PASS 780 if (cpi->third_pass_ctx && 781 frame_offset < cpi->third_pass_ctx->frame_info_count && 782 tpl_data->frame_idx < gf_group->size) { 783 double ratio_h, ratio_w; 784 av1_get_third_pass_ratio(cpi->third_pass_ctx, frame_offset, cm->height, 785 cm->width, &ratio_h, &ratio_w); 786 THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( 787 cpi->third_pass_ctx, frame_offset, mi_row, mi_col, ratio_h, ratio_w); 788 789 int_mv tp_mv = av1_get_third_pass_adjusted_mv(this_mi, ratio_h, ratio_w, 790 rf_idx + LAST_FRAME); 791 if (tp_mv.as_int != INVALID_MV && 792 !is_alike_mv(tp_mv, center_mvs + 1, refmv_count - 1, 793 tpl_sf->skip_alike_starting_mv)) { 794 center_mvs[0].mv = tp_mv; 795 } 796 } 797 #endif // CONFIG_THREE_PASS 798 799 // Prune starting mvs 800 if (tpl_sf->prune_starting_mv && refmv_count > 1) { 801 // Get each center mv's sad. 802 for (idx = 0; idx < refmv_count; ++idx) { 803 FULLPEL_MV mv = get_fullmv_from_mv(¢er_mvs[idx].mv.as_mv); 804 clamp_fullmv(&mv, &x->mv_limits); 805 center_mvs[idx].sad = (int)cpi->ppi->fn_ptr[bsize].sdf( 806 src_mb_buffer, src_stride, &ref_mb[mv.row * ref_stride + mv.col], 807 ref_stride); 808 } 809 810 // Rank center_mv using sad. 811 qsort(center_mvs, refmv_count, sizeof(center_mvs[0]), compare_sad); 812 813 refmv_count = AOMMIN(4 - tpl_sf->prune_starting_mv, refmv_count); 814 // Further reduce number of refmv based on sad difference. 815 if (refmv_count > 1) { 816 int last_sad = center_mvs[refmv_count - 1].sad; 817 int second_to_last_sad = center_mvs[refmv_count - 2].sad; 818 if ((last_sad - second_to_last_sad) * 5 > second_to_last_sad) 819 refmv_count--; 820 } 821 } 822 823 for (idx = 0; idx < refmv_count; ++idx) { 824 int_mv this_mv; 825 uint32_t thissme = motion_estimation( 826 cpi, x, src_mb_buffer, ref_mb, src_stride, ref_stride, src_width, 827 ref_width, bsize, center_mvs[idx].mv.as_mv, &this_mv); 828 829 if (thissme < bestsme) { 830 bestsme = thissme; 831 best_rfidx_mv = this_mv; 832 } 833 } 834 835 tpl_stats->mv[rf_idx].as_int = best_rfidx_mv.as_int; 836 single_mv[rf_idx] = best_rfidx_mv; 837 838 inter_cost = get_inter_cost( 839 cpi, xd, src_mb_buffer, src_stride, tpl_tmp_buffers, bsize, tx_size, 840 mi_row, mi_col, rf_idx, &best_rfidx_mv.as_mv, tpl_frame->use_pred_sad); 841 // Store inter cost for each ref frame. This is used to prune inter modes. 842 tpl_stats->pred_error[rf_idx] = AOMMAX(1, inter_cost); 843 844 if (inter_cost < best_inter_cost) { 845 best_rf_idx = rf_idx; 846 847 best_inter_cost = inter_cost; 848 best_mv[0].as_int = best_rfidx_mv.as_int; 849 } 850 } 851 // Calculate SATD of the best inter mode if SAD was used for mode decision 852 // as best_inter_cost is used in ML model to skip intra mode evaluation. 853 if (best_inter_cost < INT32_MAX && tpl_frame->use_pred_sad) { 854 assert(best_rf_idx != -1); 855 best_inter_cost = get_inter_cost( 856 cpi, xd, src_mb_buffer, src_stride, tpl_tmp_buffers, bsize, tx_size, 857 mi_row, mi_col, best_rf_idx, &best_mv[0].as_mv, 0 /* use_pred_sad */); 858 } 859 860 if (best_rf_idx != -1 && best_inter_cost < best_intra_cost) { 861 best_mode = NEWMV; 862 xd->mi[0]->ref_frame[0] = best_rf_idx + LAST_FRAME; 863 xd->mi[0]->mv[0].as_int = best_mv[0].as_int; 864 } 865 866 // Start compound predition search. 867 int comp_ref_frames[3][2] = { 868 { 0, 4 }, 869 { 0, 6 }, 870 { 3, 6 }, 871 }; 872 873 int start_rf = 0; 874 int end_rf = 3; 875 if (!tpl_sf->allow_compound_pred) end_rf = 0; 876 #if CONFIG_THREE_PASS 877 if (cpi->third_pass_ctx && 878 frame_offset < cpi->third_pass_ctx->frame_info_count && 879 tpl_data->frame_idx < gf_group->size) { 880 double ratio_h, ratio_w; 881 av1_get_third_pass_ratio(cpi->third_pass_ctx, frame_offset, cm->height, 882 cm->width, &ratio_h, &ratio_w); 883 THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( 884 cpi->third_pass_ctx, frame_offset, mi_row, mi_col, ratio_h, ratio_w); 885 886 if (this_mi->ref_frame[0] >= LAST_FRAME && 887 this_mi->ref_frame[1] >= LAST_FRAME) { 888 int found = 0; 889 for (int i = 0; i < 3; i++) { 890 if (comp_ref_frames[i][0] + LAST_FRAME == this_mi->ref_frame[0] && 891 comp_ref_frames[i][1] + LAST_FRAME == this_mi->ref_frame[1]) { 892 found = 1; 893 break; 894 } 895 } 896 if (!found || !tpl_sf->allow_compound_pred) { 897 comp_ref_frames[2][0] = this_mi->ref_frame[0] - LAST_FRAME; 898 comp_ref_frames[2][1] = this_mi->ref_frame[1] - LAST_FRAME; 899 if (!tpl_sf->allow_compound_pred) { 900 start_rf = 2; 901 end_rf = 3; 902 } 903 } 904 } 905 } 906 #endif // CONFIG_THREE_PASS 907 908 xd->mi_row = mi_row; 909 xd->mi_col = mi_col; 910 int best_cmp_rf_idx = -1; 911 const int_interpfilters kernel = 912 av1_broadcast_interp_filter(EIGHTTAP_REGULAR); 913 for (int cmp_rf_idx = start_rf; cmp_rf_idx < end_rf; ++cmp_rf_idx) { 914 int rf_idx0 = comp_ref_frames[cmp_rf_idx][0]; 915 int rf_idx1 = comp_ref_frames[cmp_rf_idx][1]; 916 917 if (tpl_data->ref_frame[rf_idx0] == NULL || 918 tpl_data->src_ref_frame[rf_idx0] == NULL || 919 tpl_data->ref_frame[rf_idx1] == NULL || 920 tpl_data->src_ref_frame[rf_idx1] == NULL) { 921 continue; 922 } 923 924 const YV12_BUFFER_CONFIG *ref_frame_ptr[2] = { 925 tpl_data->src_ref_frame[rf_idx0], 926 tpl_data->src_ref_frame[rf_idx1], 927 }; 928 929 xd->mi[0]->ref_frame[0] = rf_idx0 + LAST_FRAME; 930 xd->mi[0]->ref_frame[1] = rf_idx1 + LAST_FRAME; 931 xd->mi[0]->mode = NEW_NEWMV; 932 const int8_t ref_frame_type = av1_ref_frame_type(xd->mi[0]->ref_frame); 933 // Set up ref_mv for av1_joint_motion_search(). 934 CANDIDATE_MV *this_ref_mv_stack = x->mbmi_ext.ref_mv_stack[ref_frame_type]; 935 this_ref_mv_stack[xd->mi[0]->ref_mv_idx].this_mv = single_mv[rf_idx0]; 936 this_ref_mv_stack[xd->mi[0]->ref_mv_idx].comp_mv = single_mv[rf_idx1]; 937 938 struct buf_2d yv12_mb[2][MAX_MB_PLANE]; 939 for (int i = 0; i < 2; ++i) { 940 av1_setup_pred_block(xd, yv12_mb[i], ref_frame_ptr[i], 941 xd->block_ref_scale_factors[i], 942 xd->block_ref_scale_factors[i], MAX_MB_PLANE); 943 for (int plane = 0; plane < MAX_MB_PLANE; ++plane) { 944 xd->plane[plane].pre[i] = yv12_mb[i][plane]; 945 } 946 } 947 948 int_mv tmp_mv[2] = { single_mv[rf_idx0], single_mv[rf_idx1] }; 949 int rate_mv; 950 av1_joint_motion_search(cpi, x, bsize, tmp_mv, NULL, 0, &rate_mv, 951 !cpi->sf.mv_sf.disable_second_mv, 952 NUM_JOINT_ME_REFINE_ITER); 953 954 for (int ref = 0; ref < 2; ++ref) { 955 struct buf_2d ref_buf = { NULL, ref_frame_ptr[ref]->y_buffer, 956 ref_frame_ptr[ref]->y_width, 957 ref_frame_ptr[ref]->y_height, 958 ref_frame_ptr[ref]->y_stride }; 959 InterPredParams inter_pred_params; 960 av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE, 961 mi_col * MI_SIZE, 0, 0, xd->bd, is_cur_buf_hbd(xd), 962 0, &tpl_data->sf, &ref_buf, kernel); 963 av1_init_comp_mode(&inter_pred_params); 964 965 inter_pred_params.conv_params = get_conv_params_no_round( 966 ref, 0, xd->tmp_conv_dst, MAX_SB_SIZE, 1, xd->bd); 967 968 av1_enc_build_one_inter_predictor(predictor, bw, &tmp_mv[ref].as_mv, 969 &inter_pred_params); 970 } 971 inter_cost = 972 tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, 973 predictor, bw, coeff, bw, bh, tx_size); 974 if (inter_cost < best_inter_cost) { 975 best_cmp_rf_idx = cmp_rf_idx; 976 best_inter_cost = inter_cost; 977 best_mv[0] = tmp_mv[0]; 978 best_mv[1] = tmp_mv[1]; 979 } 980 } 981 982 if (best_cmp_rf_idx != -1 && best_inter_cost < best_intra_cost) { 983 best_mode = NEW_NEWMV; 984 const int best_rf_idx0 = comp_ref_frames[best_cmp_rf_idx][0]; 985 const int best_rf_idx1 = comp_ref_frames[best_cmp_rf_idx][1]; 986 xd->mi[0]->ref_frame[0] = best_rf_idx0 + LAST_FRAME; 987 xd->mi[0]->ref_frame[1] = best_rf_idx1 + LAST_FRAME; 988 } 989 990 if (best_inter_cost < INT32_MAX && is_inter_mode(best_mode)) { 991 xd->mi[0]->mv[0].as_int = best_mv[0].as_int; 992 xd->mi[0]->mv[1].as_int = best_mv[1].as_int; 993 const YV12_BUFFER_CONFIG *ref_frame_ptr[2] = { 994 best_cmp_rf_idx >= 0 995 ? tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][0]] 996 : tpl_data->src_ref_frame[best_rf_idx], 997 best_cmp_rf_idx >= 0 998 ? tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][1]] 999 : NULL, 1000 }; 1001 rate_cost = 1; 1002 get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, 1003 qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, 1004 rec_stride_pool, tx_size, best_mode, mi_row, mi_col, 1005 use_y_only_rate_distortion, 0 /*do_recon*/, NULL); 1006 tpl_stats->srcrf_rate = rate_cost; 1007 } 1008 1009 best_intra_cost = AOMMAX(best_intra_cost, 1); 1010 best_inter_cost = AOMMIN(best_intra_cost, best_inter_cost); 1011 tpl_stats->inter_cost = best_inter_cost; 1012 tpl_stats->intra_cost = best_intra_cost; 1013 1014 tpl_stats->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; 1015 tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; 1016 1017 // Final encode 1018 rate_cost = 0; 1019 const YV12_BUFFER_CONFIG *ref_frame_ptr[2]; 1020 1021 ref_frame_ptr[0] = 1022 best_mode == NEW_NEWMV 1023 ? tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][0]] 1024 : best_rf_idx >= 0 ? tpl_data->ref_frame[best_rf_idx] 1025 : NULL; 1026 ref_frame_ptr[1] = 1027 best_mode == NEW_NEWMV 1028 ? tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]] 1029 : NULL; 1030 get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, 1031 qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, 1032 rec_stride_pool, tx_size, best_mode, mi_row, mi_col, 1033 use_y_only_rate_distortion, 1 /*do_recon*/, 1034 tpl_txfm_stats); 1035 1036 tpl_stats->recrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; 1037 tpl_stats->recrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; 1038 tpl_stats->recrf_rate = rate_cost; 1039 1040 if (!is_inter_mode(best_mode)) { 1041 tpl_stats->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; 1042 tpl_stats->srcrf_rate = rate_cost; 1043 tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; 1044 } 1045 1046 tpl_stats->recrf_dist = AOMMAX(tpl_stats->srcrf_dist, tpl_stats->recrf_dist); 1047 tpl_stats->recrf_rate = AOMMAX(tpl_stats->srcrf_rate, tpl_stats->recrf_rate); 1048 1049 if (best_mode == NEW_NEWMV) { 1050 ref_frame_ptr[0] = tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][0]]; 1051 ref_frame_ptr[1] = 1052 tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][1]]; 1053 get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, 1054 qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, 1055 rec_stride_pool, tx_size, best_mode, mi_row, mi_col, 1056 use_y_only_rate_distortion, 1 /*do_recon*/, NULL); 1057 tpl_stats->cmp_recrf_dist[0] = recon_error << TPL_DEP_COST_SCALE_LOG2; 1058 tpl_stats->cmp_recrf_rate[0] = rate_cost; 1059 1060 tpl_stats->cmp_recrf_dist[0] = 1061 AOMMAX(tpl_stats->srcrf_dist, tpl_stats->cmp_recrf_dist[0]); 1062 tpl_stats->cmp_recrf_rate[0] = 1063 AOMMAX(tpl_stats->srcrf_rate, tpl_stats->cmp_recrf_rate[0]); 1064 1065 tpl_stats->cmp_recrf_dist[0] = 1066 AOMMIN(tpl_stats->recrf_dist, tpl_stats->cmp_recrf_dist[0]); 1067 tpl_stats->cmp_recrf_rate[0] = 1068 AOMMIN(tpl_stats->recrf_rate, tpl_stats->cmp_recrf_rate[0]); 1069 1070 rate_cost = 0; 1071 ref_frame_ptr[0] = 1072 tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][0]]; 1073 ref_frame_ptr[1] = tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]]; 1074 get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, 1075 qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, 1076 rec_stride_pool, tx_size, best_mode, mi_row, mi_col, 1077 use_y_only_rate_distortion, 1 /*do_recon*/, NULL); 1078 tpl_stats->cmp_recrf_dist[1] = recon_error << TPL_DEP_COST_SCALE_LOG2; 1079 tpl_stats->cmp_recrf_rate[1] = rate_cost; 1080 1081 tpl_stats->cmp_recrf_dist[1] = 1082 AOMMAX(tpl_stats->srcrf_dist, tpl_stats->cmp_recrf_dist[1]); 1083 tpl_stats->cmp_recrf_rate[1] = 1084 AOMMAX(tpl_stats->srcrf_rate, tpl_stats->cmp_recrf_rate[1]); 1085 1086 tpl_stats->cmp_recrf_dist[1] = 1087 AOMMIN(tpl_stats->recrf_dist, tpl_stats->cmp_recrf_dist[1]); 1088 tpl_stats->cmp_recrf_rate[1] = 1089 AOMMIN(tpl_stats->recrf_rate, tpl_stats->cmp_recrf_rate[1]); 1090 } 1091 1092 if (best_mode == NEWMV) { 1093 tpl_stats->mv[best_rf_idx] = best_mv[0]; 1094 tpl_stats->ref_frame_index[0] = best_rf_idx; 1095 tpl_stats->ref_frame_index[1] = NONE_FRAME; 1096 } else if (best_mode == NEW_NEWMV) { 1097 tpl_stats->ref_frame_index[0] = comp_ref_frames[best_cmp_rf_idx][0]; 1098 tpl_stats->ref_frame_index[1] = comp_ref_frames[best_cmp_rf_idx][1]; 1099 tpl_stats->mv[tpl_stats->ref_frame_index[0]] = best_mv[0]; 1100 tpl_stats->mv[tpl_stats->ref_frame_index[1]] = best_mv[1]; 1101 } 1102 1103 for (int idy = 0; idy < mi_height; ++idy) { 1104 for (int idx = 0; idx < mi_width; ++idx) { 1105 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > idx && 1106 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > idy) { 1107 xd->mi[idx + idy * cm->mi_params.mi_stride] = xd->mi[0]; 1108 } 1109 } 1110 } 1111 } 1112 1113 static int round_floor(int ref_pos, int bsize_pix) { 1114 int round; 1115 if (ref_pos < 0) 1116 round = -(1 + (-ref_pos - 1) / bsize_pix); 1117 else 1118 round = ref_pos / bsize_pix; 1119 1120 return round; 1121 } 1122 1123 int av1_get_overlap_area(int row_a, int col_a, int row_b, int col_b, int width, 1124 int height) { 1125 int min_row = AOMMAX(row_a, row_b); 1126 int max_row = AOMMIN(row_a + height, row_b + height); 1127 int min_col = AOMMAX(col_a, col_b); 1128 int max_col = AOMMIN(col_a + width, col_b + width); 1129 if (min_row < max_row && min_col < max_col) { 1130 return (max_row - min_row) * (max_col - min_col); 1131 } 1132 return 0; 1133 } 1134 1135 int av1_tpl_ptr_pos(int mi_row, int mi_col, int stride, uint8_t right_shift) { 1136 return (mi_row >> right_shift) * stride + (mi_col >> right_shift); 1137 } 1138 1139 int64_t av1_delta_rate_cost(int64_t delta_rate, int64_t recrf_dist, 1140 int64_t srcrf_dist, int pix_num) { 1141 double beta = (double)srcrf_dist / recrf_dist; 1142 int64_t rate_cost = delta_rate; 1143 1144 if (srcrf_dist <= 128) return rate_cost; 1145 1146 double dr = 1147 (double)(delta_rate >> (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT)) / 1148 pix_num; 1149 1150 double log_den = log(beta) / log(2.0) + 2.0 * dr; 1151 1152 if (log_den > log(10.0) / log(2.0)) { 1153 rate_cost = (int64_t)((log(1.0 / beta) * pix_num) / log(2.0) / 2.0); 1154 rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT); 1155 return rate_cost; 1156 } 1157 1158 double num = pow(2.0, log_den); 1159 double den = num * beta + (1 - beta) * beta; 1160 1161 rate_cost = (int64_t)((pix_num * log(num / den)) / log(2.0) / 2.0); 1162 1163 rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT); 1164 1165 return rate_cost; 1166 } 1167 1168 static inline void tpl_model_update_b(TplParams *const tpl_data, int mi_row, 1169 int mi_col, const BLOCK_SIZE bsize, 1170 int frame_idx, int ref) { 1171 TplDepFrame *tpl_frame_ptr = &tpl_data->tpl_frame[frame_idx]; 1172 TplDepStats *tpl_ptr = tpl_frame_ptr->tpl_stats_ptr; 1173 TplDepFrame *tpl_frame = tpl_data->tpl_frame; 1174 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 1175 TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos( 1176 mi_row, mi_col, tpl_frame->stride, block_mis_log2)]; 1177 1178 int is_compound = tpl_stats_ptr->ref_frame_index[1] >= 0; 1179 1180 if (tpl_stats_ptr->ref_frame_index[ref] < 0) return; 1181 const int ref_frame_index = tpl_stats_ptr->ref_frame_index[ref]; 1182 TplDepFrame *ref_tpl_frame = 1183 &tpl_frame[tpl_frame[frame_idx].ref_map_index[ref_frame_index]]; 1184 TplDepStats *ref_stats_ptr = ref_tpl_frame->tpl_stats_ptr; 1185 1186 if (tpl_frame[frame_idx].ref_map_index[ref_frame_index] < 0) return; 1187 1188 const FULLPEL_MV full_mv = 1189 get_fullmv_from_mv(&tpl_stats_ptr->mv[ref_frame_index].as_mv); 1190 const int ref_pos_row = mi_row * MI_SIZE + full_mv.row; 1191 const int ref_pos_col = mi_col * MI_SIZE + full_mv.col; 1192 1193 const int bw = 4 << mi_size_wide_log2[bsize]; 1194 const int bh = 4 << mi_size_high_log2[bsize]; 1195 const int mi_height = mi_size_high[bsize]; 1196 const int mi_width = mi_size_wide[bsize]; 1197 const int pix_num = bw * bh; 1198 1199 // top-left on grid block location in pixel 1200 int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh; 1201 int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw; 1202 int block; 1203 1204 int64_t srcrf_dist = is_compound ? tpl_stats_ptr->cmp_recrf_dist[!ref] 1205 : tpl_stats_ptr->srcrf_dist; 1206 int64_t srcrf_rate = 1207 is_compound 1208 ? (tpl_stats_ptr->cmp_recrf_rate[!ref] << TPL_DEP_COST_SCALE_LOG2) 1209 : (tpl_stats_ptr->srcrf_rate << TPL_DEP_COST_SCALE_LOG2); 1210 1211 int64_t cur_dep_dist = tpl_stats_ptr->recrf_dist - srcrf_dist; 1212 int64_t mc_dep_dist = 1213 (int64_t)(tpl_stats_ptr->mc_dep_dist * 1214 ((double)(tpl_stats_ptr->recrf_dist - srcrf_dist) / 1215 tpl_stats_ptr->recrf_dist)); 1216 int64_t delta_rate = 1217 (tpl_stats_ptr->recrf_rate << TPL_DEP_COST_SCALE_LOG2) - srcrf_rate; 1218 int64_t mc_dep_rate = 1219 av1_delta_rate_cost(tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->recrf_dist, 1220 srcrf_dist, pix_num); 1221 1222 for (block = 0; block < 4; ++block) { 1223 int grid_pos_row = grid_pos_row_base + bh * (block >> 1); 1224 int grid_pos_col = grid_pos_col_base + bw * (block & 0x01); 1225 1226 if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE && 1227 grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) { 1228 int overlap_area = av1_get_overlap_area(grid_pos_row, grid_pos_col, 1229 ref_pos_row, ref_pos_col, bw, bh); 1230 int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height; 1231 int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width; 1232 assert((1 << block_mis_log2) == mi_height); 1233 assert((1 << block_mis_log2) == mi_width); 1234 TplDepStats *des_stats = &ref_stats_ptr[av1_tpl_ptr_pos( 1235 ref_mi_row, ref_mi_col, ref_tpl_frame->stride, block_mis_log2)]; 1236 des_stats->mc_dep_dist += 1237 ((cur_dep_dist + mc_dep_dist) * overlap_area) / pix_num; 1238 des_stats->mc_dep_rate += 1239 ((delta_rate + mc_dep_rate) * overlap_area) / pix_num; 1240 } 1241 } 1242 } 1243 1244 static inline void tpl_model_update(TplParams *const tpl_data, int mi_row, 1245 int mi_col, int frame_idx) { 1246 const BLOCK_SIZE tpl_stats_block_size = 1247 convert_length_to_bsize(MI_SIZE << tpl_data->tpl_stats_block_mis_log2); 1248 tpl_model_update_b(tpl_data, mi_row, mi_col, tpl_stats_block_size, frame_idx, 1249 0); 1250 tpl_model_update_b(tpl_data, mi_row, mi_col, tpl_stats_block_size, frame_idx, 1251 1); 1252 } 1253 1254 static inline void tpl_model_store(TplDepStats *tpl_stats_ptr, int mi_row, 1255 int mi_col, int stride, 1256 const TplDepStats *src_stats, 1257 uint8_t block_mis_log2) { 1258 int index = av1_tpl_ptr_pos(mi_row, mi_col, stride, block_mis_log2); 1259 TplDepStats *tpl_ptr = &tpl_stats_ptr[index]; 1260 *tpl_ptr = *src_stats; 1261 tpl_ptr->intra_cost = AOMMAX(1, tpl_ptr->intra_cost); 1262 tpl_ptr->inter_cost = AOMMAX(1, tpl_ptr->inter_cost); 1263 tpl_ptr->srcrf_dist = AOMMAX(1, tpl_ptr->srcrf_dist); 1264 tpl_ptr->srcrf_sse = AOMMAX(1, tpl_ptr->srcrf_sse); 1265 tpl_ptr->recrf_dist = AOMMAX(1, tpl_ptr->recrf_dist); 1266 tpl_ptr->srcrf_rate = AOMMAX(1, tpl_ptr->srcrf_rate); 1267 tpl_ptr->recrf_rate = AOMMAX(1, tpl_ptr->recrf_rate); 1268 tpl_ptr->cmp_recrf_dist[0] = AOMMAX(1, tpl_ptr->cmp_recrf_dist[0]); 1269 tpl_ptr->cmp_recrf_dist[1] = AOMMAX(1, tpl_ptr->cmp_recrf_dist[1]); 1270 tpl_ptr->cmp_recrf_rate[0] = AOMMAX(1, tpl_ptr->cmp_recrf_rate[0]); 1271 tpl_ptr->cmp_recrf_rate[1] = AOMMAX(1, tpl_ptr->cmp_recrf_rate[1]); 1272 } 1273 1274 // Reset the ref and source frame pointers of tpl_data. 1275 static inline void tpl_reset_src_ref_frames(TplParams *tpl_data) { 1276 for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { 1277 tpl_data->ref_frame[i] = NULL; 1278 tpl_data->src_ref_frame[i] = NULL; 1279 } 1280 } 1281 1282 static inline int get_gop_length(const GF_GROUP *gf_group) { 1283 int gop_length = AOMMIN(gf_group->size, MAX_TPL_FRAME_IDX - 1); 1284 return gop_length; 1285 } 1286 1287 // Initialize the mc_flow parameters used in computing tpl data. 1288 static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx, 1289 int pframe_qindex) { 1290 TplParams *const tpl_data = &cpi->ppi->tpl_data; 1291 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx]; 1292 const YV12_BUFFER_CONFIG *this_frame = tpl_frame->gf_picture; 1293 const YV12_BUFFER_CONFIG *ref_frames_ordered[INTER_REFS_PER_FRAME]; 1294 uint32_t ref_frame_display_indices[INTER_REFS_PER_FRAME]; 1295 const GF_GROUP *gf_group = &cpi->ppi->gf_group; 1296 TPL_SPEED_FEATURES *tpl_sf = &cpi->sf.tpl_sf; 1297 int ref_pruning_enabled = is_frame_eligible_for_ref_pruning( 1298 gf_group, cpi->sf.inter_sf.selective_ref_frame, 1299 tpl_sf->prune_ref_frames_in_tpl, frame_idx); 1300 int gop_length = get_gop_length(gf_group); 1301 int ref_frame_flags; 1302 AV1_COMMON *cm = &cpi->common; 1303 int rdmult, idx; 1304 ThreadData *td = &cpi->td; 1305 MACROBLOCK *x = &td->mb; 1306 MACROBLOCKD *xd = &x->e_mbd; 1307 TplTxfmStats *tpl_txfm_stats = &td->tpl_txfm_stats; 1308 tpl_data->frame_idx = frame_idx; 1309 tpl_reset_src_ref_frames(tpl_data); 1310 av1_tile_init(&xd->tile, cm, 0, 0); 1311 1312 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100)); 1313 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6); 1314 const FRAME_TYPE frame_type = cm->current_frame.frame_type; 1315 1316 // Setup scaling factor 1317 av1_setup_scale_factors_for_frame( 1318 &tpl_data->sf, this_frame->y_crop_width, this_frame->y_crop_height, 1319 this_frame->y_crop_width, this_frame->y_crop_height); 1320 1321 xd->cur_buf = this_frame; 1322 1323 for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) { 1324 TplDepFrame *tpl_ref_frame = 1325 &tpl_data->tpl_frame[tpl_frame->ref_map_index[idx]]; 1326 tpl_data->ref_frame[idx] = tpl_ref_frame->rec_picture; 1327 tpl_data->src_ref_frame[idx] = tpl_ref_frame->gf_picture; 1328 ref_frame_display_indices[idx] = tpl_ref_frame->frame_display_index; 1329 } 1330 1331 // Store the reference frames based on priority order 1332 for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { 1333 ref_frames_ordered[i] = 1334 tpl_data->ref_frame[ref_frame_priority_order[i] - 1]; 1335 } 1336 1337 // Work out which reference frame slots may be used. 1338 ref_frame_flags = 1339 get_ref_frame_flags(&cpi->sf, is_one_pass_rt_params(cpi), 1340 ref_frames_ordered, cpi->ext_flags.ref_frame_flags); 1341 1342 enforce_max_ref_frames(cpi, &ref_frame_flags, ref_frame_display_indices, 1343 tpl_frame->frame_display_index); 1344 1345 // Prune reference frames 1346 for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) { 1347 if ((ref_frame_flags & (1 << idx)) == 0) { 1348 tpl_data->ref_frame[idx] = NULL; 1349 } 1350 } 1351 1352 // Skip motion estimation w.r.t. reference frames which are not 1353 // considered in RD search, using "selective_ref_frame" speed feature. 1354 // The reference frame pruning is not enabled for frames beyond the gop 1355 // length, as there are fewer reference frames and the reference frames 1356 // differ from the frames considered during RD search. 1357 if (ref_pruning_enabled && (frame_idx < gop_length)) { 1358 for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) { 1359 const MV_REFERENCE_FRAME refs[2] = { idx + 1, NONE_FRAME }; 1360 if (prune_ref_by_selective_ref_frame(cpi, NULL, refs, 1361 ref_frame_display_indices)) { 1362 tpl_data->ref_frame[idx] = NULL; 1363 } 1364 } 1365 } 1366 1367 // Make a temporary mbmi for tpl model 1368 MB_MODE_INFO mbmi; 1369 memset(&mbmi, 0, sizeof(mbmi)); 1370 MB_MODE_INFO *mbmi_ptr = &mbmi; 1371 xd->mi = &mbmi_ptr; 1372 1373 xd->block_ref_scale_factors[0] = &tpl_data->sf; 1374 xd->block_ref_scale_factors[1] = &tpl_data->sf; 1375 1376 const int base_qindex = 1377 cpi->use_ducky_encode ? gf_group->q_val[frame_idx] : pframe_qindex; 1378 // The TPL model is only meant to be run in inter mode, so ensure that we are 1379 // not running in all intra mode, which implies we are not tuning for image 1380 // quality (IQ) or SSIMULACRA2. 1381 assert(cpi->oxcf.tune_cfg.tuning != AOM_TUNE_IQ && 1382 cpi->oxcf.tune_cfg.tuning != AOM_TUNE_SSIMULACRA2 && 1383 cpi->oxcf.mode != ALLINTRA); 1384 // Get rd multiplier set up. 1385 rdmult = av1_compute_rd_mult( 1386 base_qindex, cm->seq_params->bit_depth, 1387 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, 1388 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, 1389 is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); 1390 1391 if (rdmult < 1) rdmult = 1; 1392 av1_set_error_per_bit(&x->errorperbit, rdmult); 1393 av1_set_sad_per_bit(cpi, &x->sadperbit, base_qindex); 1394 1395 tpl_frame->is_valid = 1; 1396 1397 cm->quant_params.base_qindex = base_qindex; 1398 av1_frame_init_quantizer(cpi); 1399 1400 const BitDepthInfo bd_info = get_bit_depth_info(xd); 1401 const FRAME_UPDATE_TYPE update_type = 1402 gf_group->update_type[cpi->gf_frame_index]; 1403 tpl_frame->base_rdmult = av1_compute_rd_mult_based_on_qindex( 1404 bd_info.bit_depth, update_type, base_qindex, 1405 cpi->oxcf.tune_cfg.tuning) / 1406 6; 1407 1408 if (cpi->use_ducky_encode) 1409 tpl_frame->base_rdmult = gf_group->rdmult_val[frame_idx]; 1410 1411 av1_init_tpl_txfm_stats(tpl_txfm_stats); 1412 1413 // Initialize x->mbmi_ext when compound predictions are enabled. 1414 if (tpl_sf->allow_compound_pred) av1_zero(x->mbmi_ext); 1415 1416 // Set the pointer to null since mbmi is only allocated inside this function. 1417 assert(xd->mi == &mbmi_ptr); 1418 xd->mi = NULL; 1419 1420 // Tpl module is called before the setting of speed features at frame level. 1421 // Thus, turning off this speed feature for key frame is done here and not 1422 // integrated into the speed feature setting itself. 1423 const int layer_depth_th = (tpl_sf->use_sad_for_mode_decision == 1) ? 5 : 0; 1424 tpl_frame->use_pred_sad = 1425 tpl_sf->use_sad_for_mode_decision && 1426 gf_group->update_type[cpi->gf_frame_index] != KF_UPDATE && 1427 gf_group->layer_depth[frame_idx] >= layer_depth_th; 1428 } 1429 1430 // This function stores the motion estimation dependencies of all the blocks in 1431 // a row 1432 void av1_mc_flow_dispenser_row(AV1_COMP *cpi, TplTxfmStats *tpl_txfm_stats, 1433 TplBuffers *tpl_tmp_buffers, MACROBLOCK *x, 1434 int mi_row, BLOCK_SIZE bsize, TX_SIZE tx_size) { 1435 AV1_COMMON *const cm = &cpi->common; 1436 MultiThreadInfo *const mt_info = &cpi->mt_info; 1437 AV1TplRowMultiThreadInfo *const tpl_row_mt = &mt_info->tpl_row_mt; 1438 const CommonModeInfoParams *const mi_params = &cm->mi_params; 1439 const int mi_width = mi_size_wide[bsize]; 1440 TplParams *const tpl_data = &cpi->ppi->tpl_data; 1441 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx]; 1442 MACROBLOCKD *xd = &x->e_mbd; 1443 1444 const int tplb_cols_in_tile = 1445 ROUND_POWER_OF_TWO(mi_params->mi_cols, mi_size_wide_log2[bsize]); 1446 const int tplb_row = ROUND_POWER_OF_TWO(mi_row, mi_size_high_log2[bsize]); 1447 assert(mi_size_high[bsize] == (1 << tpl_data->tpl_stats_block_mis_log2)); 1448 assert(mi_size_wide[bsize] == (1 << tpl_data->tpl_stats_block_mis_log2)); 1449 1450 for (int mi_col = 0, tplb_col_in_tile = 0; mi_col < mi_params->mi_cols; 1451 mi_col += mi_width, tplb_col_in_tile++) { 1452 (*tpl_row_mt->sync_read_ptr)(&tpl_data->tpl_mt_sync, tplb_row, 1453 tplb_col_in_tile); 1454 1455 #if CONFIG_MULTITHREAD 1456 if (mt_info->num_workers > 1) { 1457 pthread_mutex_lock(tpl_row_mt->mutex_); 1458 const bool tpl_mt_exit = tpl_row_mt->tpl_mt_exit; 1459 pthread_mutex_unlock(tpl_row_mt->mutex_); 1460 // Exit in case any worker has encountered an error. 1461 if (tpl_mt_exit) return; 1462 } 1463 #endif 1464 1465 TplDepStats tpl_stats; 1466 1467 // Motion estimation column boundary 1468 av1_set_mv_col_limits(mi_params, &x->mv_limits, mi_col, mi_width, 1469 tpl_data->border_in_pixels); 1470 xd->mb_to_left_edge = -GET_MV_SUBPEL(mi_col * MI_SIZE); 1471 xd->mb_to_right_edge = 1472 GET_MV_SUBPEL(mi_params->mi_cols - mi_width - mi_col); 1473 mode_estimation(cpi, tpl_txfm_stats, tpl_tmp_buffers, x, mi_row, mi_col, 1474 bsize, tx_size, &tpl_stats); 1475 1476 // Motion flow dependency dispenser. 1477 tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, tpl_frame->stride, 1478 &tpl_stats, tpl_data->tpl_stats_block_mis_log2); 1479 (*tpl_row_mt->sync_write_ptr)(&tpl_data->tpl_mt_sync, tplb_row, 1480 tplb_col_in_tile, tplb_cols_in_tile); 1481 } 1482 } 1483 1484 static inline void mc_flow_dispenser(AV1_COMP *cpi) { 1485 AV1_COMMON *cm = &cpi->common; 1486 const CommonModeInfoParams *const mi_params = &cm->mi_params; 1487 ThreadData *td = &cpi->td; 1488 MACROBLOCK *x = &td->mb; 1489 MACROBLOCKD *xd = &x->e_mbd; 1490 const BLOCK_SIZE bsize = 1491 convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d); 1492 const TX_SIZE tx_size = max_txsize_lookup[bsize]; 1493 const int mi_height = mi_size_high[bsize]; 1494 for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += mi_height) { 1495 // Motion estimation row boundary 1496 av1_set_mv_row_limits(mi_params, &x->mv_limits, mi_row, mi_height, 1497 cpi->ppi->tpl_data.border_in_pixels); 1498 xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE); 1499 xd->mb_to_bottom_edge = 1500 GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE); 1501 av1_mc_flow_dispenser_row(cpi, &td->tpl_txfm_stats, &td->tpl_tmp_buffers, x, 1502 mi_row, bsize, tx_size); 1503 } 1504 } 1505 1506 static void mc_flow_synthesizer(TplParams *tpl_data, int frame_idx, int mi_rows, 1507 int mi_cols) { 1508 if (!frame_idx) { 1509 return; 1510 } 1511 const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d); 1512 const int mi_height = mi_size_high[bsize]; 1513 const int mi_width = mi_size_wide[bsize]; 1514 assert(mi_height == (1 << tpl_data->tpl_stats_block_mis_log2)); 1515 assert(mi_width == (1 << tpl_data->tpl_stats_block_mis_log2)); 1516 1517 for (int mi_row = 0; mi_row < mi_rows; mi_row += mi_height) { 1518 for (int mi_col = 0; mi_col < mi_cols; mi_col += mi_width) { 1519 tpl_model_update(tpl_data, mi_row, mi_col, frame_idx); 1520 } 1521 } 1522 } 1523 1524 static inline void init_gop_frames_for_tpl( 1525 AV1_COMP *cpi, const EncodeFrameParams *const init_frame_params, 1526 GF_GROUP *gf_group, int *tpl_group_frames, int *pframe_qindex) { 1527 AV1_COMMON *cm = &cpi->common; 1528 assert(cpi->gf_frame_index == 0); 1529 *pframe_qindex = 0; 1530 1531 RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]; 1532 init_ref_map_pair(cpi, ref_frame_map_pairs); 1533 1534 int remapped_ref_idx[REF_FRAMES]; 1535 1536 EncodeFrameParams frame_params = *init_frame_params; 1537 TplParams *const tpl_data = &cpi->ppi->tpl_data; 1538 1539 int ref_picture_map[REF_FRAMES]; 1540 1541 for (int i = 0; i < REF_FRAMES; ++i) { 1542 if (frame_params.frame_type == KEY_FRAME) { 1543 tpl_data->tpl_frame[-i - 1].gf_picture = NULL; 1544 tpl_data->tpl_frame[-i - 1].rec_picture = NULL; 1545 tpl_data->tpl_frame[-i - 1].frame_display_index = 0; 1546 } else { 1547 tpl_data->tpl_frame[-i - 1].gf_picture = &cm->ref_frame_map[i]->buf; 1548 tpl_data->tpl_frame[-i - 1].rec_picture = &cm->ref_frame_map[i]->buf; 1549 tpl_data->tpl_frame[-i - 1].frame_display_index = 1550 cm->ref_frame_map[i]->display_order_hint; 1551 } 1552 1553 ref_picture_map[i] = -i - 1; 1554 } 1555 1556 *tpl_group_frames = 0; 1557 1558 int gf_index; 1559 int process_frame_count = 0; 1560 const int gop_length = get_gop_length(gf_group); 1561 1562 for (gf_index = 0; gf_index < gop_length; ++gf_index) { 1563 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index]; 1564 FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index]; 1565 int lookahead_index = 1566 gf_group->cur_frame_idx[gf_index] + gf_group->arf_src_offset[gf_index]; 1567 frame_params.show_frame = frame_update_type != ARF_UPDATE && 1568 frame_update_type != INTNL_ARF_UPDATE; 1569 frame_params.show_existing_frame = 1570 frame_update_type == INTNL_OVERLAY_UPDATE || 1571 frame_update_type == OVERLAY_UPDATE; 1572 frame_params.frame_type = gf_group->frame_type[gf_index]; 1573 1574 if (frame_update_type == LF_UPDATE) 1575 *pframe_qindex = gf_group->q_val[gf_index]; 1576 1577 const struct lookahead_entry *buf = av1_lookahead_peek( 1578 cpi->ppi->lookahead, lookahead_index, cpi->compressor_stage); 1579 if (buf == NULL) break; 1580 tpl_frame->gf_picture = &buf->img; 1581 1582 // Use filtered frame buffer if available. This will make tpl stats more 1583 // precise. 1584 FRAME_DIFF frame_diff; 1585 const YV12_BUFFER_CONFIG *tf_buf = 1586 av1_tf_info_get_filtered_buf(&cpi->ppi->tf_info, gf_index, &frame_diff); 1587 if (tf_buf != NULL) { 1588 tpl_frame->gf_picture = tf_buf; 1589 } 1590 1591 // 'cm->current_frame.frame_number' is the display number 1592 // of the current frame. 1593 // 'lookahead_index' is frame offset within the gf group. 1594 // 'lookahead_index + cm->current_frame.frame_number' 1595 // is the display index of the frame. 1596 tpl_frame->frame_display_index = 1597 lookahead_index + cm->current_frame.frame_number; 1598 assert(buf->display_idx == 1599 cpi->frame_index_set.show_frame_count + lookahead_index); 1600 1601 if (frame_update_type != OVERLAY_UPDATE && 1602 frame_update_type != INTNL_OVERLAY_UPDATE) { 1603 tpl_frame->rec_picture = &tpl_data->tpl_rec_pool[process_frame_count]; 1604 tpl_frame->tpl_stats_ptr = tpl_data->tpl_stats_pool[process_frame_count]; 1605 ++process_frame_count; 1606 } 1607 const int true_disp = (int)(tpl_frame->frame_display_index); 1608 1609 av1_get_ref_frames(ref_frame_map_pairs, true_disp, cpi, gf_index, 0, 1610 remapped_ref_idx); 1611 1612 int refresh_mask = 1613 av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type, 1614 gf_index, true_disp, ref_frame_map_pairs); 1615 1616 // Make the frames marked as is_frame_non_ref to non-reference frames. 1617 if (cpi->ppi->gf_group.is_frame_non_ref[gf_index]) refresh_mask = 0; 1618 1619 int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask); 1620 1621 if (refresh_frame_map_index < REF_FRAMES && 1622 refresh_frame_map_index != INVALID_IDX) { 1623 ref_frame_map_pairs[refresh_frame_map_index].disp_order = 1624 AOMMAX(0, true_disp); 1625 ref_frame_map_pairs[refresh_frame_map_index].pyr_level = 1626 get_true_pyr_level(gf_group->layer_depth[gf_index], true_disp, 1627 cpi->ppi->gf_group.max_layer_depth); 1628 } 1629 1630 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) 1631 tpl_frame->ref_map_index[i - LAST_FRAME] = 1632 ref_picture_map[remapped_ref_idx[i - LAST_FRAME]]; 1633 1634 if (refresh_mask) ref_picture_map[refresh_frame_map_index] = gf_index; 1635 1636 ++*tpl_group_frames; 1637 } 1638 1639 const int tpl_extend = cpi->oxcf.gf_cfg.lag_in_frames - MAX_GF_INTERVAL; 1640 int extend_frame_count = 0; 1641 int extend_frame_length = AOMMIN( 1642 tpl_extend, cpi->rc.frames_to_key - cpi->ppi->p_rc.baseline_gf_interval); 1643 1644 int frame_display_index = gf_group->cur_frame_idx[gop_length - 1] + 1645 gf_group->arf_src_offset[gop_length - 1] + 1; 1646 1647 for (; 1648 gf_index < MAX_TPL_FRAME_IDX && extend_frame_count < extend_frame_length; 1649 ++gf_index) { 1650 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index]; 1651 FRAME_UPDATE_TYPE frame_update_type = LF_UPDATE; 1652 frame_params.show_frame = frame_update_type != ARF_UPDATE && 1653 frame_update_type != INTNL_ARF_UPDATE; 1654 frame_params.show_existing_frame = 1655 frame_update_type == INTNL_OVERLAY_UPDATE; 1656 frame_params.frame_type = INTER_FRAME; 1657 1658 int lookahead_index = frame_display_index; 1659 struct lookahead_entry *buf = av1_lookahead_peek( 1660 cpi->ppi->lookahead, lookahead_index, cpi->compressor_stage); 1661 1662 if (buf == NULL) break; 1663 1664 tpl_frame->gf_picture = &buf->img; 1665 tpl_frame->rec_picture = &tpl_data->tpl_rec_pool[process_frame_count]; 1666 tpl_frame->tpl_stats_ptr = tpl_data->tpl_stats_pool[process_frame_count]; 1667 // 'cm->current_frame.frame_number' is the display number 1668 // of the current frame. 1669 // 'frame_display_index' is frame offset within the gf group. 1670 // 'frame_display_index + cm->current_frame.frame_number' 1671 // is the display index of the frame. 1672 tpl_frame->frame_display_index = 1673 frame_display_index + cm->current_frame.frame_number; 1674 1675 ++process_frame_count; 1676 1677 gf_group->update_type[gf_index] = LF_UPDATE; 1678 1679 #if CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS 1680 if (cpi->oxcf.pass == AOM_RC_SECOND_PASS) { 1681 if (cpi->oxcf.rc_cfg.mode == AOM_Q) { 1682 *pframe_qindex = cpi->oxcf.rc_cfg.cq_level; 1683 } else if (cpi->oxcf.rc_cfg.mode == AOM_VBR) { 1684 // TODO(angiebird): Find a more adaptive method to decide pframe_qindex 1685 // override the pframe_qindex in the second pass when bitrate accuracy 1686 // is on. We found that setting this pframe_qindex make the tpl stats 1687 // more stable. 1688 *pframe_qindex = 128; 1689 } 1690 } 1691 #endif // CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS 1692 gf_group->q_val[gf_index] = *pframe_qindex; 1693 const int true_disp = (int)(tpl_frame->frame_display_index); 1694 av1_get_ref_frames(ref_frame_map_pairs, true_disp, cpi, gf_index, 0, 1695 remapped_ref_idx); 1696 int refresh_mask = 1697 av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type, 1698 gf_index, true_disp, ref_frame_map_pairs); 1699 int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask); 1700 1701 if (refresh_frame_map_index < REF_FRAMES && 1702 refresh_frame_map_index != INVALID_IDX) { 1703 ref_frame_map_pairs[refresh_frame_map_index].disp_order = 1704 AOMMAX(0, true_disp); 1705 ref_frame_map_pairs[refresh_frame_map_index].pyr_level = 1706 get_true_pyr_level(gf_group->layer_depth[gf_index], true_disp, 1707 cpi->ppi->gf_group.max_layer_depth); 1708 } 1709 1710 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) 1711 tpl_frame->ref_map_index[i - LAST_FRAME] = 1712 ref_picture_map[remapped_ref_idx[i - LAST_FRAME]]; 1713 1714 tpl_frame->ref_map_index[ALTREF_FRAME - LAST_FRAME] = -1; 1715 tpl_frame->ref_map_index[LAST3_FRAME - LAST_FRAME] = -1; 1716 tpl_frame->ref_map_index[BWDREF_FRAME - LAST_FRAME] = -1; 1717 tpl_frame->ref_map_index[ALTREF2_FRAME - LAST_FRAME] = -1; 1718 1719 if (refresh_mask) ref_picture_map[refresh_frame_map_index] = gf_index; 1720 1721 ++*tpl_group_frames; 1722 ++extend_frame_count; 1723 ++frame_display_index; 1724 } 1725 } 1726 1727 void av1_init_tpl_stats(TplParams *const tpl_data) { 1728 tpl_data->ready = 0; 1729 set_tpl_stats_block_size(&tpl_data->tpl_stats_block_mis_log2, 1730 &tpl_data->tpl_bsize_1d); 1731 for (int frame_idx = 0; frame_idx < MAX_LENGTH_TPL_FRAME_STATS; ++frame_idx) { 1732 TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame_idx]; 1733 tpl_frame->is_valid = 0; 1734 } 1735 for (int frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) { 1736 TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame_idx]; 1737 if (tpl_data->tpl_stats_pool[frame_idx] == NULL) continue; 1738 memset(tpl_data->tpl_stats_pool[frame_idx], 0, 1739 tpl_frame->height * tpl_frame->width * 1740 sizeof(*tpl_frame->tpl_stats_ptr)); 1741 } 1742 } 1743 1744 int av1_tpl_stats_ready(const TplParams *tpl_data, int gf_frame_index) { 1745 if (tpl_data->ready == 0) { 1746 return 0; 1747 } 1748 if (gf_frame_index >= MAX_TPL_FRAME_IDX) { 1749 // The sub-GOP length exceeds the TPL buffer capacity. 1750 // Hence the TPL related functions are disabled hereafter. 1751 return 0; 1752 } 1753 return tpl_data->tpl_frame[gf_frame_index].is_valid; 1754 } 1755 1756 static inline int eval_gop_length(double *beta, int gop_eval) { 1757 switch (gop_eval) { 1758 case 1: 1759 // Allow larger GOP size if the base layer ARF has higher dependency 1760 // factor than the intermediate ARF and both ARFs have reasonably high 1761 // dependency factors. 1762 return (beta[0] >= beta[1] + 0.7) && beta[0] > 3.0; 1763 case 2: 1764 if ((beta[0] >= beta[1] + 0.4) && beta[0] > 1.6) 1765 return 1; // Don't shorten the gf interval 1766 else if ((beta[0] < beta[1] + 0.1) || beta[0] <= 1.4) 1767 return 0; // Shorten the gf interval 1768 else 1769 return 2; // Cannot decide the gf interval, so redo the 1770 // tpl stats calculation. 1771 case 3: return beta[0] > 1.1; 1772 default: return 2; 1773 } 1774 } 1775 1776 // TODO(jingning): Restructure av1_rc_pick_q_and_bounds() to narrow down 1777 // the scope of input arguments. 1778 void av1_tpl_preload_rc_estimate(AV1_COMP *cpi, 1779 const EncodeFrameParams *const frame_params) { 1780 AV1_COMMON *cm = &cpi->common; 1781 GF_GROUP *gf_group = &cpi->ppi->gf_group; 1782 int bottom_index, top_index; 1783 if (cpi->use_ducky_encode) return; 1784 1785 cm->current_frame.frame_type = frame_params->frame_type; 1786 for (int gf_index = cpi->gf_frame_index; gf_index < gf_group->size; 1787 ++gf_index) { 1788 cm->current_frame.frame_type = gf_group->frame_type[gf_index]; 1789 cm->show_frame = gf_group->update_type[gf_index] != ARF_UPDATE && 1790 gf_group->update_type[gf_index] != INTNL_ARF_UPDATE; 1791 gf_group->q_val[gf_index] = av1_rc_pick_q_and_bounds( 1792 cpi, cm->width, cm->height, gf_index, &bottom_index, &top_index); 1793 } 1794 } 1795 1796 static inline int skip_tpl_for_frame(const GF_GROUP *gf_group, int frame_idx, 1797 int gop_eval, int approx_gop_eval, 1798 int reduce_num_frames) { 1799 // When gop_eval is set to 2, tpl stats calculation is done for ARFs from base 1800 // layer, (base+1) layer and (base+2) layer. When gop_eval is set to 3, 1801 // tpl stats calculation is limited to ARFs from base layer and (base+1) 1802 // layer. 1803 const int num_arf_layers = (gop_eval == 2) ? 3 : 2; 1804 const int gop_length = get_gop_length(gf_group); 1805 1806 if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE || 1807 gf_group->update_type[frame_idx] == OVERLAY_UPDATE) 1808 return 1; 1809 1810 // When approx_gop_eval = 1, skip tpl stats calculation for higher layer 1811 // frames and for frames beyond gop length. 1812 if (approx_gop_eval && (gf_group->layer_depth[frame_idx] > num_arf_layers || 1813 frame_idx >= gop_length)) 1814 return 1; 1815 1816 if (reduce_num_frames && gf_group->update_type[frame_idx] == LF_UPDATE && 1817 frame_idx < gop_length) 1818 return 1; 1819 1820 return 0; 1821 } 1822 1823 /*!\brief Compute the frame importance from TPL stats 1824 * 1825 * \param[in] tpl_data TPL struct 1826 * \param[in] gf_frame_index current frame index in the GOP 1827 * 1828 * \return frame_importance 1829 */ 1830 static double get_frame_importance(const TplParams *tpl_data, 1831 int gf_frame_index) { 1832 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_frame_index]; 1833 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 1834 1835 const int tpl_stride = tpl_frame->stride; 1836 double intra_cost_base = 0; 1837 double mc_dep_cost_base = 0; 1838 double cbcmp_base = 1; 1839 const int step = 1 << tpl_data->tpl_stats_block_mis_log2; 1840 1841 for (int row = 0; row < tpl_frame->mi_rows; row += step) { 1842 for (int col = 0; col < tpl_frame->mi_cols; col += step) { 1843 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos( 1844 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)]; 1845 double cbcmp = (double)this_stats->srcrf_dist; 1846 const int64_t mc_dep_delta = 1847 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 1848 this_stats->mc_dep_dist); 1849 double dist_scaled = (double)(this_stats->recrf_dist << RDDIV_BITS); 1850 dist_scaled = AOMMAX(dist_scaled, 1); 1851 intra_cost_base += log(dist_scaled) * cbcmp; 1852 mc_dep_cost_base += log(dist_scaled + mc_dep_delta) * cbcmp; 1853 cbcmp_base += cbcmp; 1854 } 1855 } 1856 return exp((mc_dep_cost_base - intra_cost_base) / cbcmp_base); 1857 } 1858 1859 int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval, 1860 const EncodeFrameParams *const frame_params) { 1861 #if CONFIG_COLLECT_COMPONENT_TIMING 1862 start_timing(cpi, av1_tpl_setup_stats_time); 1863 #endif 1864 assert(cpi->gf_frame_index == 0); 1865 AV1_COMMON *cm = &cpi->common; 1866 MultiThreadInfo *const mt_info = &cpi->mt_info; 1867 AV1TplRowMultiThreadInfo *const tpl_row_mt = &mt_info->tpl_row_mt; 1868 GF_GROUP *gf_group = &cpi->ppi->gf_group; 1869 EncodeFrameParams this_frame_params = *frame_params; 1870 TplParams *const tpl_data = &cpi->ppi->tpl_data; 1871 int approx_gop_eval = (gop_eval > 1); 1872 1873 if (cpi->superres_mode != AOM_SUPERRES_NONE) { 1874 assert(cpi->superres_mode != AOM_SUPERRES_AUTO); 1875 av1_init_tpl_stats(tpl_data); 1876 return 0; 1877 } 1878 1879 cm->current_frame.frame_type = frame_params->frame_type; 1880 for (int gf_index = cpi->gf_frame_index; gf_index < gf_group->size; 1881 ++gf_index) { 1882 cm->current_frame.frame_type = gf_group->frame_type[gf_index]; 1883 av1_configure_buffer_updates(cpi, &this_frame_params.refresh_frame, 1884 gf_group->update_type[gf_index], 1885 gf_group->refbuf_state[gf_index], 0); 1886 1887 memcpy(&cpi->refresh_frame, &this_frame_params.refresh_frame, 1888 sizeof(cpi->refresh_frame)); 1889 } 1890 1891 int pframe_qindex; 1892 int tpl_gf_group_frames; 1893 init_gop_frames_for_tpl(cpi, frame_params, gf_group, &tpl_gf_group_frames, 1894 &pframe_qindex); 1895 1896 cpi->ppi->p_rc.base_layer_qp = pframe_qindex; 1897 1898 av1_init_tpl_stats(tpl_data); 1899 1900 TplBuffers *tpl_tmp_buffers = &cpi->td.tpl_tmp_buffers; 1901 if (!tpl_alloc_temp_buffers(tpl_tmp_buffers, tpl_data->tpl_bsize_1d)) { 1902 aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR, 1903 "Error allocating tpl data"); 1904 } 1905 1906 tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read_dummy; 1907 tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write_dummy; 1908 1909 av1_setup_scale_factors_for_frame(&cm->sf_identity, cm->width, cm->height, 1910 cm->width, cm->height); 1911 1912 if (frame_params->frame_type == KEY_FRAME) { 1913 av1_init_mv_probs(cm); 1914 } 1915 av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv, 1916 cm->features.allow_high_precision_mv, cpi->td.mb.mv_costs); 1917 1918 const int num_planes = 1919 cpi->sf.tpl_sf.use_y_only_rate_distortion ? 1 : av1_num_planes(cm); 1920 // As tpl module is called before the setting of speed features at frame 1921 // level, turning off this speed feature for the first GF group of the 1922 // key-frame interval is done here. 1923 int reduce_num_frames = 1924 cpi->sf.tpl_sf.reduce_num_frames && 1925 gf_group->update_type[cpi->gf_frame_index] != KF_UPDATE && 1926 gf_group->max_layer_depth > 2; 1927 // TPL processing is skipped for frames of type LF_UPDATE when 1928 // 'reduce_num_frames' is 1, which affects the r0 calcuation. Thus, a factor 1929 // to adjust r0 is used. The value of 1.6 corresponds to using ~60% of the 1930 // frames in the gf group on an average. 1931 tpl_data->r0_adjust_factor = reduce_num_frames ? 1.6 : 1.0; 1932 1933 // Backward propagation from tpl_group_frames to 1. 1934 for (int frame_idx = cpi->gf_frame_index; frame_idx < tpl_gf_group_frames; 1935 ++frame_idx) { 1936 if (skip_tpl_for_frame(gf_group, frame_idx, gop_eval, approx_gop_eval, 1937 reduce_num_frames)) 1938 continue; 1939 1940 init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex); 1941 if (mt_info->num_workers > 1) { 1942 tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read; 1943 tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write; 1944 av1_mc_flow_dispenser_mt(cpi); 1945 } else { 1946 mc_flow_dispenser(cpi); 1947 } 1948 #if CONFIG_BITRATE_ACCURACY 1949 av1_tpl_txfm_stats_update_abs_coeff_mean(&cpi->td.tpl_txfm_stats); 1950 av1_tpl_store_txfm_stats(tpl_data, &cpi->td.tpl_txfm_stats, frame_idx); 1951 #endif // CONFIG_BITRATE_ACCURACY 1952 #if CONFIG_RATECTRL_LOG && CONFIG_THREE_PASS && CONFIG_BITRATE_ACCURACY 1953 if (cpi->oxcf.pass == AOM_RC_THIRD_PASS) { 1954 int frame_coding_idx = 1955 av1_vbr_rc_frame_coding_idx(&cpi->vbr_rc_info, frame_idx); 1956 rc_log_frame_stats(&cpi->rc_log, frame_coding_idx, 1957 &cpi->td.tpl_txfm_stats); 1958 } 1959 #endif // CONFIG_RATECTRL_LOG 1960 1961 aom_extend_frame_borders(tpl_data->tpl_frame[frame_idx].rec_picture, 1962 num_planes); 1963 } 1964 1965 for (int frame_idx = tpl_gf_group_frames - 1; 1966 frame_idx >= cpi->gf_frame_index; --frame_idx) { 1967 if (skip_tpl_for_frame(gf_group, frame_idx, gop_eval, approx_gop_eval, 1968 reduce_num_frames)) 1969 continue; 1970 1971 mc_flow_synthesizer(tpl_data, frame_idx, cm->mi_params.mi_rows, 1972 cm->mi_params.mi_cols); 1973 } 1974 1975 av1_configure_buffer_updates(cpi, &this_frame_params.refresh_frame, 1976 gf_group->update_type[cpi->gf_frame_index], 1977 gf_group->update_type[cpi->gf_frame_index], 0); 1978 cm->current_frame.frame_type = frame_params->frame_type; 1979 cm->show_frame = frame_params->show_frame; 1980 1981 #if CONFIG_COLLECT_COMPONENT_TIMING 1982 // Record the time if the function returns. 1983 if (cpi->common.tiles.large_scale || gf_group->max_layer_depth_allowed == 0 || 1984 !gop_eval) 1985 end_timing(cpi, av1_tpl_setup_stats_time); 1986 #endif 1987 1988 tpl_dealloc_temp_buffers(tpl_tmp_buffers); 1989 1990 if (!approx_gop_eval) { 1991 tpl_data->ready = 1; 1992 } 1993 if (cpi->common.tiles.large_scale) return 0; 1994 if (gf_group->max_layer_depth_allowed == 0) return 1; 1995 if (!gop_eval) return 0; 1996 assert(gf_group->arf_index >= 0); 1997 1998 double beta[2] = { 0.0 }; 1999 const int frame_idx_0 = gf_group->arf_index; 2000 const int frame_idx_1 = 2001 AOMMIN(tpl_gf_group_frames - 1, gf_group->arf_index + 1); 2002 beta[0] = get_frame_importance(tpl_data, frame_idx_0); 2003 beta[1] = get_frame_importance(tpl_data, frame_idx_1); 2004 #if CONFIG_COLLECT_COMPONENT_TIMING 2005 end_timing(cpi, av1_tpl_setup_stats_time); 2006 #endif 2007 return eval_gop_length(beta, gop_eval); 2008 } 2009 2010 void av1_tpl_rdmult_setup(AV1_COMP *cpi) { 2011 const AV1_COMMON *const cm = &cpi->common; 2012 const int tpl_idx = cpi->gf_frame_index; 2013 2014 assert( 2015 IMPLIES(cpi->ppi->gf_group.size > 0, tpl_idx < cpi->ppi->gf_group.size)); 2016 2017 TplParams *const tpl_data = &cpi->ppi->tpl_data; 2018 const TplDepFrame *const tpl_frame = &tpl_data->tpl_frame[tpl_idx]; 2019 2020 if (!tpl_frame->is_valid) return; 2021 2022 const TplDepStats *const tpl_stats = tpl_frame->tpl_stats_ptr; 2023 const int tpl_stride = tpl_frame->stride; 2024 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); 2025 2026 const int block_size = BLOCK_16X16; 2027 const int num_mi_w = mi_size_wide[block_size]; 2028 const int num_mi_h = mi_size_high[block_size]; 2029 const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w; 2030 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h; 2031 const double c = 1.2; 2032 const int step = 1 << tpl_data->tpl_stats_block_mis_log2; 2033 2034 // Loop through each 'block_size' X 'block_size' block. 2035 for (int row = 0; row < num_rows; row++) { 2036 for (int col = 0; col < num_cols; col++) { 2037 double intra_cost = 0.0, mc_dep_cost = 0.0; 2038 // Loop through each mi block. 2039 for (int mi_row = row * num_mi_h; mi_row < (row + 1) * num_mi_h; 2040 mi_row += step) { 2041 for (int mi_col = col * num_mi_w; mi_col < (col + 1) * num_mi_w; 2042 mi_col += step) { 2043 if (mi_row >= cm->mi_params.mi_rows || mi_col >= mi_cols_sr) continue; 2044 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos( 2045 mi_row, mi_col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)]; 2046 int64_t mc_dep_delta = 2047 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 2048 this_stats->mc_dep_dist); 2049 intra_cost += (double)(this_stats->recrf_dist << RDDIV_BITS); 2050 mc_dep_cost += 2051 (double)(this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta; 2052 } 2053 } 2054 const double rk = intra_cost / mc_dep_cost; 2055 const int index = row * num_cols + col; 2056 cpi->tpl_rdmult_scaling_factors[index] = rk / cpi->rd.r0 + c; 2057 } 2058 } 2059 } 2060 2061 void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x, 2062 BLOCK_SIZE sb_size, int mi_row, int mi_col) { 2063 AV1_COMMON *const cm = &cpi->common; 2064 GF_GROUP *gf_group = &cpi->ppi->gf_group; 2065 assert(IMPLIES(cpi->ppi->gf_group.size > 0, 2066 cpi->gf_frame_index < cpi->ppi->gf_group.size)); 2067 const int tpl_idx = cpi->gf_frame_index; 2068 2069 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100)); 2070 const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6); 2071 const FRAME_TYPE frame_type = cm->current_frame.frame_type; 2072 2073 if (tpl_idx >= MAX_TPL_FRAME_IDX) return; 2074 TplDepFrame *tpl_frame = &cpi->ppi->tpl_data.tpl_frame[tpl_idx]; 2075 if (!tpl_frame->is_valid) return; 2076 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return; 2077 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return; 2078 2079 const int mi_col_sr = 2080 coded_to_superres_mi(mi_col, cm->superres_scale_denominator); 2081 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); 2082 const int sb_mi_width_sr = coded_to_superres_mi( 2083 mi_size_wide[sb_size], cm->superres_scale_denominator); 2084 2085 const int bsize_base = BLOCK_16X16; 2086 const int num_mi_w = mi_size_wide[bsize_base]; 2087 const int num_mi_h = mi_size_high[bsize_base]; 2088 const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w; 2089 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h; 2090 const int num_bcols = (sb_mi_width_sr + num_mi_w - 1) / num_mi_w; 2091 const int num_brows = (mi_size_high[sb_size] + num_mi_h - 1) / num_mi_h; 2092 int row, col; 2093 2094 double base_block_count = 0.0; 2095 double log_sum = 0.0; 2096 2097 for (row = mi_row / num_mi_w; 2098 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { 2099 for (col = mi_col_sr / num_mi_h; 2100 col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { 2101 const int index = row * num_cols + col; 2102 log_sum += log(cpi->tpl_rdmult_scaling_factors[index]); 2103 base_block_count += 1.0; 2104 } 2105 } 2106 2107 const CommonQuantParams *quant_params = &cm->quant_params; 2108 2109 const int orig_qindex_rdmult = 2110 quant_params->base_qindex + quant_params->y_dc_delta_q; 2111 const int orig_rdmult = av1_compute_rd_mult( 2112 orig_qindex_rdmult, cm->seq_params->bit_depth, 2113 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, 2114 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, 2115 is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); 2116 2117 const int new_qindex_rdmult = quant_params->base_qindex + 2118 x->rdmult_delta_qindex + 2119 quant_params->y_dc_delta_q; 2120 const int new_rdmult = av1_compute_rd_mult( 2121 new_qindex_rdmult, cm->seq_params->bit_depth, 2122 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, 2123 boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, 2124 is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); 2125 2126 const double scaling_factor = (double)new_rdmult / (double)orig_rdmult; 2127 2128 double scale_adj = log(scaling_factor) - log_sum / base_block_count; 2129 scale_adj = exp_bounded(scale_adj); 2130 2131 for (row = mi_row / num_mi_w; 2132 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { 2133 for (col = mi_col_sr / num_mi_h; 2134 col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { 2135 const int index = row * num_cols + col; 2136 cpi->ppi->tpl_sb_rdmult_scaling_factors[index] = 2137 scale_adj * cpi->tpl_rdmult_scaling_factors[index]; 2138 } 2139 } 2140 } 2141 2142 double av1_exponential_entropy(double q_step, double b) { 2143 b = AOMMAX(b, TPL_EPSILON); 2144 double z = fmax(exp_bounded(-q_step / b), TPL_EPSILON); 2145 return -log2(1 - z) - z * log2(z) / (1 - z); 2146 } 2147 2148 double av1_laplace_entropy(double q_step, double b, double zero_bin_ratio) { 2149 // zero bin's size is zero_bin_ratio * q_step 2150 // non-zero bin's size is q_step 2151 b = AOMMAX(b, TPL_EPSILON); 2152 double z = fmax(exp_bounded(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON); 2153 double h = av1_exponential_entropy(q_step, b); 2154 double r = -(1 - z) * log2(1 - z) - z * log2(z) + z * (h + 1); 2155 return r; 2156 } 2157 2158 #if CONFIG_BITRATE_ACCURACY 2159 double av1_laplace_estimate_frame_rate(int q_index, int block_count, 2160 const double *abs_coeff_mean, 2161 int coeff_num) { 2162 double zero_bin_ratio = 2; 2163 double dc_q_step = av1_dc_quant_QTX(q_index, 0, AOM_BITS_8) / 4.; 2164 double ac_q_step = av1_ac_quant_QTX(q_index, 0, AOM_BITS_8) / 4.; 2165 double est_rate = 0; 2166 // dc coeff 2167 est_rate += av1_laplace_entropy(dc_q_step, abs_coeff_mean[0], zero_bin_ratio); 2168 // ac coeff 2169 for (int i = 1; i < coeff_num; ++i) { 2170 est_rate += 2171 av1_laplace_entropy(ac_q_step, abs_coeff_mean[i], zero_bin_ratio); 2172 } 2173 est_rate *= block_count; 2174 return est_rate; 2175 } 2176 #endif // CONFIG_BITRATE_ACCURACY 2177 2178 double av1_estimate_coeff_entropy(double q_step, double b, 2179 double zero_bin_ratio, int qcoeff) { 2180 b = AOMMAX(b, TPL_EPSILON); 2181 int abs_qcoeff = abs(qcoeff); 2182 double z0 = fmax(exp_bounded(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON); 2183 if (abs_qcoeff == 0) { 2184 double r = -log2(1 - z0); 2185 return r; 2186 } else { 2187 double z = fmax(exp_bounded(-q_step / b), TPL_EPSILON); 2188 double r = 1 - log2(z0) - log2(1 - z) - (abs_qcoeff - 1) * log2(z); 2189 return r; 2190 } 2191 } 2192 2193 #if CONFIG_RD_COMMAND 2194 void av1_read_rd_command(const char *filepath, RD_COMMAND *rd_command) { 2195 FILE *fptr = fopen(filepath, "r"); 2196 fscanf(fptr, "%d", &rd_command->frame_count); 2197 rd_command->frame_index = 0; 2198 for (int i = 0; i < rd_command->frame_count; ++i) { 2199 int option; 2200 fscanf(fptr, "%d", &option); 2201 rd_command->option_ls[i] = (RD_OPTION)option; 2202 if (option == RD_OPTION_SET_Q) { 2203 fscanf(fptr, "%d", &rd_command->q_index_ls[i]); 2204 } else if (option == RD_OPTION_SET_Q_RDMULT) { 2205 fscanf(fptr, "%d", &rd_command->q_index_ls[i]); 2206 fscanf(fptr, "%d", &rd_command->rdmult_ls[i]); 2207 } 2208 } 2209 fclose(fptr); 2210 } 2211 #endif // CONFIG_RD_COMMAND 2212 2213 double av1_tpl_get_qstep_ratio(const TplParams *tpl_data, int gf_frame_index) { 2214 if (!av1_tpl_stats_ready(tpl_data, gf_frame_index)) { 2215 return 1; 2216 } 2217 const double frame_importance = 2218 get_frame_importance(tpl_data, gf_frame_index); 2219 return sqrt(1 / frame_importance); 2220 } 2221 2222 int av1_get_q_index_from_qstep_ratio(int leaf_qindex, double qstep_ratio, 2223 aom_bit_depth_t bit_depth) { 2224 const double leaf_qstep = av1_dc_quant_QTX(leaf_qindex, 0, bit_depth); 2225 const double target_qstep = leaf_qstep * qstep_ratio; 2226 int qindex = leaf_qindex; 2227 if (qstep_ratio < 1.0) { 2228 for (qindex = leaf_qindex; qindex > 0; --qindex) { 2229 const double qstep = av1_dc_quant_QTX(qindex, 0, bit_depth); 2230 if (qstep <= target_qstep) break; 2231 } 2232 } else { 2233 for (qindex = leaf_qindex; qindex <= MAXQ; ++qindex) { 2234 const double qstep = av1_dc_quant_QTX(qindex, 0, bit_depth); 2235 if (qstep >= target_qstep) break; 2236 } 2237 } 2238 return qindex; 2239 } 2240 2241 int av1_tpl_get_q_index(const TplParams *tpl_data, int gf_frame_index, 2242 int leaf_qindex, aom_bit_depth_t bit_depth) { 2243 const double qstep_ratio = av1_tpl_get_qstep_ratio(tpl_data, gf_frame_index); 2244 return av1_get_q_index_from_qstep_ratio(leaf_qindex, qstep_ratio, bit_depth); 2245 } 2246 2247 #if CONFIG_BITRATE_ACCURACY 2248 void av1_vbr_rc_init(VBR_RATECTRL_INFO *vbr_rc_info, double total_bit_budget, 2249 int show_frame_count) { 2250 av1_zero(*vbr_rc_info); 2251 vbr_rc_info->ready = 0; 2252 vbr_rc_info->total_bit_budget = total_bit_budget; 2253 vbr_rc_info->show_frame_count = show_frame_count; 2254 const double scale_factors[FRAME_UPDATE_TYPES] = { 0.94559, 0.94559, 1, 2255 0.94559, 1, 1, 2256 0.94559 }; 2257 2258 // TODO(angiebird): Based on the previous code, only the scale factor 0.94559 2259 // will be used in most of the cases with --limi=17. Figure out if the 2260 // following scale factors works better. 2261 // const double scale_factors[FRAME_UPDATE_TYPES] = { 0.94559, 0.12040, 1, 2262 // 1.10199, 1, 1, 2263 // 0.16393 }; 2264 2265 const double mv_scale_factors[FRAME_UPDATE_TYPES] = { 3, 3, 3, 3, 3, 3, 3 }; 2266 memcpy(vbr_rc_info->scale_factors, scale_factors, 2267 sizeof(scale_factors[0]) * FRAME_UPDATE_TYPES); 2268 memcpy(vbr_rc_info->mv_scale_factors, mv_scale_factors, 2269 sizeof(mv_scale_factors[0]) * FRAME_UPDATE_TYPES); 2270 2271 vbr_rc_reset_gop_data(vbr_rc_info); 2272 #if CONFIG_THREE_PASS 2273 // TODO(angiebird): Explain why we use -1 here 2274 vbr_rc_info->cur_gop_idx = -1; 2275 vbr_rc_info->gop_count = 0; 2276 vbr_rc_info->total_frame_count = 0; 2277 #endif // CONFIG_THREE_PASS 2278 } 2279 2280 #if CONFIG_THREE_PASS 2281 int av1_vbr_rc_frame_coding_idx(const VBR_RATECTRL_INFO *vbr_rc_info, 2282 int gf_frame_index) { 2283 int gop_idx = vbr_rc_info->cur_gop_idx; 2284 int gop_start_idx = vbr_rc_info->gop_start_idx_list[gop_idx]; 2285 return gop_start_idx + gf_frame_index; 2286 } 2287 2288 void av1_vbr_rc_append_tpl_info(VBR_RATECTRL_INFO *vbr_rc_info, 2289 const TPL_INFO *tpl_info) { 2290 int gop_start_idx = vbr_rc_info->total_frame_count; 2291 vbr_rc_info->gop_start_idx_list[vbr_rc_info->gop_count] = gop_start_idx; 2292 vbr_rc_info->gop_length_list[vbr_rc_info->gop_count] = tpl_info->gf_length; 2293 assert(gop_start_idx + tpl_info->gf_length <= VBR_RC_INFO_MAX_FRAMES); 2294 for (int i = 0; i < tpl_info->gf_length; ++i) { 2295 vbr_rc_info->txfm_stats_list[gop_start_idx + i] = 2296 tpl_info->txfm_stats_list[i]; 2297 vbr_rc_info->qstep_ratio_list[gop_start_idx + i] = 2298 tpl_info->qstep_ratio_ls[i]; 2299 vbr_rc_info->update_type_list[gop_start_idx + i] = 2300 tpl_info->update_type_list[i]; 2301 } 2302 vbr_rc_info->total_frame_count += tpl_info->gf_length; 2303 vbr_rc_info->gop_count++; 2304 } 2305 #endif // CONFIG_THREE_PASS 2306 2307 void av1_vbr_rc_set_gop_bit_budget(VBR_RATECTRL_INFO *vbr_rc_info, 2308 int gop_showframe_count) { 2309 vbr_rc_info->gop_showframe_count = gop_showframe_count; 2310 vbr_rc_info->gop_bit_budget = vbr_rc_info->total_bit_budget * 2311 gop_showframe_count / 2312 vbr_rc_info->show_frame_count; 2313 } 2314 2315 void av1_vbr_rc_compute_q_indices(int base_q_index, int frame_count, 2316 const double *qstep_ratio_list, 2317 aom_bit_depth_t bit_depth, 2318 int *q_index_list) { 2319 for (int i = 0; i < frame_count; ++i) { 2320 q_index_list[i] = av1_get_q_index_from_qstep_ratio( 2321 base_q_index, qstep_ratio_list[i], bit_depth); 2322 } 2323 } 2324 2325 double av1_vbr_rc_info_estimate_gop_bitrate( 2326 int base_q_index, aom_bit_depth_t bit_depth, 2327 const double *update_type_scale_factors, int frame_count, 2328 const FRAME_UPDATE_TYPE *update_type_list, const double *qstep_ratio_list, 2329 const TplTxfmStats *stats_list, int *q_index_list, 2330 double *estimated_bitrate_byframe) { 2331 av1_vbr_rc_compute_q_indices(base_q_index, frame_count, qstep_ratio_list, 2332 bit_depth, q_index_list); 2333 double estimated_gop_bitrate = 0; 2334 for (int frame_index = 0; frame_index < frame_count; frame_index++) { 2335 const TplTxfmStats *frame_stats = &stats_list[frame_index]; 2336 double frame_bitrate = 0; 2337 if (frame_stats->ready) { 2338 int q_index = q_index_list[frame_index]; 2339 2340 frame_bitrate = av1_laplace_estimate_frame_rate( 2341 q_index, frame_stats->txfm_block_count, frame_stats->abs_coeff_mean, 2342 frame_stats->coeff_num); 2343 } 2344 FRAME_UPDATE_TYPE update_type = update_type_list[frame_index]; 2345 estimated_gop_bitrate += 2346 frame_bitrate * update_type_scale_factors[update_type]; 2347 if (estimated_bitrate_byframe != NULL) { 2348 estimated_bitrate_byframe[frame_index] = frame_bitrate; 2349 } 2350 } 2351 return estimated_gop_bitrate; 2352 } 2353 2354 int av1_vbr_rc_info_estimate_base_q( 2355 double bit_budget, aom_bit_depth_t bit_depth, 2356 const double *update_type_scale_factors, int frame_count, 2357 const FRAME_UPDATE_TYPE *update_type_list, const double *qstep_ratio_list, 2358 const TplTxfmStats *stats_list, int *q_index_list, 2359 double *estimated_bitrate_byframe) { 2360 int q_max = 255; // Maximum q value. 2361 int q_min = 0; // Minimum q value. 2362 int q = (q_max + q_min) / 2; 2363 2364 double q_max_estimate = av1_vbr_rc_info_estimate_gop_bitrate( 2365 q_max, bit_depth, update_type_scale_factors, frame_count, 2366 update_type_list, qstep_ratio_list, stats_list, q_index_list, 2367 estimated_bitrate_byframe); 2368 2369 double q_min_estimate = av1_vbr_rc_info_estimate_gop_bitrate( 2370 q_min, bit_depth, update_type_scale_factors, frame_count, 2371 update_type_list, qstep_ratio_list, stats_list, q_index_list, 2372 estimated_bitrate_byframe); 2373 while (q_min + 1 < q_max) { 2374 double estimate = av1_vbr_rc_info_estimate_gop_bitrate( 2375 q, bit_depth, update_type_scale_factors, frame_count, update_type_list, 2376 qstep_ratio_list, stats_list, q_index_list, estimated_bitrate_byframe); 2377 if (estimate > bit_budget) { 2378 q_min = q; 2379 q_min_estimate = estimate; 2380 } else { 2381 q_max = q; 2382 q_max_estimate = estimate; 2383 } 2384 q = (q_max + q_min) / 2; 2385 } 2386 // Pick the estimate that lands closest to the budget. 2387 if (fabs(q_max_estimate - bit_budget) < fabs(q_min_estimate - bit_budget)) { 2388 q = q_max; 2389 } else { 2390 q = q_min; 2391 } 2392 // Update q_index_list and vbr_rc_info. 2393 av1_vbr_rc_info_estimate_gop_bitrate( 2394 q, bit_depth, update_type_scale_factors, frame_count, update_type_list, 2395 qstep_ratio_list, stats_list, q_index_list, estimated_bitrate_byframe); 2396 return q; 2397 } 2398 void av1_vbr_rc_update_q_index_list(VBR_RATECTRL_INFO *vbr_rc_info, 2399 const TplParams *tpl_data, 2400 const GF_GROUP *gf_group, 2401 aom_bit_depth_t bit_depth) { 2402 vbr_rc_info->q_index_list_ready = 1; 2403 double gop_bit_budget = vbr_rc_info->gop_bit_budget; 2404 2405 for (int i = 0; i < gf_group->size; i++) { 2406 vbr_rc_info->qstep_ratio_list[i] = av1_tpl_get_qstep_ratio(tpl_data, i); 2407 } 2408 2409 double mv_bits = 0; 2410 for (int i = 0; i < gf_group->size; i++) { 2411 double frame_mv_bits = 0; 2412 if (av1_tpl_stats_ready(tpl_data, i)) { 2413 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[i]; 2414 frame_mv_bits = av1_tpl_compute_frame_mv_entropy( 2415 tpl_frame, tpl_data->tpl_stats_block_mis_log2); 2416 FRAME_UPDATE_TYPE updae_type = gf_group->update_type[i]; 2417 mv_bits += frame_mv_bits * vbr_rc_info->mv_scale_factors[updae_type]; 2418 } 2419 } 2420 2421 mv_bits = AOMMIN(mv_bits, 0.6 * gop_bit_budget); 2422 gop_bit_budget -= mv_bits; 2423 2424 vbr_rc_info->base_q_index = av1_vbr_rc_info_estimate_base_q( 2425 gop_bit_budget, bit_depth, vbr_rc_info->scale_factors, gf_group->size, 2426 gf_group->update_type, vbr_rc_info->qstep_ratio_list, 2427 tpl_data->txfm_stats_list, vbr_rc_info->q_index_list, NULL); 2428 } 2429 2430 #endif // CONFIG_BITRATE_ACCURACY 2431 2432 // Use upper and left neighbor block as the reference MVs. 2433 // Compute the minimum difference between current MV and reference MV. 2434 int_mv av1_compute_mv_difference(const TplDepFrame *tpl_frame, int row, int col, 2435 int step, int tpl_stride, int right_shift) { 2436 const TplDepStats *tpl_stats = 2437 &tpl_frame 2438 ->tpl_stats_ptr[av1_tpl_ptr_pos(row, col, tpl_stride, right_shift)]; 2439 int_mv current_mv = tpl_stats->mv[tpl_stats->ref_frame_index[0]]; 2440 int current_mv_magnitude = 2441 abs(current_mv.as_mv.row) + abs(current_mv.as_mv.col); 2442 2443 // Retrieve the up and left neighbors. 2444 int up_error = INT_MAX; 2445 int_mv up_mv_diff; 2446 if (row - step >= 0) { 2447 tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( 2448 row - step, col, tpl_stride, right_shift)]; 2449 up_mv_diff = tpl_stats->mv[tpl_stats->ref_frame_index[0]]; 2450 up_mv_diff.as_mv.row = current_mv.as_mv.row - up_mv_diff.as_mv.row; 2451 up_mv_diff.as_mv.col = current_mv.as_mv.col - up_mv_diff.as_mv.col; 2452 up_error = abs(up_mv_diff.as_mv.row) + abs(up_mv_diff.as_mv.col); 2453 } 2454 2455 int left_error = INT_MAX; 2456 int_mv left_mv_diff; 2457 if (col - step >= 0) { 2458 tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( 2459 row, col - step, tpl_stride, right_shift)]; 2460 left_mv_diff = tpl_stats->mv[tpl_stats->ref_frame_index[0]]; 2461 left_mv_diff.as_mv.row = current_mv.as_mv.row - left_mv_diff.as_mv.row; 2462 left_mv_diff.as_mv.col = current_mv.as_mv.col - left_mv_diff.as_mv.col; 2463 left_error = abs(left_mv_diff.as_mv.row) + abs(left_mv_diff.as_mv.col); 2464 } 2465 2466 // Return the MV with the minimum distance from current. 2467 if (up_error < left_error && up_error < current_mv_magnitude) { 2468 return up_mv_diff; 2469 } else if (left_error < up_error && left_error < current_mv_magnitude) { 2470 return left_mv_diff; 2471 } 2472 return current_mv; 2473 } 2474 2475 /* Compute the entropy of motion vectors for a single frame. */ 2476 double av1_tpl_compute_frame_mv_entropy(const TplDepFrame *tpl_frame, 2477 uint8_t right_shift) { 2478 if (!tpl_frame->is_valid) { 2479 return 0; 2480 } 2481 2482 int count_row[500] = { 0 }; 2483 int count_col[500] = { 0 }; 2484 int n = 0; // number of MVs to process 2485 2486 const int tpl_stride = tpl_frame->stride; 2487 const int step = 1 << right_shift; 2488 2489 for (int row = 0; row < tpl_frame->mi_rows; row += step) { 2490 for (int col = 0; col < tpl_frame->mi_cols; col += step) { 2491 int_mv mv = av1_compute_mv_difference(tpl_frame, row, col, step, 2492 tpl_stride, right_shift); 2493 count_row[clamp(mv.as_mv.row, 0, 499)] += 1; 2494 count_col[clamp(mv.as_mv.row, 0, 499)] += 1; 2495 n += 1; 2496 } 2497 } 2498 2499 // Estimate the bits used using the entropy formula. 2500 double rate_row = 0; 2501 double rate_col = 0; 2502 for (int i = 0; i < 500; i++) { 2503 if (count_row[i] != 0) { 2504 double p = count_row[i] / (double)n; 2505 rate_row += count_row[i] * -log2(p); 2506 } 2507 if (count_col[i] != 0) { 2508 double p = count_col[i] / (double)n; 2509 rate_col += count_col[i] * -log2(p); 2510 } 2511 } 2512 2513 return rate_row + rate_col; 2514 }