encodeframe_utils.c (71612B)
1 /* 2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include "av1/common/common_data.h" 13 #include "av1/common/quant_common.h" 14 #include "av1/common/reconintra.h" 15 16 #include "av1/encoder/encoder.h" 17 #include "av1/encoder/encodeframe_utils.h" 18 #include "av1/encoder/encoder_utils.h" 19 #include "av1/encoder/rdopt.h" 20 21 void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit, 22 const BLOCK_SIZE bsize, const int mi_row, 23 const int mi_col, int *const rdmult) { 24 const AV1_COMMON *const cm = &cpi->common; 25 26 const BLOCK_SIZE bsize_base = BLOCK_16X16; 27 const int num_mi_w = mi_size_wide[bsize_base]; 28 const int num_mi_h = mi_size_high[bsize_base]; 29 const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w; 30 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h; 31 const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w; 32 const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h; 33 int row, col; 34 double num_of_mi = 0.0; 35 double geom_mean_of_scale = 1.0; 36 37 // To avoid overflow of 'geom_mean_of_scale', bsize_base must be at least 38 // BLOCK_8X8. 39 // 40 // For bsize=BLOCK_128X128 and bsize_base=BLOCK_8X8, the loop below would 41 // iterate 256 times. Considering the maximum value of 42 // cpi->ssim_rdmult_scaling_factors (see av1_set_mb_ssim_rdmult_scaling()), 43 // geom_mean_of_scale can go up to 4.8323^256, which is within DBL_MAX 44 // (maximum value a double data type can hold). If bsize_base is modified to 45 // BLOCK_4X4 (minimum possible block size), geom_mean_of_scale can go up 46 // to 4.8323^1024 and exceed DBL_MAX, resulting in data overflow. 47 assert(bsize_base >= BLOCK_8X8); 48 assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM || 49 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || 50 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2); 51 52 for (row = mi_row / num_mi_w; 53 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { 54 for (col = mi_col / num_mi_h; 55 col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) { 56 const int index = row * num_cols + col; 57 assert(cpi->ssim_rdmult_scaling_factors[index] != 0.0); 58 geom_mean_of_scale *= cpi->ssim_rdmult_scaling_factors[index]; 59 num_of_mi += 1.0; 60 } 61 } 62 geom_mean_of_scale = pow(geom_mean_of_scale, (1.0 / num_of_mi)); 63 64 *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5); 65 *rdmult = AOMMAX(*rdmult, 0); 66 av1_set_error_per_bit(errorperbit, *rdmult); 67 } 68 69 #if CONFIG_SALIENCY_MAP 70 void av1_set_saliency_map_vmaf_rdmult(const AV1_COMP *const cpi, 71 int *errorperbit, const BLOCK_SIZE bsize, 72 const int mi_row, const int mi_col, 73 int *const rdmult) { 74 const AV1_COMMON *const cm = &cpi->common; 75 const int num_mi_w = mi_size_wide[bsize]; 76 const int num_mi_h = mi_size_high[bsize]; 77 const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w; 78 79 *rdmult = 80 (int)(*rdmult * cpi->sm_scaling_factor[(mi_row / num_mi_h) * num_cols + 81 (mi_col / num_mi_w)]); 82 83 *rdmult = AOMMAX(*rdmult, 0); 84 av1_set_error_per_bit(errorperbit, *rdmult); 85 } 86 #endif 87 88 // TODO(angiebird): Move this function to tpl_model.c 89 #if !CONFIG_REALTIME_ONLY 90 int av1_get_cb_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, 91 const BLOCK_SIZE bsize, const int mi_row, 92 const int mi_col) { 93 const AV1_COMMON *const cm = &cpi->common; 94 assert(IMPLIES(cpi->ppi->gf_group.size > 0, 95 cpi->gf_frame_index < cpi->ppi->gf_group.size)); 96 const int tpl_idx = cpi->gf_frame_index; 97 int deltaq_rdmult = set_rdmult(cpi, x, -1); 98 if (!av1_tpl_stats_ready(&cpi->ppi->tpl_data, tpl_idx)) return deltaq_rdmult; 99 if (cm->superres_scale_denominator != SCALE_NUMERATOR) return deltaq_rdmult; 100 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return deltaq_rdmult; 101 if (x->rb == 0) return deltaq_rdmult; 102 103 TplParams *const tpl_data = &cpi->ppi->tpl_data; 104 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx]; 105 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 106 107 const int mi_wide = mi_size_wide[bsize]; 108 const int mi_high = mi_size_high[bsize]; 109 110 int tpl_stride = tpl_frame->stride; 111 double intra_cost_base = 0; 112 double mc_dep_cost_base = 0; 113 double cbcmp_base = 0; 114 const int step = 1 << tpl_data->tpl_stats_block_mis_log2; 115 116 for (int row = mi_row; row < mi_row + mi_high; row += step) { 117 for (int col = mi_col; col < mi_col + mi_wide; col += step) { 118 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) 119 continue; 120 121 TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos( 122 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)]; 123 124 double cbcmp = (double)this_stats->srcrf_dist; 125 int64_t mc_dep_delta = 126 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 127 this_stats->mc_dep_dist); 128 double dist_scaled = (double)(this_stats->recrf_dist << RDDIV_BITS); 129 intra_cost_base += log(dist_scaled) * cbcmp; 130 mc_dep_cost_base += log(3 * dist_scaled + mc_dep_delta) * cbcmp; 131 cbcmp_base += cbcmp; 132 } 133 } 134 135 if (cbcmp_base == 0) return deltaq_rdmult; 136 137 double rk = exp((intra_cost_base - mc_dep_cost_base) / cbcmp_base); 138 deltaq_rdmult = (int)(deltaq_rdmult * (rk / x->rb)); 139 140 return AOMMAX(deltaq_rdmult, 1); 141 } 142 #endif // !CONFIG_REALTIME_ONLY 143 144 static inline void update_filter_type_count(FRAME_COUNTS *counts, 145 const MACROBLOCKD *xd, 146 const MB_MODE_INFO *mbmi) { 147 int dir; 148 for (dir = 0; dir < 2; ++dir) { 149 const int ctx = av1_get_pred_context_switchable_interp(xd, dir); 150 InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir); 151 152 // Only allow the 3 valid SWITCHABLE_FILTERS. 153 assert(filter < SWITCHABLE_FILTERS); 154 ++counts->switchable_interp[ctx][filter]; 155 } 156 } 157 158 // This function will copy the best reference mode information from 159 // MB_MODE_INFO_EXT_FRAME to MB_MODE_INFO_EXT. 160 static inline void copy_mbmi_ext_frame_to_mbmi_ext( 161 MB_MODE_INFO_EXT *mbmi_ext, 162 const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type) { 163 memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack, 164 sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); 165 memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight, 166 sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); 167 mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context; 168 mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count; 169 memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs, 170 sizeof(mbmi_ext->global_mvs)); 171 } 172 173 void av1_update_state(const AV1_COMP *const cpi, ThreadData *td, 174 const PICK_MODE_CONTEXT *const ctx, int mi_row, 175 int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) { 176 int i, x_idx, y; 177 const AV1_COMMON *const cm = &cpi->common; 178 const CommonModeInfoParams *const mi_params = &cm->mi_params; 179 const int num_planes = av1_num_planes(cm); 180 MACROBLOCK *const x = &td->mb; 181 MACROBLOCKD *const xd = &x->e_mbd; 182 struct macroblock_plane *const p = x->plane; 183 struct macroblockd_plane *const pd = xd->plane; 184 const MB_MODE_INFO *const mi = &ctx->mic; 185 MB_MODE_INFO *const mi_addr = xd->mi[0]; 186 const struct segmentation *const seg = &cm->seg; 187 assert(bsize < BLOCK_SIZES_ALL); 188 const int bw = mi_size_wide[mi->bsize]; 189 const int bh = mi_size_high[mi->bsize]; 190 const int mis = mi_params->mi_stride; 191 const int mi_width = mi_size_wide[bsize]; 192 const int mi_height = mi_size_high[bsize]; 193 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 194 195 assert(mi->bsize == bsize); 196 197 *mi_addr = *mi; 198 copy_mbmi_ext_frame_to_mbmi_ext(&x->mbmi_ext, &ctx->mbmi_ext_best, 199 av1_ref_frame_type(ctx->mic.ref_frame)); 200 201 memcpy(txfm_info->blk_skip, ctx->blk_skip, 202 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk); 203 204 txfm_info->skip_txfm = ctx->rd_stats.skip_txfm; 205 206 xd->tx_type_map = ctx->tx_type_map; 207 xd->tx_type_map_stride = mi_size_wide[bsize]; 208 // If not dry_run, copy the transform type data into the frame level buffer. 209 // Encoder will fetch tx types when writing bitstream. 210 if (!dry_run) { 211 const int grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col); 212 uint8_t *const tx_type_map = mi_params->tx_type_map + grid_idx; 213 const int mi_stride = mi_params->mi_stride; 214 for (int blk_row = 0; blk_row < bh; ++blk_row) { 215 av1_copy_array(tx_type_map + blk_row * mi_stride, 216 xd->tx_type_map + blk_row * xd->tx_type_map_stride, bw); 217 } 218 xd->tx_type_map = tx_type_map; 219 xd->tx_type_map_stride = mi_stride; 220 } 221 222 // If segmentation in use 223 if (seg->enabled) { 224 // For in frame complexity AQ or ROI copy the segment id from the 225 // segment map. 226 if (cpi->oxcf.q_cfg.aq_mode == COMPLEXITY_AQ || cpi->roi.enabled) { 227 const uint8_t *const map = 228 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map; 229 mi_addr->segment_id = 230 map ? get_segment_id(mi_params, map, bsize, mi_row, mi_col) : 0; 231 } 232 // Else for cyclic refresh mode update the segment map, set the segment id 233 // and then update the quantizer. 234 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && !cpi->roi.enabled && 235 mi_addr->segment_id != AM_SEGMENT_ID_INACTIVE && 236 !cpi->rc.rtc_external_ratectrl) { 237 av1_cyclic_refresh_update_segment(cpi, x, mi_row, mi_col, bsize, 238 ctx->rd_stats.rate, ctx->rd_stats.dist, 239 txfm_info->skip_txfm, dry_run); 240 } 241 if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd)) 242 mi_addr->uv_mode = UV_DC_PRED; 243 244 if (!dry_run && !mi_addr->skip_txfm) { 245 int cdf_num; 246 const uint8_t spatial_pred = av1_get_spatial_seg_pred( 247 cm, xd, &cdf_num, cpi->cyclic_refresh->skip_over4x4); 248 const uint8_t coded_id = av1_neg_interleave( 249 mi_addr->segment_id, spatial_pred, seg->last_active_segid + 1); 250 int64_t spatial_cost = x->mode_costs.spatial_pred_cost[cdf_num][coded_id]; 251 td->rd_counts.seg_tmp_pred_cost[0] += spatial_cost; 252 253 const int pred_segment_id = 254 cm->last_frame_seg_map 255 ? get_segment_id(mi_params, cm->last_frame_seg_map, bsize, mi_row, 256 mi_col) 257 : 0; 258 const int use_tmp_pred = pred_segment_id == mi_addr->segment_id; 259 const uint8_t tmp_pred_ctx = av1_get_pred_context_seg_id(xd); 260 td->rd_counts.seg_tmp_pred_cost[1] += 261 x->mode_costs.tmp_pred_cost[tmp_pred_ctx][use_tmp_pred]; 262 if (!use_tmp_pred) { 263 td->rd_counts.seg_tmp_pred_cost[1] += spatial_cost; 264 } 265 } 266 } 267 268 // Count zero motion vector. 269 if (!dry_run && !frame_is_intra_only(cm)) { 270 const MV mv = mi->mv[0].as_mv; 271 if (is_inter_block(mi) && mi->ref_frame[0] == LAST_FRAME && 272 abs(mv.row) < 8 && abs(mv.col) < 8) { 273 const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh); 274 // Accumulate low_content_frame. 275 for (int mi_y = 0; mi_y < ymis; mi_y += 2) x->cnt_zeromv += bw << 1; 276 } 277 } 278 279 for (i = 0; i < num_planes; ++i) { 280 p[i].coeff = ctx->coeff[i]; 281 p[i].qcoeff = ctx->qcoeff[i]; 282 p[i].dqcoeff = ctx->dqcoeff[i]; 283 p[i].eobs = ctx->eobs[i]; 284 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; 285 } 286 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; 287 // Restore the coding context of the MB to that that was in place 288 // when the mode was picked for it 289 290 const int cols = 291 AOMMIN((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width, mi_width); 292 const int rows = AOMMIN( 293 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height, mi_height); 294 for (y = 0; y < rows; y++) { 295 for (x_idx = 0; x_idx < cols; x_idx++) xd->mi[x_idx + y * mis] = mi_addr; 296 } 297 298 if (cpi->oxcf.q_cfg.aq_mode || 299 (cpi->roi.enabled && cpi->roi.delta_qp_enabled)) 300 av1_init_plane_quantizers(cpi, x, mi_addr->segment_id, 0); 301 302 if (dry_run) return; 303 304 #if CONFIG_INTERNAL_STATS 305 { 306 unsigned int *const mode_chosen_counts = 307 (unsigned int *)cpi->mode_chosen_counts; // Cast const away. 308 if (frame_is_intra_only(cm)) { 309 static const int kf_mode_index[] = { 310 THR_DC /*DC_PRED*/, 311 THR_V_PRED /*V_PRED*/, 312 THR_H_PRED /*H_PRED*/, 313 THR_D45_PRED /*D45_PRED*/, 314 THR_D135_PRED /*D135_PRED*/, 315 THR_D113_PRED /*D113_PRED*/, 316 THR_D157_PRED /*D157_PRED*/, 317 THR_D203_PRED /*D203_PRED*/, 318 THR_D67_PRED /*D67_PRED*/, 319 THR_SMOOTH, /*SMOOTH_PRED*/ 320 THR_SMOOTH_V, /*SMOOTH_V_PRED*/ 321 THR_SMOOTH_H, /*SMOOTH_H_PRED*/ 322 THR_PAETH /*PAETH_PRED*/, 323 }; 324 ++mode_chosen_counts[kf_mode_index[mi_addr->mode]]; 325 } else { 326 // Note how often each mode chosen as best 327 ++mode_chosen_counts[ctx->best_mode_index]; 328 } 329 } 330 #endif 331 if (!frame_is_intra_only(cm)) { 332 if (is_inter_block(mi) && cm->features.interp_filter == SWITCHABLE) { 333 // When the frame interp filter is SWITCHABLE, several cases that always 334 // use the default type (EIGHTTAP_REGULAR) are described in 335 // av1_is_interp_needed(). Here, we should keep the counts for all 336 // applicable blocks, so the frame filter resetting decision in 337 // fix_interp_filter() is made correctly. 338 update_filter_type_count(td->counts, xd, mi_addr); 339 } 340 } 341 342 const int x_mis = AOMMIN(bw, mi_params->mi_cols - mi_col); 343 const int y_mis = AOMMIN(bh, mi_params->mi_rows - mi_row); 344 if (cm->seq_params->order_hint_info.enable_ref_frame_mvs) 345 av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis); 346 } 347 348 void av1_update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts, 349 PREDICTION_MODE mode, int16_t mode_context) { 350 (void)counts; 351 352 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; 353 if (mode == NEWMV) { 354 #if CONFIG_ENTROPY_STATS 355 ++counts->newmv_mode[mode_ctx][0]; 356 #endif 357 update_cdf(fc->newmv_cdf[mode_ctx], 0, 2); 358 return; 359 } 360 361 #if CONFIG_ENTROPY_STATS 362 ++counts->newmv_mode[mode_ctx][1]; 363 #endif 364 update_cdf(fc->newmv_cdf[mode_ctx], 1, 2); 365 366 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; 367 if (mode == GLOBALMV) { 368 #if CONFIG_ENTROPY_STATS 369 ++counts->zeromv_mode[mode_ctx][0]; 370 #endif 371 update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2); 372 return; 373 } 374 375 #if CONFIG_ENTROPY_STATS 376 ++counts->zeromv_mode[mode_ctx][1]; 377 #endif 378 update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2); 379 380 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; 381 #if CONFIG_ENTROPY_STATS 382 ++counts->refmv_mode[mode_ctx][mode != NEARESTMV]; 383 #endif 384 update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2); 385 } 386 387 static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi, 388 FRAME_COUNTS *counts) { 389 FRAME_CONTEXT *fc = xd->tile_ctx; 390 const BLOCK_SIZE bsize = mbmi->bsize; 391 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 392 const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize); 393 394 (void)counts; 395 396 if (mbmi->mode == DC_PRED) { 397 const int n = pmi->palette_size[0]; 398 const int palette_mode_ctx = av1_get_palette_mode_ctx(xd); 399 400 #if CONFIG_ENTROPY_STATS 401 ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0]; 402 #endif 403 update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx], 404 n > 0, 2); 405 if (n > 0) { 406 #if CONFIG_ENTROPY_STATS 407 ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE]; 408 #endif 409 update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx], 410 n - PALETTE_MIN_SIZE, PALETTE_SIZES); 411 } 412 } 413 414 if (mbmi->uv_mode == UV_DC_PRED) { 415 const int n = pmi->palette_size[1]; 416 const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0); 417 418 #if CONFIG_ENTROPY_STATS 419 ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0]; 420 #endif 421 update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2); 422 423 if (n > 0) { 424 #if CONFIG_ENTROPY_STATS 425 ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE]; 426 #endif 427 update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx], 428 n - PALETTE_MIN_SIZE, PALETTE_SIZES); 429 } 430 } 431 } 432 433 void av1_sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts, 434 MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi, 435 const MB_MODE_INFO *above_mi, 436 const MB_MODE_INFO *left_mi, const int intraonly) { 437 FRAME_CONTEXT *fc = xd->tile_ctx; 438 const PREDICTION_MODE y_mode = mbmi->mode; 439 (void)counts; 440 const BLOCK_SIZE bsize = mbmi->bsize; 441 442 if (intraonly) { 443 #if CONFIG_ENTROPY_STATS 444 const PREDICTION_MODE above = av1_above_block_mode(above_mi); 445 const PREDICTION_MODE left = av1_left_block_mode(left_mi); 446 const int above_ctx = intra_mode_context[above]; 447 const int left_ctx = intra_mode_context[left]; 448 ++counts->kf_y_mode[above_ctx][left_ctx][y_mode]; 449 #endif // CONFIG_ENTROPY_STATS 450 update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES); 451 } else { 452 #if CONFIG_ENTROPY_STATS 453 ++counts->y_mode[size_group_lookup[bsize]][y_mode]; 454 #endif // CONFIG_ENTROPY_STATS 455 update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES); 456 } 457 458 if (av1_filter_intra_allowed(cm, mbmi)) { 459 const int use_filter_intra_mode = 460 mbmi->filter_intra_mode_info.use_filter_intra; 461 #if CONFIG_ENTROPY_STATS 462 ++counts->filter_intra[mbmi->bsize][use_filter_intra_mode]; 463 if (use_filter_intra_mode) { 464 ++counts 465 ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode]; 466 } 467 #endif // CONFIG_ENTROPY_STATS 468 update_cdf(fc->filter_intra_cdfs[mbmi->bsize], use_filter_intra_mode, 2); 469 if (use_filter_intra_mode) { 470 update_cdf(fc->filter_intra_mode_cdf, 471 mbmi->filter_intra_mode_info.filter_intra_mode, 472 FILTER_INTRA_MODES); 473 } 474 } 475 if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) { 476 #if CONFIG_ENTROPY_STATS 477 ++counts->angle_delta[mbmi->mode - V_PRED] 478 [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA]; 479 #endif 480 update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED], 481 mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA, 482 2 * MAX_ANGLE_DELTA + 1); 483 } 484 485 if (!xd->is_chroma_ref) return; 486 487 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; 488 const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd); 489 #if CONFIG_ENTROPY_STATS 490 ++counts->uv_mode[cfl_allowed][y_mode][uv_mode]; 491 #endif // CONFIG_ENTROPY_STATS 492 update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode, 493 UV_INTRA_MODES - !cfl_allowed); 494 if (uv_mode == UV_CFL_PRED) { 495 const int8_t joint_sign = mbmi->cfl_alpha_signs; 496 const uint8_t idx = mbmi->cfl_alpha_idx; 497 498 #if CONFIG_ENTROPY_STATS 499 ++counts->cfl_sign[joint_sign]; 500 #endif 501 update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS); 502 if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) { 503 aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)]; 504 505 #if CONFIG_ENTROPY_STATS 506 ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)]; 507 #endif 508 update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE); 509 } 510 if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) { 511 aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)]; 512 513 #if CONFIG_ENTROPY_STATS 514 ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)]; 515 #endif 516 update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE); 517 } 518 } 519 const PREDICTION_MODE intra_mode = get_uv_mode(uv_mode); 520 if (av1_is_directional_mode(intra_mode) && av1_use_angle_delta(bsize)) { 521 #if CONFIG_ENTROPY_STATS 522 ++counts->angle_delta[intra_mode - V_PRED] 523 [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA]; 524 #endif 525 update_cdf(fc->angle_delta_cdf[intra_mode - V_PRED], 526 mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA, 527 2 * MAX_ANGLE_DELTA + 1); 528 } 529 if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) { 530 update_palette_cdf(xd, mbmi, counts); 531 } 532 } 533 534 void av1_restore_context(MACROBLOCK *x, const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, 535 int mi_row, int mi_col, BLOCK_SIZE bsize, 536 const int num_planes) { 537 MACROBLOCKD *xd = &x->e_mbd; 538 int p; 539 const int num_4x4_blocks_wide = mi_size_wide[bsize]; 540 const int num_4x4_blocks_high = mi_size_high[bsize]; 541 int mi_width = mi_size_wide[bsize]; 542 int mi_height = mi_size_high[bsize]; 543 for (p = 0; p < num_planes; p++) { 544 int tx_col = mi_col; 545 int tx_row = mi_row & MAX_MIB_MASK; 546 memcpy( 547 xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x), 548 ctx->a + num_4x4_blocks_wide * p, 549 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> 550 xd->plane[p].subsampling_x); 551 memcpy(xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y), 552 ctx->l + num_4x4_blocks_high * p, 553 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> 554 xd->plane[p].subsampling_y); 555 } 556 memcpy(xd->above_partition_context + mi_col, ctx->sa, 557 sizeof(*xd->above_partition_context) * mi_width); 558 memcpy(xd->left_partition_context + (mi_row & MAX_MIB_MASK), ctx->sl, 559 sizeof(xd->left_partition_context[0]) * mi_height); 560 xd->above_txfm_context = ctx->p_ta; 561 xd->left_txfm_context = ctx->p_tl; 562 memcpy(xd->above_txfm_context, ctx->ta, 563 sizeof(*xd->above_txfm_context) * mi_width); 564 memcpy(xd->left_txfm_context, ctx->tl, 565 sizeof(*xd->left_txfm_context) * mi_height); 566 } 567 568 void av1_save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx, 569 int mi_row, int mi_col, BLOCK_SIZE bsize, 570 const int num_planes) { 571 const MACROBLOCKD *xd = &x->e_mbd; 572 int p; 573 int mi_width = mi_size_wide[bsize]; 574 int mi_height = mi_size_high[bsize]; 575 576 // buffer the above/left context information of the block in search. 577 for (p = 0; p < num_planes; ++p) { 578 int tx_col = mi_col; 579 int tx_row = mi_row & MAX_MIB_MASK; 580 memcpy( 581 ctx->a + mi_width * p, 582 xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x), 583 (sizeof(ENTROPY_CONTEXT) * mi_width) >> xd->plane[p].subsampling_x); 584 memcpy(ctx->l + mi_height * p, 585 xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y), 586 (sizeof(ENTROPY_CONTEXT) * mi_height) >> xd->plane[p].subsampling_y); 587 } 588 memcpy(ctx->sa, xd->above_partition_context + mi_col, 589 sizeof(*xd->above_partition_context) * mi_width); 590 memcpy(ctx->sl, xd->left_partition_context + (mi_row & MAX_MIB_MASK), 591 sizeof(xd->left_partition_context[0]) * mi_height); 592 memcpy(ctx->ta, xd->above_txfm_context, 593 sizeof(*xd->above_txfm_context) * mi_width); 594 memcpy(ctx->tl, xd->left_txfm_context, 595 sizeof(*xd->left_txfm_context) * mi_height); 596 ctx->p_ta = xd->above_txfm_context; 597 ctx->p_tl = xd->left_txfm_context; 598 } 599 600 static void set_partial_sb_partition(const AV1_COMMON *const cm, 601 MB_MODE_INFO *mi, int bh_in, int bw_in, 602 int mi_rows_remaining, 603 int mi_cols_remaining, BLOCK_SIZE bsize, 604 MB_MODE_INFO **mib) { 605 int bh = bh_in; 606 int r, c; 607 for (r = 0; r < cm->seq_params->mib_size; r += bh) { 608 int bw = bw_in; 609 for (c = 0; c < cm->seq_params->mib_size; c += bw) { 610 const int grid_index = get_mi_grid_idx(&cm->mi_params, r, c); 611 const int mi_index = get_alloc_mi_idx(&cm->mi_params, r, c); 612 mib[grid_index] = mi + mi_index; 613 mib[grid_index]->bsize = find_partition_size( 614 bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw); 615 } 616 } 617 } 618 619 // This function attempts to set all mode info entries in a given superblock 620 // to the same block partition size. 621 // However, at the bottom and right borders of the image the requested size 622 // may not be allowed in which case this code attempts to choose the largest 623 // allowable partition. 624 void av1_set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile, 625 MB_MODE_INFO **mib, int mi_row, int mi_col, 626 BLOCK_SIZE bsize) { 627 AV1_COMMON *const cm = &cpi->common; 628 const CommonModeInfoParams *const mi_params = &cm->mi_params; 629 const int mi_rows_remaining = tile->mi_row_end - mi_row; 630 const int mi_cols_remaining = tile->mi_col_end - mi_col; 631 MB_MODE_INFO *const mi_upper_left = 632 mi_params->mi_alloc + get_alloc_mi_idx(mi_params, mi_row, mi_col); 633 int bh = mi_size_high[bsize]; 634 int bw = mi_size_wide[bsize]; 635 636 assert(bsize >= mi_params->mi_alloc_bsize && 637 "Attempted to use bsize < mi_params->mi_alloc_bsize"); 638 assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0)); 639 640 // Apply the requested partition size to the SB if it is all "in image" 641 if ((mi_cols_remaining >= cm->seq_params->mib_size) && 642 (mi_rows_remaining >= cm->seq_params->mib_size)) { 643 for (int block_row = 0; block_row < cm->seq_params->mib_size; 644 block_row += bh) { 645 for (int block_col = 0; block_col < cm->seq_params->mib_size; 646 block_col += bw) { 647 const int grid_index = get_mi_grid_idx(mi_params, block_row, block_col); 648 const int mi_index = get_alloc_mi_idx(mi_params, block_row, block_col); 649 mib[grid_index] = mi_upper_left + mi_index; 650 mib[grid_index]->bsize = bsize; 651 } 652 } 653 } else { 654 // Else this is a partial SB. 655 set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining, 656 mi_cols_remaining, bsize, mib); 657 } 658 } 659 660 int av1_is_leaf_split_partition(AV1_COMMON *cm, int mi_row, int mi_col, 661 BLOCK_SIZE bsize) { 662 const int bs = mi_size_wide[bsize]; 663 const int hbs = bs / 2; 664 assert(bsize >= BLOCK_8X8); 665 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 666 667 for (int i = 0; i < 4; i++) { 668 int x_idx = (i & 1) * hbs; 669 int y_idx = (i >> 1) * hbs; 670 if ((mi_row + y_idx >= cm->mi_params.mi_rows) || 671 (mi_col + x_idx >= cm->mi_params.mi_cols)) 672 return 0; 673 if (get_partition(cm, mi_row + y_idx, mi_col + x_idx, subsize) != 674 PARTITION_NONE && 675 subsize != BLOCK_8X8) 676 return 0; 677 } 678 return 1; 679 } 680 681 #if !CONFIG_REALTIME_ONLY 682 int av1_get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, 683 int mi_col, int orig_rdmult) { 684 AV1_COMMON *const cm = &cpi->common; 685 const GF_GROUP *const gf_group = &cpi->ppi->gf_group; 686 assert(IMPLIES(cpi->ppi->gf_group.size > 0, 687 cpi->gf_frame_index < cpi->ppi->gf_group.size)); 688 const int tpl_idx = cpi->gf_frame_index; 689 TplParams *const tpl_data = &cpi->ppi->tpl_data; 690 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 691 int64_t intra_cost = 0; 692 int64_t mc_dep_cost = 0; 693 const int mi_wide = mi_size_wide[bsize]; 694 const int mi_high = mi_size_high[bsize]; 695 696 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx]; 697 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 698 int tpl_stride = tpl_frame->stride; 699 700 if (!av1_tpl_stats_ready(&cpi->ppi->tpl_data, cpi->gf_frame_index)) { 701 return orig_rdmult; 702 } 703 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) { 704 return orig_rdmult; 705 } 706 707 #ifndef NDEBUG 708 int mi_count = 0; 709 #endif 710 const int mi_col_sr = 711 coded_to_superres_mi(mi_col, cm->superres_scale_denominator); 712 const int mi_col_end_sr = 713 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator); 714 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); 715 const int step = 1 << block_mis_log2; 716 const int row_step = step; 717 const int col_step_sr = 718 coded_to_superres_mi(step, cm->superres_scale_denominator); 719 for (int row = mi_row; row < mi_row + mi_high; row += row_step) { 720 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) { 721 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue; 722 TplDepStats *this_stats = 723 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)]; 724 int64_t mc_dep_delta = 725 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 726 this_stats->mc_dep_dist); 727 intra_cost += this_stats->recrf_dist << RDDIV_BITS; 728 mc_dep_cost += (this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta; 729 #ifndef NDEBUG 730 mi_count++; 731 #endif 732 } 733 } 734 assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB); 735 736 double beta = 1.0; 737 if (mc_dep_cost > 0 && intra_cost > 0) { 738 const double r0 = cpi->rd.r0; 739 const double rk = (double)intra_cost / mc_dep_cost; 740 beta = (r0 / rk); 741 } 742 743 int rdmult = av1_get_adaptive_rdmult(cpi, beta); 744 745 rdmult = AOMMIN(rdmult, orig_rdmult * 3 / 2); 746 rdmult = AOMMAX(rdmult, orig_rdmult * 1 / 2); 747 748 rdmult = AOMMAX(1, rdmult); 749 750 return rdmult; 751 } 752 753 // Checks to see if a super block is on a horizontal image edge. 754 // In most cases this is the "real" edge unless there are formatting 755 // bars embedded in the stream. 756 int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) { 757 int top_edge = 0; 758 int bottom_edge = cpi->common.mi_params.mi_rows; 759 int is_active_h_edge = 0; 760 761 // For two pass account for any formatting bars detected. 762 if (is_stat_consumption_stage_twopass(cpi)) { 763 const AV1_COMMON *const cm = &cpi->common; 764 const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats( 765 &cpi->ppi->twopass, cm->current_frame.display_order_hint); 766 if (this_frame_stats == NULL) return AOM_CODEC_ERROR; 767 768 // The inactive region is specified in MBs not mi units. 769 // The image edge is in the following MB row. 770 top_edge += (int)(this_frame_stats->inactive_zone_rows * 4); 771 772 bottom_edge -= (int)(this_frame_stats->inactive_zone_rows * 4); 773 bottom_edge = AOMMAX(top_edge, bottom_edge); 774 } 775 776 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || 777 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) { 778 is_active_h_edge = 1; 779 } 780 return is_active_h_edge; 781 } 782 783 // Checks to see if a super block is on a vertical image edge. 784 // In most cases this is the "real" edge unless there are formatting 785 // bars embedded in the stream. 786 int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) { 787 int left_edge = 0; 788 int right_edge = cpi->common.mi_params.mi_cols; 789 int is_active_v_edge = 0; 790 791 // For two pass account for any formatting bars detected. 792 if (is_stat_consumption_stage_twopass(cpi)) { 793 const AV1_COMMON *const cm = &cpi->common; 794 const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats( 795 &cpi->ppi->twopass, cm->current_frame.display_order_hint); 796 if (this_frame_stats == NULL) return AOM_CODEC_ERROR; 797 798 // The inactive region is specified in MBs not mi units. 799 // The image edge is in the following MB row. 800 left_edge += (int)(this_frame_stats->inactive_zone_cols * 4); 801 802 right_edge -= (int)(this_frame_stats->inactive_zone_cols * 4); 803 right_edge = AOMMAX(left_edge, right_edge); 804 } 805 806 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || 807 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) { 808 is_active_v_edge = 1; 809 } 810 return is_active_v_edge; 811 } 812 813 void av1_get_tpl_stats_sb(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, 814 int mi_col, SuperBlockEnc *sb_enc) { 815 sb_enc->tpl_data_count = 0; 816 817 if (!cpi->oxcf.algo_cfg.enable_tpl_model) return; 818 if (cpi->common.current_frame.frame_type == KEY_FRAME) return; 819 const FRAME_UPDATE_TYPE update_type = 820 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 821 if (update_type == INTNL_OVERLAY_UPDATE || update_type == OVERLAY_UPDATE) 822 return; 823 assert(IMPLIES(cpi->ppi->gf_group.size > 0, 824 cpi->gf_frame_index < cpi->ppi->gf_group.size)); 825 826 AV1_COMMON *const cm = &cpi->common; 827 const int gf_group_index = cpi->gf_frame_index; 828 TplParams *const tpl_data = &cpi->ppi->tpl_data; 829 if (!av1_tpl_stats_ready(tpl_data, gf_group_index)) return; 830 const int mi_wide = mi_size_wide[bsize]; 831 const int mi_high = mi_size_high[bsize]; 832 833 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_group_index]; 834 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 835 int tpl_stride = tpl_frame->stride; 836 837 int mi_count = 0; 838 int count = 0; 839 const int mi_col_sr = 840 coded_to_superres_mi(mi_col, cm->superres_scale_denominator); 841 const int mi_col_end_sr = 842 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator); 843 // mi_cols_sr is mi_cols at superres case. 844 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); 845 846 // TPL store unit size is not the same as the motion estimation unit size. 847 // Here always use motion estimation size to avoid getting repetitive inter/ 848 // intra cost. 849 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d); 850 assert(mi_size_wide[tpl_bsize] == mi_size_high[tpl_bsize]); 851 const int row_step = mi_size_high[tpl_bsize]; 852 const int col_step_sr = coded_to_superres_mi(mi_size_wide[tpl_bsize], 853 cm->superres_scale_denominator); 854 855 // Stride is only based on SB size, and we fill in values for every 16x16 856 // block in a SB. 857 sb_enc->tpl_stride = (mi_col_end_sr - mi_col_sr) / col_step_sr; 858 859 for (int row = mi_row; row < mi_row + mi_high; row += row_step) { 860 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) { 861 assert(count < MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB); 862 // Handle partial SB, so that no invalid values are used later. 863 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) { 864 sb_enc->tpl_inter_cost[count] = INT64_MAX; 865 sb_enc->tpl_intra_cost[count] = INT64_MAX; 866 for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { 867 sb_enc->tpl_mv[count][i].as_int = INVALID_MV; 868 } 869 count++; 870 continue; 871 } 872 873 TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos( 874 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)]; 875 sb_enc->tpl_inter_cost[count] = this_stats->inter_cost 876 << TPL_DEP_COST_SCALE_LOG2; 877 sb_enc->tpl_intra_cost[count] = this_stats->intra_cost 878 << TPL_DEP_COST_SCALE_LOG2; 879 memcpy(sb_enc->tpl_mv[count], this_stats->mv, sizeof(this_stats->mv)); 880 mi_count++; 881 count++; 882 } 883 } 884 885 assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB); 886 sb_enc->tpl_data_count = mi_count; 887 } 888 889 // analysis_type 0: Use mc_dep_cost and intra_cost 890 // analysis_type 1: Use count of best inter predictor chosen 891 // analysis_type 2: Use cost reduction from intra to inter for best inter 892 // predictor chosen 893 int av1_get_q_for_deltaq_objective(AV1_COMP *const cpi, ThreadData *td, 894 int64_t *delta_dist, BLOCK_SIZE bsize, 895 int mi_row, int mi_col) { 896 AV1_COMMON *const cm = &cpi->common; 897 assert(IMPLIES(cpi->ppi->gf_group.size > 0, 898 cpi->gf_frame_index < cpi->ppi->gf_group.size)); 899 const int tpl_idx = cpi->gf_frame_index; 900 TplParams *const tpl_data = &cpi->ppi->tpl_data; 901 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 902 double intra_cost = 0; 903 double mc_dep_reg = 0; 904 double mc_dep_cost = 0; 905 double cbcmp_base = 1; 906 double srcrf_dist = 0; 907 double srcrf_sse = 0; 908 double srcrf_rate = 0; 909 const int mi_wide = mi_size_wide[bsize]; 910 const int mi_high = mi_size_high[bsize]; 911 const int base_qindex = cm->quant_params.base_qindex; 912 913 if (tpl_idx >= MAX_TPL_FRAME_IDX) return base_qindex; 914 915 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx]; 916 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 917 int tpl_stride = tpl_frame->stride; 918 if (!tpl_frame->is_valid) return base_qindex; 919 920 #ifndef NDEBUG 921 int mi_count = 0; 922 #endif 923 const int mi_col_sr = 924 coded_to_superres_mi(mi_col, cm->superres_scale_denominator); 925 const int mi_col_end_sr = 926 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator); 927 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); 928 const int step = 1 << block_mis_log2; 929 const int row_step = step; 930 const int col_step_sr = 931 coded_to_superres_mi(step, cm->superres_scale_denominator); 932 for (int row = mi_row; row < mi_row + mi_high; row += row_step) { 933 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) { 934 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue; 935 TplDepStats *this_stats = 936 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)]; 937 double cbcmp = (double)this_stats->srcrf_dist; 938 int64_t mc_dep_delta = 939 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 940 this_stats->mc_dep_dist); 941 double dist_scaled = (double)(this_stats->recrf_dist << RDDIV_BITS); 942 intra_cost += log(dist_scaled) * cbcmp; 943 mc_dep_cost += log(dist_scaled + mc_dep_delta) * cbcmp; 944 mc_dep_reg += log(3 * dist_scaled + mc_dep_delta) * cbcmp; 945 srcrf_dist += (double)(this_stats->srcrf_dist << RDDIV_BITS); 946 srcrf_sse += (double)(this_stats->srcrf_sse << RDDIV_BITS); 947 srcrf_rate += (double)(this_stats->srcrf_rate << TPL_DEP_COST_SCALE_LOG2); 948 #ifndef NDEBUG 949 mi_count++; 950 #endif 951 cbcmp_base += cbcmp; 952 } 953 } 954 assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB); 955 956 int offset = 0; 957 double beta = 1.0; 958 double rk; 959 if (mc_dep_cost > 0 && intra_cost > 0) { 960 const double r0 = cpi->rd.r0; 961 rk = exp((intra_cost - mc_dep_cost) / cbcmp_base); 962 td->mb.rb = exp((intra_cost - mc_dep_reg) / cbcmp_base); 963 beta = (r0 / rk); 964 assert(beta > 0.0); 965 } else { 966 return base_qindex; 967 } 968 offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta); 969 970 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 971 offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1); 972 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1); 973 int qindex = cm->quant_params.base_qindex + offset; 974 qindex = AOMMIN(qindex, MAXQ); 975 qindex = AOMMAX(qindex, MINQ); 976 977 int frm_qstep = av1_dc_quant_QTX(base_qindex, 0, cm->seq_params->bit_depth); 978 int sbs_qstep = 979 av1_dc_quant_QTX(base_qindex, offset, cm->seq_params->bit_depth); 980 981 if (delta_dist) { 982 double sbs_dist = srcrf_dist * pow((double)sbs_qstep / frm_qstep, 2.0); 983 double sbs_rate = srcrf_rate * ((double)frm_qstep / sbs_qstep); 984 sbs_dist = AOMMIN(sbs_dist, srcrf_sse); 985 *delta_dist = (int64_t)((sbs_dist - srcrf_dist) / rk); 986 *delta_dist += RDCOST(tpl_frame->base_rdmult, 4 * 256, 0); 987 *delta_dist += RDCOST(tpl_frame->base_rdmult, sbs_rate - srcrf_rate, 0); 988 } 989 return qindex; 990 } 991 992 #if !DISABLE_HDR_LUMA_DELTAQ 993 // offset table defined in Table3 of T-REC-H.Sup15 document. 994 static const int hdr_thres[HDR_QP_LEVELS + 1] = { 0, 301, 367, 434, 501, 567, 995 634, 701, 767, 834, 1024 }; 996 997 static const int hdr10_qp_offset[HDR_QP_LEVELS] = { 3, 2, 1, 0, -1, 998 -2, -3, -4, -5, -6 }; 999 #endif 1000 1001 int av1_get_q_for_hdr(AV1_COMP *const cpi, MACROBLOCK *const x, 1002 BLOCK_SIZE bsize, int mi_row, int mi_col) { 1003 AV1_COMMON *const cm = &cpi->common; 1004 assert(cm->seq_params->bit_depth == AOM_BITS_10); 1005 1006 #if DISABLE_HDR_LUMA_DELTAQ 1007 (void)x; 1008 (void)bsize; 1009 (void)mi_row; 1010 (void)mi_col; 1011 return cm->quant_params.base_qindex; 1012 #else 1013 // calculate pixel average 1014 const int block_luma_avg = av1_log_block_avg(cpi, x, bsize, mi_row, mi_col); 1015 // adjust offset based on average of the pixel block 1016 int offset = 0; 1017 for (int i = 0; i < HDR_QP_LEVELS; i++) { 1018 if (block_luma_avg >= hdr_thres[i] && block_luma_avg < hdr_thres[i + 1]) { 1019 offset = (int)(hdr10_qp_offset[i] * QP_SCALE_FACTOR); 1020 break; 1021 } 1022 } 1023 1024 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 1025 offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1); 1026 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1); 1027 int qindex = cm->quant_params.base_qindex + offset; 1028 qindex = AOMMIN(qindex, MAXQ); 1029 qindex = AOMMAX(qindex, MINQ); 1030 1031 return qindex; 1032 #endif 1033 } 1034 #endif // !CONFIG_REALTIME_ONLY 1035 1036 void av1_reset_simple_motion_tree_partition(SIMPLE_MOTION_DATA_TREE *sms_tree, 1037 BLOCK_SIZE bsize) { 1038 if (sms_tree == NULL) return; 1039 sms_tree->partitioning = PARTITION_NONE; 1040 1041 if (bsize >= BLOCK_8X8) { 1042 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 1043 for (int idx = 0; idx < 4; ++idx) 1044 av1_reset_simple_motion_tree_partition(sms_tree->split[idx], subsize); 1045 } 1046 } 1047 1048 // Record the ref frames that have been selected by square partition blocks. 1049 void av1_update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type, 1050 BLOCK_SIZE bsize, int mib_size, 1051 int mi_row, int mi_col) { 1052 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 1053 const int sb_size_mask = mib_size - 1; 1054 const int mi_row_in_sb = mi_row & sb_size_mask; 1055 const int mi_col_in_sb = mi_col & sb_size_mask; 1056 const int mi_size = mi_size_wide[bsize]; 1057 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) { 1058 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) { 1059 x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type; 1060 } 1061 } 1062 } 1063 1064 static void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left, aom_cdf_prob *cdf_ptr_tr, 1065 int num_cdfs, int cdf_stride, int nsymbs, 1066 int wt_left, int wt_tr) { 1067 for (int i = 0; i < num_cdfs; i++) { 1068 for (int j = 0; j <= nsymbs; j++) { 1069 cdf_ptr_left[i * cdf_stride + j] = 1070 (aom_cdf_prob)(((int)cdf_ptr_left[i * cdf_stride + j] * wt_left + 1071 (int)cdf_ptr_tr[i * cdf_stride + j] * wt_tr + 1072 ((wt_left + wt_tr) / 2)) / 1073 (wt_left + wt_tr)); 1074 assert(cdf_ptr_left[i * cdf_stride + j] >= 0 && 1075 cdf_ptr_left[i * cdf_stride + j] < CDF_PROB_TOP); 1076 } 1077 } 1078 } 1079 1080 #define AVERAGE_CDF(cname_left, cname_tr, nsymbs) \ 1081 AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, CDF_SIZE(nsymbs)) 1082 1083 #define AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, cdf_stride) \ 1084 do { \ 1085 aom_cdf_prob *cdf_ptr_left = (aom_cdf_prob *)cname_left; \ 1086 aom_cdf_prob *cdf_ptr_tr = (aom_cdf_prob *)cname_tr; \ 1087 int array_size = (int)sizeof(cname_left) / sizeof(aom_cdf_prob); \ 1088 int num_cdfs = array_size / cdf_stride; \ 1089 avg_cdf_symbol(cdf_ptr_left, cdf_ptr_tr, num_cdfs, cdf_stride, nsymbs, \ 1090 wt_left, wt_tr); \ 1091 } while (0) 1092 1093 static void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr, int wt_left, 1094 int wt_tr) { 1095 AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4); 1096 for (int i = 0; i < 2; i++) { 1097 AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf, 1098 MV_CLASSES); 1099 AVERAGE_CDF(nmv_left->comps[i].class0_fp_cdf, 1100 nmv_tr->comps[i].class0_fp_cdf, MV_FP_SIZE); 1101 AVERAGE_CDF(nmv_left->comps[i].fp_cdf, nmv_tr->comps[i].fp_cdf, MV_FP_SIZE); 1102 AVERAGE_CDF(nmv_left->comps[i].sign_cdf, nmv_tr->comps[i].sign_cdf, 2); 1103 AVERAGE_CDF(nmv_left->comps[i].class0_hp_cdf, 1104 nmv_tr->comps[i].class0_hp_cdf, 2); 1105 AVERAGE_CDF(nmv_left->comps[i].hp_cdf, nmv_tr->comps[i].hp_cdf, 2); 1106 AVERAGE_CDF(nmv_left->comps[i].class0_cdf, nmv_tr->comps[i].class0_cdf, 1107 CLASS0_SIZE); 1108 AVERAGE_CDF(nmv_left->comps[i].bits_cdf, nmv_tr->comps[i].bits_cdf, 2); 1109 } 1110 } 1111 1112 // In case of row-based multi-threading of encoder, since we always 1113 // keep a top - right sync, we can average the top - right SB's CDFs and 1114 // the left SB's CDFs and use the same for current SB's encoding to 1115 // improve the performance. This function facilitates the averaging 1116 // of CDF and used only when row-mt is enabled in encoder. 1117 void av1_avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr, 1118 int wt_left, int wt_tr) { 1119 AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2); 1120 AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2); 1121 AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2); 1122 AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5); 1123 AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6); 1124 AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7); 1125 AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8); 1126 AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9); 1127 AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10); 1128 AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11); 1129 AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3); 1130 AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4); 1131 AVERAGE_CDF(ctx_left->coeff_br_cdf, ctx_tr->coeff_br_cdf, BR_CDF_SIZE); 1132 AVERAGE_CDF(ctx_left->newmv_cdf, ctx_tr->newmv_cdf, 2); 1133 AVERAGE_CDF(ctx_left->zeromv_cdf, ctx_tr->zeromv_cdf, 2); 1134 AVERAGE_CDF(ctx_left->refmv_cdf, ctx_tr->refmv_cdf, 2); 1135 AVERAGE_CDF(ctx_left->drl_cdf, ctx_tr->drl_cdf, 2); 1136 AVERAGE_CDF(ctx_left->inter_compound_mode_cdf, 1137 ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES); 1138 AVERAGE_CDF(ctx_left->compound_type_cdf, ctx_tr->compound_type_cdf, 1139 MASKED_COMPOUND_TYPES); 1140 AVERAGE_CDF(ctx_left->wedge_idx_cdf, ctx_tr->wedge_idx_cdf, 16); 1141 AVERAGE_CDF(ctx_left->interintra_cdf, ctx_tr->interintra_cdf, 2); 1142 AVERAGE_CDF(ctx_left->wedge_interintra_cdf, ctx_tr->wedge_interintra_cdf, 2); 1143 AVERAGE_CDF(ctx_left->interintra_mode_cdf, ctx_tr->interintra_mode_cdf, 1144 INTERINTRA_MODES); 1145 AVERAGE_CDF(ctx_left->motion_mode_cdf, ctx_tr->motion_mode_cdf, MOTION_MODES); 1146 AVERAGE_CDF(ctx_left->obmc_cdf, ctx_tr->obmc_cdf, 2); 1147 AVERAGE_CDF(ctx_left->palette_y_size_cdf, ctx_tr->palette_y_size_cdf, 1148 PALETTE_SIZES); 1149 AVERAGE_CDF(ctx_left->palette_uv_size_cdf, ctx_tr->palette_uv_size_cdf, 1150 PALETTE_SIZES); 1151 for (int j = 0; j < PALETTE_SIZES; j++) { 1152 int nsymbs = j + PALETTE_MIN_SIZE; 1153 AVG_CDF_STRIDE(ctx_left->palette_y_color_index_cdf[j], 1154 ctx_tr->palette_y_color_index_cdf[j], nsymbs, 1155 CDF_SIZE(PALETTE_COLORS)); 1156 AVG_CDF_STRIDE(ctx_left->palette_uv_color_index_cdf[j], 1157 ctx_tr->palette_uv_color_index_cdf[j], nsymbs, 1158 CDF_SIZE(PALETTE_COLORS)); 1159 } 1160 AVERAGE_CDF(ctx_left->palette_y_mode_cdf, ctx_tr->palette_y_mode_cdf, 2); 1161 AVERAGE_CDF(ctx_left->palette_uv_mode_cdf, ctx_tr->palette_uv_mode_cdf, 2); 1162 AVERAGE_CDF(ctx_left->comp_inter_cdf, ctx_tr->comp_inter_cdf, 2); 1163 AVERAGE_CDF(ctx_left->single_ref_cdf, ctx_tr->single_ref_cdf, 2); 1164 AVERAGE_CDF(ctx_left->comp_ref_type_cdf, ctx_tr->comp_ref_type_cdf, 2); 1165 AVERAGE_CDF(ctx_left->uni_comp_ref_cdf, ctx_tr->uni_comp_ref_cdf, 2); 1166 AVERAGE_CDF(ctx_left->comp_ref_cdf, ctx_tr->comp_ref_cdf, 2); 1167 AVERAGE_CDF(ctx_left->comp_bwdref_cdf, ctx_tr->comp_bwdref_cdf, 2); 1168 AVERAGE_CDF(ctx_left->txfm_partition_cdf, ctx_tr->txfm_partition_cdf, 2); 1169 AVERAGE_CDF(ctx_left->compound_index_cdf, ctx_tr->compound_index_cdf, 2); 1170 AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2); 1171 AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2); 1172 AVERAGE_CDF(ctx_left->skip_txfm_cdfs, ctx_tr->skip_txfm_cdfs, 2); 1173 AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2); 1174 avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr); 1175 avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr); 1176 AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2); 1177 AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2); 1178 AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf, 1179 ctx_tr->seg.spatial_pred_seg_cdf, MAX_SEGMENTS); 1180 AVERAGE_CDF(ctx_left->filter_intra_cdfs, ctx_tr->filter_intra_cdfs, 2); 1181 AVERAGE_CDF(ctx_left->filter_intra_mode_cdf, ctx_tr->filter_intra_mode_cdf, 1182 FILTER_INTRA_MODES); 1183 AVERAGE_CDF(ctx_left->switchable_restore_cdf, ctx_tr->switchable_restore_cdf, 1184 RESTORE_SWITCHABLE_TYPES); 1185 AVERAGE_CDF(ctx_left->wiener_restore_cdf, ctx_tr->wiener_restore_cdf, 2); 1186 AVERAGE_CDF(ctx_left->sgrproj_restore_cdf, ctx_tr->sgrproj_restore_cdf, 2); 1187 AVERAGE_CDF(ctx_left->y_mode_cdf, ctx_tr->y_mode_cdf, INTRA_MODES); 1188 AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0], 1189 UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES)); 1190 AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES); 1191 for (int i = 0; i < PARTITION_CONTEXTS; i++) { 1192 if (i < 4) { 1193 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 4, 1194 CDF_SIZE(10)); 1195 } else if (i < 16) { 1196 AVERAGE_CDF(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 10); 1197 } else { 1198 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 8, 1199 CDF_SIZE(10)); 1200 } 1201 } 1202 AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf, 1203 SWITCHABLE_FILTERS); 1204 AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES); 1205 AVERAGE_CDF(ctx_left->angle_delta_cdf, ctx_tr->angle_delta_cdf, 1206 2 * MAX_ANGLE_DELTA + 1); 1207 AVG_CDF_STRIDE(ctx_left->tx_size_cdf[0], ctx_tr->tx_size_cdf[0], MAX_TX_DEPTH, 1208 CDF_SIZE(MAX_TX_DEPTH + 1)); 1209 AVERAGE_CDF(ctx_left->tx_size_cdf[1], ctx_tr->tx_size_cdf[1], 1210 MAX_TX_DEPTH + 1); 1211 AVERAGE_CDF(ctx_left->tx_size_cdf[2], ctx_tr->tx_size_cdf[2], 1212 MAX_TX_DEPTH + 1); 1213 AVERAGE_CDF(ctx_left->tx_size_cdf[3], ctx_tr->tx_size_cdf[3], 1214 MAX_TX_DEPTH + 1); 1215 AVERAGE_CDF(ctx_left->delta_q_cdf, ctx_tr->delta_q_cdf, DELTA_Q_PROBS + 1); 1216 AVERAGE_CDF(ctx_left->delta_lf_cdf, ctx_tr->delta_lf_cdf, DELTA_LF_PROBS + 1); 1217 for (int i = 0; i < FRAME_LF_COUNT; i++) { 1218 AVERAGE_CDF(ctx_left->delta_lf_multi_cdf[i], ctx_tr->delta_lf_multi_cdf[i], 1219 DELTA_LF_PROBS + 1); 1220 } 1221 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], 7, 1222 CDF_SIZE(TX_TYPES)); 1223 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], 5, 1224 CDF_SIZE(TX_TYPES)); 1225 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16, 1226 CDF_SIZE(TX_TYPES)); 1227 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12, 1228 CDF_SIZE(TX_TYPES)); 1229 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[3], ctx_tr->inter_ext_tx_cdf[3], 2, 1230 CDF_SIZE(TX_TYPES)); 1231 AVERAGE_CDF(ctx_left->cfl_sign_cdf, ctx_tr->cfl_sign_cdf, CFL_JOINT_SIGNS); 1232 AVERAGE_CDF(ctx_left->cfl_alpha_cdf, ctx_tr->cfl_alpha_cdf, 1233 CFL_ALPHABET_SIZE); 1234 } 1235 1236 // Check neighbor blocks' motion information. 1237 static int check_neighbor_blocks(MB_MODE_INFO **mi, int mi_stride, 1238 const TileInfo *const tile_info, int mi_row, 1239 int mi_col) { 1240 int is_above_low_motion = 1; 1241 int is_left_low_motion = 1; 1242 const int thr = 24; 1243 1244 // Check above block. 1245 if (mi_row > tile_info->mi_row_start) { 1246 const MB_MODE_INFO *above_mbmi = mi[-mi_stride]; 1247 const int_mv above_mv = above_mbmi->mv[0]; 1248 if (above_mbmi->mode >= INTRA_MODE_END && 1249 (abs(above_mv.as_mv.row) > thr || abs(above_mv.as_mv.col) > thr)) 1250 is_above_low_motion = 0; 1251 } 1252 1253 // Check left block. 1254 if (mi_col > tile_info->mi_col_start) { 1255 const MB_MODE_INFO *left_mbmi = mi[-1]; 1256 const int_mv left_mv = left_mbmi->mv[0]; 1257 if (left_mbmi->mode >= INTRA_MODE_END && 1258 (abs(left_mv.as_mv.row) > thr || abs(left_mv.as_mv.col) > thr)) 1259 is_left_low_motion = 0; 1260 } 1261 1262 return (is_above_low_motion && is_left_low_motion); 1263 } 1264 1265 // Check this block's motion in a fast way. 1266 static int fast_detect_non_zero_motion(AV1_COMP *cpi, const uint8_t *src_y, 1267 int src_ystride, 1268 const uint8_t *last_src_y, 1269 int last_src_ystride, int mi_row, 1270 int mi_col) { 1271 AV1_COMMON *const cm = &cpi->common; 1272 const BLOCK_SIZE bsize = cm->seq_params->sb_size; 1273 unsigned int blk_sad = INT_MAX; 1274 if (cpi->src_sad_blk_64x64 != NULL) { 1275 const int sb_size_by_mb = (bsize == BLOCK_128X128) 1276 ? (cm->seq_params->mib_size >> 1) 1277 : cm->seq_params->mib_size; 1278 const int sb_cols = 1279 (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb; 1280 const int sbi_col = mi_col / sb_size_by_mb; 1281 const int sbi_row = mi_row / sb_size_by_mb; 1282 blk_sad = (unsigned int)cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols]; 1283 } else { 1284 blk_sad = cpi->ppi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, 1285 last_src_ystride); 1286 } 1287 1288 // Search 4 1-away points. 1289 const uint8_t *const search_pos[4] = { 1290 last_src_y - last_src_ystride, 1291 last_src_y - 1, 1292 last_src_y + 1, 1293 last_src_y + last_src_ystride, 1294 }; 1295 unsigned int sad_arr[4]; 1296 cpi->ppi->fn_ptr[bsize].sdx4df(src_y, src_ystride, search_pos, 1297 last_src_ystride, sad_arr); 1298 1299 blk_sad = (blk_sad * 5) >> 3; 1300 return (blk_sad < sad_arr[0] && blk_sad < sad_arr[1] && 1301 blk_sad < sad_arr[2] && blk_sad < sad_arr[3]); 1302 } 1303 1304 // Grade the temporal variation of the source by comparing the current sb and 1305 // its collocated block in the last frame. 1306 void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, 1307 int mi_row, int mi_col) { 1308 if (cpi->last_source->y_width != cpi->source->y_width || 1309 cpi->last_source->y_height != cpi->source->y_height) 1310 return; 1311 #if CONFIG_AV1_HIGHBITDEPTH 1312 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) return; 1313 #endif 1314 1315 unsigned int tmp_sse; 1316 unsigned int tmp_variance; 1317 const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size; 1318 uint8_t *src_y = cpi->source->y_buffer; 1319 const int src_ystride = cpi->source->y_stride; 1320 const int src_offset = src_ystride * (mi_row << 2) + (mi_col << 2); 1321 uint8_t *last_src_y = cpi->last_source->y_buffer; 1322 const int last_src_ystride = cpi->last_source->y_stride; 1323 const int last_src_offset = last_src_ystride * (mi_row << 2) + (mi_col << 2); 1324 uint64_t avg_source_sse_threshold_verylow = 10000; // ~1.5*1.5*(64*64) 1325 uint64_t avg_source_sse_threshold_low[2] = { 100000, // ~5*5*(64*64) 1326 36000 }; // ~3*3*(64*64) 1327 1328 uint64_t avg_source_sse_threshold_high = 1000000; // ~15*15*(64*64) 1329 if (cpi->sf.rt_sf.increase_source_sad_thresh) { 1330 avg_source_sse_threshold_high = avg_source_sse_threshold_high << 1; 1331 avg_source_sse_threshold_low[0] = avg_source_sse_threshold_low[0] << 1; 1332 avg_source_sse_threshold_verylow = avg_source_sse_threshold_verylow << 1; 1333 } 1334 uint64_t sum_sq_thresh = 10000; // sum = sqrt(thresh / 64*64)) ~1.5 1335 src_y += src_offset; 1336 last_src_y += last_src_offset; 1337 tmp_variance = cpi->ppi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y, 1338 last_src_ystride, &tmp_sse); 1339 // rd thresholds 1340 if (tmp_sse < avg_source_sse_threshold_low[1]) 1341 x->content_state_sb.source_sad_rd = kLowSad; 1342 1343 // nonrd thresholds 1344 if (tmp_sse == 0) { 1345 x->content_state_sb.source_sad_nonrd = kZeroSad; 1346 return; 1347 } 1348 if (tmp_sse < avg_source_sse_threshold_verylow) 1349 x->content_state_sb.source_sad_nonrd = kVeryLowSad; 1350 else if (tmp_sse < avg_source_sse_threshold_low[0]) 1351 x->content_state_sb.source_sad_nonrd = kLowSad; 1352 else if (tmp_sse > avg_source_sse_threshold_high) 1353 x->content_state_sb.source_sad_nonrd = kHighSad; 1354 1355 // Detect large lighting change. 1356 // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) 1357 if (tmp_variance < (tmp_sse >> 1) && (tmp_sse - tmp_variance) > sum_sq_thresh) 1358 x->content_state_sb.lighting_change = 1; 1359 if ((tmp_sse - tmp_variance) < (sum_sq_thresh >> 1)) 1360 x->content_state_sb.low_sumdiff = 1; 1361 1362 if (tmp_sse > ((avg_source_sse_threshold_high * 7) >> 3) && 1363 !x->content_state_sb.lighting_change && !x->content_state_sb.low_sumdiff) 1364 x->sb_force_fixed_part = 0; 1365 1366 if (!cpi->sf.rt_sf.use_rtc_tf || cpi->rc.high_source_sad || 1367 cpi->rc.frame_source_sad > 20000 || cpi->svc.number_spatial_layers > 1) 1368 return; 1369 1370 // In-place temporal filter. If psnr calculation is enabled, we store the 1371 // source for that. 1372 AV1_COMMON *const cm = &cpi->common; 1373 // Calculate n*mean^2 1374 const unsigned int nmean2 = tmp_sse - tmp_variance; 1375 const int ac_q_step = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, 1376 cm->seq_params->bit_depth); 1377 const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc; 1378 const int avg_q_step = av1_ac_quant_QTX(p_rc->avg_frame_qindex[INTER_FRAME], 1379 0, cm->seq_params->bit_depth); 1380 1381 const unsigned int threshold = (cpi->sf.rt_sf.use_rtc_tf == 1) 1382 ? clamp(avg_q_step, 250, 1000) * ac_q_step 1383 : 250 * ac_q_step; 1384 1385 // TODO(yunqing): use a weighted sum instead of averaging in filtering. 1386 if (tmp_variance <= threshold && nmean2 <= 15) { 1387 // Check neighbor blocks. If neighbor blocks aren't low-motion blocks, 1388 // skip temporal filtering for this block. 1389 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + 1390 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col); 1391 const TileInfo *const tile_info = &tile_data->tile_info; 1392 const int is_neighbor_blocks_low_motion = check_neighbor_blocks( 1393 mi, cm->mi_params.mi_stride, tile_info, mi_row, mi_col); 1394 if (!is_neighbor_blocks_low_motion) return; 1395 1396 // Only consider 64x64 SB for now. Need to extend to 128x128 for large SB 1397 // size. 1398 // Test several nearby points. If non-zero mv exists, don't do temporal 1399 // filtering. 1400 const int is_this_blk_low_motion = fast_detect_non_zero_motion( 1401 cpi, src_y, src_ystride, last_src_y, last_src_ystride, mi_row, mi_col); 1402 1403 if (!is_this_blk_low_motion) return; 1404 1405 const int shift_x[2] = { 0, cpi->source->subsampling_x }; 1406 const int shift_y[2] = { 0, cpi->source->subsampling_y }; 1407 const uint8_t h = block_size_high[bsize]; 1408 const uint8_t w = block_size_wide[bsize]; 1409 1410 for (int plane = 0; plane < av1_num_planes(cm); ++plane) { 1411 uint8_t *src = cpi->source->buffers[plane]; 1412 const int src_stride = cpi->source->strides[plane != 0]; 1413 uint8_t *last_src = cpi->last_source->buffers[plane]; 1414 const int last_src_stride = cpi->last_source->strides[plane != 0]; 1415 src += src_stride * (mi_row << (2 - shift_y[plane != 0])) + 1416 (mi_col << (2 - shift_x[plane != 0])); 1417 last_src += last_src_stride * (mi_row << (2 - shift_y[plane != 0])) + 1418 (mi_col << (2 - shift_x[plane != 0])); 1419 1420 for (int i = 0; i < (h >> shift_y[plane != 0]); ++i) { 1421 for (int j = 0; j < (w >> shift_x[plane != 0]); ++j) { 1422 src[j] = (last_src[j] + src[j]) >> 1; 1423 } 1424 src += src_stride; 1425 last_src += last_src_stride; 1426 } 1427 } 1428 } 1429 } 1430 1431 // Memset the mbmis at the current superblock to 0 1432 void av1_reset_mbmi(CommonModeInfoParams *const mi_params, BLOCK_SIZE sb_size, 1433 int mi_row, int mi_col) { 1434 // size of sb in unit of mi (BLOCK_4X4) 1435 const int sb_size_mi = mi_size_wide[sb_size]; 1436 const int mi_alloc_size_1d = mi_size_wide[mi_params->mi_alloc_bsize]; 1437 // size of sb in unit of allocated mi size 1438 const int sb_size_alloc_mi = mi_size_wide[sb_size] / mi_alloc_size_1d; 1439 assert(mi_params->mi_alloc_stride % sb_size_alloc_mi == 0 && 1440 "mi is not allocated as a multiple of sb!"); 1441 assert(mi_params->mi_stride % sb_size_mi == 0 && 1442 "mi_grid_base is not allocated as a multiple of sb!"); 1443 1444 const int mi_rows = mi_size_high[sb_size]; 1445 for (int cur_mi_row = 0; cur_mi_row < mi_rows; cur_mi_row++) { 1446 assert(get_mi_grid_idx(mi_params, 0, mi_col + mi_alloc_size_1d) < 1447 mi_params->mi_stride); 1448 const int mi_grid_idx = 1449 get_mi_grid_idx(mi_params, mi_row + cur_mi_row, mi_col); 1450 const int alloc_mi_idx = 1451 get_alloc_mi_idx(mi_params, mi_row + cur_mi_row, mi_col); 1452 memset(&mi_params->mi_grid_base[mi_grid_idx], 0, 1453 sb_size_mi * sizeof(*mi_params->mi_grid_base)); 1454 memset(&mi_params->tx_type_map[mi_grid_idx], 0, 1455 sb_size_mi * sizeof(*mi_params->tx_type_map)); 1456 if (cur_mi_row % mi_alloc_size_1d == 0) { 1457 memset(&mi_params->mi_alloc[alloc_mi_idx], 0, 1458 sb_size_alloc_mi * sizeof(*mi_params->mi_alloc)); 1459 } 1460 } 1461 } 1462 1463 void av1_backup_sb_state(SB_FIRST_PASS_STATS *sb_fp_stats, const AV1_COMP *cpi, 1464 ThreadData *td, const TileDataEnc *tile_data, 1465 int mi_row, int mi_col) { 1466 MACROBLOCK *x = &td->mb; 1467 MACROBLOCKD *xd = &x->e_mbd; 1468 const TileInfo *tile_info = &tile_data->tile_info; 1469 1470 const AV1_COMMON *cm = &cpi->common; 1471 const int num_planes = av1_num_planes(cm); 1472 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 1473 1474 xd->above_txfm_context = 1475 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 1476 xd->left_txfm_context = 1477 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 1478 av1_save_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes); 1479 1480 sb_fp_stats->rd_count = td->rd_counts; 1481 sb_fp_stats->split_count = x->txfm_search_info.txb_split_count; 1482 1483 sb_fp_stats->fc = *td->counts; 1484 1485 // Don't copy in row_mt case, otherwise run into data race. No behavior change 1486 // in row_mt case. 1487 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { 1488 memcpy(sb_fp_stats->inter_mode_rd_models, tile_data->inter_mode_rd_models, 1489 sizeof(sb_fp_stats->inter_mode_rd_models)); 1490 } 1491 1492 memcpy(sb_fp_stats->thresh_freq_fact, x->thresh_freq_fact, 1493 sizeof(sb_fp_stats->thresh_freq_fact)); 1494 1495 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col); 1496 sb_fp_stats->current_qindex = 1497 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex; 1498 1499 #if CONFIG_INTERNAL_STATS 1500 memcpy(sb_fp_stats->mode_chosen_counts, cpi->mode_chosen_counts, 1501 sizeof(sb_fp_stats->mode_chosen_counts)); 1502 #endif // CONFIG_INTERNAL_STATS 1503 } 1504 1505 void av1_restore_sb_state(const SB_FIRST_PASS_STATS *sb_fp_stats, AV1_COMP *cpi, 1506 ThreadData *td, TileDataEnc *tile_data, int mi_row, 1507 int mi_col) { 1508 MACROBLOCK *x = &td->mb; 1509 1510 const AV1_COMMON *cm = &cpi->common; 1511 const int num_planes = av1_num_planes(cm); 1512 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 1513 1514 av1_restore_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, 1515 num_planes); 1516 1517 td->rd_counts = sb_fp_stats->rd_count; 1518 x->txfm_search_info.txb_split_count = sb_fp_stats->split_count; 1519 1520 *td->counts = sb_fp_stats->fc; 1521 1522 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { 1523 memcpy(tile_data->inter_mode_rd_models, sb_fp_stats->inter_mode_rd_models, 1524 sizeof(sb_fp_stats->inter_mode_rd_models)); 1525 } 1526 1527 memcpy(x->thresh_freq_fact, sb_fp_stats->thresh_freq_fact, 1528 sizeof(sb_fp_stats->thresh_freq_fact)); 1529 1530 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col); 1531 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = 1532 sb_fp_stats->current_qindex; 1533 1534 #if CONFIG_INTERNAL_STATS 1535 memcpy(cpi->mode_chosen_counts, sb_fp_stats->mode_chosen_counts, 1536 sizeof(sb_fp_stats->mode_chosen_counts)); 1537 #endif // CONFIG_INTERNAL_STATS 1538 } 1539 1540 /*! Checks whether to skip updating the entropy cost based on tile info. 1541 * 1542 * This function contains the common code used to skip the cost update of coeff, 1543 * mode, mv and dv symbols. 1544 */ 1545 static int skip_cost_update(const SequenceHeader *seq_params, 1546 const TileInfo *const tile_info, const int mi_row, 1547 const int mi_col, 1548 INTERNAL_COST_UPDATE_TYPE upd_level) { 1549 if (upd_level == INTERNAL_COST_UPD_SB) return 0; 1550 if (upd_level == INTERNAL_COST_UPD_OFF) return 1; 1551 1552 // upd_level is at most as frequent as each sb_row in a tile. 1553 if (mi_col != tile_info->mi_col_start) return 1; 1554 1555 if (upd_level == INTERNAL_COST_UPD_SBROW_SET) { 1556 const int mib_size_log2 = seq_params->mib_size_log2; 1557 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2; 1558 const int sb_size = seq_params->mib_size * MI_SIZE; 1559 const int tile_height = 1560 (tile_info->mi_row_end - tile_info->mi_row_start) * MI_SIZE; 1561 // When upd_level = INTERNAL_COST_UPD_SBROW_SET, the cost update happens 1562 // once for 2, 4 sb rows for sb size 128, sb size 64 respectively. However, 1563 // as the update will not be equally spaced in smaller resolutions making 1564 // it equally spaced by calculating (mv_num_rows_cost_update) the number of 1565 // rows after which the cost update should happen. 1566 const int sb_size_update_freq_map[2] = { 2, 4 }; 1567 const int update_freq_sb_rows = 1568 sb_size_update_freq_map[sb_size != MAX_SB_SIZE]; 1569 const int update_freq_num_rows = sb_size * update_freq_sb_rows; 1570 // Round-up the division result to next integer. 1571 const int num_updates_per_tile = 1572 (tile_height + update_freq_num_rows - 1) / update_freq_num_rows; 1573 const int num_rows_update_per_tile = num_updates_per_tile * sb_size; 1574 // Round-up the division result to next integer. 1575 const int num_sb_rows_per_update = 1576 (tile_height + num_rows_update_per_tile - 1) / num_rows_update_per_tile; 1577 if ((sb_row % num_sb_rows_per_update) != 0) return 1; 1578 } 1579 return 0; 1580 } 1581 1582 // Checks for skip status of mv cost update. 1583 static int skip_mv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info, 1584 const int mi_row, const int mi_col) { 1585 const AV1_COMMON *cm = &cpi->common; 1586 // For intra frames, mv cdfs are not updated during the encode. Hence, the mv 1587 // cost calculation is skipped in this case. 1588 if (frame_is_intra_only(cm)) return 1; 1589 1590 return skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col, 1591 cpi->sf.inter_sf.mv_cost_upd_level); 1592 } 1593 1594 // Checks for skip status of dv cost update. 1595 static int skip_dv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info, 1596 const int mi_row, const int mi_col) { 1597 const AV1_COMMON *cm = &cpi->common; 1598 // Intrabc is only applicable to intra frames. So skip if intrabc is not 1599 // allowed. 1600 if (!av1_allow_intrabc(cm) || is_stat_generation_stage(cpi)) { 1601 return 1; 1602 } 1603 1604 return skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col, 1605 cpi->sf.intra_sf.dv_cost_upd_level); 1606 } 1607 1608 // Update the rate costs of some symbols according to the frequency directed 1609 // by speed features 1610 void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td, 1611 const TileInfo *const tile_info, const int mi_row, 1612 const int mi_col) { 1613 AV1_COMMON *const cm = &cpi->common; 1614 const int num_planes = av1_num_planes(cm); 1615 MACROBLOCK *const x = &td->mb; 1616 MACROBLOCKD *const xd = &x->e_mbd; 1617 1618 if (cm->features.disable_cdf_update) { 1619 return; 1620 } 1621 1622 switch (cpi->sf.inter_sf.coeff_cost_upd_level) { 1623 case INTERNAL_COST_UPD_OFF: 1624 case INTERNAL_COST_UPD_TILE: // Tile level 1625 break; 1626 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile 1627 case INTERNAL_COST_UPD_SBROW: // SB row level in tile 1628 case INTERNAL_COST_UPD_SB: // SB level 1629 if (skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col, 1630 cpi->sf.inter_sf.coeff_cost_upd_level)) 1631 break; 1632 av1_fill_coeff_costs(&x->coeff_costs, xd->tile_ctx, num_planes); 1633 break; 1634 default: assert(0); 1635 } 1636 1637 switch (cpi->sf.inter_sf.mode_cost_upd_level) { 1638 case INTERNAL_COST_UPD_OFF: 1639 case INTERNAL_COST_UPD_TILE: // Tile level 1640 break; 1641 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile 1642 case INTERNAL_COST_UPD_SBROW: // SB row level in tile 1643 case INTERNAL_COST_UPD_SB: // SB level 1644 if (skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col, 1645 cpi->sf.inter_sf.mode_cost_upd_level)) 1646 break; 1647 av1_fill_mode_rates(cm, &x->mode_costs, xd->tile_ctx); 1648 break; 1649 default: assert(0); 1650 } 1651 1652 switch (cpi->sf.inter_sf.mv_cost_upd_level) { 1653 case INTERNAL_COST_UPD_OFF: 1654 case INTERNAL_COST_UPD_TILE: // Tile level 1655 break; 1656 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile 1657 case INTERNAL_COST_UPD_SBROW: // SB row level in tile 1658 case INTERNAL_COST_UPD_SB: // SB level 1659 // Checks for skip status of mv cost update. 1660 if (skip_mv_cost_update(cpi, tile_info, mi_row, mi_col)) break; 1661 av1_fill_mv_costs(&xd->tile_ctx->nmvc, 1662 cm->features.cur_frame_force_integer_mv, 1663 cm->features.allow_high_precision_mv, x->mv_costs); 1664 break; 1665 default: assert(0); 1666 } 1667 1668 switch (cpi->sf.intra_sf.dv_cost_upd_level) { 1669 case INTERNAL_COST_UPD_OFF: 1670 case INTERNAL_COST_UPD_TILE: // Tile level 1671 break; 1672 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile 1673 case INTERNAL_COST_UPD_SBROW: // SB row level in tile 1674 case INTERNAL_COST_UPD_SB: // SB level 1675 // Checks for skip status of dv cost update. 1676 if (skip_dv_cost_update(cpi, tile_info, mi_row, mi_col)) break; 1677 av1_fill_dv_costs(&xd->tile_ctx->ndvc, x->dv_costs); 1678 break; 1679 default: assert(0); 1680 } 1681 } 1682 1683 void av1_dealloc_src_diff_buf(struct macroblock *mb, int num_planes) { 1684 for (int plane = 0; plane < num_planes; ++plane) { 1685 aom_free(mb->plane[plane].src_diff); 1686 mb->plane[plane].src_diff = NULL; 1687 } 1688 } 1689 1690 void av1_alloc_src_diff_buf(const struct AV1Common *cm, struct macroblock *mb) { 1691 const int num_planes = av1_num_planes(cm); 1692 #ifndef NDEBUG 1693 for (int plane = 0; plane < num_planes; ++plane) { 1694 assert(!mb->plane[plane].src_diff); 1695 } 1696 #endif 1697 for (int plane = 0; plane < num_planes; ++plane) { 1698 const int subsampling_xy = 1699 plane ? cm->seq_params->subsampling_x + cm->seq_params->subsampling_y 1700 : 0; 1701 const int sb_size = MAX_SB_SQUARE >> subsampling_xy; 1702 CHECK_MEM_ERROR(cm, mb->plane[plane].src_diff, 1703 (int16_t *)aom_memalign( 1704 32, sizeof(*mb->plane[plane].src_diff) * sb_size)); 1705 } 1706 }