partition_search.c (266596B)
1 /* 2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <float.h> 13 14 #include "config/aom_config.h" 15 16 #include "aom_dsp/txfm_common.h" 17 18 #include "av1/common/av1_common_int.h" 19 #include "av1/common/blockd.h" 20 #include "av1/common/enums.h" 21 #include "av1/common/reconintra.h" 22 23 #include "av1/encoder/aq_complexity.h" 24 #include "av1/encoder/aq_variance.h" 25 #include "av1/encoder/context_tree.h" 26 #include "av1/encoder/encoder.h" 27 #include "av1/encoder/encodeframe.h" 28 #include "av1/encoder/encodeframe_utils.h" 29 #include "av1/encoder/encodemv.h" 30 #include "av1/encoder/intra_mode_search_utils.h" 31 #include "av1/encoder/motion_search_facade.h" 32 #include "av1/encoder/nonrd_opt.h" 33 #include "av1/encoder/partition_search.h" 34 #include "av1/encoder/partition_strategy.h" 35 #include "av1/encoder/reconinter_enc.h" 36 #include "av1/encoder/tokenize.h" 37 #include "av1/encoder/var_based_part.h" 38 #include "av1/encoder/av1_ml_partition_models.h" 39 40 #if CONFIG_TUNE_VMAF 41 #include "av1/encoder/tune_vmaf.h" 42 #endif 43 44 #define COLLECT_MOTION_SEARCH_FEATURE_SB 0 45 46 #if CONFIG_PARTITION_SEARCH_ORDER 47 void av1_reset_part_sf(PARTITION_SPEED_FEATURES *part_sf) { 48 part_sf->partition_search_type = SEARCH_PARTITION; 49 part_sf->less_rectangular_check_level = 0; 50 part_sf->use_square_partition_only_threshold = BLOCK_128X128; 51 part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE; 52 part_sf->default_max_partition_size = BLOCK_LARGEST; 53 part_sf->default_min_partition_size = BLOCK_4X4; 54 part_sf->adjust_var_based_rd_partitioning = 0; 55 part_sf->max_intra_bsize = BLOCK_LARGEST; 56 // This setting only takes effect when partition_search_type is set 57 // to FIXED_PARTITION. 58 part_sf->fixed_partition_size = BLOCK_16X16; 59 // Recode loop tolerance %. 60 part_sf->partition_search_breakout_dist_thr = 0; 61 part_sf->partition_search_breakout_rate_thr = 0; 62 part_sf->prune_ext_partition_types_search_level = 0; 63 part_sf->prune_part4_search = 0; 64 part_sf->ml_prune_partition = 0; 65 part_sf->ml_early_term_after_part_split_level = 0; 66 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) { 67 part_sf->ml_partition_search_breakout_thresh[i] = 68 -1; // -1 means not enabled. 69 } 70 part_sf->simple_motion_search_prune_agg = SIMPLE_AGG_LVL0; 71 part_sf->simple_motion_search_split = 0; 72 part_sf->simple_motion_search_prune_rect = 0; 73 part_sf->simple_motion_search_early_term_none = 0; 74 part_sf->simple_motion_search_reduce_search_steps = 0; 75 part_sf->intra_cnn_based_part_prune_level = 0; 76 part_sf->ext_partition_eval_thresh = BLOCK_8X8; 77 part_sf->rect_partition_eval_thresh = BLOCK_128X128; 78 part_sf->ext_part_eval_based_on_cur_best = 0; 79 part_sf->prune_ext_part_using_split_info = 0; 80 part_sf->prune_rectangular_split_based_on_qidx = 0; 81 part_sf->early_term_after_none_split = 0; 82 part_sf->ml_predict_breakout_level = 0; 83 part_sf->prune_sub_8x8_partition_level = 0; 84 part_sf->simple_motion_search_rect_split = 0; 85 part_sf->reuse_prev_rd_results_for_part_ab = 0; 86 part_sf->reuse_best_prediction_for_part_ab = 0; 87 part_sf->use_best_rd_for_pruning = 0; 88 part_sf->skip_non_sq_part_based_on_none = 0; 89 } 90 91 // Reset speed features that works for the baseline encoding, but 92 // blocks the external partition search. 93 void av1_reset_sf_for_ext_part(AV1_COMP *const cpi) { 94 cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions = 0; 95 } 96 #endif // CONFIG_PARTITION_SEARCH_ORDER 97 98 #if !CONFIG_REALTIME_ONLY 99 // If input |features| is NULL, write tpl stats to file for each super block. 100 // Otherwise, store tpl stats to |features|. 101 // The tpl stats is computed in the unit of tpl_bsize_1d (16x16). 102 // When writing to text file: 103 // The first row contains super block position, super block size, 104 // tpl unit length, number of units in the super block. 105 // The second row contains the intra prediction cost for each unit. 106 // The third row contains the inter prediction cost for each unit. 107 // The forth row contains the motion compensated dependency cost for each unit. 108 static void collect_tpl_stats_sb(const AV1_COMP *const cpi, 109 const BLOCK_SIZE bsize, const int mi_row, 110 const int mi_col, 111 aom_partition_features_t *features) { 112 const AV1_COMMON *const cm = &cpi->common; 113 GF_GROUP *gf_group = &cpi->ppi->gf_group; 114 if (gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE || 115 gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) { 116 return; 117 } 118 119 TplParams *const tpl_data = &cpi->ppi->tpl_data; 120 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cpi->gf_frame_index]; 121 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 122 // If tpl stats is not established, early return 123 if (!tpl_data->ready || gf_group->max_layer_depth_allowed == 0) { 124 if (features != NULL) features->sb_features.tpl_features.available = 0; 125 return; 126 } 127 128 const int tpl_stride = tpl_frame->stride; 129 const int step = 1 << tpl_data->tpl_stats_block_mis_log2; 130 const int mi_width = 131 AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col); 132 const int mi_height = 133 AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row); 134 const int col_steps = (mi_width / step) + ((mi_width % step) > 0); 135 const int row_steps = (mi_height / step) + ((mi_height % step) > 0); 136 const int num_blocks = col_steps * row_steps; 137 138 if (features == NULL) { 139 char filename[256]; 140 snprintf(filename, sizeof(filename), "%s/tpl_feature_sb%d", 141 cpi->oxcf.partition_info_path, cpi->sb_counter); 142 FILE *pfile = fopen(filename, "w"); 143 fprintf(pfile, "%d,%d,%d,%d,%d\n", mi_row, mi_col, bsize, 144 tpl_data->tpl_bsize_1d, num_blocks); 145 int count = 0; 146 for (int row = 0; row < mi_height; row += step) { 147 for (int col = 0; col < mi_width; col += step) { 148 TplDepStats *this_stats = 149 &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride, 150 tpl_data->tpl_stats_block_mis_log2)]; 151 fprintf(pfile, "%.0f", (double)this_stats->intra_cost); 152 if (count < num_blocks - 1) fprintf(pfile, ","); 153 ++count; 154 } 155 } 156 fprintf(pfile, "\n"); 157 count = 0; 158 for (int row = 0; row < mi_height; row += step) { 159 for (int col = 0; col < mi_width; col += step) { 160 TplDepStats *this_stats = 161 &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride, 162 tpl_data->tpl_stats_block_mis_log2)]; 163 fprintf(pfile, "%.0f", (double)this_stats->inter_cost); 164 if (count < num_blocks - 1) fprintf(pfile, ","); 165 ++count; 166 } 167 } 168 fprintf(pfile, "\n"); 169 count = 0; 170 for (int row = 0; row < mi_height; row += step) { 171 for (int col = 0; col < mi_width; col += step) { 172 TplDepStats *this_stats = 173 &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride, 174 tpl_data->tpl_stats_block_mis_log2)]; 175 const int64_t mc_dep_delta = 176 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 177 this_stats->mc_dep_dist); 178 fprintf(pfile, "%.0f", (double)mc_dep_delta); 179 if (count < num_blocks - 1) fprintf(pfile, ","); 180 ++count; 181 } 182 } 183 fclose(pfile); 184 } else { 185 features->sb_features.tpl_features.available = 1; 186 features->sb_features.tpl_features.tpl_unit_length = tpl_data->tpl_bsize_1d; 187 features->sb_features.tpl_features.num_units = num_blocks; 188 int count = 0; 189 for (int row = 0; row < mi_height; row += step) { 190 for (int col = 0; col < mi_width; col += step) { 191 TplDepStats *this_stats = 192 &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride, 193 tpl_data->tpl_stats_block_mis_log2)]; 194 const int64_t mc_dep_delta = 195 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 196 this_stats->mc_dep_dist); 197 features->sb_features.tpl_features.intra_cost[count] = 198 this_stats->intra_cost; 199 features->sb_features.tpl_features.inter_cost[count] = 200 this_stats->inter_cost; 201 features->sb_features.tpl_features.mc_dep_cost[count] = mc_dep_delta; 202 ++count; 203 } 204 } 205 } 206 } 207 #endif // !CONFIG_REALTIME_ONLY 208 209 static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, 210 FRAME_COUNTS *counts, TX_SIZE tx_size, int depth, 211 int blk_row, int blk_col, 212 uint8_t allow_update_cdf) { 213 MB_MODE_INFO *mbmi = xd->mi[0]; 214 const BLOCK_SIZE bsize = mbmi->bsize; 215 const int max_blocks_high = max_block_high(xd, bsize, 0); 216 const int max_blocks_wide = max_block_wide(xd, bsize, 0); 217 int ctx = txfm_partition_context(xd->above_txfm_context + blk_col, 218 xd->left_txfm_context + blk_row, mbmi->bsize, 219 tx_size); 220 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col); 221 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index]; 222 223 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; 224 assert(tx_size > TX_4X4); 225 226 if (depth == MAX_VARTX_DEPTH) { 227 // Don't add to counts in this case 228 mbmi->tx_size = tx_size; 229 txfm_partition_update(xd->above_txfm_context + blk_col, 230 xd->left_txfm_context + blk_row, tx_size, tx_size); 231 return; 232 } 233 234 if (tx_size == plane_tx_size) { 235 #if CONFIG_ENTROPY_STATS 236 ++counts->txfm_partition[ctx][0]; 237 #endif 238 if (allow_update_cdf) 239 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2); 240 mbmi->tx_size = tx_size; 241 txfm_partition_update(xd->above_txfm_context + blk_col, 242 xd->left_txfm_context + blk_row, tx_size, tx_size); 243 } else { 244 const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; 245 const int bsw = tx_size_wide_unit[sub_txs]; 246 const int bsh = tx_size_high_unit[sub_txs]; 247 248 #if CONFIG_ENTROPY_STATS 249 ++counts->txfm_partition[ctx][1]; 250 #endif 251 if (allow_update_cdf) 252 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2); 253 ++x->txfm_search_info.txb_split_count; 254 255 if (sub_txs == TX_4X4) { 256 mbmi->inter_tx_size[txb_size_index] = TX_4X4; 257 mbmi->tx_size = TX_4X4; 258 txfm_partition_update(xd->above_txfm_context + blk_col, 259 xd->left_txfm_context + blk_row, TX_4X4, tx_size); 260 return; 261 } 262 263 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { 264 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { 265 int offsetr = row; 266 int offsetc = col; 267 268 update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr, 269 blk_col + offsetc, allow_update_cdf); 270 } 271 } 272 } 273 } 274 275 static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x, 276 BLOCK_SIZE plane_bsize, 277 FRAME_COUNTS *td_counts, 278 uint8_t allow_update_cdf) { 279 MACROBLOCKD *xd = &x->e_mbd; 280 const int mi_width = mi_size_wide[plane_bsize]; 281 const int mi_height = mi_size_high[plane_bsize]; 282 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); 283 const int bh = tx_size_high_unit[max_tx_size]; 284 const int bw = tx_size_wide_unit[max_tx_size]; 285 286 xd->above_txfm_context = 287 cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col; 288 xd->left_txfm_context = 289 xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK); 290 291 for (int idy = 0; idy < mi_height; idy += bh) { 292 for (int idx = 0; idx < mi_width; idx += bw) { 293 update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx, 294 allow_update_cdf); 295 } 296 } 297 } 298 299 static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row, 300 int blk_col) { 301 MB_MODE_INFO *mbmi = xd->mi[0]; 302 const BLOCK_SIZE bsize = mbmi->bsize; 303 const int max_blocks_high = max_block_high(xd, bsize, 0); 304 const int max_blocks_wide = max_block_wide(xd, bsize, 0); 305 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col); 306 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index]; 307 308 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; 309 310 if (tx_size == plane_tx_size) { 311 mbmi->tx_size = tx_size; 312 txfm_partition_update(xd->above_txfm_context + blk_col, 313 xd->left_txfm_context + blk_row, tx_size, tx_size); 314 315 } else { 316 if (tx_size == TX_8X8) { 317 mbmi->inter_tx_size[txb_size_index] = TX_4X4; 318 mbmi->tx_size = TX_4X4; 319 txfm_partition_update(xd->above_txfm_context + blk_col, 320 xd->left_txfm_context + blk_row, TX_4X4, tx_size); 321 return; 322 } 323 const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; 324 const int bsw = tx_size_wide_unit[sub_txs]; 325 const int bsh = tx_size_high_unit[sub_txs]; 326 const int row_end = 327 AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row); 328 const int col_end = 329 AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col); 330 for (int row = 0; row < row_end; row += bsh) { 331 const int offsetr = blk_row + row; 332 for (int col = 0; col < col_end; col += bsw) { 333 const int offsetc = blk_col + col; 334 set_txfm_context(xd, sub_txs, offsetr, offsetc); 335 } 336 } 337 } 338 } 339 340 static void tx_partition_set_contexts(const AV1_COMMON *const cm, 341 MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) { 342 const int mi_width = mi_size_wide[plane_bsize]; 343 const int mi_height = mi_size_high[plane_bsize]; 344 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); 345 const int bh = tx_size_high_unit[max_tx_size]; 346 const int bw = tx_size_wide_unit[max_tx_size]; 347 348 xd->above_txfm_context = 349 cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col; 350 xd->left_txfm_context = 351 xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK); 352 353 for (int idy = 0; idy < mi_height; idy += bh) { 354 for (int idx = 0; idx < mi_width; idx += bw) { 355 set_txfm_context(xd, max_tx_size, idy, idx); 356 } 357 } 358 } 359 360 static void update_zeromv_cnt(const AV1_COMP *const cpi, 361 const MB_MODE_INFO *const mi, int mi_row, 362 int mi_col, BLOCK_SIZE bsize) { 363 if (mi->ref_frame[0] != LAST_FRAME || !is_inter_block(mi) || 364 mi->segment_id > CR_SEGMENT_ID_BOOST2) { 365 return; 366 } 367 const AV1_COMMON *const cm = &cpi->common; 368 const MV mv = mi->mv[0].as_mv; 369 const int bw = mi_size_wide[bsize] >> 1; 370 const int bh = mi_size_high[bsize] >> 1; 371 const int xmis = AOMMIN((cm->mi_params.mi_cols - mi_col) >> 1, bw); 372 const int ymis = AOMMIN((cm->mi_params.mi_rows - mi_row) >> 1, bh); 373 const int block_index = 374 (mi_row >> 1) * (cm->mi_params.mi_cols >> 1) + (mi_col >> 1); 375 for (int y = 0; y < ymis; y++) { 376 for (int x = 0; x < xmis; x++) { 377 // consec_zero_mv is in the scale of 8x8 blocks 378 const int map_offset = block_index + y * (cm->mi_params.mi_cols >> 1) + x; 379 if (abs(mv.row) < 10 && abs(mv.col) < 10) { 380 if (cpi->consec_zero_mv[map_offset] < 255) 381 cpi->consec_zero_mv[map_offset]++; 382 } else { 383 cpi->consec_zero_mv[map_offset] = 0; 384 } 385 } 386 } 387 } 388 389 static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data, 390 ThreadData *td, TokenExtra **t, RUN_TYPE dry_run, 391 BLOCK_SIZE bsize, int *rate) { 392 const AV1_COMMON *const cm = &cpi->common; 393 const int num_planes = av1_num_planes(cm); 394 MACROBLOCK *const x = &td->mb; 395 MACROBLOCKD *const xd = &x->e_mbd; 396 MB_MODE_INFO **mi_4x4 = xd->mi; 397 MB_MODE_INFO *mbmi = mi_4x4[0]; 398 const int seg_skip = 399 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); 400 const int mis = cm->mi_params.mi_stride; 401 const int mi_width = mi_size_wide[bsize]; 402 const int mi_height = mi_size_high[bsize]; 403 const int is_inter = is_inter_block(mbmi); 404 405 // Initialize tx_mode and tx_size_search_method 406 TxfmSearchParams *txfm_params = &x->txfm_search_params; 407 set_tx_size_search_method( 408 cm, &cpi->winner_mode_params, txfm_params, 409 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch, 1); 410 411 const int mi_row = xd->mi_row; 412 const int mi_col = xd->mi_col; 413 if (!is_inter) { 414 xd->cfl.store_y = store_cfl_required(cm, xd); 415 mbmi->skip_txfm = 1; 416 for (int plane = 0; plane < num_planes; ++plane) { 417 av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run, 418 cpi->optimize_seg_arr[mbmi->segment_id]); 419 } 420 421 // If there is at least one lossless segment, force the skip for intra 422 // block to be 0, in order to avoid the segment_id to be changed by in 423 // write_segment_id(). 424 if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map && 425 cpi->enc_seg.has_lossless_segment) 426 mbmi->skip_txfm = 0; 427 428 xd->cfl.store_y = 0; 429 if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) { 430 for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) { 431 if (mbmi->palette_mode_info.palette_size[plane] > 0) { 432 if (!dry_run) { 433 av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size, 434 PALETTE_MAP, tile_data->allow_update_cdf, 435 td->counts); 436 } else if (dry_run == DRY_RUN_COSTCOEFFS) { 437 *rate += 438 av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP); 439 } 440 } 441 } 442 } 443 444 av1_update_intra_mb_txb_context(cpi, td, dry_run, bsize, 445 tile_data->allow_update_cdf); 446 } else { 447 int ref; 448 const int is_compound = has_second_ref(mbmi); 449 450 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 451 for (ref = 0; ref < 1 + is_compound; ++ref) { 452 const YV12_BUFFER_CONFIG *cfg = 453 get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]); 454 assert(IMPLIES(!is_intrabc_block(mbmi), cfg)); 455 av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, 456 xd->block_ref_scale_factors[ref], num_planes); 457 } 458 // Predicted sample of inter mode (for Luma plane) cannot be reused if 459 // nonrd_check_partition_split speed feature is enabled, Since in such cases 460 // the buffer may not contain the predicted sample of best mode. 461 const int start_plane = 462 (x->reuse_inter_pred && (!cpi->sf.rt_sf.nonrd_check_partition_split) && 463 cm->seq_params->bit_depth == AOM_BITS_8) 464 ? 1 465 : 0; 466 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 467 start_plane, av1_num_planes(cm) - 1); 468 if (mbmi->motion_mode == OBMC_CAUSAL) { 469 assert(cpi->oxcf.motion_mode_cfg.enable_obmc); 470 av1_build_obmc_inter_predictors_sb(cm, xd); 471 } 472 473 #if CONFIG_MISMATCH_DEBUG 474 if (dry_run == OUTPUT_ENABLED) { 475 for (int plane = 0; plane < num_planes; ++plane) { 476 const struct macroblockd_plane *pd = &xd->plane[plane]; 477 int pixel_c, pixel_r; 478 mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, 479 pd->subsampling_x, pd->subsampling_y); 480 if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, 481 pd->subsampling_y)) 482 continue; 483 mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, 484 cm->current_frame.order_hint, plane, pixel_c, 485 pixel_r, pd->width, pd->height, 486 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); 487 } 488 } 489 #else 490 (void)num_planes; 491 #endif 492 493 av1_encode_sb(cpi, x, bsize, dry_run); 494 av1_tokenize_sb_vartx(cpi, td, dry_run, bsize, rate, 495 tile_data->allow_update_cdf); 496 } 497 498 if (!dry_run) { 499 if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1; 500 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT && 501 !xd->lossless[mbmi->segment_id] && mbmi->bsize > BLOCK_4X4 && 502 !(is_inter && (mbmi->skip_txfm || seg_skip))) { 503 if (is_inter) { 504 tx_partition_count_update(cm, x, bsize, td->counts, 505 tile_data->allow_update_cdf); 506 } else { 507 if (mbmi->tx_size != max_txsize_rect_lookup[bsize]) 508 ++x->txfm_search_info.txb_split_count; 509 if (block_signals_txsize(bsize)) { 510 const int tx_size_ctx = get_tx_size_context(xd); 511 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); 512 const int depth = tx_size_to_depth(mbmi->tx_size, bsize); 513 const int max_depths = bsize_to_max_depth(bsize); 514 515 if (tile_data->allow_update_cdf) 516 update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], 517 depth, max_depths + 1); 518 #if CONFIG_ENTROPY_STATS 519 ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth]; 520 #endif 521 } 522 } 523 assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi))); 524 } else { 525 int i, j; 526 TX_SIZE intra_tx_size; 527 // The new intra coding scheme requires no change of transform size 528 if (is_inter) { 529 if (xd->lossless[mbmi->segment_id]) { 530 intra_tx_size = TX_4X4; 531 } else { 532 intra_tx_size = 533 tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type); 534 } 535 } else { 536 intra_tx_size = mbmi->tx_size; 537 } 538 539 const int cols = AOMMIN(cm->mi_params.mi_cols - mi_col, mi_width); 540 const int rows = AOMMIN(cm->mi_params.mi_rows - mi_row, mi_height); 541 for (j = 0; j < rows; j++) { 542 for (i = 0; i < cols; i++) mi_4x4[mis * j + i]->tx_size = intra_tx_size; 543 } 544 545 if (intra_tx_size != max_txsize_rect_lookup[bsize]) 546 ++x->txfm_search_info.txb_split_count; 547 } 548 } 549 550 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT && 551 block_signals_txsize(mbmi->bsize) && is_inter && 552 !(mbmi->skip_txfm || seg_skip) && !xd->lossless[mbmi->segment_id]) { 553 if (dry_run) tx_partition_set_contexts(cm, xd, bsize); 554 } else { 555 TX_SIZE tx_size = mbmi->tx_size; 556 // The new intra coding scheme requires no change of transform size 557 if (is_inter) { 558 if (xd->lossless[mbmi->segment_id]) { 559 tx_size = TX_4X4; 560 } else { 561 tx_size = tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type); 562 } 563 } else { 564 tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4; 565 } 566 mbmi->tx_size = tx_size; 567 set_txfm_ctxs(tx_size, xd->width, xd->height, 568 (mbmi->skip_txfm || seg_skip) && is_inter_block(mbmi), xd); 569 } 570 571 #if !CONFIG_REALTIME_ONLY 572 if (is_inter_block(mbmi) && !xd->is_chroma_ref && is_cfl_allowed(xd)) { 573 cfl_store_block(xd, mbmi->bsize, mbmi->tx_size); 574 } 575 #endif 576 if (!dry_run) { 577 if (cpi->oxcf.pass == AOM_RC_ONE_PASS && cpi->svc.temporal_layer_id == 0 && 578 cpi->sf.rt_sf.use_temporal_noise_estimate && 579 (!cpi->ppi->use_svc || 580 (cpi->ppi->use_svc && 581 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && 582 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) 583 update_zeromv_cnt(cpi, mbmi, mi_row, mi_col, bsize); 584 } 585 } 586 587 static void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, 588 int mi_row, int mi_col, BLOCK_SIZE bsize, 589 AQ_MODE aq_mode, MB_MODE_INFO *mbmi) { 590 x->rdmult = cpi->rd.RDMULT; 591 592 if (aq_mode != NO_AQ) { 593 assert(mbmi != NULL); 594 if (aq_mode == VARIANCE_AQ) { 595 if (cpi->vaq_refresh) { 596 const int energy = bsize <= BLOCK_16X16 597 ? x->mb_energy 598 : av1_log_block_var(cpi, x, bsize); 599 mbmi->segment_id = energy; 600 } 601 x->rdmult = set_rdmult(cpi, x, mbmi->segment_id); 602 } else if (aq_mode == COMPLEXITY_AQ) { 603 x->rdmult = set_rdmult(cpi, x, mbmi->segment_id); 604 } else if (aq_mode == CYCLIC_REFRESH_AQ) { 605 // If segment is boosted, use rdmult for that segment. 606 if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) 607 x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); 608 } 609 } 610 611 #if !CONFIG_REALTIME_ONLY 612 if (cpi->common.delta_q_info.delta_q_present_flag && 613 !cpi->sf.rt_sf.use_nonrd_pick_mode) { 614 x->rdmult = av1_get_cb_rdmult(cpi, x, bsize, mi_row, mi_col); 615 } 616 #endif // !CONFIG_REALTIME_ONLY 617 618 if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM || 619 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_IQ || 620 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIMULACRA2) { 621 av1_set_ssim_rdmult(cpi, &x->errorperbit, bsize, mi_row, mi_col, 622 &x->rdmult); 623 } 624 #if CONFIG_SALIENCY_MAP 625 else if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_SALIENCY_MAP) { 626 av1_set_saliency_map_vmaf_rdmult(cpi, &x->errorperbit, 627 cpi->common.seq_params->sb_size, mi_row, 628 mi_col, &x->rdmult); 629 } 630 #endif 631 #if CONFIG_TUNE_VMAF 632 else if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING || 633 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_MAX_GAIN || 634 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) { 635 av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); 636 } 637 #endif 638 #if CONFIG_TUNE_BUTTERAUGLI 639 else if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) { 640 av1_set_butteraugli_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); 641 } 642 #endif 643 if (cpi->oxcf.mode == ALLINTRA) { 644 x->rdmult = (int)(((int64_t)x->rdmult * x->intra_sb_rdmult_modifier) >> 7); 645 } 646 647 // Check to make sure that the adjustments above have not caused the 648 // rd multiplier to be truncated to 0. 649 x->rdmult = (x->rdmult > 0) ? x->rdmult : 1; 650 } 651 652 void av1_set_offsets_without_segment_id(const AV1_COMP *const cpi, 653 const TileInfo *const tile, 654 MACROBLOCK *const x, int mi_row, 655 int mi_col, BLOCK_SIZE bsize) { 656 const AV1_COMMON *const cm = &cpi->common; 657 const int num_planes = av1_num_planes(cm); 658 MACROBLOCKD *const xd = &x->e_mbd; 659 assert(bsize < BLOCK_SIZES_ALL); 660 const int mi_width = mi_size_wide[bsize]; 661 const int mi_height = mi_size_high[bsize]; 662 663 set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, 664 mi_row, mi_col); 665 666 set_entropy_context(xd, mi_row, mi_col, num_planes); 667 xd->above_txfm_context = cm->above_contexts.txfm[tile->tile_row] + mi_col; 668 xd->left_txfm_context = 669 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 670 671 // Set up destination pointers. 672 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0, 673 num_planes); 674 675 // Set up limit values for MV components. 676 // Mv beyond the range do not produce new/different prediction block. 677 av1_set_mv_limits(&cm->mi_params, &x->mv_limits, mi_row, mi_col, mi_height, 678 mi_width, cpi->oxcf.border_in_pixels); 679 680 set_plane_n4(xd, mi_width, mi_height, num_planes); 681 682 // Set up distance of MB to edge of frame in 1/8th pel units. 683 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); 684 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, 685 cm->mi_params.mi_rows, cm->mi_params.mi_cols); 686 687 // Set up source buffers. 688 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); 689 690 // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs() 691 xd->tile = *tile; 692 } 693 694 void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, 695 MACROBLOCK *const x, int mi_row, int mi_col, 696 BLOCK_SIZE bsize) { 697 const AV1_COMMON *const cm = &cpi->common; 698 const struct segmentation *const seg = &cm->seg; 699 MACROBLOCKD *const xd = &x->e_mbd; 700 MB_MODE_INFO *mbmi; 701 702 av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); 703 704 // Setup segment ID. 705 mbmi = xd->mi[0]; 706 mbmi->segment_id = 0; 707 if (seg->enabled) { 708 if (seg->enabled && !cpi->vaq_refresh) { 709 const uint8_t *const map = 710 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map; 711 mbmi->segment_id = 712 map ? get_segment_id(&cm->mi_params, map, bsize, mi_row, mi_col) : 0; 713 } 714 av1_init_plane_quantizers(cpi, x, mbmi->segment_id, 0); 715 } 716 #ifndef NDEBUG 717 x->last_set_offsets_loc.mi_row = mi_row; 718 x->last_set_offsets_loc.mi_col = mi_col; 719 x->last_set_offsets_loc.bsize = bsize; 720 #endif // NDEBUG 721 } 722 723 /*!\brief Hybrid intra mode search. 724 * 725 * \ingroup intra_mode_search 726 * \callgraph 727 * \callergraph 728 * This is top level function for mode search for intra frames in non-RD 729 * optimized case. Depending on speed feature and block size it calls 730 * either non-RD or RD optimized intra mode search. 731 * 732 * \param[in] cpi Top-level encoder structure 733 * \param[in] x Pointer to structure holding all the data for 734 the current macroblock 735 * \param[in] rd_cost Struct to keep track of the RD information 736 * \param[in] bsize Current block size 737 * \param[in] ctx Structure to hold snapshot of coding context 738 during the mode picking process 739 * 740 * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x 741 * is modified to store information about the best mode computed 742 * in this function. The rd_cost struct is also updated with the RD stats 743 * corresponding to the best mode found. 744 */ 745 746 static inline void hybrid_intra_mode_search(AV1_COMP *cpi, MACROBLOCK *const x, 747 RD_STATS *rd_cost, BLOCK_SIZE bsize, 748 PICK_MODE_CONTEXT *ctx) { 749 int use_rdopt = 0; 750 const int hybrid_intra_pickmode = cpi->sf.rt_sf.hybrid_intra_pickmode; 751 // Use rd pick for intra mode search based on block size and variance. 752 if (hybrid_intra_pickmode && bsize < BLOCK_16X16) { 753 unsigned int var_thresh[3] = { 0, 101, 201 }; 754 assert(hybrid_intra_pickmode <= 3); 755 if (x->source_variance >= var_thresh[hybrid_intra_pickmode - 1]) 756 use_rdopt = 1; 757 } 758 759 if (use_rdopt) 760 av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); 761 else 762 av1_nonrd_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); 763 } 764 765 // For real time/allintra row-mt enabled multi-threaded encoding with cost 766 // update frequency set to COST_UPD_TILE/COST_UPD_OFF, tile ctxt is not updated 767 // at superblock level. Thus, it is not required for the encoding of top-right 768 // superblock be complete for updating tile ctxt. However, when encoding a block 769 // whose right edge is also the superblock edge, intra and inter mode evaluation 770 // (ref mv list population) require the encoding of the top-right superblock to 771 // be complete. So, here, we delay the waiting of threads until the need for the 772 // data from the top-right superblock region. 773 static inline void wait_for_top_right_sb(AV1EncRowMultiThreadInfo *enc_row_mt, 774 AV1EncRowMultiThreadSync *row_mt_sync, 775 TileInfo *tile_info, 776 BLOCK_SIZE sb_size, 777 int sb_mi_size_log2, BLOCK_SIZE bsize, 778 int mi_row, int mi_col) { 779 const int sb_size_in_mi = mi_size_wide[sb_size]; 780 const int bw_in_mi = mi_size_wide[bsize]; 781 const int blk_row_in_sb = mi_row & (sb_size_in_mi - 1); 782 const int blk_col_in_sb = mi_col & (sb_size_in_mi - 1); 783 const int top_right_block_in_sb = 784 (blk_row_in_sb == 0) && (blk_col_in_sb + bw_in_mi >= sb_size_in_mi); 785 786 // Don't wait if the block is the not the top-right block in the superblock. 787 if (!top_right_block_in_sb) return; 788 789 // Wait for the top-right superblock to finish encoding. 790 const int sb_row_in_tile = 791 (mi_row - tile_info->mi_row_start) >> sb_mi_size_log2; 792 const int sb_col_in_tile = 793 (mi_col - tile_info->mi_col_start) >> sb_mi_size_log2; 794 795 enc_row_mt->sync_read_ptr(row_mt_sync, sb_row_in_tile, sb_col_in_tile); 796 } 797 798 /*!\brief Interface for AV1 mode search for an individual coding block 799 * 800 * \ingroup partition_search 801 * \callgraph 802 * \callergraph 803 * Searches prediction modes, transform, and coefficient coding modes for an 804 * individual coding block. This function is the top-level interface that 805 * directs the encoder to the proper mode search function, among these 806 * implemented for inter/intra + rd/non-rd + non-skip segment/skip segment. 807 * 808 * \param[in] cpi Top-level encoder structure 809 * \param[in] tile_data Pointer to struct holding adaptive 810 * data/contexts/models for the tile during 811 * encoding 812 * \param[in] x Pointer to structure holding all the data for 813 * the current macroblock 814 * \param[in] mi_row Row coordinate of the block in a step size of 815 * MI_SIZE 816 * \param[in] mi_col Column coordinate of the block in a step size of 817 * MI_SIZE 818 * \param[in] rd_cost Pointer to structure holding rate and distortion 819 * stats for the current block 820 * \param[in] partition Partition mode of the parent block 821 * \param[in] bsize Current block size 822 * \param[in] ctx Pointer to structure holding coding contexts and 823 * chosen modes for the current block 824 * \param[in] best_rd Upper bound of rd cost of a valid partition 825 * 826 * \remark Nothing is returned. Instead, the chosen modes and contexts necessary 827 * for reconstruction are stored in ctx, the rate-distortion stats are stored in 828 * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be 829 * signalled by an INT64_MAX rd_cost->rdcost. 830 */ 831 static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data, 832 MACROBLOCK *const x, int mi_row, int mi_col, 833 RD_STATS *rd_cost, PARTITION_TYPE partition, 834 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, 835 RD_STATS best_rd) { 836 if (cpi->sf.part_sf.use_best_rd_for_pruning && best_rd.rdcost < 0) { 837 ctx->rd_stats.rdcost = INT64_MAX; 838 ctx->rd_stats.skip_txfm = 0; 839 av1_invalid_rd_stats(rd_cost); 840 return; 841 } 842 843 av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize); 844 845 if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab && 846 ctx->rd_mode_is_ready) { 847 assert(ctx->mic.bsize == bsize); 848 assert(ctx->mic.partition == partition); 849 rd_cost->rate = ctx->rd_stats.rate; 850 rd_cost->dist = ctx->rd_stats.dist; 851 rd_cost->rdcost = ctx->rd_stats.rdcost; 852 return; 853 } 854 855 AV1_COMMON *const cm = &cpi->common; 856 const int num_planes = av1_num_planes(cm); 857 MACROBLOCKD *const xd = &x->e_mbd; 858 MB_MODE_INFO *mbmi; 859 struct macroblock_plane *const p = x->plane; 860 struct macroblockd_plane *const pd = xd->plane; 861 const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode; 862 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 863 864 int i; 865 866 // This is only needed for real time/allintra row-mt enabled multi-threaded 867 // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF. 868 wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync, 869 &tile_data->tile_info, cm->seq_params->sb_size, 870 cm->seq_params->mib_size_log2, bsize, mi_row, mi_col); 871 872 #if CONFIG_COLLECT_COMPONENT_TIMING 873 start_timing(cpi, rd_pick_sb_modes_time); 874 #endif 875 876 mbmi = xd->mi[0]; 877 mbmi->bsize = bsize; 878 mbmi->partition = partition; 879 880 #if CONFIG_RD_DEBUG 881 mbmi->mi_row = mi_row; 882 mbmi->mi_col = mi_col; 883 #endif 884 885 // Sets up the tx_type_map buffer in MACROBLOCKD. 886 xd->tx_type_map = txfm_info->tx_type_map_; 887 xd->tx_type_map_stride = mi_size_wide[bsize]; 888 889 for (i = 0; i < num_planes; ++i) { 890 p[i].coeff = ctx->coeff[i]; 891 p[i].qcoeff = ctx->qcoeff[i]; 892 p[i].dqcoeff = ctx->dqcoeff[i]; 893 p[i].eobs = ctx->eobs[i]; 894 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; 895 } 896 897 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; 898 899 ctx->skippable = 0; 900 // Set to zero to make sure we do not use the previous encoded frame stats 901 mbmi->skip_txfm = 0; 902 // Reset skip mode flag. 903 mbmi->skip_mode = 0; 904 905 x->source_variance = av1_get_perpixel_variance_facade( 906 cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y); 907 908 // Initialize default mode evaluation params 909 set_mode_eval_params(cpi, x, DEFAULT_EVAL); 910 911 // Save rdmult before it might be changed, so it can be restored later. 912 const int orig_rdmult = x->rdmult; 913 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi); 914 // Set error per bit for current rdmult 915 av1_set_error_per_bit(&x->errorperbit, x->rdmult); 916 av1_rd_cost_update(x->rdmult, &best_rd); 917 918 // If set best_rd.rdcost to INT64_MAX, the encoder will not use any previous 919 // rdcost information for the following mode search. 920 // Disabling the feature could get some coding gain, with encoder slowdown. 921 if (!cpi->sf.part_sf.use_best_rd_for_pruning) { 922 av1_invalid_rd_stats(&best_rd); 923 } 924 925 // Find best coding mode & reconstruct the MB so it is available 926 // as a predictor for MBs that follow in the SB 927 if (frame_is_intra_only(cm)) { 928 #if CONFIG_COLLECT_COMPONENT_TIMING 929 start_timing(cpi, av1_rd_pick_intra_mode_sb_time); 930 #endif 931 av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost); 932 #if CONFIG_COLLECT_COMPONENT_TIMING 933 end_timing(cpi, av1_rd_pick_intra_mode_sb_time); 934 #endif 935 } else { 936 #if CONFIG_COLLECT_COMPONENT_TIMING 937 start_timing(cpi, av1_rd_pick_inter_mode_sb_time); 938 #endif 939 if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { 940 av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col, 941 rd_cost, bsize, ctx, best_rd.rdcost); 942 } else { 943 av1_rd_pick_inter_mode(cpi, tile_data, x, rd_cost, bsize, ctx, 944 best_rd.rdcost); 945 } 946 #if CONFIG_COLLECT_COMPONENT_TIMING 947 end_timing(cpi, av1_rd_pick_inter_mode_sb_time); 948 #endif 949 } 950 951 // Examine the resulting rate and for AQ mode 2 make a segment choice. 952 if (rd_cost->rate != INT_MAX && aq_mode == COMPLEXITY_AQ && 953 bsize >= BLOCK_16X16) { 954 av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); 955 } 956 957 x->rdmult = orig_rdmult; 958 959 // TODO(jingning) The rate-distortion optimization flow needs to be 960 // refactored to provide proper exit/return handle. 961 if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX; 962 963 ctx->rd_stats.rate = rd_cost->rate; 964 ctx->rd_stats.dist = rd_cost->dist; 965 ctx->rd_stats.rdcost = rd_cost->rdcost; 966 967 #if CONFIG_COLLECT_COMPONENT_TIMING 968 end_timing(cpi, rd_pick_sb_modes_time); 969 #endif 970 } 971 972 static void update_stats(const AV1_COMMON *const cm, ThreadData *td) { 973 MACROBLOCK *x = &td->mb; 974 MACROBLOCKD *const xd = &x->e_mbd; 975 const MB_MODE_INFO *const mbmi = xd->mi[0]; 976 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 977 const CurrentFrame *const current_frame = &cm->current_frame; 978 const BLOCK_SIZE bsize = mbmi->bsize; 979 FRAME_CONTEXT *fc = xd->tile_ctx; 980 const int seg_ref_active = 981 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); 982 983 if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active && 984 is_comp_ref_allowed(bsize)) { 985 const int skip_mode_ctx = av1_get_skip_mode_context(xd); 986 #if CONFIG_ENTROPY_STATS 987 td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++; 988 #endif 989 update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2); 990 } 991 992 if (!mbmi->skip_mode && !seg_ref_active) { 993 const int skip_ctx = av1_get_skip_txfm_context(xd); 994 #if CONFIG_ENTROPY_STATS 995 td->counts->skip_txfm[skip_ctx][mbmi->skip_txfm]++; 996 #endif 997 update_cdf(fc->skip_txfm_cdfs[skip_ctx], mbmi->skip_txfm, 2); 998 } 999 1000 #if CONFIG_ENTROPY_STATS 1001 // delta quant applies to both intra and inter 1002 const int super_block_upper_left = 1003 ((xd->mi_row & (cm->seq_params->mib_size - 1)) == 0) && 1004 ((xd->mi_col & (cm->seq_params->mib_size - 1)) == 0); 1005 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 1006 if (delta_q_info->delta_q_present_flag && 1007 (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) && 1008 super_block_upper_left) { 1009 const int dq = (mbmi->current_qindex - xd->current_base_qindex) / 1010 delta_q_info->delta_q_res; 1011 const int absdq = abs(dq); 1012 for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) { 1013 td->counts->delta_q[i][1]++; 1014 } 1015 if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++; 1016 if (delta_q_info->delta_lf_present_flag) { 1017 if (delta_q_info->delta_lf_multi) { 1018 const int frame_lf_count = 1019 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; 1020 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { 1021 const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) / 1022 delta_q_info->delta_lf_res; 1023 const int abs_delta_lf = abs(delta_lf); 1024 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) { 1025 td->counts->delta_lf_multi[lf_id][i][1]++; 1026 } 1027 if (abs_delta_lf < DELTA_LF_SMALL) 1028 td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++; 1029 } 1030 } else { 1031 const int delta_lf = 1032 (mbmi->delta_lf_from_base - xd->delta_lf_from_base) / 1033 delta_q_info->delta_lf_res; 1034 const int abs_delta_lf = abs(delta_lf); 1035 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) { 1036 td->counts->delta_lf[i][1]++; 1037 } 1038 if (abs_delta_lf < DELTA_LF_SMALL) 1039 td->counts->delta_lf[abs_delta_lf][0]++; 1040 } 1041 } 1042 } 1043 #endif 1044 1045 if (!is_inter_block(mbmi)) { 1046 av1_sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi, 1047 frame_is_intra_only(cm)); 1048 } 1049 1050 if (av1_allow_intrabc(cm)) { 1051 const int is_intrabc = is_intrabc_block(mbmi); 1052 update_cdf(fc->intrabc_cdf, is_intrabc, 2); 1053 #if CONFIG_ENTROPY_STATS 1054 ++td->counts->intrabc[is_intrabc]; 1055 #endif // CONFIG_ENTROPY_STATS 1056 if (is_intrabc) { 1057 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 1058 const int_mv dv_ref = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv; 1059 av1_update_mv_stats(&mbmi->mv[0].as_mv, &dv_ref.as_mv, &fc->ndvc, 1060 MV_SUBPEL_NONE); 1061 } 1062 } 1063 1064 if (frame_is_intra_only(cm) || mbmi->skip_mode) return; 1065 1066 FRAME_COUNTS *const counts = td->counts; 1067 const int inter_block = is_inter_block(mbmi); 1068 1069 if (!seg_ref_active) { 1070 #if CONFIG_ENTROPY_STATS 1071 counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++; 1072 #endif 1073 update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)], 1074 inter_block, 2); 1075 // If the segment reference feature is enabled we have only a single 1076 // reference frame allowed for the segment so exclude it from 1077 // the reference frame counts used to work out probabilities. 1078 if (inter_block) { 1079 const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0]; 1080 const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1]; 1081 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) { 1082 if (is_comp_ref_allowed(bsize)) { 1083 #if CONFIG_ENTROPY_STATS 1084 counts->comp_inter[av1_get_reference_mode_context(xd)] 1085 [has_second_ref(mbmi)]++; 1086 #endif // CONFIG_ENTROPY_STATS 1087 update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), 2); 1088 } 1089 } 1090 1091 if (has_second_ref(mbmi)) { 1092 const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi) 1093 ? UNIDIR_COMP_REFERENCE 1094 : BIDIR_COMP_REFERENCE; 1095 update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type, 1096 COMP_REFERENCE_TYPES); 1097 #if CONFIG_ENTROPY_STATS 1098 counts->comp_ref_type[av1_get_comp_reference_type_context(xd)] 1099 [comp_ref_type]++; 1100 #endif // CONFIG_ENTROPY_STATS 1101 1102 if (comp_ref_type == UNIDIR_COMP_REFERENCE) { 1103 const int bit = (ref0 == BWDREF_FRAME); 1104 update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2); 1105 #if CONFIG_ENTROPY_STATS 1106 counts 1107 ->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0][bit]++; 1108 #endif // CONFIG_ENTROPY_STATS 1109 if (!bit) { 1110 const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME); 1111 update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2); 1112 #if CONFIG_ENTROPY_STATS 1113 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1] 1114 [bit1]++; 1115 #endif // CONFIG_ENTROPY_STATS 1116 if (bit1) { 1117 update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd), 1118 ref1 == GOLDEN_FRAME, 2); 1119 #if CONFIG_ENTROPY_STATS 1120 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)][2] 1121 [ref1 == GOLDEN_FRAME]++; 1122 #endif // CONFIG_ENTROPY_STATS 1123 } 1124 } 1125 } else { 1126 const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME); 1127 update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2); 1128 #if CONFIG_ENTROPY_STATS 1129 counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++; 1130 #endif // CONFIG_ENTROPY_STATS 1131 if (!bit) { 1132 update_cdf(av1_get_pred_cdf_comp_ref_p1(xd), ref0 == LAST2_FRAME, 1133 2); 1134 #if CONFIG_ENTROPY_STATS 1135 counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1] 1136 [ref0 == LAST2_FRAME]++; 1137 #endif // CONFIG_ENTROPY_STATS 1138 } else { 1139 update_cdf(av1_get_pred_cdf_comp_ref_p2(xd), ref0 == GOLDEN_FRAME, 1140 2); 1141 #if CONFIG_ENTROPY_STATS 1142 counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2] 1143 [ref0 == GOLDEN_FRAME]++; 1144 #endif // CONFIG_ENTROPY_STATS 1145 } 1146 update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd), ref1 == ALTREF_FRAME, 1147 2); 1148 #if CONFIG_ENTROPY_STATS 1149 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0] 1150 [ref1 == ALTREF_FRAME]++; 1151 #endif // CONFIG_ENTROPY_STATS 1152 if (ref1 != ALTREF_FRAME) { 1153 update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd), 1154 ref1 == ALTREF2_FRAME, 2); 1155 #if CONFIG_ENTROPY_STATS 1156 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1] 1157 [ref1 == ALTREF2_FRAME]++; 1158 #endif // CONFIG_ENTROPY_STATS 1159 } 1160 } 1161 } else { 1162 const int bit = (ref0 >= BWDREF_FRAME); 1163 update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2); 1164 #if CONFIG_ENTROPY_STATS 1165 counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++; 1166 #endif // CONFIG_ENTROPY_STATS 1167 if (bit) { 1168 assert(ref0 <= ALTREF_FRAME); 1169 update_cdf(av1_get_pred_cdf_single_ref_p2(xd), ref0 == ALTREF_FRAME, 1170 2); 1171 #if CONFIG_ENTROPY_STATS 1172 counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1] 1173 [ref0 == ALTREF_FRAME]++; 1174 #endif // CONFIG_ENTROPY_STATS 1175 if (ref0 != ALTREF_FRAME) { 1176 update_cdf(av1_get_pred_cdf_single_ref_p6(xd), 1177 ref0 == ALTREF2_FRAME, 2); 1178 #if CONFIG_ENTROPY_STATS 1179 counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5] 1180 [ref0 == ALTREF2_FRAME]++; 1181 #endif // CONFIG_ENTROPY_STATS 1182 } 1183 } else { 1184 const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME); 1185 update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2); 1186 #if CONFIG_ENTROPY_STATS 1187 counts->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++; 1188 #endif // CONFIG_ENTROPY_STATS 1189 if (!bit1) { 1190 update_cdf(av1_get_pred_cdf_single_ref_p4(xd), ref0 != LAST_FRAME, 1191 2); 1192 #if CONFIG_ENTROPY_STATS 1193 counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3] 1194 [ref0 != LAST_FRAME]++; 1195 #endif // CONFIG_ENTROPY_STATS 1196 } else { 1197 update_cdf(av1_get_pred_cdf_single_ref_p5(xd), ref0 != LAST3_FRAME, 1198 2); 1199 #if CONFIG_ENTROPY_STATS 1200 counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4] 1201 [ref0 != LAST3_FRAME]++; 1202 #endif // CONFIG_ENTROPY_STATS 1203 } 1204 } 1205 } 1206 1207 if (cm->seq_params->enable_interintra_compound && 1208 is_interintra_allowed(mbmi)) { 1209 const int bsize_group = size_group_lookup[bsize]; 1210 if (mbmi->ref_frame[1] == INTRA_FRAME) { 1211 #if CONFIG_ENTROPY_STATS 1212 counts->interintra[bsize_group][1]++; 1213 #endif 1214 update_cdf(fc->interintra_cdf[bsize_group], 1, 2); 1215 #if CONFIG_ENTROPY_STATS 1216 counts->interintra_mode[bsize_group][mbmi->interintra_mode]++; 1217 #endif 1218 update_cdf(fc->interintra_mode_cdf[bsize_group], 1219 mbmi->interintra_mode, INTERINTRA_MODES); 1220 if (av1_is_wedge_used(bsize)) { 1221 #if CONFIG_ENTROPY_STATS 1222 counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++; 1223 #endif 1224 update_cdf(fc->wedge_interintra_cdf[bsize], 1225 mbmi->use_wedge_interintra, 2); 1226 if (mbmi->use_wedge_interintra) { 1227 #if CONFIG_ENTROPY_STATS 1228 counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++; 1229 #endif 1230 update_cdf(fc->wedge_idx_cdf[bsize], mbmi->interintra_wedge_index, 1231 16); 1232 } 1233 } 1234 } else { 1235 #if CONFIG_ENTROPY_STATS 1236 counts->interintra[bsize_group][0]++; 1237 #endif 1238 update_cdf(fc->interintra_cdf[bsize_group], 0, 2); 1239 } 1240 } 1241 1242 const MOTION_MODE motion_allowed = 1243 cm->features.switchable_motion_mode 1244 ? motion_mode_allowed(xd->global_motion, xd, mbmi, 1245 cm->features.allow_warped_motion) 1246 : SIMPLE_TRANSLATION; 1247 if (mbmi->ref_frame[1] != INTRA_FRAME) { 1248 if (motion_allowed == WARPED_CAUSAL) { 1249 #if CONFIG_ENTROPY_STATS 1250 counts->motion_mode[bsize][mbmi->motion_mode]++; 1251 #endif 1252 update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode, 1253 MOTION_MODES); 1254 } else if (motion_allowed == OBMC_CAUSAL) { 1255 #if CONFIG_ENTROPY_STATS 1256 counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++; 1257 #endif 1258 update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL, 2); 1259 } 1260 } 1261 1262 if (has_second_ref(mbmi)) { 1263 assert(current_frame->reference_mode != SINGLE_REFERENCE && 1264 is_inter_compound_mode(mbmi->mode) && 1265 mbmi->motion_mode == SIMPLE_TRANSLATION); 1266 1267 const int masked_compound_used = is_any_masked_compound_used(bsize) && 1268 cm->seq_params->enable_masked_compound; 1269 if (masked_compound_used) { 1270 const int comp_group_idx_ctx = get_comp_group_idx_context(xd); 1271 #if CONFIG_ENTROPY_STATS 1272 ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx]; 1273 #endif 1274 update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx], 1275 mbmi->comp_group_idx, 2); 1276 } 1277 1278 if (mbmi->comp_group_idx == 0) { 1279 const int comp_index_ctx = get_comp_index_context(cm, xd); 1280 #if CONFIG_ENTROPY_STATS 1281 ++counts->compound_index[comp_index_ctx][mbmi->compound_idx]; 1282 #endif 1283 update_cdf(fc->compound_index_cdf[comp_index_ctx], mbmi->compound_idx, 1284 2); 1285 } else { 1286 assert(masked_compound_used); 1287 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { 1288 #if CONFIG_ENTROPY_STATS 1289 ++counts->compound_type[bsize][mbmi->interinter_comp.type - 1290 COMPOUND_WEDGE]; 1291 #endif 1292 update_cdf(fc->compound_type_cdf[bsize], 1293 mbmi->interinter_comp.type - COMPOUND_WEDGE, 1294 MASKED_COMPOUND_TYPES); 1295 } 1296 } 1297 } 1298 if (mbmi->interinter_comp.type == COMPOUND_WEDGE) { 1299 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { 1300 #if CONFIG_ENTROPY_STATS 1301 counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++; 1302 #endif 1303 update_cdf(fc->wedge_idx_cdf[bsize], 1304 mbmi->interinter_comp.wedge_index, 16); 1305 } 1306 } 1307 } 1308 } 1309 1310 if (inter_block && cm->features.interp_filter == SWITCHABLE && 1311 av1_is_interp_needed(xd)) { 1312 update_filter_type_cdf(xd, mbmi, cm->seq_params->enable_dual_filter); 1313 } 1314 if (inter_block && 1315 !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { 1316 const PREDICTION_MODE mode = mbmi->mode; 1317 const int16_t mode_ctx = 1318 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); 1319 if (has_second_ref(mbmi)) { 1320 #if CONFIG_ENTROPY_STATS 1321 ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; 1322 #endif 1323 update_cdf(fc->inter_compound_mode_cdf[mode_ctx], 1324 INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES); 1325 } else { 1326 av1_update_inter_mode_stats(fc, counts, mode, mode_ctx); 1327 } 1328 1329 const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV; 1330 if (new_mv) { 1331 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 1332 for (int idx = 0; idx < 2; ++idx) { 1333 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 1334 const uint8_t drl_ctx = 1335 av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx); 1336 update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx, 2); 1337 #if CONFIG_ENTROPY_STATS 1338 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx]; 1339 #endif 1340 if (mbmi->ref_mv_idx == idx) break; 1341 } 1342 } 1343 } 1344 1345 if (have_nearmv_in_inter_mode(mbmi->mode)) { 1346 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); 1347 for (int idx = 1; idx < 3; ++idx) { 1348 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { 1349 const uint8_t drl_ctx = 1350 av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx); 1351 update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx - 1, 2); 1352 #if CONFIG_ENTROPY_STATS 1353 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1]; 1354 #endif 1355 if (mbmi->ref_mv_idx == idx - 1) break; 1356 } 1357 } 1358 } 1359 if (have_newmv_in_inter_mode(mbmi->mode)) { 1360 const int allow_hp = cm->features.cur_frame_force_integer_mv 1361 ? MV_SUBPEL_NONE 1362 : cm->features.allow_high_precision_mv; 1363 if (new_mv) { 1364 for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { 1365 const int_mv ref_mv = av1_get_ref_mv(x, ref); 1366 av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc, 1367 allow_hp); 1368 } 1369 } else if (mbmi->mode == NEAREST_NEWMV || mbmi->mode == NEAR_NEWMV) { 1370 const int ref = 1; 1371 const int_mv ref_mv = av1_get_ref_mv(x, ref); 1372 av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc, 1373 allow_hp); 1374 } else if (mbmi->mode == NEW_NEARESTMV || mbmi->mode == NEW_NEARMV) { 1375 const int ref = 0; 1376 const int_mv ref_mv = av1_get_ref_mv(x, ref); 1377 av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc, 1378 allow_hp); 1379 } 1380 } 1381 } 1382 } 1383 1384 /*!\brief Reconstructs an individual coding block 1385 * 1386 * \ingroup partition_search 1387 * Reconstructs an individual coding block by applying the chosen modes stored 1388 * in ctx, also updates mode counts and entropy models. 1389 * 1390 * \param[in] cpi Top-level encoder structure 1391 * \param[in] tile_data Pointer to struct holding adaptive 1392 * data/contexts/models for the tile during encoding 1393 * \param[in] td Pointer to thread data 1394 * \param[in] tp Pointer to the starting token 1395 * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE 1396 * \param[in] mi_col Column coordinate of the block in a step size of 1397 * MI_SIZE 1398 * \param[in] dry_run A code indicating whether it is part of the final 1399 * pass for reconstructing the superblock 1400 * \param[in] bsize Current block size 1401 * \param[in] partition Partition mode of the parent block 1402 * \param[in] ctx Pointer to structure holding coding contexts and the 1403 * chosen modes for the current block 1404 * \param[in] rate Pointer to the total rate for the current block 1405 * 1406 * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters) 1407 * will be updated in the pixel buffers in td->mb.e_mbd. Also, the chosen modes 1408 * will be stored in the MB_MODE_INFO buffer td->mb.e_mbd.mi[0]. 1409 */ 1410 static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data, 1411 ThreadData *td, TokenExtra **tp, int mi_row, int mi_col, 1412 RUN_TYPE dry_run, BLOCK_SIZE bsize, 1413 PARTITION_TYPE partition, PICK_MODE_CONTEXT *const ctx, 1414 int *rate) { 1415 const AV1_COMMON *const cm = &cpi->common; 1416 TileInfo *const tile = &tile_data->tile_info; 1417 MACROBLOCK *const x = &td->mb; 1418 MACROBLOCKD *xd = &x->e_mbd; 1419 const int subsampling_x = cm->seq_params->subsampling_x; 1420 const int subsampling_y = cm->seq_params->subsampling_y; 1421 1422 av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); 1423 const int origin_mult = x->rdmult; 1424 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); 1425 MB_MODE_INFO *mbmi = xd->mi[0]; 1426 mbmi->partition = partition; 1427 av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run); 1428 1429 if (!dry_run) { 1430 set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y], 1431 x->cb_offset[PLANE_TYPE_UV]); 1432 assert(x->cb_offset[PLANE_TYPE_Y] < 1433 (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size])); 1434 assert(x->cb_offset[PLANE_TYPE_UV] < 1435 ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >> 1436 (subsampling_x + subsampling_y))); 1437 } 1438 1439 encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate); 1440 1441 if (!dry_run) { 1442 update_cb_offsets(x, bsize, subsampling_x, subsampling_y); 1443 if (bsize == cpi->common.seq_params->sb_size && mbmi->skip_txfm == 1 && 1444 cm->delta_q_info.delta_lf_present_flag) { 1445 const int frame_lf_count = 1446 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; 1447 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) 1448 mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id]; 1449 mbmi->delta_lf_from_base = xd->delta_lf_from_base; 1450 } 1451 if (has_second_ref(mbmi)) { 1452 if (mbmi->compound_idx == 0 || 1453 mbmi->interinter_comp.type == COMPOUND_AVERAGE) 1454 mbmi->comp_group_idx = 0; 1455 else 1456 mbmi->comp_group_idx = 1; 1457 } 1458 1459 // delta quant applies to both intra and inter 1460 const int super_block_upper_left = 1461 ((mi_row & (cm->seq_params->mib_size - 1)) == 0) && 1462 ((mi_col & (cm->seq_params->mib_size - 1)) == 0); 1463 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 1464 if (delta_q_info->delta_q_present_flag && 1465 (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) && 1466 super_block_upper_left) { 1467 xd->current_base_qindex = mbmi->current_qindex; 1468 if (delta_q_info->delta_lf_present_flag) { 1469 if (delta_q_info->delta_lf_multi) { 1470 const int frame_lf_count = 1471 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; 1472 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { 1473 xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id]; 1474 } 1475 } else { 1476 xd->delta_lf_from_base = mbmi->delta_lf_from_base; 1477 } 1478 } 1479 } 1480 1481 RD_COUNTS *rdc = &td->rd_counts; 1482 if (mbmi->skip_mode) { 1483 assert(!frame_is_intra_only(cm)); 1484 rdc->skip_mode_used_flag = 1; 1485 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { 1486 assert(has_second_ref(mbmi)); 1487 rdc->compound_ref_used_flag = 1; 1488 } 1489 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 1490 } else { 1491 const int seg_ref_active = 1492 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); 1493 if (!seg_ref_active) { 1494 // If the segment reference feature is enabled we have only a single 1495 // reference frame allowed for the segment so exclude it from 1496 // the reference frame counts used to work out probabilities. 1497 if (is_inter_block(mbmi)) { 1498 av1_collect_neighbors_ref_counts(xd); 1499 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { 1500 if (has_second_ref(mbmi)) { 1501 // This flag is also updated for 4x4 blocks 1502 rdc->compound_ref_used_flag = 1; 1503 } 1504 } 1505 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 1506 } 1507 } 1508 } 1509 1510 if (tile_data->allow_update_cdf) update_stats(&cpi->common, td); 1511 1512 // Gather obmc and warped motion count to update the probability. 1513 if ((cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 && 1514 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) || 1515 (cm->features.allow_warped_motion && 1516 cpi->sf.inter_sf.prune_warped_prob_thresh > 0)) { 1517 const int inter_block = is_inter_block(mbmi); 1518 const int seg_ref_active = 1519 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); 1520 if (!seg_ref_active && inter_block) { 1521 const MOTION_MODE motion_allowed = 1522 cm->features.switchable_motion_mode 1523 ? motion_mode_allowed(xd->global_motion, xd, mbmi, 1524 cm->features.allow_warped_motion) 1525 : SIMPLE_TRANSLATION; 1526 1527 if (mbmi->ref_frame[1] != INTRA_FRAME) { 1528 if (motion_allowed >= OBMC_CAUSAL) { 1529 td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++; 1530 } 1531 if (motion_allowed == WARPED_CAUSAL) { 1532 td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++; 1533 } 1534 } 1535 } 1536 } 1537 } 1538 // TODO(Ravi/Remya): Move this copy function to a better logical place 1539 // This function will copy the best mode information from block 1540 // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This 1541 // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during 1542 // bitstream preparation. 1543 av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext, 1544 av1_ref_frame_type(xd->mi[0]->ref_frame)); 1545 x->rdmult = origin_mult; 1546 } 1547 1548 /*!\brief Reconstructs a partition (may contain multiple coding blocks) 1549 * 1550 * \ingroup partition_search 1551 * Reconstructs a sub-partition of the superblock by applying the chosen modes 1552 * and partition trees stored in pc_tree. 1553 * 1554 * \param[in] cpi Top-level encoder structure 1555 * \param[in] td Pointer to thread data 1556 * \param[in] tile_data Pointer to struct holding adaptive 1557 * data/contexts/models for the tile during encoding 1558 * \param[in] tp Pointer to the starting token 1559 * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE 1560 * \param[in] mi_col Column coordinate of the block in a step size of 1561 * MI_SIZE 1562 * \param[in] dry_run A code indicating whether it is part of the final 1563 * pass for reconstructing the superblock 1564 * \param[in] bsize Current block size 1565 * \param[in] pc_tree Pointer to the PC_TREE node storing the picked 1566 * partitions and mode info for the current block 1567 * \param[in] rate Pointer to the total rate for the current block 1568 * 1569 * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters) 1570 * will be updated in the pixel buffers in td->mb.e_mbd. 1571 */ 1572 static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, 1573 TileDataEnc *tile_data, TokenExtra **tp, int mi_row, 1574 int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize, 1575 PC_TREE *pc_tree, int *rate) { 1576 assert(bsize < BLOCK_SIZES_ALL); 1577 const AV1_COMMON *const cm = &cpi->common; 1578 const CommonModeInfoParams *const mi_params = &cm->mi_params; 1579 MACROBLOCK *const x = &td->mb; 1580 MACROBLOCKD *const xd = &x->e_mbd; 1581 assert(bsize < BLOCK_SIZES_ALL); 1582 const int hbs = mi_size_wide[bsize] / 2; 1583 const int is_partition_root = bsize >= BLOCK_8X8; 1584 const int ctx = is_partition_root 1585 ? partition_plane_context(xd, mi_row, mi_col, bsize) 1586 : -1; 1587 const PARTITION_TYPE partition = pc_tree->partitioning; 1588 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 1589 #if !CONFIG_REALTIME_ONLY 1590 int quarter_step = mi_size_wide[bsize] / 4; 1591 int i; 1592 BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); 1593 #endif 1594 1595 if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; 1596 if (subsize == BLOCK_INVALID) return; 1597 1598 if (!dry_run && ctx >= 0) { 1599 const int has_rows = (mi_row + hbs) < mi_params->mi_rows; 1600 const int has_cols = (mi_col + hbs) < mi_params->mi_cols; 1601 1602 if (has_rows && has_cols) { 1603 #if CONFIG_ENTROPY_STATS 1604 td->counts->partition[ctx][partition]++; 1605 #endif 1606 1607 if (tile_data->allow_update_cdf) { 1608 FRAME_CONTEXT *fc = xd->tile_ctx; 1609 update_cdf(fc->partition_cdf[ctx], partition, 1610 partition_cdf_length(bsize)); 1611 } 1612 } 1613 } 1614 1615 switch (partition) { 1616 case PARTITION_NONE: 1617 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1618 partition, pc_tree->none, rate); 1619 break; 1620 case PARTITION_VERT: 1621 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1622 partition, pc_tree->vertical[0], rate); 1623 if (mi_col + hbs < mi_params->mi_cols) { 1624 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, 1625 partition, pc_tree->vertical[1], rate); 1626 } 1627 break; 1628 case PARTITION_HORZ: 1629 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1630 partition, pc_tree->horizontal[0], rate); 1631 if (mi_row + hbs < mi_params->mi_rows) { 1632 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, 1633 partition, pc_tree->horizontal[1], rate); 1634 } 1635 break; 1636 case PARTITION_SPLIT: 1637 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, 1638 pc_tree->split[0], rate); 1639 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize, 1640 pc_tree->split[1], rate); 1641 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize, 1642 pc_tree->split[2], rate); 1643 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run, 1644 subsize, pc_tree->split[3], rate); 1645 break; 1646 1647 #if !CONFIG_REALTIME_ONLY 1648 case PARTITION_HORZ_A: 1649 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, 1650 partition, pc_tree->horizontala[0], rate); 1651 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, 1652 partition, pc_tree->horizontala[1], rate); 1653 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, 1654 partition, pc_tree->horizontala[2], rate); 1655 break; 1656 case PARTITION_HORZ_B: 1657 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1658 partition, pc_tree->horizontalb[0], rate); 1659 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, 1660 partition, pc_tree->horizontalb[1], rate); 1661 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, 1662 bsize2, partition, pc_tree->horizontalb[2], rate); 1663 break; 1664 case PARTITION_VERT_A: 1665 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, 1666 partition, pc_tree->verticala[0], rate); 1667 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, 1668 partition, pc_tree->verticala[1], rate); 1669 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, 1670 partition, pc_tree->verticala[2], rate); 1671 1672 break; 1673 case PARTITION_VERT_B: 1674 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, 1675 partition, pc_tree->verticalb[0], rate); 1676 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, 1677 partition, pc_tree->verticalb[1], rate); 1678 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, 1679 bsize2, partition, pc_tree->verticalb[2], rate); 1680 break; 1681 case PARTITION_HORZ_4: 1682 for (i = 0; i < SUB_PARTITIONS_PART4; ++i) { 1683 int this_mi_row = mi_row + i * quarter_step; 1684 if (i > 0 && this_mi_row >= mi_params->mi_rows) break; 1685 1686 encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize, 1687 partition, pc_tree->horizontal4[i], rate); 1688 } 1689 break; 1690 case PARTITION_VERT_4: 1691 for (i = 0; i < SUB_PARTITIONS_PART4; ++i) { 1692 int this_mi_col = mi_col + i * quarter_step; 1693 if (i > 0 && this_mi_col >= mi_params->mi_cols) break; 1694 encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize, 1695 partition, pc_tree->vertical4[i], rate); 1696 } 1697 break; 1698 #endif 1699 default: assert(0 && "Invalid partition type."); break; 1700 } 1701 1702 update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); 1703 } 1704 1705 static inline int is_adjust_var_based_part_enabled( 1706 AV1_COMMON *const cm, const PARTITION_SPEED_FEATURES *const part_sf, 1707 BLOCK_SIZE bsize) { 1708 if (part_sf->partition_search_type != VAR_BASED_PARTITION) return 0; 1709 if (part_sf->adjust_var_based_rd_partitioning == 0 || 1710 part_sf->adjust_var_based_rd_partitioning > 2) 1711 return 0; 1712 1713 if (bsize <= BLOCK_32X32) return 1; 1714 if (part_sf->adjust_var_based_rd_partitioning == 2) { 1715 const int is_larger_qindex = cm->quant_params.base_qindex > 190; 1716 const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360; 1717 return is_360p_or_larger && is_larger_qindex && bsize == BLOCK_64X64; 1718 } 1719 return 0; 1720 } 1721 1722 /*!\brief AV1 block partition search (partition estimation and partial search). 1723 * 1724 * \ingroup partition_search 1725 * Encode the block by applying pre-calculated partition patterns that are 1726 * represented by coding block sizes stored in the mbmi array. Minor partition 1727 * adjustments are tested and applied if they lead to lower rd costs. The 1728 * partition types are limited to a basic set: none, horz, vert, and split. 1729 * 1730 * \param[in] cpi Top-level encoder structure 1731 * \param[in] td Pointer to thread data 1732 * \param[in] tile_data Pointer to struct holding adaptive 1733 data/contexts/models for the tile during encoding 1734 * \param[in] mib Array representing MB_MODE_INFO pointers for mi 1735 blocks starting from the first pixel of the current 1736 block 1737 * \param[in] tp Pointer to the starting token 1738 * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE 1739 * \param[in] mi_col Column coordinate of the block in a step size of 1740 MI_SIZE 1741 * \param[in] bsize Current block size 1742 * \param[in] rate Pointer to the final rate for encoding the current 1743 block 1744 * \param[in] dist Pointer to the final distortion of the current block 1745 * \param[in] do_recon Whether the reconstruction function needs to be run, 1746 either for finalizing a superblock or providing 1747 reference for future sub-partitions 1748 * \param[in] pc_tree Pointer to the PC_TREE node holding the picked 1749 partitions and mode info for the current block 1750 * 1751 * \remark Nothing is returned. The pc_tree struct is modified to store the 1752 * picked partition and modes. The rate and dist are also updated with those 1753 * corresponding to the best partition found. 1754 */ 1755 void av1_rd_use_partition(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, 1756 MB_MODE_INFO **mib, TokenExtra **tp, int mi_row, 1757 int mi_col, BLOCK_SIZE bsize, int *rate, 1758 int64_t *dist, int do_recon, PC_TREE *pc_tree) { 1759 AV1_COMMON *const cm = &cpi->common; 1760 const CommonModeInfoParams *const mi_params = &cm->mi_params; 1761 const int num_planes = av1_num_planes(cm); 1762 TileInfo *const tile_info = &tile_data->tile_info; 1763 MACROBLOCK *const x = &td->mb; 1764 MACROBLOCKD *const xd = &x->e_mbd; 1765 const ModeCosts *mode_costs = &x->mode_costs; 1766 const int bs = mi_size_wide[bsize]; 1767 const int hbs = bs / 2; 1768 const int pl = (bsize >= BLOCK_8X8) 1769 ? partition_plane_context(xd, mi_row, mi_col, bsize) 1770 : 0; 1771 const PARTITION_TYPE partition = 1772 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize) 1773 : PARTITION_NONE; 1774 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 1775 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 1776 RD_STATS last_part_rdc, none_rdc, chosen_rdc, invalid_rdc; 1777 BLOCK_SIZE bs_type = mib[0]->bsize; 1778 int use_partition_none = 0; 1779 x->try_merge_partition = 0; 1780 1781 if (pc_tree->none == NULL) { 1782 pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf); 1783 if (!pc_tree->none) 1784 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 1785 "Failed to allocate PICK_MODE_CONTEXT"); 1786 } 1787 PICK_MODE_CONTEXT *ctx_none = pc_tree->none; 1788 1789 if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; 1790 1791 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 1792 // In rt mode, currently the min partition size is BLOCK_8X8. 1793 assert(bsize >= cpi->sf.part_sf.default_min_partition_size); 1794 1795 av1_invalid_rd_stats(&last_part_rdc); 1796 av1_invalid_rd_stats(&none_rdc); 1797 av1_invalid_rd_stats(&chosen_rdc); 1798 av1_invalid_rd_stats(&invalid_rdc); 1799 1800 pc_tree->partitioning = partition; 1801 1802 xd->above_txfm_context = 1803 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 1804 xd->left_txfm_context = 1805 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 1806 av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1807 1808 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) { 1809 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 1810 x->mb_energy = av1_log_block_var(cpi, x, bsize); 1811 } 1812 1813 // Save rdmult before it might be changed, so it can be restored later. 1814 const int orig_rdmult = x->rdmult; 1815 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); 1816 1817 if (partition != PARTITION_NONE && 1818 is_adjust_var_based_part_enabled(cm, &cpi->sf.part_sf, bsize) && 1819 (mi_row + hbs < mi_params->mi_rows && 1820 mi_col + hbs < mi_params->mi_cols)) { 1821 assert(bsize > cpi->sf.part_sf.default_min_partition_size); 1822 mib[0]->bsize = bsize; 1823 pc_tree->partitioning = PARTITION_NONE; 1824 x->try_merge_partition = 1; 1825 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, PARTITION_NONE, 1826 bsize, ctx_none, invalid_rdc); 1827 1828 if (none_rdc.rate < INT_MAX) { 1829 none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE]; 1830 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist); 1831 } 1832 1833 // Try to skip split partition evaluation based on none partition 1834 // characteristics. 1835 if (none_rdc.rate < INT_MAX && none_rdc.skip_txfm == 1) { 1836 use_partition_none = 1; 1837 } 1838 1839 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1840 mib[0]->bsize = bs_type; 1841 pc_tree->partitioning = partition; 1842 } 1843 1844 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 1845 pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); 1846 if (!pc_tree->split[i]) 1847 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 1848 "Failed to allocate PC_TREE"); 1849 pc_tree->split[i]->index = i; 1850 } 1851 switch (partition) { 1852 case PARTITION_NONE: 1853 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 1854 PARTITION_NONE, bsize, ctx_none, invalid_rdc); 1855 break; 1856 case PARTITION_HORZ: 1857 if (use_partition_none) { 1858 av1_invalid_rd_stats(&last_part_rdc); 1859 break; 1860 } 1861 1862 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 1863 pc_tree->horizontal[i] = 1864 av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 1865 if (!pc_tree->horizontal[i]) 1866 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 1867 "Failed to allocate PICK_MODE_CONTEXT"); 1868 } 1869 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 1870 PARTITION_HORZ, subsize, pc_tree->horizontal[0], 1871 invalid_rdc); 1872 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 1873 mi_row + hbs < mi_params->mi_rows) { 1874 RD_STATS tmp_rdc; 1875 const PICK_MODE_CONTEXT *const ctx_h = pc_tree->horizontal[0]; 1876 av1_init_rd_stats(&tmp_rdc); 1877 av1_update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1); 1878 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, 1879 NULL); 1880 pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, 1881 PARTITION_HORZ, subsize, pc_tree->horizontal[1], 1882 invalid_rdc); 1883 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 1884 av1_invalid_rd_stats(&last_part_rdc); 1885 break; 1886 } 1887 last_part_rdc.rate += tmp_rdc.rate; 1888 last_part_rdc.dist += tmp_rdc.dist; 1889 last_part_rdc.rdcost += tmp_rdc.rdcost; 1890 } 1891 break; 1892 case PARTITION_VERT: 1893 if (use_partition_none) { 1894 av1_invalid_rd_stats(&last_part_rdc); 1895 break; 1896 } 1897 1898 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 1899 pc_tree->vertical[i] = 1900 av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 1901 if (!pc_tree->vertical[i]) 1902 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 1903 "Failed to allocate PICK_MODE_CONTEXT"); 1904 } 1905 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, 1906 PARTITION_VERT, subsize, pc_tree->vertical[0], invalid_rdc); 1907 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && 1908 mi_col + hbs < mi_params->mi_cols) { 1909 RD_STATS tmp_rdc; 1910 const PICK_MODE_CONTEXT *const ctx_v = pc_tree->vertical[0]; 1911 av1_init_rd_stats(&tmp_rdc); 1912 av1_update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1); 1913 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, 1914 NULL); 1915 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, 1916 PARTITION_VERT, subsize, 1917 pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc); 1918 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 1919 av1_invalid_rd_stats(&last_part_rdc); 1920 break; 1921 } 1922 last_part_rdc.rate += tmp_rdc.rate; 1923 last_part_rdc.dist += tmp_rdc.dist; 1924 last_part_rdc.rdcost += tmp_rdc.rdcost; 1925 } 1926 break; 1927 case PARTITION_SPLIT: 1928 if (use_partition_none) { 1929 av1_invalid_rd_stats(&last_part_rdc); 1930 break; 1931 } 1932 1933 last_part_rdc.rate = 0; 1934 last_part_rdc.dist = 0; 1935 last_part_rdc.rdcost = 0; 1936 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 1937 int x_idx = (i & 1) * hbs; 1938 int y_idx = (i >> 1) * hbs; 1939 int jj = i >> 1, ii = i & 0x01; 1940 RD_STATS tmp_rdc; 1941 if ((mi_row + y_idx >= mi_params->mi_rows) || 1942 (mi_col + x_idx >= mi_params->mi_cols)) 1943 continue; 1944 1945 av1_init_rd_stats(&tmp_rdc); 1946 av1_rd_use_partition( 1947 cpi, td, tile_data, 1948 mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp, 1949 mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, 1950 &tmp_rdc.dist, i != (SUB_PARTITIONS_SPLIT - 1), pc_tree->split[i]); 1951 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 1952 av1_invalid_rd_stats(&last_part_rdc); 1953 break; 1954 } 1955 last_part_rdc.rate += tmp_rdc.rate; 1956 last_part_rdc.dist += tmp_rdc.dist; 1957 } 1958 break; 1959 case PARTITION_VERT_A: 1960 case PARTITION_VERT_B: 1961 case PARTITION_HORZ_A: 1962 case PARTITION_HORZ_B: 1963 case PARTITION_HORZ_4: 1964 case PARTITION_VERT_4: 1965 assert(0 && "Cannot handle extended partition types"); 1966 default: assert(0); break; 1967 } 1968 1969 if (last_part_rdc.rate < INT_MAX) { 1970 last_part_rdc.rate += mode_costs->partition_cost[pl][partition]; 1971 last_part_rdc.rdcost = 1972 RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist); 1973 } 1974 1975 if ((cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION && 1976 cpi->sf.part_sf.adjust_var_based_rd_partitioning > 2) && 1977 partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && 1978 (mi_row + bs < mi_params->mi_rows || 1979 mi_row + hbs == mi_params->mi_rows) && 1980 (mi_col + bs < mi_params->mi_cols || 1981 mi_col + hbs == mi_params->mi_cols)) { 1982 BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 1983 chosen_rdc.rate = 0; 1984 chosen_rdc.dist = 0; 1985 1986 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 1987 pc_tree->partitioning = PARTITION_SPLIT; 1988 1989 // Split partition. 1990 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 1991 int x_idx = (i & 1) * hbs; 1992 int y_idx = (i >> 1) * hbs; 1993 RD_STATS tmp_rdc; 1994 1995 if ((mi_row + y_idx >= mi_params->mi_rows) || 1996 (mi_col + x_idx >= mi_params->mi_cols)) 1997 continue; 1998 1999 av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2000 pc_tree->split[i]->partitioning = PARTITION_NONE; 2001 if (pc_tree->split[i]->none == NULL) 2002 pc_tree->split[i]->none = 2003 av1_alloc_pmc(cpi, split_subsize, &td->shared_coeff_buf); 2004 if (!pc_tree->split[i]->none) 2005 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2006 "Failed to allocate PICK_MODE_CONTEXT"); 2007 pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, 2008 PARTITION_SPLIT, split_subsize, pc_tree->split[i]->none, 2009 invalid_rdc); 2010 2011 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2012 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { 2013 av1_invalid_rd_stats(&chosen_rdc); 2014 break; 2015 } 2016 2017 chosen_rdc.rate += tmp_rdc.rate; 2018 chosen_rdc.dist += tmp_rdc.dist; 2019 2020 if (i != SUB_PARTITIONS_SPLIT - 1) 2021 encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, 2022 OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL); 2023 2024 chosen_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE]; 2025 } 2026 if (chosen_rdc.rate < INT_MAX) { 2027 chosen_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT]; 2028 chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist); 2029 } 2030 } 2031 2032 // If last_part is better set the partitioning to that. 2033 if (last_part_rdc.rdcost < chosen_rdc.rdcost) { 2034 mib[0]->bsize = bs_type; 2035 if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; 2036 2037 chosen_rdc = last_part_rdc; 2038 } 2039 // If none was better set the partitioning to that. 2040 if (none_rdc.rdcost < INT64_MAX && 2041 none_rdc.rdcost - (none_rdc.rdcost >> 9) < chosen_rdc.rdcost) { 2042 mib[0]->bsize = bsize; 2043 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 2044 chosen_rdc = none_rdc; 2045 } 2046 2047 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2048 2049 // We must have chosen a partitioning and encoding or we'll fail later on. 2050 // No other opportunities for success. 2051 if (bsize == cm->seq_params->sb_size) 2052 assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); 2053 2054 #if CONFIG_COLLECT_COMPONENT_TIMING 2055 start_timing(cpi, encode_sb_time); 2056 #endif 2057 if (do_recon) { 2058 if (bsize == cm->seq_params->sb_size) { 2059 // NOTE: To get estimate for rate due to the tokens, use: 2060 // int rate_coeffs = 0; 2061 // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, 2062 // bsize, pc_tree, &rate_coeffs); 2063 set_cb_offsets(x->cb_offset, 0, 0); 2064 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 2065 pc_tree, NULL); 2066 } else { 2067 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 2068 pc_tree, NULL); 2069 } 2070 } 2071 #if CONFIG_COLLECT_COMPONENT_TIMING 2072 end_timing(cpi, encode_sb_time); 2073 #endif 2074 2075 *rate = chosen_rdc.rate; 2076 *dist = chosen_rdc.dist; 2077 x->rdmult = orig_rdmult; 2078 } 2079 2080 static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data, 2081 ThreadData *td, TokenExtra **tp, int mi_row, 2082 int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize, 2083 PARTITION_TYPE partition, 2084 PICK_MODE_CONTEXT *const ctx, int *rate) { 2085 #if CONFIG_COLLECT_COMPONENT_TIMING 2086 start_timing((AV1_COMP *)cpi, encode_b_nonrd_time); 2087 #endif 2088 const AV1_COMMON *const cm = &cpi->common; 2089 TileInfo *const tile = &tile_data->tile_info; 2090 MACROBLOCK *const x = &td->mb; 2091 MACROBLOCKD *xd = &x->e_mbd; 2092 av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); 2093 const int origin_mult = x->rdmult; 2094 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); 2095 MB_MODE_INFO *mbmi = xd->mi[0]; 2096 mbmi->partition = partition; 2097 av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run); 2098 const int subsampling_x = cpi->common.seq_params->subsampling_x; 2099 const int subsampling_y = cpi->common.seq_params->subsampling_y; 2100 if (!dry_run) { 2101 set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y], 2102 x->cb_offset[PLANE_TYPE_UV]); 2103 assert(x->cb_offset[PLANE_TYPE_Y] < 2104 (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size])); 2105 assert(x->cb_offset[PLANE_TYPE_UV] < 2106 ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >> 2107 (subsampling_x + subsampling_y))); 2108 } 2109 2110 encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate); 2111 if (!dry_run) { 2112 update_cb_offsets(x, bsize, subsampling_x, subsampling_y); 2113 if (has_second_ref(mbmi)) { 2114 if (mbmi->compound_idx == 0 || 2115 mbmi->interinter_comp.type == COMPOUND_AVERAGE) 2116 mbmi->comp_group_idx = 0; 2117 else 2118 mbmi->comp_group_idx = 1; 2119 mbmi->compound_idx = 1; 2120 } 2121 RD_COUNTS *const rdc = &td->rd_counts; 2122 if (mbmi->skip_mode) { 2123 assert(!frame_is_intra_only(cm)); 2124 rdc->skip_mode_used_flag = 1; 2125 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT && 2126 has_second_ref(mbmi)) { 2127 rdc->compound_ref_used_flag = 1; 2128 } 2129 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 2130 } else { 2131 const int seg_ref_active = 2132 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); 2133 if (!seg_ref_active) { 2134 // If the segment reference feature is enabled we have only a single 2135 // reference frame allowed for the segment so exclude it from 2136 // the reference frame counts used to work out probabilities. 2137 if (is_inter_block(mbmi)) { 2138 av1_collect_neighbors_ref_counts(xd); 2139 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT && 2140 has_second_ref(mbmi)) { 2141 // This flag is also updated for 4x4 blocks 2142 rdc->compound_ref_used_flag = 1; 2143 } 2144 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); 2145 } 2146 } 2147 } 2148 if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY && 2149 (mbmi->mode == NEWMV || mbmi->mode < INTRA_MODE_END)) { 2150 int32_t blocks = mi_size_high[bsize] * mi_size_wide[bsize]; 2151 rdc->newmv_or_intra_blocks += blocks; 2152 } 2153 if (tile_data->allow_update_cdf) update_stats(&cpi->common, td); 2154 } 2155 if ((cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ || 2156 cpi->active_map.enabled || cpi->roi.enabled) && 2157 mbmi->skip_txfm && !cpi->rc.rtc_external_ratectrl && cm->seg.enabled) 2158 av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize, dry_run); 2159 // TODO(Ravi/Remya): Move this copy function to a better logical place 2160 // This function will copy the best mode information from block 2161 // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This 2162 // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during 2163 // bitstream preparation. 2164 av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, &x->mbmi_ext, 2165 av1_ref_frame_type(xd->mi[0]->ref_frame)); 2166 x->rdmult = origin_mult; 2167 #if CONFIG_COLLECT_COMPONENT_TIMING 2168 end_timing((AV1_COMP *)cpi, encode_b_nonrd_time); 2169 #endif 2170 } 2171 2172 static int get_force_zeromv_skip_flag_for_blk(const AV1_COMP *cpi, 2173 const MACROBLOCK *x, 2174 BLOCK_SIZE bsize) { 2175 // Force zero MV skip based on SB level decision 2176 if (x->force_zeromv_skip_for_sb < 2) return x->force_zeromv_skip_for_sb; 2177 2178 // For blocks of size equal to superblock size, the decision would have been 2179 // already done at superblock level. Hence zeromv-skip decision is skipped. 2180 const AV1_COMMON *const cm = &cpi->common; 2181 if (bsize == cm->seq_params->sb_size) return 0; 2182 2183 const int num_planes = av1_num_planes(cm); 2184 const MACROBLOCKD *const xd = &x->e_mbd; 2185 const unsigned int thresh_exit_part_y = 2186 cpi->zeromv_skip_thresh_exit_part[bsize]; 2187 const unsigned int thresh_exit_part_uv = 2188 CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y); 2189 const unsigned int thresh_exit_part[MAX_MB_PLANE] = { thresh_exit_part_y, 2190 thresh_exit_part_uv, 2191 thresh_exit_part_uv }; 2192 const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); 2193 const struct scale_factors *const sf = 2194 get_ref_scale_factors_const(cm, LAST_FRAME); 2195 2196 struct buf_2d yv12_mb[MAX_MB_PLANE]; 2197 av1_setup_pred_block(xd, yv12_mb, yv12, sf, sf, num_planes); 2198 2199 for (int plane = 0; plane < num_planes; ++plane) { 2200 const struct macroblock_plane *const p = &x->plane[plane]; 2201 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2202 const BLOCK_SIZE bs = 2203 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 2204 const unsigned int plane_sad = cpi->ppi->fn_ptr[bs].sdf( 2205 p->src.buf, p->src.stride, yv12_mb[plane].buf, yv12_mb[plane].stride); 2206 assert(plane < MAX_MB_PLANE); 2207 if (plane_sad >= thresh_exit_part[plane]) return 0; 2208 } 2209 return 1; 2210 } 2211 2212 /*!\brief Top level function to pick block mode for non-RD optimized case 2213 * 2214 * \ingroup partition_search 2215 * \callgraph 2216 * \callergraph 2217 * Searches prediction modes, transform, and coefficient coding modes for an 2218 * individual coding block. This function is the top-level function that is 2219 * used for non-RD optimized mode search (controlled by 2220 * \c cpi->sf.rt_sf.use_nonrd_pick_mode). Depending on frame type it calls 2221 * inter/skip/hybrid-intra mode search functions 2222 * 2223 * \param[in] cpi Top-level encoder structure 2224 * \param[in] tile_data Pointer to struct holding adaptive 2225 * data/contexts/models for the tile during 2226 * encoding 2227 * \param[in] x Pointer to structure holding all the data for 2228 * the current macroblock 2229 * \param[in] mi_row Row coordinate of the block in a step size of 2230 * MI_SIZE 2231 * \param[in] mi_col Column coordinate of the block in a step size of 2232 * MI_SIZE 2233 * \param[in] rd_cost Pointer to structure holding rate and distortion 2234 * stats for the current block 2235 * \param[in] bsize Current block size 2236 * \param[in] ctx Pointer to structure holding coding contexts and 2237 * chosen modes for the current block 2238 * 2239 * \remark Nothing is returned. Instead, the chosen modes and contexts necessary 2240 * for reconstruction are stored in ctx, the rate-distortion stats are stored in 2241 * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be 2242 * signalled by an INT64_MAX rd_cost->rdcost. 2243 */ 2244 static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data, 2245 MACROBLOCK *const x, int mi_row, int mi_col, 2246 RD_STATS *rd_cost, BLOCK_SIZE bsize, 2247 PICK_MODE_CONTEXT *ctx) { 2248 // For nonrd mode, av1_set_offsets is already called at the superblock level 2249 // in encode_nonrd_sb when we determine the partitioning. 2250 if (bsize != cpi->common.seq_params->sb_size || 2251 cpi->sf.rt_sf.nonrd_check_partition_split == 1) { 2252 av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize); 2253 } 2254 assert(x->last_set_offsets_loc.mi_row == mi_row && 2255 x->last_set_offsets_loc.mi_col == mi_col && 2256 x->last_set_offsets_loc.bsize == bsize); 2257 AV1_COMMON *const cm = &cpi->common; 2258 const int num_planes = av1_num_planes(cm); 2259 MACROBLOCKD *const xd = &x->e_mbd; 2260 MB_MODE_INFO *mbmi = xd->mi[0]; 2261 struct macroblock_plane *const p = x->plane; 2262 struct macroblockd_plane *const pd = xd->plane; 2263 const AQ_MODE aq_mode = cpi->oxcf.q_cfg.aq_mode; 2264 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 2265 int i; 2266 const int seg_skip = 2267 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); 2268 2269 // This is only needed for real time/allintra row-mt enabled multi-threaded 2270 // encoding with cost update frequency set to COST_UPD_TILE/COST_UPD_OFF. 2271 wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync, 2272 &tile_data->tile_info, cm->seq_params->sb_size, 2273 cm->seq_params->mib_size_log2, bsize, mi_row, mi_col); 2274 2275 #if CONFIG_COLLECT_COMPONENT_TIMING 2276 start_timing(cpi, pick_sb_modes_nonrd_time); 2277 #endif 2278 // Sets up the tx_type_map buffer in MACROBLOCKD. 2279 xd->tx_type_map = txfm_info->tx_type_map_; 2280 xd->tx_type_map_stride = mi_size_wide[bsize]; 2281 for (i = 0; i < num_planes; ++i) { 2282 p[i].coeff = ctx->coeff[i]; 2283 p[i].qcoeff = ctx->qcoeff[i]; 2284 p[i].dqcoeff = ctx->dqcoeff[i]; 2285 p[i].eobs = ctx->eobs[i]; 2286 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; 2287 } 2288 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; 2289 2290 if (!seg_skip) { 2291 x->force_zeromv_skip_for_blk = 2292 get_force_zeromv_skip_flag_for_blk(cpi, x, bsize); 2293 2294 // Source variance may be already compute at superblock level, so no need 2295 // to recompute, unless bsize < sb_size or source_variance is not yet set. 2296 if (!x->force_zeromv_skip_for_blk && 2297 (x->source_variance == UINT_MAX || bsize < cm->seq_params->sb_size)) 2298 x->source_variance = av1_get_perpixel_variance_facade( 2299 cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y); 2300 } 2301 2302 // Save rdmult before it might be changed, so it can be restored later. 2303 const int orig_rdmult = x->rdmult; 2304 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi); 2305 if (cpi->roi.enabled && cpi->roi.delta_qp_enabled && mbmi->segment_id) 2306 x->rdmult = cpi->roi.rdmult_delta_qp; 2307 // Set error per bit for current rdmult 2308 av1_set_error_per_bit(&x->errorperbit, x->rdmult); 2309 // Find best coding mode & reconstruct the MB so it is available 2310 // as a predictor for MBs that follow in the SB 2311 if (frame_is_intra_only(cm)) { 2312 #if CONFIG_COLLECT_COMPONENT_TIMING 2313 start_timing(cpi, hybrid_intra_mode_search_time); 2314 #endif 2315 hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); 2316 #if CONFIG_COLLECT_COMPONENT_TIMING 2317 end_timing(cpi, hybrid_intra_mode_search_time); 2318 #endif 2319 } else { 2320 #if CONFIG_COLLECT_COMPONENT_TIMING 2321 start_timing(cpi, nonrd_pick_inter_mode_sb_time); 2322 #endif 2323 if (seg_skip) { 2324 x->force_zeromv_skip_for_blk = 1; 2325 // TODO(marpan): Consider adding a function for nonrd: 2326 // av1_nonrd_pick_inter_mode_sb_seg_skip(), instead of setting 2327 // x->force_zeromv_skip flag and entering av1_nonrd_pick_inter_mode_sb(). 2328 } 2329 av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx); 2330 #if CONFIG_COLLECT_COMPONENT_TIMING 2331 end_timing(cpi, nonrd_pick_inter_mode_sb_time); 2332 #endif 2333 } 2334 if (cpi->sf.rt_sf.skip_cdef_sb) { 2335 // cdef_strength is initialized to 1 which means skip_cdef, and is updated 2336 // here. Check to see is skipping cdef is allowed. Never skip on slide/scene 2337 // change, near a key frame, or when color sensitivity is set. Always allow 2338 // cdef_skip for seg_skip = 1. 2339 const int allow_cdef_skipping = 2340 seg_skip || 2341 (cpi->rc.frames_since_key > 10 && !cpi->rc.high_source_sad && 2342 !(x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] || 2343 x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)])); 2344 2345 // Find the corresponding 64x64 block. It'll be the 128x128 block if that's 2346 // the block size. 2347 const int mi_row_sb = mi_row - mi_row % MI_SIZE_64X64; 2348 const int mi_col_sb = mi_col - mi_col % MI_SIZE_64X64; 2349 MB_MODE_INFO **mi_sb = 2350 cm->mi_params.mi_grid_base + 2351 get_mi_grid_idx(&cm->mi_params, mi_row_sb, mi_col_sb); 2352 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 2353 unsigned int thresh_spatial_var = 2354 (cpi->oxcf.speed >= 11 && !is_720p_or_larger && 2355 cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) 2356 ? 400 2357 : UINT_MAX; 2358 // For skip_cdef_sb = 1: do not skip if allow_cdef_skipping is false or 2359 // intra or new mv is picked, with possible conidition on spatial variance. 2360 // For skip_cdef_sb >= 2: more aggressive mode to always skip unless 2361 // allow_cdef_skipping is false and source_variance is non-zero. 2362 if (cpi->sf.rt_sf.skip_cdef_sb >= 2) { 2363 mi_sb[0]->cdef_strength = 2364 mi_sb[0]->cdef_strength && 2365 (allow_cdef_skipping || x->source_variance == 0); 2366 } else { 2367 mi_sb[0]->cdef_strength = 2368 mi_sb[0]->cdef_strength && allow_cdef_skipping && 2369 !(x->source_variance < thresh_spatial_var && 2370 (mbmi->mode < INTRA_MODES || mbmi->mode == NEWMV)); 2371 } 2372 // Store in the pickmode context. 2373 ctx->mic.cdef_strength = mi_sb[0]->cdef_strength; 2374 } 2375 x->rdmult = orig_rdmult; 2376 ctx->rd_stats.rate = rd_cost->rate; 2377 ctx->rd_stats.dist = rd_cost->dist; 2378 ctx->rd_stats.rdcost = rd_cost->rdcost; 2379 #if CONFIG_COLLECT_COMPONENT_TIMING 2380 end_timing(cpi, pick_sb_modes_nonrd_time); 2381 #endif 2382 } 2383 2384 static int try_split_partition(AV1_COMP *const cpi, ThreadData *const td, 2385 TileDataEnc *const tile_data, 2386 TileInfo *const tile_info, TokenExtra **tp, 2387 MACROBLOCK *const x, MACROBLOCKD *const xd, 2388 const CommonModeInfoParams *const mi_params, 2389 const int mi_row, const int mi_col, 2390 const BLOCK_SIZE bsize, const int pl, 2391 PC_TREE *pc_tree) { 2392 AV1_COMMON *const cm = &cpi->common; 2393 const ModeCosts *mode_costs = &x->mode_costs; 2394 const int hbs = mi_size_wide[bsize] / 2; 2395 if (mi_row + mi_size_high[bsize] >= mi_params->mi_rows || 2396 mi_col + mi_size_wide[bsize] >= mi_params->mi_cols) 2397 return 0; 2398 if (bsize <= BLOCK_8X8 || frame_is_intra_only(cm)) return 0; 2399 if (x->content_state_sb.source_sad_nonrd <= kLowSad) return 0; 2400 2401 // Do not try split partition when the source sad is small, or 2402 // the prediction residual is small. 2403 const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); 2404 const struct scale_factors *const sf = 2405 get_ref_scale_factors_const(cm, LAST_FRAME); 2406 const int num_planes = av1_num_planes(cm); 2407 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); 2408 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf, num_planes); 2409 int block_sad = 0; 2410 for (int plane = 0; plane < num_planes; ++plane) { 2411 const struct macroblock_plane *const p = &x->plane[plane]; 2412 const struct macroblockd_plane *const pd = &xd->plane[plane]; 2413 const BLOCK_SIZE bs = 2414 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 2415 const unsigned int plane_sad = cpi->ppi->fn_ptr[bs].sdf( 2416 p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride); 2417 block_sad += plane_sad; 2418 } 2419 const int blk_pix = block_size_wide[bsize] * block_size_high[bsize]; 2420 const int block_avg_sad = block_sad / blk_pix; 2421 // TODO(chengchen): find a proper threshold. It might change according to 2422 // q as well. 2423 const int threshold = 25; 2424 if (block_avg_sad < threshold) return 0; 2425 2426 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 2427 RD_STATS split_rdc, none_rdc; 2428 av1_invalid_rd_stats(&split_rdc); 2429 av1_invalid_rd_stats(&none_rdc); 2430 av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, 3); 2431 xd->above_txfm_context = 2432 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 2433 xd->left_txfm_context = 2434 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 2435 2436 // Calculate rdcost for none partition 2437 pc_tree->partitioning = PARTITION_NONE; 2438 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 2439 if (!pc_tree->none) { 2440 pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf); 2441 if (!pc_tree->none) 2442 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2443 "Failed to allocate PICK_MODE_CONTEXT"); 2444 } else { 2445 av1_reset_pmc(pc_tree->none); 2446 } 2447 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, 2448 pc_tree->none); 2449 none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE]; 2450 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist); 2451 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3); 2452 2453 // Calculate rdcost for split partition 2454 pc_tree->partitioning = PARTITION_SPLIT; 2455 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 2456 av1_init_rd_stats(&split_rdc); 2457 split_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT]; 2458 if (subsize >= BLOCK_8X8) { 2459 split_rdc.rate += (mode_costs->partition_cost[pl][PARTITION_NONE] * 4); 2460 } 2461 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 2462 if (!pc_tree->split[i]) { 2463 pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); 2464 if (!pc_tree->split[i]) 2465 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2466 "Failed to allocate PC_TREE"); 2467 } 2468 pc_tree->split[i]->index = i; 2469 } 2470 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 2471 RD_STATS block_rdc; 2472 av1_invalid_rd_stats(&block_rdc); 2473 int x_idx = (i & 1) * hbs; 2474 int y_idx = (i >> 1) * hbs; 2475 if ((mi_row + y_idx >= mi_params->mi_rows) || 2476 (mi_col + x_idx >= mi_params->mi_cols)) 2477 continue; 2478 xd->above_txfm_context = 2479 cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx; 2480 xd->left_txfm_context = 2481 xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK); 2482 if (!pc_tree->split[i]->none) { 2483 pc_tree->split[i]->none = 2484 av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 2485 if (!pc_tree->split[i]->none) 2486 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2487 "Failed to allocate PICK_MODE_CONTEXT"); 2488 } else { 2489 av1_reset_pmc(pc_tree->split[i]->none); 2490 } 2491 pc_tree->split[i]->partitioning = PARTITION_NONE; 2492 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, 2493 &block_rdc, subsize, pc_tree->split[i]->none); 2494 split_rdc.rate += block_rdc.rate; 2495 split_rdc.dist += block_rdc.dist; 2496 av1_rd_cost_update(x->rdmult, &split_rdc); 2497 if (none_rdc.rdcost < split_rdc.rdcost) break; 2498 if (i != SUB_PARTITIONS_SPLIT - 1) 2499 encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1, 2500 subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL); 2501 } 2502 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3); 2503 split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist); 2504 const int split = split_rdc.rdcost < none_rdc.rdcost; 2505 2506 return split; 2507 } 2508 2509 // Returns if SPLIT partitions should be evaluated 2510 static bool calc_do_split_flag(const AV1_COMP *cpi, const MACROBLOCK *x, 2511 const PC_TREE *pc_tree, const RD_STATS *none_rdc, 2512 const CommonModeInfoParams *mi_params, 2513 int mi_row, int mi_col, int hbs, 2514 BLOCK_SIZE bsize, PARTITION_TYPE partition) { 2515 const AV1_COMMON *const cm = &cpi->common; 2516 const int is_larger_qindex = cm->quant_params.base_qindex > 100; 2517 const MACROBLOCKD *const xd = &x->e_mbd; 2518 bool do_split = 2519 (cpi->sf.rt_sf.nonrd_check_partition_merge_mode == 3) 2520 ? (bsize <= BLOCK_32X32 || (is_larger_qindex && bsize <= BLOCK_64X64)) 2521 : true; 2522 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN || 2523 cpi->sf.rt_sf.nonrd_check_partition_merge_mode < 2 || 2524 cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) || 2525 !none_rdc->skip_txfm) 2526 return do_split; 2527 2528 const int use_model_yrd_large = get_model_rd_flag(cpi, xd, bsize); 2529 2530 // When model based skip is not used (i.e.,use_model_yrd_large = 0), skip_txfm 2531 // would have been populated based on Hadamard transform and skip_txfm flag is 2532 // more reliable. Hence SPLIT evaluation is disabled at all quantizers for 8x8 2533 // and 16x16 blocks. 2534 // When model based skip is used (i.e.,use_model_yrd_large = 1), skip_txfm may 2535 // not be reliable. Hence SPLIT evaluation is disabled only at lower 2536 // quantizers for blocks >= 32x32. 2537 if ((!use_model_yrd_large) || (!is_larger_qindex)) return false; 2538 2539 // Use residual statistics to decide if SPLIT partition should be evaluated 2540 // for 32x32 blocks. The pruning logic is avoided for larger block size to 2541 // avoid the visual artifacts 2542 if (pc_tree->none->mic.mode == NEWMV && bsize == BLOCK_32X32 && do_split) { 2543 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 2544 assert(subsize < BLOCK_SIZES_ALL); 2545 double min_per_pixel_error = DBL_MAX; 2546 double max_per_pixel_error = 0.; 2547 int i; 2548 for (i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 2549 const int x_idx = (i & 1) * hbs; 2550 const int y_idx = (i >> 1) * hbs; 2551 if ((mi_row + y_idx >= mi_params->mi_rows) || 2552 (mi_col + x_idx >= mi_params->mi_cols)) { 2553 break; 2554 } 2555 2556 // Populate the appropriate buffer pointers. 2557 // Pass scale factors as NULL as the base pointer of the block would have 2558 // been calculated appropriately. 2559 struct buf_2d src_split_buf_2d, pred_split_buf_2d; 2560 const struct buf_2d *src_none_buf_2d = &x->plane[AOM_PLANE_Y].src; 2561 setup_pred_plane(&src_split_buf_2d, subsize, src_none_buf_2d->buf, 2562 src_none_buf_2d->width, src_none_buf_2d->height, 2563 src_none_buf_2d->stride, y_idx, x_idx, NULL, 0, 0); 2564 const struct buf_2d *pred_none_buf_2d = &xd->plane[AOM_PLANE_Y].dst; 2565 setup_pred_plane(&pred_split_buf_2d, subsize, pred_none_buf_2d->buf, 2566 pred_none_buf_2d->width, pred_none_buf_2d->height, 2567 pred_none_buf_2d->stride, y_idx, x_idx, NULL, 0, 0); 2568 2569 unsigned int curr_uint_mse; 2570 const unsigned int curr_uint_var = cpi->ppi->fn_ptr[subsize].vf( 2571 src_split_buf_2d.buf, src_split_buf_2d.stride, pred_split_buf_2d.buf, 2572 pred_split_buf_2d.stride, &curr_uint_mse); 2573 const double curr_per_pixel_error = 2574 sqrt((double)curr_uint_var / block_size_wide[subsize] / 2575 block_size_high[subsize]); 2576 if (curr_per_pixel_error < min_per_pixel_error) 2577 min_per_pixel_error = curr_per_pixel_error; 2578 if (curr_per_pixel_error > max_per_pixel_error) 2579 max_per_pixel_error = curr_per_pixel_error; 2580 } 2581 2582 // Prune based on residual statistics only if all the sub-partitions are 2583 // valid. 2584 if (i == SUB_PARTITIONS_SPLIT) { 2585 if (max_per_pixel_error - min_per_pixel_error <= 1.5) do_split = false; 2586 } 2587 } 2588 2589 return do_split; 2590 } 2591 2592 static void try_merge(AV1_COMP *const cpi, ThreadData *td, 2593 TileDataEnc *tile_data, MB_MODE_INFO **mib, 2594 TokenExtra **tp, const int mi_row, const int mi_col, 2595 const BLOCK_SIZE bsize, PC_TREE *const pc_tree, 2596 const PARTITION_TYPE partition, const BLOCK_SIZE subsize, 2597 const int pl) { 2598 AV1_COMMON *const cm = &cpi->common; 2599 const CommonModeInfoParams *const mi_params = &cm->mi_params; 2600 TileInfo *const tile_info = &tile_data->tile_info; 2601 MACROBLOCK *const x = &td->mb; 2602 MACROBLOCKD *const xd = &x->e_mbd; 2603 const ModeCosts *mode_costs = &x->mode_costs; 2604 const int num_planes = av1_num_planes(cm); 2605 // Only square blocks from 8x8 to 128x128 are supported 2606 assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128); 2607 const int bs = mi_size_wide[bsize]; 2608 const int hbs = bs / 2; 2609 bool do_split = false; 2610 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 2611 RD_STATS split_rdc, none_rdc; 2612 av1_invalid_rd_stats(&split_rdc); 2613 av1_invalid_rd_stats(&none_rdc); 2614 av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2615 xd->above_txfm_context = 2616 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 2617 xd->left_txfm_context = 2618 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 2619 pc_tree->partitioning = PARTITION_NONE; 2620 if (!pc_tree->none) { 2621 pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf); 2622 if (!pc_tree->none) 2623 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2624 "Failed to allocate PICK_MODE_CONTEXT"); 2625 } else { 2626 av1_reset_pmc(pc_tree->none); 2627 } 2628 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, 2629 pc_tree->none); 2630 none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE]; 2631 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist); 2632 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2633 2634 if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode < 2 || 2635 none_rdc.skip_txfm != 1 || pc_tree->none->mic.mode == NEWMV) { 2636 do_split = calc_do_split_flag(cpi, x, pc_tree, &none_rdc, mi_params, mi_row, 2637 mi_col, hbs, bsize, partition); 2638 if (do_split) { 2639 av1_init_rd_stats(&split_rdc); 2640 split_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT]; 2641 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 2642 RD_STATS block_rdc; 2643 av1_invalid_rd_stats(&block_rdc); 2644 int x_idx = (i & 1) * hbs; 2645 int y_idx = (i >> 1) * hbs; 2646 if ((mi_row + y_idx >= mi_params->mi_rows) || 2647 (mi_col + x_idx >= mi_params->mi_cols)) 2648 continue; 2649 xd->above_txfm_context = 2650 cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx; 2651 xd->left_txfm_context = 2652 xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK); 2653 if (!pc_tree->split[i]->none) { 2654 pc_tree->split[i]->none = 2655 av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 2656 if (!pc_tree->split[i]->none) 2657 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2658 "Failed to allocate PICK_MODE_CONTEXT"); 2659 } else { 2660 av1_reset_pmc(pc_tree->split[i]->none); 2661 } 2662 pc_tree->split[i]->partitioning = PARTITION_NONE; 2663 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, 2664 &block_rdc, subsize, pc_tree->split[i]->none); 2665 // TODO(yunqingwang): The rate here did not include the cost of 2666 // signaling PARTITION_NONE token in the sub-blocks. 2667 split_rdc.rate += block_rdc.rate; 2668 split_rdc.dist += block_rdc.dist; 2669 2670 av1_rd_cost_update(x->rdmult, &split_rdc); 2671 2672 if (none_rdc.rdcost < split_rdc.rdcost) { 2673 break; 2674 } 2675 2676 if (i != SUB_PARTITIONS_SPLIT - 1) 2677 encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 2678 1, subsize, PARTITION_NONE, pc_tree->split[i]->none, 2679 NULL); 2680 } 2681 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 2682 split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist); 2683 } 2684 } 2685 2686 if (none_rdc.rdcost < split_rdc.rdcost) { 2687 /* Predicted samples can not be reused for PARTITION_NONE since same 2688 * buffer is being used to store the reconstructed samples of 2689 * PARTITION_SPLIT block. */ 2690 if (do_split) x->reuse_inter_pred = false; 2691 2692 mib[0]->bsize = bsize; 2693 pc_tree->partitioning = PARTITION_NONE; 2694 encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition, 2695 pc_tree->none, NULL); 2696 } else { 2697 mib[0]->bsize = subsize; 2698 pc_tree->partitioning = PARTITION_SPLIT; 2699 /* Predicted samples can not be reused for PARTITION_SPLIT since same 2700 * buffer is being used to write the reconstructed samples. */ 2701 // TODO(Cherma): Store and reuse predicted samples generated by 2702 // encode_b_nonrd() in DRY_RUN_NORMAL mode. 2703 x->reuse_inter_pred = false; 2704 2705 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 2706 int x_idx = (i & 1) * hbs; 2707 int y_idx = (i >> 1) * hbs; 2708 if ((mi_row + y_idx >= mi_params->mi_rows) || 2709 (mi_col + x_idx >= mi_params->mi_cols)) 2710 continue; 2711 2712 // Note: We don't reset pc_tree->split[i]->none here because it 2713 // could contain results from the additional check. Instead, it is 2714 // reset before we enter the nonrd_check_partition_merge_mode 2715 // condition. 2716 if (!pc_tree->split[i]->none) { 2717 pc_tree->split[i]->none = 2718 av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 2719 if (!pc_tree->split[i]->none) 2720 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2721 "Failed to allocate PICK_MODE_CONTEXT"); 2722 } 2723 encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0, 2724 subsize, PARTITION_NONE, pc_tree->split[i]->none, NULL); 2725 } 2726 } 2727 } 2728 2729 // Evaluate if the sub-partitions can be merged directly into a large partition 2730 // without calculating the RD cost. 2731 static void direct_partition_merging(AV1_COMP *cpi, ThreadData *td, 2732 TileDataEnc *tile_data, MB_MODE_INFO **mib, 2733 int mi_row, int mi_col, BLOCK_SIZE bsize) { 2734 AV1_COMMON *const cm = &cpi->common; 2735 const CommonModeInfoParams *const mi_params = &cm->mi_params; 2736 TileInfo *const tile_info = &tile_data->tile_info; 2737 MACROBLOCK *const x = &td->mb; 2738 MACROBLOCKD *const xd = &x->e_mbd; 2739 const int bs = mi_size_wide[bsize]; 2740 const int hbs = bs / 2; 2741 const PARTITION_TYPE partition = 2742 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize) 2743 : PARTITION_NONE; 2744 BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 2745 2746 MB_MODE_INFO **b0 = mib; 2747 MB_MODE_INFO **b1 = mib + hbs; 2748 MB_MODE_INFO **b2 = mib + hbs * mi_params->mi_stride; 2749 MB_MODE_INFO **b3 = mib + hbs * mi_params->mi_stride + hbs; 2750 2751 // Check if the following conditions are met. This can be updated 2752 // later with more support added. 2753 const int further_split = b0[0]->bsize < subsize || b1[0]->bsize < subsize || 2754 b2[0]->bsize < subsize || b3[0]->bsize < subsize; 2755 if (further_split) return; 2756 2757 const int no_skip = !b0[0]->skip_txfm || !b1[0]->skip_txfm || 2758 !b2[0]->skip_txfm || !b3[0]->skip_txfm; 2759 if (no_skip) return; 2760 2761 const int compound = (b0[0]->ref_frame[1] != b1[0]->ref_frame[1] || 2762 b0[0]->ref_frame[1] != b2[0]->ref_frame[1] || 2763 b0[0]->ref_frame[1] != b3[0]->ref_frame[1] || 2764 b0[0]->ref_frame[1] > NONE_FRAME); 2765 if (compound) return; 2766 2767 // Intra modes aren't considered here. 2768 const int different_ref = (b0[0]->ref_frame[0] != b1[0]->ref_frame[0] || 2769 b0[0]->ref_frame[0] != b2[0]->ref_frame[0] || 2770 b0[0]->ref_frame[0] != b3[0]->ref_frame[0] || 2771 b0[0]->ref_frame[0] <= INTRA_FRAME); 2772 if (different_ref) return; 2773 2774 const int different_mode = 2775 (b0[0]->mode != b1[0]->mode || b0[0]->mode != b2[0]->mode || 2776 b0[0]->mode != b3[0]->mode); 2777 if (different_mode) return; 2778 2779 const int unsupported_mode = 2780 (b0[0]->mode != NEARESTMV && b0[0]->mode != GLOBALMV); 2781 if (unsupported_mode) return; 2782 2783 const int different_mv = (b0[0]->mv[0].as_int != b1[0]->mv[0].as_int || 2784 b0[0]->mv[0].as_int != b2[0]->mv[0].as_int || 2785 b0[0]->mv[0].as_int != b3[0]->mv[0].as_int); 2786 if (different_mv) return; 2787 2788 const int unsupported_motion_mode = 2789 (b0[0]->motion_mode != b1[0]->motion_mode || 2790 b0[0]->motion_mode != b2[0]->motion_mode || 2791 b0[0]->motion_mode != b3[0]->motion_mode || 2792 b0[0]->motion_mode != SIMPLE_TRANSLATION); 2793 if (unsupported_motion_mode) return; 2794 2795 const int diffent_filter = 2796 (b0[0]->interp_filters.as_int != b1[0]->interp_filters.as_int || 2797 b0[0]->interp_filters.as_int != b2[0]->interp_filters.as_int || 2798 b0[0]->interp_filters.as_int != b3[0]->interp_filters.as_int); 2799 if (diffent_filter) return; 2800 2801 const int different_seg = (b0[0]->segment_id != b1[0]->segment_id || 2802 b0[0]->segment_id != b2[0]->segment_id || 2803 b0[0]->segment_id != b3[0]->segment_id); 2804 if (different_seg) return; 2805 2806 // Evaluate the ref_mv. 2807 MB_MODE_INFO **this_mi = mib; 2808 BLOCK_SIZE orig_bsize = this_mi[0]->bsize; 2809 const PARTITION_TYPE orig_partition = this_mi[0]->partition; 2810 2811 this_mi[0]->bsize = bsize; 2812 this_mi[0]->partition = PARTITION_NONE; 2813 this_mi[0]->skip_txfm = 1; 2814 2815 // TODO(yunqing): functions called below can be optimized by 2816 // removing unrelated operations. 2817 av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row, 2818 mi_col, bsize); 2819 2820 const MV_REFERENCE_FRAME ref_frame = this_mi[0]->ref_frame[0]; 2821 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES]; 2822 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; 2823 int force_skip_low_temp_var = 0; 2824 int skip_pred_mv = 0; 2825 bool use_scaled_ref; 2826 2827 for (int i = 0; i < MB_MODE_COUNT; ++i) { 2828 for (int j = 0; j < REF_FRAMES; ++j) { 2829 frame_mv[i][j].as_int = INVALID_MV; 2830 } 2831 } 2832 av1_copy(x->color_sensitivity, x->color_sensitivity_sb); 2833 skip_pred_mv = (x->nonrd_prune_ref_frame_search > 2 && 2834 x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_U)] != 2 && 2835 x->color_sensitivity[COLOR_SENS_IDX(AOM_PLANE_V)] != 2); 2836 2837 find_predictors(cpi, x, ref_frame, frame_mv, yv12_mb, bsize, 2838 force_skip_low_temp_var, skip_pred_mv, &use_scaled_ref); 2839 2840 int continue_merging = 1; 2841 if (frame_mv[NEARESTMV][ref_frame].as_mv.row != b0[0]->mv[0].as_mv.row || 2842 frame_mv[NEARESTMV][ref_frame].as_mv.col != b0[0]->mv[0].as_mv.col) 2843 continue_merging = 0; 2844 2845 if (!continue_merging) { 2846 this_mi[0]->bsize = orig_bsize; 2847 this_mi[0]->partition = orig_partition; 2848 2849 // TODO(yunqing): Store the results and restore here instead of 2850 // calling find_predictors() again. 2851 av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row, 2852 mi_col, this_mi[0]->bsize); 2853 find_predictors(cpi, x, ref_frame, frame_mv, yv12_mb, this_mi[0]->bsize, 2854 force_skip_low_temp_var, skip_pred_mv, &use_scaled_ref); 2855 } else { 2856 struct scale_factors *sf = get_ref_scale_factors(cm, ref_frame); 2857 const int is_scaled = av1_is_scaled(sf); 2858 const int is_y_subpel_mv = (abs(this_mi[0]->mv[0].as_mv.row) % 8) || 2859 (abs(this_mi[0]->mv[0].as_mv.col) % 8); 2860 const int is_uv_subpel_mv = (abs(this_mi[0]->mv[0].as_mv.row) % 16) || 2861 (abs(this_mi[0]->mv[0].as_mv.col) % 16); 2862 2863 if (cpi->ppi->use_svc || is_scaled || is_y_subpel_mv || is_uv_subpel_mv) { 2864 const int num_planes = av1_num_planes(cm); 2865 set_ref_ptrs(cm, xd, ref_frame, this_mi[0]->ref_frame[1]); 2866 const YV12_BUFFER_CONFIG *cfg = get_ref_frame_yv12_buf(cm, ref_frame); 2867 av1_setup_pre_planes(xd, 0, cfg, mi_row, mi_col, 2868 xd->block_ref_scale_factors[0], num_planes); 2869 2870 if (!cpi->ppi->use_svc && !is_scaled && !is_y_subpel_mv) { 2871 assert(is_uv_subpel_mv == 1); 2872 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 1, 2873 num_planes - 1); 2874 } else { 2875 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 2876 num_planes - 1); 2877 } 2878 } 2879 2880 // Copy out mbmi_ext information. 2881 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 2882 MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame = x->mbmi_ext_frame; 2883 av1_copy_mbmi_ext_to_mbmi_ext_frame( 2884 mbmi_ext_frame, mbmi_ext, av1_ref_frame_type(this_mi[0]->ref_frame)); 2885 2886 const BLOCK_SIZE this_subsize = 2887 get_partition_subsize(bsize, this_mi[0]->partition); 2888 // Update partition contexts. 2889 update_ext_partition_context(xd, mi_row, mi_col, this_subsize, bsize, 2890 this_mi[0]->partition); 2891 2892 const int num_planes = av1_num_planes(cm); 2893 av1_reset_entropy_context(xd, bsize, num_planes); 2894 2895 // Note: use x->txfm_search_params.tx_mode_search_type instead of 2896 // cm->features.tx_mode here. 2897 TX_SIZE tx_size = 2898 tx_size_from_tx_mode(bsize, x->txfm_search_params.tx_mode_search_type); 2899 if (xd->lossless[this_mi[0]->segment_id]) tx_size = TX_4X4; 2900 this_mi[0]->tx_size = tx_size; 2901 memset(this_mi[0]->inter_tx_size, this_mi[0]->tx_size, 2902 sizeof(this_mi[0]->inter_tx_size)); 2903 2904 // Update txfm contexts. 2905 xd->above_txfm_context = 2906 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 2907 xd->left_txfm_context = 2908 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 2909 set_txfm_ctxs(this_mi[0]->tx_size, xd->width, xd->height, 2910 this_mi[0]->skip_txfm && is_inter_block(this_mi[0]), xd); 2911 2912 // Update mi for this partition block. 2913 for (int y = 0; y < bs; y++) { 2914 for (int x_idx = 0; x_idx < bs; x_idx++) { 2915 this_mi[x_idx + y * mi_params->mi_stride] = this_mi[0]; 2916 } 2917 } 2918 } 2919 } 2920 2921 /*!\brief AV1 block partition application (minimal RD search). 2922 * 2923 * \ingroup partition_search 2924 * \callgraph 2925 * \callergraph 2926 * Encode the block by applying pre-calculated partition patterns that are 2927 * represented by coding block sizes stored in the mbmi array. The only 2928 * partition adjustment allowed is merging leaf split nodes if it leads to a 2929 * lower rd cost. The partition types are limited to a basic set: none, horz, 2930 * vert, and split. This function is only used in the real-time mode. 2931 * 2932 * \param[in] cpi Top-level encoder structure 2933 * \param[in] td Pointer to thread data 2934 * \param[in] tile_data Pointer to struct holding adaptive 2935 data/contexts/models for the tile during encoding 2936 * \param[in] mib Array representing MB_MODE_INFO pointers for mi 2937 blocks starting from the first pixel of the current 2938 block 2939 * \param[in] tp Pointer to the starting token 2940 * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE 2941 * \param[in] mi_col Column coordinate of the block in a step size of 2942 MI_SIZE 2943 * \param[in] bsize Current block size 2944 * \param[in] pc_tree Pointer to the PC_TREE node holding the picked 2945 partitions and mode info for the current block 2946 * 2947 * \remark Nothing is returned. The pc_tree struct is modified to store the 2948 * picked partition and modes. 2949 */ 2950 void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td, 2951 TileDataEnc *tile_data, MB_MODE_INFO **mib, 2952 TokenExtra **tp, int mi_row, int mi_col, 2953 BLOCK_SIZE bsize, PC_TREE *pc_tree) { 2954 AV1_COMMON *const cm = &cpi->common; 2955 const CommonModeInfoParams *const mi_params = &cm->mi_params; 2956 TileInfo *const tile_info = &tile_data->tile_info; 2957 MACROBLOCK *const x = &td->mb; 2958 MACROBLOCKD *const xd = &x->e_mbd; 2959 const ModeCosts *mode_costs = &x->mode_costs; 2960 // Only square blocks from 8x8 to 128x128 are supported 2961 assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128); 2962 const int bs = mi_size_wide[bsize]; 2963 const int hbs = bs / 2; 2964 PARTITION_TYPE partition = (bsize >= BLOCK_8X8) 2965 ? get_partition(cm, mi_row, mi_col, bsize) 2966 : PARTITION_NONE; 2967 BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 2968 assert(subsize <= BLOCK_LARGEST); 2969 const int pl = (bsize >= BLOCK_8X8) 2970 ? partition_plane_context(xd, mi_row, mi_col, bsize) 2971 : 0; 2972 2973 RD_STATS dummy_cost; 2974 av1_invalid_rd_stats(&dummy_cost); 2975 2976 if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; 2977 2978 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 2979 2980 xd->above_txfm_context = 2981 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 2982 xd->left_txfm_context = 2983 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 2984 2985 // Initialize default mode evaluation params 2986 set_mode_eval_params(cpi, x, DEFAULT_EVAL); 2987 2988 x->reuse_inter_pred = cpi->sf.rt_sf.reuse_inter_pred_nonrd; 2989 2990 int change_none_to_split = 0; 2991 if (partition == PARTITION_NONE && 2992 cpi->sf.rt_sf.nonrd_check_partition_split == 1) { 2993 change_none_to_split = 2994 try_split_partition(cpi, td, tile_data, tile_info, tp, x, xd, mi_params, 2995 mi_row, mi_col, bsize, pl, pc_tree); 2996 if (change_none_to_split) { 2997 partition = PARTITION_SPLIT; 2998 subsize = get_partition_subsize(bsize, partition); 2999 assert(subsize <= BLOCK_LARGEST); 3000 } 3001 } 3002 3003 pc_tree->partitioning = partition; 3004 3005 switch (partition) { 3006 case PARTITION_NONE: 3007 if (!pc_tree->none) { 3008 pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf); 3009 if (!pc_tree->none) 3010 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 3011 "Failed to allocate PICK_MODE_CONTEXT"); 3012 } else { 3013 av1_reset_pmc(pc_tree->none); 3014 } 3015 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost, bsize, 3016 pc_tree->none); 3017 encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, 3018 partition, pc_tree->none, NULL); 3019 break; 3020 case PARTITION_VERT: 3021 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 3022 if (!pc_tree->vertical[i]) { 3023 pc_tree->vertical[i] = 3024 av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 3025 if (!pc_tree->vertical[i]) 3026 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 3027 "Failed to allocate PICK_MODE_CONTEXT"); 3028 } else { 3029 av1_reset_pmc(pc_tree->vertical[i]); 3030 } 3031 } 3032 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost, 3033 subsize, pc_tree->vertical[0]); 3034 encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize, 3035 PARTITION_VERT, pc_tree->vertical[0], NULL); 3036 if (mi_col + hbs < mi_params->mi_cols && bsize > BLOCK_8X8) { 3037 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col + hbs, 3038 &dummy_cost, subsize, pc_tree->vertical[1]); 3039 encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize, 3040 PARTITION_VERT, pc_tree->vertical[1], NULL); 3041 } 3042 break; 3043 case PARTITION_HORZ: 3044 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 3045 if (!pc_tree->horizontal[i]) { 3046 pc_tree->horizontal[i] = 3047 av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 3048 if (!pc_tree->horizontal[i]) 3049 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 3050 "Failed to allocate PICK_MODE_CONTEXT"); 3051 } else { 3052 av1_reset_pmc(pc_tree->horizontal[i]); 3053 } 3054 } 3055 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost, 3056 subsize, pc_tree->horizontal[0]); 3057 encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize, 3058 PARTITION_HORZ, pc_tree->horizontal[0], NULL); 3059 3060 if (mi_row + hbs < mi_params->mi_rows && bsize > BLOCK_8X8) { 3061 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + hbs, mi_col, 3062 &dummy_cost, subsize, pc_tree->horizontal[1]); 3063 encode_b_nonrd(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize, 3064 PARTITION_HORZ, pc_tree->horizontal[1], NULL); 3065 } 3066 break; 3067 case PARTITION_SPLIT: 3068 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 3069 if (!pc_tree->split[i]) { 3070 pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); 3071 if (!pc_tree->split[i]) 3072 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 3073 "Failed to allocate PC_TREE"); 3074 } 3075 pc_tree->split[i]->index = i; 3076 } 3077 if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode && 3078 av1_is_leaf_split_partition(cm, mi_row, mi_col, bsize) && 3079 !frame_is_intra_only(cm) && bsize <= BLOCK_64X64) { 3080 try_merge(cpi, td, tile_data, mib, tp, mi_row, mi_col, bsize, pc_tree, 3081 partition, subsize, pl); 3082 } else { 3083 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 3084 int x_idx = (i & 1) * hbs; 3085 int y_idx = (i >> 1) * hbs; 3086 int jj = i >> 1, ii = i & 0x01; 3087 if ((mi_row + y_idx >= mi_params->mi_rows) || 3088 (mi_col + x_idx >= mi_params->mi_cols)) 3089 continue; 3090 av1_nonrd_use_partition( 3091 cpi, td, tile_data, 3092 mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp, 3093 mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]); 3094 } 3095 3096 if (!change_none_to_split) { 3097 // Note: Palette, cfl are not supported. 3098 if (!frame_is_intra_only(cm) && !tile_data->allow_update_cdf && 3099 cpi->sf.rt_sf.partition_direct_merging && 3100 mode_costs->partition_cost[pl][PARTITION_NONE] < 3101 mode_costs->partition_cost[pl][PARTITION_SPLIT] && 3102 (mi_row + bs <= mi_params->mi_rows) && 3103 (mi_col + bs <= mi_params->mi_cols)) { 3104 direct_partition_merging(cpi, td, tile_data, mib, mi_row, mi_col, 3105 bsize); 3106 } 3107 } 3108 } 3109 break; 3110 case PARTITION_VERT_A: 3111 case PARTITION_VERT_B: 3112 case PARTITION_HORZ_A: 3113 case PARTITION_HORZ_B: 3114 case PARTITION_HORZ_4: 3115 case PARTITION_VERT_4: 3116 assert(0 && "Cannot handle extended partition types"); 3117 default: assert(0); break; 3118 } 3119 } 3120 3121 #if !CONFIG_REALTIME_ONLY 3122 // Try searching for an encoding for the given subblock. Returns zero if the 3123 // rdcost is already too high (to tell the caller not to bother searching for 3124 // encodings of further subblocks). 3125 static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td, 3126 TileDataEnc *tile_data, TokenExtra **tp, int is_last, 3127 int mi_row, int mi_col, BLOCK_SIZE subsize, 3128 RD_STATS best_rdcost, RD_STATS *sum_rdc, 3129 PARTITION_TYPE partition, 3130 PICK_MODE_CONTEXT *this_ctx) { 3131 MACROBLOCK *const x = &td->mb; 3132 const int orig_mult = x->rdmult; 3133 setup_block_rdmult(cpi, x, mi_row, mi_col, subsize, NO_AQ, NULL); 3134 3135 av1_rd_cost_update(x->rdmult, &best_rdcost); 3136 3137 RD_STATS rdcost_remaining; 3138 av1_rd_stats_subtraction(x->rdmult, &best_rdcost, sum_rdc, &rdcost_remaining); 3139 RD_STATS this_rdc; 3140 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, partition, 3141 subsize, this_ctx, rdcost_remaining); 3142 3143 if (this_rdc.rate == INT_MAX) { 3144 sum_rdc->rdcost = INT64_MAX; 3145 } else { 3146 sum_rdc->rate += this_rdc.rate; 3147 sum_rdc->dist += this_rdc.dist; 3148 av1_rd_cost_update(x->rdmult, sum_rdc); 3149 } 3150 3151 if (sum_rdc->rdcost >= best_rdcost.rdcost) { 3152 x->rdmult = orig_mult; 3153 return 0; 3154 } 3155 3156 if (!is_last) { 3157 av1_update_state(cpi, td, this_ctx, mi_row, mi_col, subsize, 1); 3158 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL); 3159 } 3160 3161 x->rdmult = orig_mult; 3162 return 1; 3163 } 3164 3165 // Tests an AB partition, and updates the encoder status, the pick mode 3166 // contexts, the best rdcost, and the best partition. 3167 static bool rd_test_partition3(AV1_COMP *const cpi, ThreadData *td, 3168 TileDataEnc *tile_data, TokenExtra **tp, 3169 PC_TREE *pc_tree, RD_STATS *best_rdc, 3170 int64_t *this_rdcost, 3171 PICK_MODE_CONTEXT *ctxs[SUB_PARTITIONS_AB], 3172 int mi_row, int mi_col, BLOCK_SIZE bsize, 3173 PARTITION_TYPE partition, 3174 const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB], 3175 const int ab_mi_pos[SUB_PARTITIONS_AB][2], 3176 const MB_MODE_INFO **mode_cache) { 3177 MACROBLOCK *const x = &td->mb; 3178 const MACROBLOCKD *const xd = &x->e_mbd; 3179 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 3180 RD_STATS sum_rdc; 3181 av1_init_rd_stats(&sum_rdc); 3182 sum_rdc.rate = x->mode_costs.partition_cost[pl][partition]; 3183 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 3184 // Loop over sub-partitions in AB partition type. 3185 for (int i = 0; i < SUB_PARTITIONS_AB; i++) { 3186 if (mode_cache && mode_cache[i]) { 3187 x->use_mb_mode_cache = 1; 3188 x->mb_mode_cache = mode_cache[i]; 3189 } 3190 const int mode_search_success = 3191 rd_try_subblock(cpi, td, tile_data, tp, i == SUB_PARTITIONS_AB - 1, 3192 ab_mi_pos[i][0], ab_mi_pos[i][1], ab_subsize[i], 3193 *best_rdc, &sum_rdc, partition, ctxs[i]); 3194 x->use_mb_mode_cache = 0; 3195 x->mb_mode_cache = NULL; 3196 if (!mode_search_success) { 3197 return false; 3198 } 3199 } 3200 3201 av1_rd_cost_update(x->rdmult, &sum_rdc); 3202 *this_rdcost = sum_rdc.rdcost; 3203 if (sum_rdc.rdcost >= best_rdc->rdcost) return false; 3204 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 3205 *this_rdcost = sum_rdc.rdcost; 3206 if (sum_rdc.rdcost >= best_rdc->rdcost) return false; 3207 3208 *best_rdc = sum_rdc; 3209 pc_tree->partitioning = partition; 3210 return true; 3211 } 3212 3213 #if CONFIG_COLLECT_PARTITION_STATS 3214 static void init_partition_block_timing_stats( 3215 PartitionTimingStats *part_timing_stats) { 3216 av1_zero(*part_timing_stats); 3217 } 3218 3219 static inline void start_partition_block_timer( 3220 PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type) { 3221 assert(!part_timing_stats->timer_is_on); 3222 part_timing_stats->partition_attempts[partition_type] += 1; 3223 aom_usec_timer_start(&part_timing_stats->timer); 3224 part_timing_stats->timer_is_on = 1; 3225 } 3226 3227 static inline void end_partition_block_timer( 3228 PartitionTimingStats *part_timing_stats, PARTITION_TYPE partition_type, 3229 int64_t rdcost) { 3230 if (part_timing_stats->timer_is_on) { 3231 aom_usec_timer_mark(&part_timing_stats->timer); 3232 const int64_t time = aom_usec_timer_elapsed(&part_timing_stats->timer); 3233 part_timing_stats->partition_times[partition_type] += time; 3234 part_timing_stats->partition_rdcost[partition_type] = rdcost; 3235 part_timing_stats->timer_is_on = 0; 3236 } 3237 } 3238 static inline void print_partition_timing_stats_with_rdcost( 3239 const PartitionTimingStats *part_timing_stats, int mi_row, int mi_col, 3240 BLOCK_SIZE bsize, FRAME_UPDATE_TYPE frame_update_type, int frame_number, 3241 const RD_STATS *best_rdc, const char *filename) { 3242 FILE *f = fopen(filename, "a"); 3243 fprintf(f, "%d,%d,%d,%d,%d,%d,%" PRId64 ",%" PRId64 ",", bsize, frame_number, 3244 frame_update_type, mi_row, mi_col, best_rdc->rate, best_rdc->dist, 3245 best_rdc->rdcost); 3246 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3247 fprintf(f, "%d,", part_timing_stats->partition_decisions[idx]); 3248 } 3249 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3250 fprintf(f, "%d,", part_timing_stats->partition_attempts[idx]); 3251 } 3252 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3253 fprintf(f, "%" PRId64 ",", part_timing_stats->partition_times[idx]); 3254 } 3255 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3256 if (part_timing_stats->partition_rdcost[idx] == INT64_MAX) { 3257 fprintf(f, "%d,", -1); 3258 } else { 3259 fprintf(f, "%" PRId64 ",", part_timing_stats->partition_rdcost[idx]); 3260 } 3261 } 3262 fprintf(f, "\n"); 3263 fclose(f); 3264 } 3265 3266 static inline void print_partition_timing_stats( 3267 const PartitionTimingStats *part_timing_stats, int intra_only, 3268 int show_frame, const BLOCK_SIZE bsize, const char *filename) { 3269 FILE *f = fopen(filename, "a"); 3270 fprintf(f, "%d,%d,%d,", bsize, show_frame, intra_only); 3271 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3272 fprintf(f, "%d,", part_timing_stats->partition_decisions[idx]); 3273 } 3274 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3275 fprintf(f, "%d,", part_timing_stats->partition_attempts[idx]); 3276 } 3277 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3278 fprintf(f, "%" PRId64 ",", part_timing_stats->partition_times[idx]); 3279 } 3280 fprintf(f, "\n"); 3281 fclose(f); 3282 } 3283 3284 static inline void accumulate_partition_timing_stats( 3285 FramePartitionTimingStats *fr_part_timing_stats, 3286 const PartitionTimingStats *part_timing_stats, BLOCK_SIZE bsize) { 3287 const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize); 3288 int *agg_attempts = fr_part_timing_stats->partition_attempts[bsize_idx]; 3289 int *agg_decisions = fr_part_timing_stats->partition_decisions[bsize_idx]; 3290 int64_t *agg_times = fr_part_timing_stats->partition_times[bsize_idx]; 3291 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) { 3292 agg_attempts[idx] += part_timing_stats->partition_attempts[idx]; 3293 agg_decisions[idx] += part_timing_stats->partition_decisions[idx]; 3294 agg_times[idx] += part_timing_stats->partition_times[idx]; 3295 } 3296 } 3297 #endif // CONFIG_COLLECT_PARTITION_STATS 3298 3299 // Initialize state variables of partition search used in 3300 // av1_rd_pick_partition(). 3301 static void init_partition_search_state_params( 3302 MACROBLOCK *x, AV1_COMP *const cpi, PartitionSearchState *part_search_state, 3303 int mi_row, int mi_col, BLOCK_SIZE bsize) { 3304 MACROBLOCKD *const xd = &x->e_mbd; 3305 const AV1_COMMON *const cm = &cpi->common; 3306 PartitionBlkParams *blk_params = &part_search_state->part_blk_params; 3307 const CommonModeInfoParams *const mi_params = &cpi->common.mi_params; 3308 3309 // Initialization of block size related parameters. 3310 blk_params->mi_step = mi_size_wide[bsize] / 2; 3311 blk_params->mi_row = mi_row; 3312 blk_params->mi_col = mi_col; 3313 blk_params->mi_row_edge = mi_row + blk_params->mi_step; 3314 blk_params->mi_col_edge = mi_col + blk_params->mi_step; 3315 blk_params->width = block_size_wide[bsize]; 3316 blk_params->min_partition_size_1d = 3317 block_size_wide[x->sb_enc.min_partition_size]; 3318 blk_params->subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 3319 blk_params->split_bsize2 = blk_params->subsize; 3320 blk_params->bsize_at_least_8x8 = (bsize >= BLOCK_8X8); 3321 blk_params->bsize = bsize; 3322 3323 // Check if the partition corresponds to edge block. 3324 blk_params->has_rows = (blk_params->mi_row_edge < mi_params->mi_rows); 3325 blk_params->has_cols = (blk_params->mi_col_edge < mi_params->mi_cols); 3326 3327 // Update intra partitioning related info. 3328 part_search_state->intra_part_info = &x->part_search_info; 3329 // Prepare for segmentation CNN-based partitioning for intra-frame. 3330 if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) { 3331 part_search_state->intra_part_info->quad_tree_idx = 0; 3332 part_search_state->intra_part_info->cnn_output_valid = 0; 3333 } 3334 3335 // Set partition plane context index. 3336 part_search_state->pl_ctx_idx = 3337 blk_params->bsize_at_least_8x8 3338 ? partition_plane_context(xd, mi_row, mi_col, bsize) 3339 : 0; 3340 3341 // Partition cost buffer update 3342 ModeCosts *mode_costs = &x->mode_costs; 3343 part_search_state->partition_cost = 3344 mode_costs->partition_cost[part_search_state->pl_ctx_idx]; 3345 3346 // Initialize HORZ and VERT win flags as true for all split partitions. 3347 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 3348 part_search_state->split_part_rect_win[i].rect_part_win[HORZ] = true; 3349 part_search_state->split_part_rect_win[i].rect_part_win[VERT] = true; 3350 } 3351 3352 // Initialize the rd cost. 3353 av1_init_rd_stats(&part_search_state->this_rdc); 3354 3355 // Initialize RD costs for partition types to 0. 3356 part_search_state->none_rd = 0; 3357 av1_zero(part_search_state->split_rd); 3358 av1_zero(part_search_state->rect_part_rd); 3359 3360 // Initialize SPLIT partition to be not ready. 3361 av1_zero(part_search_state->is_split_ctx_is_ready); 3362 // Initialize HORZ and VERT partitions to be not ready. 3363 av1_zero(part_search_state->is_rect_ctx_is_ready); 3364 3365 // Chroma subsampling. 3366 part_search_state->ss_x = x->e_mbd.plane[1].subsampling_x; 3367 part_search_state->ss_y = x->e_mbd.plane[1].subsampling_y; 3368 3369 // Initialize partition search flags to defaults. 3370 part_search_state->terminate_partition_search = 0; 3371 part_search_state->do_square_split = blk_params->bsize_at_least_8x8; 3372 part_search_state->do_rectangular_split = 3373 cpi->oxcf.part_cfg.enable_rect_partitions && 3374 blk_params->bsize_at_least_8x8; 3375 av1_zero(part_search_state->prune_rect_part); 3376 3377 // Initialize allowed partition types for the partition block. 3378 part_search_state->partition_none_allowed = 3379 av1_blk_has_rows_and_cols(blk_params); 3380 part_search_state->partition_rect_allowed[HORZ] = 3381 part_search_state->do_rectangular_split && blk_params->has_cols && 3382 get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ), 3383 part_search_state->ss_x, 3384 part_search_state->ss_y) != BLOCK_INVALID; 3385 part_search_state->partition_rect_allowed[VERT] = 3386 part_search_state->do_rectangular_split && blk_params->has_rows && 3387 get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT), 3388 part_search_state->ss_x, 3389 part_search_state->ss_y) != BLOCK_INVALID; 3390 3391 // Reset the flag indicating whether a partition leading to a rdcost lower 3392 // than the bound best_rdc has been found. 3393 part_search_state->found_best_partition = false; 3394 3395 #if CONFIG_COLLECT_PARTITION_STATS 3396 init_partition_block_timing_stats(&part_search_state->part_timing_stats); 3397 #endif // CONFIG_COLLECT_PARTITION_STATS 3398 } 3399 3400 // Override partition cost buffer for the edge blocks. 3401 static void set_partition_cost_for_edge_blk( 3402 AV1_COMMON const *cm, PartitionSearchState *part_search_state) { 3403 PartitionBlkParams blk_params = part_search_state->part_blk_params; 3404 assert(blk_params.bsize_at_least_8x8 && part_search_state->pl_ctx_idx >= 0); 3405 const aom_cdf_prob *partition_cdf = 3406 cm->fc->partition_cdf[part_search_state->pl_ctx_idx]; 3407 const int max_cost = av1_cost_symbol(0); 3408 for (PARTITION_TYPE i = 0; i < PARTITION_TYPES; ++i) 3409 part_search_state->tmp_partition_cost[i] = max_cost; 3410 if (blk_params.has_cols) { 3411 // At the bottom, the two possibilities are HORZ and SPLIT. 3412 aom_cdf_prob bot_cdf[2]; 3413 partition_gather_vert_alike(bot_cdf, partition_cdf, blk_params.bsize); 3414 static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT }; 3415 av1_cost_tokens_from_cdf(part_search_state->tmp_partition_cost, bot_cdf, 3416 bot_inv_map); 3417 } else if (blk_params.has_rows) { 3418 // At the right, the two possibilities are VERT and SPLIT. 3419 aom_cdf_prob rhs_cdf[2]; 3420 partition_gather_horz_alike(rhs_cdf, partition_cdf, blk_params.bsize); 3421 static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT }; 3422 av1_cost_tokens_from_cdf(part_search_state->tmp_partition_cost, rhs_cdf, 3423 rhs_inv_map); 3424 } else { 3425 // At the bottom right, we always split. 3426 part_search_state->tmp_partition_cost[PARTITION_SPLIT] = 0; 3427 } 3428 // Override the partition cost buffer. 3429 part_search_state->partition_cost = part_search_state->tmp_partition_cost; 3430 } 3431 3432 // Reset the partition search state flags when 3433 // must_find_valid_partition is equal to 1. 3434 static inline void reset_part_limitations( 3435 AV1_COMP *const cpi, PartitionSearchState *part_search_state) { 3436 PartitionBlkParams blk_params = part_search_state->part_blk_params; 3437 const int is_rect_part_allowed = 3438 blk_params.bsize_at_least_8x8 && 3439 cpi->oxcf.part_cfg.enable_rect_partitions && 3440 (blk_params.width > blk_params.min_partition_size_1d); 3441 part_search_state->do_square_split = 3442 blk_params.bsize_at_least_8x8 && 3443 (blk_params.width > blk_params.min_partition_size_1d); 3444 part_search_state->partition_none_allowed = 3445 av1_blk_has_rows_and_cols(&blk_params) && 3446 (blk_params.width >= blk_params.min_partition_size_1d); 3447 part_search_state->partition_rect_allowed[HORZ] = 3448 blk_params.has_cols && is_rect_part_allowed && 3449 get_plane_block_size( 3450 get_partition_subsize(blk_params.bsize, PARTITION_HORZ), 3451 part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID; 3452 part_search_state->partition_rect_allowed[VERT] = 3453 blk_params.has_rows && is_rect_part_allowed && 3454 get_plane_block_size( 3455 get_partition_subsize(blk_params.bsize, PARTITION_VERT), 3456 part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID; 3457 part_search_state->terminate_partition_search = 0; 3458 } 3459 3460 // Rectangular partitions evaluation at sub-block level. 3461 static void rd_pick_rect_partition(AV1_COMP *const cpi, TileDataEnc *tile_data, 3462 MACROBLOCK *x, 3463 PICK_MODE_CONTEXT *cur_partition_ctx, 3464 PartitionSearchState *part_search_state, 3465 RD_STATS *best_rdc, const int idx, 3466 int mi_row, int mi_col, BLOCK_SIZE bsize, 3467 PARTITION_TYPE partition_type) { 3468 // Obtain the remainder from the best rd cost 3469 // for further processing of partition. 3470 RD_STATS best_remain_rdcost; 3471 av1_rd_stats_subtraction(x->rdmult, best_rdc, &part_search_state->sum_rdc, 3472 &best_remain_rdcost); 3473 3474 // Obtain the best mode for the partition sub-block. 3475 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &part_search_state->this_rdc, 3476 partition_type, bsize, cur_partition_ctx, best_remain_rdcost); 3477 av1_rd_cost_update(x->rdmult, &part_search_state->this_rdc); 3478 3479 // Update the partition rd cost with the current sub-block rd. 3480 if (part_search_state->this_rdc.rate == INT_MAX) { 3481 part_search_state->sum_rdc.rdcost = INT64_MAX; 3482 } else { 3483 part_search_state->sum_rdc.rate += part_search_state->this_rdc.rate; 3484 part_search_state->sum_rdc.dist += part_search_state->this_rdc.dist; 3485 av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc); 3486 } 3487 const RECT_PART_TYPE rect_part = 3488 partition_type == PARTITION_HORZ ? HORZ : VERT; 3489 part_search_state->rect_part_rd[rect_part][idx] = 3490 part_search_state->this_rdc.rdcost; 3491 } 3492 3493 typedef int (*active_edge_info)(const AV1_COMP *cpi, int mi_col, int mi_step); 3494 3495 // Checks if HORZ / VERT partition search is allowed. 3496 static inline int is_rect_part_allowed( 3497 const AV1_COMP *cpi, const PartitionSearchState *part_search_state, 3498 const active_edge_info *active_edge, RECT_PART_TYPE rect_part, 3499 const int mi_pos) { 3500 const PartitionBlkParams *blk_params = &part_search_state->part_blk_params; 3501 const int is_part_allowed = 3502 (!part_search_state->terminate_partition_search && 3503 part_search_state->partition_rect_allowed[rect_part] && 3504 !part_search_state->prune_rect_part[rect_part] && 3505 (part_search_state->do_rectangular_split || 3506 active_edge[rect_part](cpi, mi_pos, blk_params->mi_step))); 3507 return is_part_allowed; 3508 } 3509 3510 static void rectangular_partition_search( 3511 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, 3512 TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree, 3513 RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, 3514 PartitionSearchState *part_search_state, RD_STATS *best_rdc, 3515 RD_RECT_PART_WIN_INFO *rect_part_win_info, const RECT_PART_TYPE start_type, 3516 const RECT_PART_TYPE end_type) { 3517 const AV1_COMMON *const cm = &cpi->common; 3518 PartitionBlkParams blk_params = part_search_state->part_blk_params; 3519 RD_STATS *sum_rdc = &part_search_state->sum_rdc; 3520 const int rect_partition_type[NUM_RECT_PARTS] = { PARTITION_HORZ, 3521 PARTITION_VERT }; 3522 3523 // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][0]: mi_row postion of 3524 // HORZ and VERT partition types. 3525 // mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][1]: mi_col postion of 3526 // HORZ and VERT partition types. 3527 const int mi_pos_rect[NUM_RECT_PARTS][SUB_PARTITIONS_RECT][2] = { 3528 { { blk_params.mi_row, blk_params.mi_col }, 3529 { blk_params.mi_row_edge, blk_params.mi_col } }, 3530 { { blk_params.mi_row, blk_params.mi_col }, 3531 { blk_params.mi_row, blk_params.mi_col_edge } } 3532 }; 3533 3534 // Initialize active edge_type function pointer 3535 // for HOZR and VERT partition types. 3536 active_edge_info active_edge_type[NUM_RECT_PARTS] = { av1_active_h_edge, 3537 av1_active_v_edge }; 3538 3539 // Indicates edge blocks for HORZ and VERT partition types. 3540 const int is_not_edge_block[NUM_RECT_PARTS] = { blk_params.has_rows, 3541 blk_params.has_cols }; 3542 3543 // Initialize pc tree context for HORZ and VERT partition types. 3544 PICK_MODE_CONTEXT **cur_ctx[NUM_RECT_PARTS][SUB_PARTITIONS_RECT] = { 3545 { &pc_tree->horizontal[0], &pc_tree->horizontal[1] }, 3546 { &pc_tree->vertical[0], &pc_tree->vertical[1] } 3547 }; 3548 3549 // Loop over rectangular partition types. 3550 for (RECT_PART_TYPE i = start_type; i <= end_type; i++) { 3551 assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions, 3552 !part_search_state->partition_rect_allowed[i])); 3553 3554 // Check if the HORZ / VERT partition search is to be performed. 3555 if (!is_rect_part_allowed(cpi, part_search_state, active_edge_type, i, 3556 mi_pos_rect[i][0][i])) 3557 continue; 3558 3559 // Sub-partition idx. 3560 int sub_part_idx = 0; 3561 PARTITION_TYPE partition_type = rect_partition_type[i]; 3562 blk_params.subsize = 3563 get_partition_subsize(blk_params.bsize, partition_type); 3564 assert(blk_params.subsize <= BLOCK_LARGEST); 3565 av1_init_rd_stats(sum_rdc); 3566 for (int j = 0; j < SUB_PARTITIONS_RECT; j++) { 3567 if (cur_ctx[i][j][0] == NULL) { 3568 cur_ctx[i][j][0] = 3569 av1_alloc_pmc(cpi, blk_params.subsize, &td->shared_coeff_buf); 3570 if (!cur_ctx[i][j][0]) 3571 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, 3572 "Failed to allocate PICK_MODE_CONTEXT"); 3573 } 3574 } 3575 sum_rdc->rate = part_search_state->partition_cost[partition_type]; 3576 sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, 0); 3577 #if CONFIG_COLLECT_PARTITION_STATS 3578 PartitionTimingStats *part_timing_stats = 3579 &part_search_state->part_timing_stats; 3580 if (best_rdc->rdcost - sum_rdc->rdcost >= 0) { 3581 start_partition_block_timer(part_timing_stats, partition_type); 3582 } 3583 #endif 3584 3585 // First sub-partition evaluation in HORZ / VERT partition type. 3586 rd_pick_rect_partition( 3587 cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state, 3588 best_rdc, 0, mi_pos_rect[i][sub_part_idx][0], 3589 mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type); 3590 3591 // Start of second sub-partition evaluation. 3592 // Evaluate second sub-partition if the first sub-partition cost 3593 // is less than the best cost and if it is not an edge block. 3594 if (sum_rdc->rdcost < best_rdc->rdcost && is_not_edge_block[i]) { 3595 const MB_MODE_INFO *const mbmi = &cur_ctx[i][sub_part_idx][0]->mic; 3596 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 3597 // Neither palette mode nor cfl predicted. 3598 if (pmi->palette_size[PLANE_TYPE_Y] == 0 && 3599 pmi->palette_size[PLANE_TYPE_UV] == 0) { 3600 if (mbmi->uv_mode != UV_CFL_PRED) 3601 part_search_state->is_rect_ctx_is_ready[i] = 1; 3602 } 3603 av1_update_state(cpi, td, cur_ctx[i][sub_part_idx][0], blk_params.mi_row, 3604 blk_params.mi_col, blk_params.subsize, DRY_RUN_NORMAL); 3605 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, 3606 blk_params.subsize, NULL); 3607 3608 // Second sub-partition evaluation in HORZ / VERT partition type. 3609 sub_part_idx = 1; 3610 rd_pick_rect_partition( 3611 cpi, tile_data, x, cur_ctx[i][sub_part_idx][0], part_search_state, 3612 best_rdc, 1, mi_pos_rect[i][sub_part_idx][0], 3613 mi_pos_rect[i][sub_part_idx][1], blk_params.subsize, partition_type); 3614 } 3615 // Update HORZ / VERT best partition. 3616 if (sum_rdc->rdcost < best_rdc->rdcost) { 3617 sum_rdc->rdcost = RDCOST(x->rdmult, sum_rdc->rate, sum_rdc->dist); 3618 if (sum_rdc->rdcost < best_rdc->rdcost) { 3619 *best_rdc = *sum_rdc; 3620 part_search_state->found_best_partition = true; 3621 pc_tree->partitioning = partition_type; 3622 } 3623 } else { 3624 // Update HORZ / VERT win flag. 3625 if (rect_part_win_info != NULL) 3626 rect_part_win_info->rect_part_win[i] = false; 3627 } 3628 #if CONFIG_COLLECT_PARTITION_STATS 3629 if (part_timing_stats->timer_is_on) { 3630 end_partition_block_timer(part_timing_stats, partition_type, 3631 sum_rdc->rdcost); 3632 } 3633 #endif 3634 av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col, 3635 blk_params.bsize, av1_num_planes(cm)); 3636 } 3637 } 3638 3639 // AB partition type evaluation. 3640 static void rd_pick_ab_part( 3641 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, 3642 TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, 3643 PC_TREE *pc_tree, PICK_MODE_CONTEXT *dst_ctxs[SUB_PARTITIONS_AB], 3644 PartitionSearchState *part_search_state, RD_STATS *best_rdc, 3645 const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB], 3646 const int ab_mi_pos[SUB_PARTITIONS_AB][2], const PARTITION_TYPE part_type, 3647 const MB_MODE_INFO **mode_cache) { 3648 const AV1_COMMON *const cm = &cpi->common; 3649 PartitionBlkParams blk_params = part_search_state->part_blk_params; 3650 const int mi_row = blk_params.mi_row; 3651 const int mi_col = blk_params.mi_col; 3652 const BLOCK_SIZE bsize = blk_params.bsize; 3653 int64_t this_rdcost = 0; 3654 3655 #if CONFIG_COLLECT_PARTITION_STATS 3656 PartitionTimingStats *part_timing_stats = 3657 &part_search_state->part_timing_stats; 3658 { 3659 RD_STATS tmp_sum_rdc; 3660 av1_init_rd_stats(&tmp_sum_rdc); 3661 tmp_sum_rdc.rate = part_search_state->partition_cost[part_type]; 3662 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0); 3663 if (best_rdc->rdcost - tmp_sum_rdc.rdcost >= 0) { 3664 start_partition_block_timer(part_timing_stats, part_type); 3665 } 3666 } 3667 #endif 3668 3669 // Test this partition and update the best partition. 3670 const bool find_best_ab_part = rd_test_partition3( 3671 cpi, td, tile_data, tp, pc_tree, best_rdc, &this_rdcost, dst_ctxs, mi_row, 3672 mi_col, bsize, part_type, ab_subsize, ab_mi_pos, mode_cache); 3673 part_search_state->found_best_partition |= find_best_ab_part; 3674 3675 #if CONFIG_COLLECT_PARTITION_STATS 3676 if (part_timing_stats->timer_is_on) { 3677 if (!find_best_ab_part) this_rdcost = INT64_MAX; 3678 end_partition_block_timer(part_timing_stats, part_type, this_rdcost); 3679 } 3680 #endif 3681 av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); 3682 } 3683 3684 // Set mode search context. 3685 static inline void set_mode_search_ctx( 3686 PC_TREE *pc_tree, const int is_ctx_ready[NUM_AB_PARTS][2], 3687 PICK_MODE_CONTEXT **mode_srch_ctx[NUM_AB_PARTS][2]) { 3688 mode_srch_ctx[HORZ_B][0] = &pc_tree->horizontal[0]; 3689 mode_srch_ctx[VERT_B][0] = &pc_tree->vertical[0]; 3690 3691 if (is_ctx_ready[HORZ_A][0]) 3692 mode_srch_ctx[HORZ_A][0] = &pc_tree->split[0]->none; 3693 3694 if (is_ctx_ready[VERT_A][0]) 3695 mode_srch_ctx[VERT_A][0] = &pc_tree->split[0]->none; 3696 3697 if (is_ctx_ready[HORZ_A][1]) 3698 mode_srch_ctx[HORZ_A][1] = &pc_tree->split[1]->none; 3699 } 3700 3701 static inline void copy_partition_mode_from_mode_context( 3702 const MB_MODE_INFO **dst_mode, const PICK_MODE_CONTEXT *ctx) { 3703 if (ctx && ctx->rd_stats.rate < INT_MAX) { 3704 *dst_mode = &ctx->mic; 3705 } else { 3706 *dst_mode = NULL; 3707 } 3708 } 3709 3710 static inline void copy_partition_mode_from_pc_tree( 3711 const MB_MODE_INFO **dst_mode, const PC_TREE *pc_tree) { 3712 if (pc_tree) { 3713 copy_partition_mode_from_mode_context(dst_mode, pc_tree->none); 3714 } else { 3715 *dst_mode = NULL; 3716 } 3717 } 3718 3719 static inline void set_mode_cache_for_partition_ab( 3720 const MB_MODE_INFO **mode_cache, const PC_TREE *pc_tree, 3721 AB_PART_TYPE ab_part_type) { 3722 switch (ab_part_type) { 3723 case HORZ_A: 3724 copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]); 3725 copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]); 3726 copy_partition_mode_from_mode_context(&mode_cache[2], 3727 pc_tree->horizontal[1]); 3728 break; 3729 case HORZ_B: 3730 copy_partition_mode_from_mode_context(&mode_cache[0], 3731 pc_tree->horizontal[0]); 3732 copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]); 3733 copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]); 3734 break; 3735 case VERT_A: 3736 copy_partition_mode_from_pc_tree(&mode_cache[0], pc_tree->split[0]); 3737 copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[2]); 3738 copy_partition_mode_from_mode_context(&mode_cache[2], 3739 pc_tree->vertical[1]); 3740 break; 3741 case VERT_B: 3742 copy_partition_mode_from_mode_context(&mode_cache[0], 3743 pc_tree->vertical[0]); 3744 copy_partition_mode_from_pc_tree(&mode_cache[1], pc_tree->split[1]); 3745 copy_partition_mode_from_pc_tree(&mode_cache[2], pc_tree->split[3]); 3746 break; 3747 default: assert(0 && "Invalid ab partition type!\n"); 3748 } 3749 } 3750 3751 // AB Partitions type search. 3752 static void ab_partitions_search( 3753 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, 3754 TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, 3755 PC_TREE *pc_tree, PartitionSearchState *part_search_state, 3756 RD_STATS *best_rdc, RD_RECT_PART_WIN_INFO *rect_part_win_info, 3757 int pb_source_variance, int ext_partition_allowed, 3758 const AB_PART_TYPE start_type, const AB_PART_TYPE end_type) { 3759 PartitionBlkParams blk_params = part_search_state->part_blk_params; 3760 const int mi_row = blk_params.mi_row; 3761 const int mi_col = blk_params.mi_col; 3762 const BLOCK_SIZE bsize = blk_params.bsize; 3763 3764 if (part_search_state->terminate_partition_search) { 3765 return; 3766 } 3767 3768 int ab_partitions_allowed[NUM_AB_PARTS]; 3769 // Prune AB partitions 3770 av1_prune_ab_partitions(cpi, x, pc_tree, pb_source_variance, best_rdc->rdcost, 3771 rect_part_win_info, ext_partition_allowed, 3772 part_search_state, ab_partitions_allowed); 3773 3774 // Flags to indicate whether the mode search is done. 3775 const int is_ctx_ready[NUM_AB_PARTS][2] = { 3776 { part_search_state->is_split_ctx_is_ready[0], 3777 part_search_state->is_split_ctx_is_ready[1] }, 3778 { part_search_state->is_rect_ctx_is_ready[HORZ], 0 }, 3779 { part_search_state->is_split_ctx_is_ready[0], 0 }, 3780 { part_search_state->is_rect_ctx_is_ready[VERT], 0 } 3781 }; 3782 3783 // Current partition context. 3784 PICK_MODE_CONTEXT **cur_part_ctxs[NUM_AB_PARTS] = { pc_tree->horizontala, 3785 pc_tree->horizontalb, 3786 pc_tree->verticala, 3787 pc_tree->verticalb }; 3788 3789 // Context of already evaluted partition types. 3790 PICK_MODE_CONTEXT **mode_srch_ctx[NUM_AB_PARTS][2]; 3791 // Set context of already evaluted partition types. 3792 set_mode_search_ctx(pc_tree, is_ctx_ready, mode_srch_ctx); 3793 3794 // Array of sub-partition size of AB partition types. 3795 const BLOCK_SIZE ab_subsize[NUM_AB_PARTS][SUB_PARTITIONS_AB] = { 3796 { blk_params.split_bsize2, blk_params.split_bsize2, 3797 get_partition_subsize(bsize, PARTITION_HORZ_A) }, 3798 { get_partition_subsize(bsize, PARTITION_HORZ_B), blk_params.split_bsize2, 3799 blk_params.split_bsize2 }, 3800 { blk_params.split_bsize2, blk_params.split_bsize2, 3801 get_partition_subsize(bsize, PARTITION_VERT_A) }, 3802 { get_partition_subsize(bsize, PARTITION_VERT_B), blk_params.split_bsize2, 3803 blk_params.split_bsize2 } 3804 }; 3805 3806 // Array of mi_row, mi_col positions corresponds to each sub-partition in AB 3807 // partition types. 3808 const int ab_mi_pos[NUM_AB_PARTS][SUB_PARTITIONS_AB][2] = { 3809 { { mi_row, mi_col }, 3810 { mi_row, blk_params.mi_col_edge }, 3811 { blk_params.mi_row_edge, mi_col } }, 3812 { { mi_row, mi_col }, 3813 { blk_params.mi_row_edge, mi_col }, 3814 { blk_params.mi_row_edge, blk_params.mi_col_edge } }, 3815 { { mi_row, mi_col }, 3816 { blk_params.mi_row_edge, mi_col }, 3817 { mi_row, blk_params.mi_col_edge } }, 3818 { { mi_row, mi_col }, 3819 { mi_row, blk_params.mi_col_edge }, 3820 { blk_params.mi_row_edge, blk_params.mi_col_edge } } 3821 }; 3822 3823 // Loop over AB partition types. 3824 for (AB_PART_TYPE ab_part_type = start_type; ab_part_type <= end_type; 3825 ab_part_type++) { 3826 const PARTITION_TYPE part_type = ab_part_type + PARTITION_HORZ_A; 3827 3828 // Check if the AB partition search is to be performed. 3829 if (!ab_partitions_allowed[ab_part_type]) { 3830 continue; 3831 } 3832 3833 blk_params.subsize = get_partition_subsize(bsize, part_type); 3834 for (int i = 0; i < SUB_PARTITIONS_AB; i++) { 3835 // Set AB partition context. 3836 cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc( 3837 cpi, ab_subsize[ab_part_type][i], &td->shared_coeff_buf); 3838 if (!cur_part_ctxs[ab_part_type][i]) 3839 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, 3840 "Failed to allocate PICK_MODE_CONTEXT"); 3841 // Set mode as not ready. 3842 cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0; 3843 } 3844 3845 if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab) { 3846 // We can copy directly the mode search results if we have already 3847 // searched the current block and the contexts match. 3848 if (is_ctx_ready[ab_part_type][0]) { 3849 av1_copy_tree_context(cur_part_ctxs[ab_part_type][0], 3850 mode_srch_ctx[ab_part_type][0][0]); 3851 cur_part_ctxs[ab_part_type][0]->mic.partition = part_type; 3852 cur_part_ctxs[ab_part_type][0]->rd_mode_is_ready = 1; 3853 if (is_ctx_ready[ab_part_type][1]) { 3854 av1_copy_tree_context(cur_part_ctxs[ab_part_type][1], 3855 mode_srch_ctx[ab_part_type][1][0]); 3856 cur_part_ctxs[ab_part_type][1]->mic.partition = part_type; 3857 cur_part_ctxs[ab_part_type][1]->rd_mode_is_ready = 1; 3858 } 3859 } 3860 } 3861 3862 // Even if the contexts don't match, we can still speed up by reusing the 3863 // previous prediction mode. 3864 const MB_MODE_INFO *mode_cache[3] = { NULL, NULL, NULL }; 3865 if (cpi->sf.part_sf.reuse_best_prediction_for_part_ab) { 3866 set_mode_cache_for_partition_ab(mode_cache, pc_tree, ab_part_type); 3867 } 3868 3869 // Evaluation of AB partition type. 3870 rd_pick_ab_part(cpi, td, tile_data, tp, x, x_ctx, pc_tree, 3871 cur_part_ctxs[ab_part_type], part_search_state, best_rdc, 3872 ab_subsize[ab_part_type], ab_mi_pos[ab_part_type], 3873 part_type, mode_cache); 3874 } 3875 } 3876 3877 // Set mi positions for HORZ4 / VERT4 sub-block partitions. 3878 static void set_mi_pos_partition4(const int inc_step[NUM_PART4_TYPES], 3879 int mi_pos[SUB_PARTITIONS_PART4][2], 3880 const int mi_row, const int mi_col) { 3881 for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; i++) { 3882 mi_pos[i][0] = mi_row + i * inc_step[HORZ4]; 3883 mi_pos[i][1] = mi_col + i * inc_step[VERT4]; 3884 } 3885 } 3886 3887 // Set context and RD cost for HORZ4 / VERT4 partition types. 3888 static void set_4_part_ctx_and_rdcost( 3889 MACROBLOCK *x, const AV1_COMP *const cpi, ThreadData *td, 3890 PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4], 3891 PartitionSearchState *part_search_state, PARTITION_TYPE partition_type, 3892 BLOCK_SIZE bsize) { 3893 // Initialize sum_rdc RD cost structure. 3894 av1_init_rd_stats(&part_search_state->sum_rdc); 3895 const int subsize = get_partition_subsize(bsize, partition_type); 3896 part_search_state->sum_rdc.rate = 3897 part_search_state->partition_cost[partition_type]; 3898 part_search_state->sum_rdc.rdcost = 3899 RDCOST(x->rdmult, part_search_state->sum_rdc.rate, 0); 3900 for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) { 3901 cur_part_ctx[i] = av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf); 3902 if (!cur_part_ctx[i]) 3903 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, 3904 "Failed to allocate PICK_MODE_CONTEXT"); 3905 } 3906 } 3907 3908 // Partition search of HORZ4 / VERT4 partition types. 3909 static void rd_pick_4partition( 3910 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, 3911 TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, 3912 PC_TREE *pc_tree, PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4], 3913 PartitionSearchState *part_search_state, RD_STATS *best_rdc, 3914 const int inc_step[NUM_PART4_TYPES], PARTITION_TYPE partition_type) { 3915 const AV1_COMMON *const cm = &cpi->common; 3916 PartitionBlkParams blk_params = part_search_state->part_blk_params; 3917 // mi positions needed for HORZ4 and VERT4 partition types. 3918 int mi_pos_check[NUM_PART4_TYPES] = { cm->mi_params.mi_rows, 3919 cm->mi_params.mi_cols }; 3920 const PART4_TYPES part4_idx = (partition_type != PARTITION_HORZ_4); 3921 int mi_pos[SUB_PARTITIONS_PART4][2]; 3922 3923 blk_params.subsize = get_partition_subsize(blk_params.bsize, partition_type); 3924 // Set partition context and RD cost. 3925 set_4_part_ctx_and_rdcost(x, cpi, td, cur_part_ctx, part_search_state, 3926 partition_type, blk_params.bsize); 3927 // Set mi positions for sub-block sizes. 3928 set_mi_pos_partition4(inc_step, mi_pos, blk_params.mi_row, blk_params.mi_col); 3929 #if CONFIG_COLLECT_PARTITION_STATS 3930 PartitionTimingStats *part_timing_stats = 3931 &part_search_state->part_timing_stats; 3932 if (best_rdc->rdcost - part_search_state->sum_rdc.rdcost >= 0) { 3933 start_partition_block_timer(part_timing_stats, partition_type); 3934 } 3935 #endif 3936 // Loop over sub-block partitions. 3937 for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) { 3938 if (i > 0 && mi_pos[i][part4_idx] >= mi_pos_check[part4_idx]) break; 3939 3940 // Sub-block evaluation of Horz4 / Vert4 partition type. 3941 cur_part_ctx[i]->rd_mode_is_ready = 0; 3942 if (!rd_try_subblock( 3943 cpi, td, tile_data, tp, (i == SUB_PARTITIONS_PART4 - 1), 3944 mi_pos[i][0], mi_pos[i][1], blk_params.subsize, *best_rdc, 3945 &part_search_state->sum_rdc, partition_type, cur_part_ctx[i])) { 3946 av1_invalid_rd_stats(&part_search_state->sum_rdc); 3947 break; 3948 } 3949 } 3950 3951 // Calculate the total cost and update the best partition. 3952 av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc); 3953 if (part_search_state->sum_rdc.rdcost < best_rdc->rdcost) { 3954 *best_rdc = part_search_state->sum_rdc; 3955 part_search_state->found_best_partition = true; 3956 pc_tree->partitioning = partition_type; 3957 } 3958 #if CONFIG_COLLECT_PARTITION_STATS 3959 if (part_timing_stats->timer_is_on) { 3960 end_partition_block_timer(part_timing_stats, partition_type, 3961 part_search_state->sum_rdc.rdcost); 3962 } 3963 #endif 3964 av1_restore_context(x, x_ctx, blk_params.mi_row, blk_params.mi_col, 3965 blk_params.bsize, av1_num_planes(cm)); 3966 } 3967 3968 // Do not evaluate extended partitions if NONE partition is skippable. 3969 static inline int prune_ext_part_none_skippable( 3970 PICK_MODE_CONTEXT *part_none, int must_find_valid_partition, 3971 int skip_non_sq_part_based_on_none, BLOCK_SIZE bsize) { 3972 if ((skip_non_sq_part_based_on_none >= 1) && (part_none != NULL)) { 3973 if (part_none->skippable && !must_find_valid_partition && 3974 bsize >= BLOCK_16X16) { 3975 return 1; 3976 } 3977 } 3978 return 0; 3979 } 3980 3981 // Allow ab partition search 3982 static int allow_ab_partition_search(PartitionSearchState *part_search_state, 3983 PARTITION_SPEED_FEATURES *part_sf, 3984 PARTITION_TYPE curr_best_part, 3985 int must_find_valid_partition, 3986 int prune_ext_part_state, 3987 int64_t best_rdcost) { 3988 const PartitionBlkParams blk_params = part_search_state->part_blk_params; 3989 const BLOCK_SIZE bsize = blk_params.bsize; 3990 3991 // Do not prune if there is no valid partition 3992 if (best_rdcost == INT64_MAX) return 1; 3993 3994 // Determine bsize threshold to evaluate ab partitions 3995 BLOCK_SIZE ab_bsize_thresh = part_sf->ext_partition_eval_thresh; 3996 if (part_sf->ext_part_eval_based_on_cur_best && !must_find_valid_partition && 3997 !(curr_best_part == PARTITION_HORZ || curr_best_part == PARTITION_VERT)) 3998 ab_bsize_thresh = BLOCK_128X128; 3999 4000 // ab partitions are only allowed for square block sizes BLOCK_16X16 or 4001 // higher, so ab_bsize_thresh must be large enough to exclude BLOCK_4X4 and 4002 // BLOCK_8X8. 4003 assert(ab_bsize_thresh >= BLOCK_8X8); 4004 4005 int ab_partition_allowed = 4006 part_search_state->do_rectangular_split && bsize > ab_bsize_thresh && 4007 av1_blk_has_rows_and_cols(&blk_params) && !prune_ext_part_state; 4008 4009 return ab_partition_allowed; 4010 } 4011 4012 // Prune 4-way partitions based on the number of horz/vert wins 4013 // in the current block and sub-blocks in PARTITION_SPLIT. 4014 static void prune_4_partition_using_split_info( 4015 AV1_COMP *const cpi, MACROBLOCK *x, PartitionSearchState *part_search_state, 4016 int part4_search_allowed[NUM_PART4_TYPES]) { 4017 PART4_TYPES cur_part[NUM_PART4_TYPES] = { HORZ4, VERT4 }; 4018 // Count of child blocks in which HORZ or VERT partition has won 4019 int num_child_rect_win[NUM_RECT_PARTS] = { 0, 0 }; 4020 // Prune HORZ4/VERT4 partitions based on number of HORZ/VERT winners of 4021 // split partiitons. 4022 // Conservative pruning for high quantizers. 4023 const int num_win_thresh = AOMMIN(3 * (MAXQ - x->qindex) / MAXQ + 1, 3); 4024 4025 for (RECT_PART_TYPE i = HORZ; i < NUM_RECT_PARTS; i++) { 4026 if (!(cpi->sf.part_sf.prune_ext_part_using_split_info && 4027 part4_search_allowed[cur_part[i]])) 4028 continue; 4029 // Loop over split partitions. 4030 // Get rectangular partitions winner info of split partitions. 4031 for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; idx++) 4032 num_child_rect_win[i] += 4033 (part_search_state->split_part_rect_win[idx].rect_part_win[i]) ? 1 4034 : 0; 4035 if (num_child_rect_win[i] < num_win_thresh) { 4036 part4_search_allowed[cur_part[i]] = 0; 4037 } 4038 } 4039 } 4040 4041 // Prune 4-way partition search. 4042 static void prune_4_way_partition_search( 4043 AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, 4044 PartitionSearchState *part_search_state, RD_STATS *best_rdc, 4045 int pb_source_variance, int prune_ext_part_state, 4046 int part4_search_allowed[NUM_PART4_TYPES]) { 4047 const PartitionBlkParams blk_params = part_search_state->part_blk_params; 4048 const BLOCK_SIZE bsize = blk_params.bsize; 4049 4050 const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg; 4051 4052 // Do not prune if there is no valid partition 4053 if (best_rdc->rdcost == INT64_MAX && part_cfg->enable_1to4_partitions && 4054 bsize != BLOCK_128X128) 4055 return; 4056 4057 // Determine bsize threshold to evaluate 4-way partitions 4058 BLOCK_SIZE part4_bsize_thresh = cpi->sf.part_sf.ext_partition_eval_thresh; 4059 if (cpi->sf.part_sf.ext_part_eval_based_on_cur_best && 4060 !x->must_find_valid_partition && pc_tree->partitioning == PARTITION_NONE) 4061 part4_bsize_thresh = BLOCK_128X128; 4062 4063 // 4-way partitions are only allowed for BLOCK_16X16, BLOCK_32X32, and 4064 // BLOCK_64X64, so part4_bsize_thresh must be large enough to exclude 4065 // BLOCK_4X4 and BLOCK_8X8. 4066 assert(part4_bsize_thresh >= BLOCK_8X8); 4067 4068 bool partition4_allowed = 4069 part_search_state->do_rectangular_split && bsize > part4_bsize_thresh && 4070 av1_blk_has_rows_and_cols(&blk_params) && !prune_ext_part_state; 4071 4072 // Disable 4-way partition search flags for width less than a multiple of the 4073 // minimum partition width. 4074 if (blk_params.width < (blk_params.min_partition_size_1d 4075 << cpi->sf.part_sf.prune_part4_search)) { 4076 part4_search_allowed[HORZ4] = 0; 4077 part4_search_allowed[VERT4] = 0; 4078 return; 4079 } 4080 4081 PARTITION_TYPE cur_part[NUM_PART4_TYPES] = { PARTITION_HORZ_4, 4082 PARTITION_VERT_4 }; 4083 // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or 4084 // PARTITION_VERT_4 for this block. This is almost the same as 4085 // partition4_allowed, except that we don't allow 128x32 or 32x128 4086 // blocks, so we require that bsize is not BLOCK_128X128. 4087 partition4_allowed &= 4088 part_cfg->enable_1to4_partitions && bsize != BLOCK_128X128; 4089 4090 for (PART4_TYPES i = HORZ4; i < NUM_PART4_TYPES; i++) { 4091 part4_search_allowed[i] = 4092 partition4_allowed && part_search_state->partition_rect_allowed[i] && 4093 get_plane_block_size(get_partition_subsize(bsize, cur_part[i]), 4094 part_search_state->ss_x, 4095 part_search_state->ss_y) != BLOCK_INVALID; 4096 } 4097 // Pruning: pruning out 4-way partitions based on the current best partition. 4098 if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 2) { 4099 part4_search_allowed[HORZ4] &= (pc_tree->partitioning == PARTITION_HORZ || 4100 pc_tree->partitioning == PARTITION_HORZ_A || 4101 pc_tree->partitioning == PARTITION_HORZ_B || 4102 pc_tree->partitioning == PARTITION_SPLIT || 4103 pc_tree->partitioning == PARTITION_NONE); 4104 part4_search_allowed[VERT4] &= (pc_tree->partitioning == PARTITION_VERT || 4105 pc_tree->partitioning == PARTITION_VERT_A || 4106 pc_tree->partitioning == PARTITION_VERT_B || 4107 pc_tree->partitioning == PARTITION_SPLIT || 4108 pc_tree->partitioning == PARTITION_NONE); 4109 } 4110 4111 // Pruning: pruning out some 4-way partitions using a DNN taking rd costs of 4112 // sub-blocks from basic partition types. 4113 if (cpi->sf.part_sf.ml_prune_partition && partition4_allowed && 4114 part_search_state->partition_rect_allowed[HORZ] && 4115 part_search_state->partition_rect_allowed[VERT]) { 4116 av1_ml_prune_4_partition(cpi, x, pc_tree->partitioning, best_rdc->rdcost, 4117 part_search_state, part4_search_allowed, 4118 pb_source_variance); 4119 } 4120 4121 // Pruning: pruning out 4-way partitions based on the number of horz/vert wins 4122 // in the current block and sub-blocks in PARTITION_SPLIT. 4123 prune_4_partition_using_split_info(cpi, x, part_search_state, 4124 part4_search_allowed); 4125 } 4126 4127 // Set params needed for PARTITION_NONE search. 4128 static void set_none_partition_params(const AV1_COMP *const cpi, ThreadData *td, 4129 MACROBLOCK *x, PC_TREE *pc_tree, 4130 PartitionSearchState *part_search_state, 4131 RD_STATS *best_remain_rdcost, 4132 RD_STATS *best_rdc, int *pt_cost) { 4133 PartitionBlkParams blk_params = part_search_state->part_blk_params; 4134 RD_STATS partition_rdcost; 4135 // Set PARTITION_NONE context. 4136 if (pc_tree->none == NULL) 4137 pc_tree->none = av1_alloc_pmc(cpi, blk_params.bsize, &td->shared_coeff_buf); 4138 if (!pc_tree->none) 4139 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, 4140 "Failed to allocate PICK_MODE_CONTEXT"); 4141 4142 // Set PARTITION_NONE type cost. 4143 if (part_search_state->partition_none_allowed) { 4144 if (blk_params.bsize_at_least_8x8) { 4145 *pt_cost = part_search_state->partition_cost[PARTITION_NONE] < INT_MAX 4146 ? part_search_state->partition_cost[PARTITION_NONE] 4147 : 0; 4148 } 4149 4150 // Initialize the RD stats structure. 4151 av1_init_rd_stats(&partition_rdcost); 4152 partition_rdcost.rate = *pt_cost; 4153 av1_rd_cost_update(x->rdmult, &partition_rdcost); 4154 av1_rd_stats_subtraction(x->rdmult, best_rdc, &partition_rdcost, 4155 best_remain_rdcost); 4156 } 4157 } 4158 4159 // Skip other partitions based on PARTITION_NONE rd cost. 4160 static void prune_partitions_after_none(AV1_COMP *const cpi, MACROBLOCK *x, 4161 SIMPLE_MOTION_DATA_TREE *sms_tree, 4162 PICK_MODE_CONTEXT *ctx_none, 4163 PartitionSearchState *part_search_state, 4164 RD_STATS *best_rdc, 4165 unsigned int *pb_source_variance) { 4166 const AV1_COMMON *const cm = &cpi->common; 4167 MACROBLOCKD *const xd = &x->e_mbd; 4168 const PartitionBlkParams blk_params = part_search_state->part_blk_params; 4169 RD_STATS *this_rdc = &part_search_state->this_rdc; 4170 const BLOCK_SIZE bsize = blk_params.bsize; 4171 assert(bsize < BLOCK_SIZES_ALL); 4172 4173 if (!frame_is_intra_only(cm) && 4174 (part_search_state->do_square_split || 4175 part_search_state->do_rectangular_split) && 4176 !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) { 4177 const int use_ml_based_breakout = 4178 bsize <= cpi->sf.part_sf.use_square_partition_only_threshold && 4179 bsize > BLOCK_4X4 && cpi->sf.part_sf.ml_predict_breakout_level >= 1; 4180 if (use_ml_based_breakout) { 4181 av1_ml_predict_breakout(cpi, x, this_rdc, *pb_source_variance, xd->bd, 4182 part_search_state); 4183 } 4184 4185 // Adjust dist breakout threshold according to the partition size. 4186 const int64_t dist_breakout_thr = 4187 cpi->sf.part_sf.partition_search_breakout_dist_thr >> 4188 ((2 * (MAX_SB_SIZE_LOG2 - 2)) - 4189 (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize])); 4190 const int rate_breakout_thr = 4191 cpi->sf.part_sf.partition_search_breakout_rate_thr * 4192 num_pels_log2_lookup[bsize]; 4193 // If all y, u, v transform blocks in this partition are skippable, 4194 // and the dist & rate are within the thresholds, the partition 4195 // search is terminated for current branch of the partition search 4196 // tree. The dist & rate thresholds are set to 0 at speed 0 to 4197 // disable the early termination at that speed. 4198 if (best_rdc->dist < dist_breakout_thr && 4199 best_rdc->rate < rate_breakout_thr) { 4200 part_search_state->do_square_split = 0; 4201 part_search_state->do_rectangular_split = 0; 4202 } 4203 } 4204 4205 // Early termination: using simple_motion_search features and the 4206 // rate, distortion, and rdcost of PARTITION_NONE, a DNN will make a 4207 // decision on early terminating at PARTITION_NONE. 4208 if (cpi->sf.part_sf.simple_motion_search_early_term_none && cm->show_frame && 4209 !frame_is_intra_only(cm) && bsize >= BLOCK_16X16 && 4210 av1_blk_has_rows_and_cols(&blk_params) && this_rdc->rdcost < INT64_MAX && 4211 this_rdc->rdcost >= 0 && this_rdc->rate < INT_MAX && 4212 this_rdc->rate >= 0 && 4213 (part_search_state->do_square_split || 4214 part_search_state->do_rectangular_split)) { 4215 av1_simple_motion_search_early_term_none(cpi, x, sms_tree, this_rdc, 4216 part_search_state); 4217 } 4218 } 4219 4220 // Decide early termination and rectangular partition pruning 4221 // based on PARTITION_NONE and PARTITION_SPLIT costs. 4222 static void prune_partitions_after_split( 4223 AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, 4224 PartitionSearchState *part_search_state, RD_STATS *best_rdc, 4225 int64_t part_none_rd, int64_t part_split_rd) { 4226 const AV1_COMMON *const cm = &cpi->common; 4227 PartitionBlkParams blk_params = part_search_state->part_blk_params; 4228 const int mi_row = blk_params.mi_row; 4229 const int mi_col = blk_params.mi_col; 4230 const BLOCK_SIZE bsize = blk_params.bsize; 4231 assert(bsize < BLOCK_SIZES_ALL); 4232 4233 // Early termination: using the rd costs of PARTITION_NONE and subblocks 4234 // from PARTITION_SPLIT to determine an early breakout. 4235 if (cpi->sf.part_sf.ml_early_term_after_part_split_level && 4236 !frame_is_intra_only(cm) && 4237 !part_search_state->terminate_partition_search && 4238 part_search_state->do_rectangular_split && 4239 (part_search_state->partition_rect_allowed[HORZ] || 4240 part_search_state->partition_rect_allowed[VERT])) { 4241 av1_ml_early_term_after_split( 4242 cpi, x, sms_tree, best_rdc->rdcost, part_none_rd, part_split_rd, 4243 part_search_state->split_rd, part_search_state); 4244 } 4245 4246 // Use the rd costs of PARTITION_NONE and subblocks from PARTITION_SPLIT 4247 // to prune out rectangular partitions in some directions. 4248 if (!cpi->sf.part_sf.ml_early_term_after_part_split_level && 4249 cpi->sf.part_sf.ml_prune_partition && !frame_is_intra_only(cm) && 4250 (part_search_state->partition_rect_allowed[HORZ] || 4251 part_search_state->partition_rect_allowed[VERT]) && 4252 !(part_search_state->prune_rect_part[HORZ] || 4253 part_search_state->prune_rect_part[VERT]) && 4254 !part_search_state->terminate_partition_search) { 4255 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, av1_num_planes(cm), 4256 bsize); 4257 av1_ml_prune_rect_partition(cpi, x, best_rdc->rdcost, 4258 part_search_state->none_rd, 4259 part_search_state->split_rd, part_search_state); 4260 } 4261 } 4262 4263 // Returns true if either of the left and top neighbor blocks is larger than 4264 // the current block; false otherwise. 4265 static inline bool is_neighbor_blk_larger_than_cur_blk(const MACROBLOCKD *xd, 4266 BLOCK_SIZE bsize) { 4267 const int cur_blk_area = (block_size_high[bsize] * block_size_wide[bsize]); 4268 if (xd->left_available) { 4269 const BLOCK_SIZE left_bsize = xd->left_mbmi->bsize; 4270 if (block_size_high[left_bsize] * block_size_wide[left_bsize] > 4271 cur_blk_area) 4272 return true; 4273 } 4274 4275 if (xd->up_available) { 4276 const BLOCK_SIZE above_bsize = xd->above_mbmi->bsize; 4277 if (block_size_high[above_bsize] * block_size_wide[above_bsize] > 4278 cur_blk_area) 4279 return true; 4280 } 4281 return false; 4282 } 4283 4284 static inline void prune_rect_part_using_none_pred_mode( 4285 const MACROBLOCKD *xd, PartitionSearchState *part_state, 4286 PREDICTION_MODE mode, BLOCK_SIZE bsize) { 4287 if (mode == DC_PRED || mode == SMOOTH_PRED) { 4288 // If the prediction mode of NONE partition is either DC_PRED or 4289 // SMOOTH_PRED, it indicates that the current block has less variation. In 4290 // this case, HORZ and VERT partitions are pruned if at least one of left 4291 // and top neighbor blocks is larger than the current block. 4292 if (is_neighbor_blk_larger_than_cur_blk(xd, bsize)) { 4293 part_state->prune_rect_part[HORZ] = 1; 4294 part_state->prune_rect_part[VERT] = 1; 4295 } 4296 } else if (mode == D67_PRED || mode == V_PRED || mode == D113_PRED) { 4297 // If the prediction mode chosen by NONE partition is close to 90 degrees, 4298 // it implies a dominant vertical pattern, and the chance of choosing a 4299 // vertical rectangular partition is high. Hence, horizontal partition is 4300 // pruned in these cases. 4301 part_state->prune_rect_part[HORZ] = 1; 4302 } else if (mode == D157_PRED || mode == H_PRED || mode == D203_PRED) { 4303 // If the prediction mode chosen by NONE partition is close to 180 degrees, 4304 // it implies a dominant horizontal pattern, and the chance of choosing a 4305 // horizontal rectangular partition is high. Hence, vertical partition is 4306 // pruned in these cases. 4307 part_state->prune_rect_part[VERT] = 1; 4308 } 4309 } 4310 4311 // PARTITION_NONE search. 4312 static void none_partition_search( 4313 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, MACROBLOCK *x, 4314 PC_TREE *pc_tree, SIMPLE_MOTION_DATA_TREE *sms_tree, 4315 RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, 4316 PartitionSearchState *part_search_state, RD_STATS *best_rdc, 4317 unsigned int *pb_source_variance, int64_t *none_rd, int64_t *part_none_rd) { 4318 const AV1_COMMON *const cm = &cpi->common; 4319 PartitionBlkParams blk_params = part_search_state->part_blk_params; 4320 RD_STATS *this_rdc = &part_search_state->this_rdc; 4321 const int mi_row = blk_params.mi_row; 4322 const int mi_col = blk_params.mi_col; 4323 const BLOCK_SIZE bsize = blk_params.bsize; 4324 assert(bsize < BLOCK_SIZES_ALL); 4325 4326 if (part_search_state->terminate_partition_search || 4327 !part_search_state->partition_none_allowed) 4328 return; 4329 4330 int pt_cost = 0; 4331 RD_STATS best_remain_rdcost; 4332 av1_invalid_rd_stats(&best_remain_rdcost); 4333 4334 // Set PARTITION_NONE context and cost. 4335 set_none_partition_params(cpi, td, x, pc_tree, part_search_state, 4336 &best_remain_rdcost, best_rdc, &pt_cost); 4337 4338 #if CONFIG_COLLECT_PARTITION_STATS 4339 // Timer start for partition None. 4340 PartitionTimingStats *part_timing_stats = 4341 &part_search_state->part_timing_stats; 4342 if (best_remain_rdcost.rdcost >= 0) { 4343 start_partition_block_timer(part_timing_stats, PARTITION_NONE); 4344 } 4345 #endif 4346 // PARTITION_NONE evaluation and cost update. 4347 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, PARTITION_NONE, 4348 bsize, pc_tree->none, best_remain_rdcost); 4349 4350 av1_rd_cost_update(x->rdmult, this_rdc); 4351 4352 #if CONFIG_COLLECT_PARTITION_STATS 4353 // Timer end for partition None. 4354 if (part_timing_stats->timer_is_on) { 4355 RD_STATS tmp_rdc; 4356 av1_init_rd_stats(&tmp_rdc); 4357 if (this_rdc->rate != INT_MAX) { 4358 tmp_rdc.rate = this_rdc->rate; 4359 tmp_rdc.dist = this_rdc->dist; 4360 tmp_rdc.rdcost = this_rdc->rdcost; 4361 if (blk_params.bsize_at_least_8x8) { 4362 tmp_rdc.rate += pt_cost; 4363 tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); 4364 } 4365 } 4366 end_partition_block_timer(part_timing_stats, PARTITION_NONE, 4367 tmp_rdc.rdcost); 4368 } 4369 #endif 4370 *pb_source_variance = x->source_variance; 4371 if (none_rd) *none_rd = this_rdc->rdcost; 4372 part_search_state->none_rd = this_rdc->rdcost; 4373 if (this_rdc->rate != INT_MAX) { 4374 // Record picked ref frame to prune ref frames for other partition types. 4375 if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions) { 4376 const int ref_type = av1_ref_frame_type(pc_tree->none->mic.ref_frame); 4377 av1_update_picked_ref_frames_mask( 4378 x, ref_type, bsize, cm->seq_params->mib_size, mi_row, mi_col); 4379 } 4380 4381 // Calculate the total cost and update the best partition. 4382 if (blk_params.bsize_at_least_8x8) { 4383 this_rdc->rate += pt_cost; 4384 this_rdc->rdcost = RDCOST(x->rdmult, this_rdc->rate, this_rdc->dist); 4385 } 4386 *part_none_rd = this_rdc->rdcost; 4387 if (this_rdc->rdcost < best_rdc->rdcost) { 4388 *best_rdc = *this_rdc; 4389 part_search_state->found_best_partition = true; 4390 if (blk_params.bsize_at_least_8x8) { 4391 pc_tree->partitioning = PARTITION_NONE; 4392 } 4393 4394 // Disable split and rectangular partition search 4395 // based on PARTITION_NONE cost. 4396 prune_partitions_after_none(cpi, x, sms_tree, pc_tree->none, 4397 part_search_state, best_rdc, 4398 pb_source_variance); 4399 } 4400 4401 if (cpi->sf.part_sf.prune_rect_part_using_none_pred_mode) 4402 prune_rect_part_using_none_pred_mode(&x->e_mbd, part_search_state, 4403 pc_tree->none->mic.mode, bsize); 4404 } 4405 av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); 4406 } 4407 4408 static inline double get_split_partition_penalty( 4409 BLOCK_SIZE bsize, int split_partition_penalty_level) { 4410 if (!split_partition_penalty_level) return 1.00; 4411 4412 // Higher penalty for smaller block sizes. 4413 static const double penalty_factors[2][SQR_BLOCK_SIZES - 1] = { 4414 { 1.080, 1.040, 1.020, 1.010, 1.000 }, 4415 { 1.100, 1.075, 1.050, 1.025, 1.000 }, 4416 }; 4417 const int sqr_bsize_idx = get_sqr_bsize_idx(bsize); 4418 assert(sqr_bsize_idx > 0 && sqr_bsize_idx < SQR_BLOCK_SIZES); 4419 const double this_penalty_factor = 4420 penalty_factors[split_partition_penalty_level - 1][sqr_bsize_idx - 1]; 4421 return this_penalty_factor; 4422 } 4423 4424 // PARTITION_SPLIT search. 4425 static void split_partition_search( 4426 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, 4427 TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree, 4428 SIMPLE_MOTION_DATA_TREE *sms_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, 4429 PartitionSearchState *part_search_state, RD_STATS *best_rdc, 4430 SB_MULTI_PASS_MODE multi_pass_mode, int64_t *part_split_rd) { 4431 const AV1_COMMON *const cm = &cpi->common; 4432 PartitionBlkParams blk_params = part_search_state->part_blk_params; 4433 const CommonModeInfoParams *const mi_params = &cm->mi_params; 4434 const int mi_row = blk_params.mi_row; 4435 const int mi_col = blk_params.mi_col; 4436 const BLOCK_SIZE bsize = blk_params.bsize; 4437 assert(bsize < BLOCK_SIZES_ALL); 4438 RD_STATS sum_rdc = part_search_state->sum_rdc; 4439 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 4440 4441 // Check if partition split is allowed. 4442 if (part_search_state->terminate_partition_search || 4443 !part_search_state->do_square_split) 4444 return; 4445 4446 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 4447 if (pc_tree->split[i] == NULL) 4448 pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); 4449 if (!pc_tree->split[i]) 4450 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, 4451 "Failed to allocate PC_TREE"); 4452 pc_tree->split[i]->index = i; 4453 } 4454 4455 // Initialization of this partition RD stats. 4456 av1_init_rd_stats(&sum_rdc); 4457 sum_rdc.rate = part_search_state->partition_cost[PARTITION_SPLIT]; 4458 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); 4459 4460 int idx; 4461 #if CONFIG_COLLECT_PARTITION_STATS 4462 PartitionTimingStats *part_timing_stats = 4463 &part_search_state->part_timing_stats; 4464 if (best_rdc->rdcost - sum_rdc.rdcost >= 0) { 4465 start_partition_block_timer(part_timing_stats, PARTITION_SPLIT); 4466 } 4467 #endif 4468 // Recursive partition search on 4 sub-blocks. 4469 for (idx = 0; idx < SUB_PARTITIONS_SPLIT && sum_rdc.rdcost < best_rdc->rdcost; 4470 ++idx) { 4471 const int x_idx = (idx & 1) * blk_params.mi_step; 4472 const int y_idx = (idx >> 1) * blk_params.mi_step; 4473 4474 if (mi_row + y_idx >= mi_params->mi_rows || 4475 mi_col + x_idx >= mi_params->mi_cols) 4476 continue; 4477 4478 pc_tree->split[idx]->index = idx; 4479 int64_t *p_split_rd = &part_search_state->split_rd[idx]; 4480 RD_STATS best_remain_rdcost; 4481 av1_rd_stats_subtraction(x->rdmult, best_rdc, &sum_rdc, 4482 &best_remain_rdcost); 4483 4484 int curr_quad_tree_idx = 0; 4485 if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) { 4486 curr_quad_tree_idx = part_search_state->intra_part_info->quad_tree_idx; 4487 part_search_state->intra_part_info->quad_tree_idx = 4488 4 * curr_quad_tree_idx + idx + 1; 4489 } 4490 // Split partition evaluation of corresponding idx. 4491 // If the RD cost exceeds the best cost then do not 4492 // evaluate other split sub-partitions. 4493 SIMPLE_MOTION_DATA_TREE *const sms_tree_split = 4494 (sms_tree == NULL) ? NULL : sms_tree->split[idx]; 4495 if (!av1_rd_pick_partition( 4496 cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, 4497 &part_search_state->this_rdc, best_remain_rdcost, 4498 pc_tree->split[idx], sms_tree_split, p_split_rd, multi_pass_mode, 4499 &part_search_state->split_part_rect_win[idx])) { 4500 av1_invalid_rd_stats(&sum_rdc); 4501 break; 4502 } 4503 if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) { 4504 part_search_state->intra_part_info->quad_tree_idx = curr_quad_tree_idx; 4505 } 4506 4507 sum_rdc.rate += part_search_state->this_rdc.rate; 4508 sum_rdc.dist += part_search_state->this_rdc.dist; 4509 av1_rd_cost_update(x->rdmult, &sum_rdc); 4510 4511 // Set split ctx as ready for use. 4512 if (idx <= 1 && (bsize <= BLOCK_8X8 || 4513 pc_tree->split[idx]->partitioning == PARTITION_NONE)) { 4514 const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none->mic; 4515 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 4516 // Neither palette mode nor cfl predicted. 4517 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { 4518 if (mbmi->uv_mode != UV_CFL_PRED) 4519 part_search_state->is_split_ctx_is_ready[idx] = 1; 4520 } 4521 } 4522 } 4523 #if CONFIG_COLLECT_PARTITION_STATS 4524 if (part_timing_stats->timer_is_on) { 4525 end_partition_block_timer(part_timing_stats, PARTITION_SPLIT, 4526 sum_rdc.rdcost); 4527 } 4528 #endif 4529 const int reached_last_index = (idx == SUB_PARTITIONS_SPLIT); 4530 4531 // Calculate the total cost and update the best partition. 4532 *part_split_rd = sum_rdc.rdcost; 4533 if (reached_last_index && sum_rdc.rdcost < best_rdc->rdcost) { 4534 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 4535 const double penalty_factor = get_split_partition_penalty( 4536 bsize, cpi->sf.part_sf.split_partition_penalty_level); 4537 const int64_t this_rdcost = (int64_t)(sum_rdc.rdcost * penalty_factor); 4538 if (this_rdcost < best_rdc->rdcost) { 4539 *best_rdc = sum_rdc; 4540 part_search_state->found_best_partition = true; 4541 pc_tree->partitioning = PARTITION_SPLIT; 4542 } 4543 } else if (cpi->sf.part_sf.less_rectangular_check_level > 0) { 4544 // Skip rectangular partition test when partition type none gives better 4545 // rd than partition type split. 4546 if (cpi->sf.part_sf.less_rectangular_check_level == 2 || idx <= 2) { 4547 const int partition_none_valid = part_search_state->none_rd > 0; 4548 const int partition_none_better = 4549 part_search_state->none_rd < sum_rdc.rdcost; 4550 part_search_state->do_rectangular_split &= 4551 !(partition_none_valid && partition_none_better); 4552 } 4553 } 4554 // Restore the context for the following cases: 4555 // 1) Current block size not more than maximum partition size as dry run 4556 // encode happens for these cases 4557 // 2) Current block size same as superblock size as the final encode 4558 // happens for this case 4559 if (bsize <= x->sb_enc.max_partition_size || bsize == cm->seq_params->sb_size) 4560 av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); 4561 } 4562 4563 // The max number of nodes in the partition tree. 4564 // The number of leaf nodes is (128x128) / (4x4) = 1024. 4565 // The number of All possible parent nodes is 1 + 2 + ... + 512 = 1023. 4566 #define NUM_NODES 2048 4567 4568 static void write_partition_tree(AV1_COMP *const cpi, 4569 const PC_TREE *const pc_tree, 4570 const BLOCK_SIZE bsize, const int mi_row, 4571 const int mi_col) { 4572 (void)mi_row; 4573 (void)mi_col; 4574 const char *path = cpi->oxcf.partition_info_path; 4575 char filename[256]; 4576 snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path, 4577 cpi->sb_counter, 0); 4578 FILE *pfile = fopen(filename, "w"); 4579 fprintf(pfile, "%d", bsize); 4580 4581 // Write partition type with BFS order. 4582 const PC_TREE *tree_node_queue[NUM_NODES] = { NULL }; 4583 int q_idx = 0; 4584 int last_idx = 1; 4585 int num_nodes = 1; 4586 4587 // First traversal to get number of leaf nodes. 4588 tree_node_queue[q_idx] = pc_tree; 4589 while (num_nodes > 0) { 4590 const PC_TREE *node = tree_node_queue[q_idx]; 4591 if (node->partitioning == PARTITION_SPLIT) { 4592 for (int i = 0; i < 4; ++i) { 4593 tree_node_queue[last_idx] = node->split[i]; 4594 ++last_idx; 4595 } 4596 num_nodes += 4; 4597 } 4598 --num_nodes; 4599 ++q_idx; 4600 } 4601 const int num_leafs = last_idx; 4602 fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1); 4603 4604 // Write partitions for each node. 4605 q_idx = 0; 4606 last_idx = 1; 4607 num_nodes = 1; 4608 tree_node_queue[q_idx] = pc_tree; 4609 while (num_nodes > 0) { 4610 const PC_TREE *node = tree_node_queue[q_idx]; 4611 fprintf(pfile, ",%d", node->partitioning); 4612 if (node->partitioning == PARTITION_SPLIT) { 4613 for (int i = 0; i < 4; ++i) { 4614 tree_node_queue[last_idx] = node->split[i]; 4615 ++last_idx; 4616 } 4617 num_nodes += 4; 4618 } 4619 --num_nodes; 4620 ++q_idx; 4621 } 4622 fprintf(pfile, "\n"); 4623 4624 fclose(pfile); 4625 } 4626 4627 #if CONFIG_PARTITION_SEARCH_ORDER 4628 static void verify_write_partition_tree(const AV1_COMP *const cpi, 4629 const PC_TREE *const pc_tree, 4630 const BLOCK_SIZE bsize, 4631 const int config_id, const int mi_row, 4632 const int mi_col) { 4633 (void)mi_row; 4634 (void)mi_col; 4635 const char *path = cpi->oxcf.partition_info_path; 4636 char filename[256]; 4637 snprintf(filename, sizeof(filename), "%s/verify_partition_tree_sb%d_c%d", 4638 path, cpi->sb_counter, config_id); 4639 FILE *pfile = fopen(filename, "w"); 4640 fprintf(pfile, "%d", bsize); 4641 4642 // Write partition type with BFS order. 4643 const PC_TREE *tree_node_queue[NUM_NODES] = { NULL }; 4644 int q_idx = 0; 4645 int last_idx = 1; 4646 int num_nodes = 1; 4647 4648 // First traversal to get number of leaf nodes. 4649 tree_node_queue[q_idx] = pc_tree; 4650 while (num_nodes > 0) { 4651 const PC_TREE *node = tree_node_queue[q_idx]; 4652 if (node != NULL && node->partitioning == PARTITION_SPLIT) { 4653 for (int i = 0; i < 4; ++i) { 4654 tree_node_queue[last_idx] = node->split[i]; 4655 ++last_idx; 4656 } 4657 num_nodes += 4; 4658 } 4659 --num_nodes; 4660 ++q_idx; 4661 } 4662 const int num_leafs = last_idx; 4663 fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1); 4664 4665 // Write partitions for each node. 4666 q_idx = 0; 4667 last_idx = 1; 4668 num_nodes = 1; 4669 tree_node_queue[q_idx] = pc_tree; 4670 while (num_nodes > 0) { 4671 const PC_TREE *node = tree_node_queue[q_idx]; 4672 if (node != NULL) { // suppress warning 4673 fprintf(pfile, ",%d", node->partitioning); 4674 if (node->partitioning == PARTITION_SPLIT) { 4675 for (int i = 0; i < 4; ++i) { 4676 tree_node_queue[last_idx] = node->split[i]; 4677 ++last_idx; 4678 } 4679 num_nodes += 4; 4680 } 4681 } 4682 --num_nodes; 4683 ++q_idx; 4684 } 4685 fprintf(pfile, "\n"); 4686 4687 fclose(pfile); 4688 } 4689 4690 static int read_partition_tree(AV1_COMP *const cpi, PC_TREE *const pc_tree, 4691 struct aom_internal_error_info *error_info, 4692 const int config_id) { 4693 const AV1_COMMON *const cm = &cpi->common; 4694 const char *path = cpi->oxcf.partition_info_path; 4695 char filename[256]; 4696 snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path, 4697 cpi->sb_counter, config_id); 4698 FILE *pfile = fopen(filename, "r"); 4699 if (pfile == NULL) { 4700 aom_internal_error(cm->error, AOM_CODEC_ERROR, "Can't find input file: %s.", 4701 filename); 4702 } 4703 4704 int read_bsize; 4705 int num_nodes; 4706 int num_configs; 4707 fscanf(pfile, "%d,%d,%d", &read_bsize, &num_nodes, &num_configs); 4708 assert(read_bsize == cpi->common.seq_params->sb_size); 4709 BLOCK_SIZE bsize = (BLOCK_SIZE)read_bsize; 4710 assert(bsize == pc_tree->block_size); 4711 4712 PC_TREE *tree_node_queue[NUM_NODES] = { NULL }; 4713 int last_idx = 1; 4714 int q_idx = 0; 4715 tree_node_queue[q_idx] = pc_tree; 4716 while (num_nodes > 0) { 4717 int partitioning; 4718 fscanf(pfile, ",%d", &partitioning); 4719 assert(partitioning >= PARTITION_NONE && 4720 partitioning < EXT_PARTITION_TYPES); 4721 PC_TREE *node = tree_node_queue[q_idx]; 4722 if (node != NULL) { 4723 node->partitioning = partitioning; 4724 bsize = node->block_size; 4725 } 4726 if (partitioning == PARTITION_SPLIT) { 4727 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 4728 for (int i = 0; i < 4; ++i) { 4729 if (node != NULL) { // Suppress warning 4730 node->split[i] = av1_alloc_pc_tree_node(subsize); 4731 if (!node->split[i]) 4732 aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, 4733 "Failed to allocate PC_TREE"); 4734 node->split[i]->index = i; 4735 tree_node_queue[last_idx] = node->split[i]; 4736 ++last_idx; 4737 } 4738 } 4739 } 4740 --num_nodes; 4741 ++q_idx; 4742 } 4743 fclose(pfile); 4744 4745 return num_configs; 4746 } 4747 4748 static RD_STATS rd_search_for_fixed_partition( 4749 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, 4750 TokenExtra **tp, SIMPLE_MOTION_DATA_TREE *sms_tree, int mi_row, int mi_col, 4751 const BLOCK_SIZE bsize, PC_TREE *pc_tree) { 4752 const PARTITION_TYPE partition = pc_tree->partitioning; 4753 const AV1_COMMON *const cm = &cpi->common; 4754 const int num_planes = av1_num_planes(cm); 4755 MACROBLOCK *const x = &td->mb; 4756 MACROBLOCKD *const xd = &x->e_mbd; 4757 TileInfo *const tile_info = &tile_data->tile_info; 4758 RD_STATS best_rdc; 4759 av1_invalid_rd_stats(&best_rdc); 4760 int sum_subblock_rate = 0; 4761 int64_t sum_subblock_dist = 0; 4762 PartitionSearchState part_search_state; 4763 init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col, 4764 bsize); 4765 // Override partition costs at the edges of the frame in the same 4766 // way as in read_partition (see decodeframe.c). 4767 PartitionBlkParams blk_params = part_search_state.part_blk_params; 4768 if (!av1_blk_has_rows_and_cols(&blk_params)) 4769 set_partition_cost_for_edge_blk(cm, &part_search_state); 4770 4771 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 4772 4773 // Save rdmult before it might be changed, so it can be restored later. 4774 const int orig_rdmult = x->rdmult; 4775 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); 4776 (void)orig_rdmult; 4777 4778 // Set the context. 4779 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 4780 xd->above_txfm_context = 4781 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 4782 xd->left_txfm_context = 4783 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 4784 av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 4785 4786 assert(bsize < BLOCK_SIZES_ALL); 4787 unsigned int pb_source_variance = UINT_MAX; 4788 int64_t part_none_rd = INT64_MAX; 4789 int64_t none_rd = INT64_MAX; 4790 int inc_step[NUM_PART4_TYPES] = { 0 }; 4791 if (partition == PARTITION_HORZ_4) inc_step[HORZ4] = mi_size_high[bsize] / 4; 4792 if (partition == PARTITION_VERT_4) inc_step[VERT4] = mi_size_wide[bsize] / 4; 4793 4794 switch (partition) { 4795 case PARTITION_NONE: 4796 none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx, 4797 &part_search_state, &best_rdc, &pb_source_variance, 4798 &none_rd, &part_none_rd); 4799 break; 4800 case PARTITION_HORZ: 4801 rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx, 4802 &part_search_state, &best_rdc, NULL, HORZ, 4803 HORZ); 4804 break; 4805 case PARTITION_VERT: 4806 rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx, 4807 &part_search_state, &best_rdc, NULL, VERT, 4808 VERT); 4809 break; 4810 case PARTITION_HORZ_A: 4811 ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 4812 &part_search_state, &best_rdc, NULL, 4813 pb_source_variance, 1, HORZ_A, HORZ_A); 4814 break; 4815 case PARTITION_HORZ_B: 4816 ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 4817 &part_search_state, &best_rdc, NULL, 4818 pb_source_variance, 1, HORZ_B, HORZ_B); 4819 break; 4820 case PARTITION_VERT_A: 4821 ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 4822 &part_search_state, &best_rdc, NULL, 4823 pb_source_variance, 1, VERT_A, VERT_A); 4824 break; 4825 case PARTITION_VERT_B: 4826 ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 4827 &part_search_state, &best_rdc, NULL, 4828 pb_source_variance, 1, VERT_B, VERT_B); 4829 break; 4830 case PARTITION_HORZ_4: 4831 rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 4832 pc_tree->horizontal4, &part_search_state, &best_rdc, 4833 inc_step, PARTITION_HORZ_4); 4834 break; 4835 case PARTITION_VERT_4: 4836 rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 4837 pc_tree->vertical4, &part_search_state, &best_rdc, 4838 inc_step, PARTITION_VERT_4); 4839 break; 4840 case PARTITION_SPLIT: 4841 for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; ++idx) { 4842 const BLOCK_SIZE subsize = 4843 get_partition_subsize(bsize, PARTITION_SPLIT); 4844 assert(subsize < BLOCK_SIZES_ALL); 4845 const int next_mi_row = 4846 idx < 2 ? mi_row : mi_row + mi_size_high[subsize]; 4847 const int next_mi_col = 4848 idx % 2 == 0 ? mi_col : mi_col + mi_size_wide[subsize]; 4849 if (next_mi_row >= cm->mi_params.mi_rows || 4850 next_mi_col >= cm->mi_params.mi_cols) { 4851 continue; 4852 } 4853 const RD_STATS subblock_rdc = rd_search_for_fixed_partition( 4854 cpi, td, tile_data, tp, sms_tree->split[idx], next_mi_row, 4855 next_mi_col, subsize, pc_tree->split[idx]); 4856 sum_subblock_rate += subblock_rdc.rate; 4857 sum_subblock_dist += subblock_rdc.dist; 4858 } 4859 best_rdc.rate = sum_subblock_rate; 4860 best_rdc.rate += part_search_state.partition_cost[PARTITION_SPLIT]; 4861 best_rdc.dist = sum_subblock_dist; 4862 best_rdc.rdcost = RDCOST(x->rdmult, best_rdc.rate, best_rdc.dist); 4863 break; 4864 default: 4865 assert(0 && "invalid partition type."); 4866 aom_internal_error(cm->error, AOM_CODEC_ERROR, "Invalid partition type."); 4867 } 4868 // Note: it is necessary to restore context information. 4869 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 4870 4871 if (bsize != cm->seq_params->sb_size) { 4872 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 4873 pc_tree, NULL); 4874 } 4875 x->rdmult = orig_rdmult; 4876 4877 return best_rdc; 4878 } 4879 4880 static void prepare_sb_features_before_search( 4881 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row, 4882 int mi_col, const BLOCK_SIZE bsize, aom_partition_features_t *features) { 4883 av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col, 4884 bsize, features); 4885 collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, features); 4886 } 4887 4888 static void update_partition_stats(const RD_STATS *const this_rdcost, 4889 aom_partition_stats_t *stats) { 4890 stats->rate = this_rdcost->rate; 4891 stats->dist = this_rdcost->dist; 4892 stats->rdcost = this_rdcost->rdcost; 4893 } 4894 4895 static void build_pc_tree_from_part_decision( 4896 const aom_partition_decision_t *partition_decision, 4897 const BLOCK_SIZE this_bsize, PC_TREE *pc_tree, 4898 struct aom_internal_error_info *error_info) { 4899 BLOCK_SIZE bsize = this_bsize; 4900 int num_nodes = partition_decision->num_nodes; 4901 PC_TREE *tree_node_queue[NUM_NODES] = { NULL }; 4902 int last_idx = 1; 4903 int q_idx = 0; 4904 tree_node_queue[q_idx] = pc_tree; 4905 while (num_nodes > 0) { 4906 const int partitioning = partition_decision->partition_decision[q_idx]; 4907 assert(partitioning >= PARTITION_NONE && 4908 partitioning < EXT_PARTITION_TYPES); 4909 PC_TREE *node = tree_node_queue[q_idx]; 4910 if (node != NULL) { 4911 node->partitioning = partitioning; 4912 bsize = node->block_size; 4913 } 4914 if (partitioning == PARTITION_SPLIT) { 4915 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 4916 for (int i = 0; i < 4; ++i) { 4917 if (node != NULL) { // Suppress warning 4918 node->split[i] = av1_alloc_pc_tree_node(subsize); 4919 if (!node->split[i]) 4920 aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, 4921 "Failed to allocate PC_TREE"); 4922 node->split[i]->index = i; 4923 tree_node_queue[last_idx] = node->split[i]; 4924 ++last_idx; 4925 } 4926 } 4927 } 4928 --num_nodes; 4929 ++q_idx; 4930 } 4931 } 4932 4933 // The ML model needs to provide the whole decision tree for the superblock. 4934 static bool ml_partition_search_whole_tree(AV1_COMP *const cpi, ThreadData *td, 4935 TileDataEnc *tile_data, 4936 TokenExtra **tp, 4937 SIMPLE_MOTION_DATA_TREE *sms_root, 4938 int mi_row, int mi_col, 4939 const BLOCK_SIZE bsize) { 4940 AV1_COMMON *const cm = &cpi->common; 4941 MACROBLOCK *const x = &td->mb; 4942 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 4943 struct aom_internal_error_info *error_info = x->e_mbd.error_info; 4944 aom_partition_features_t features; 4945 prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize, 4946 &features); 4947 features.mi_row = mi_row; 4948 features.mi_col = mi_col; 4949 features.frame_width = cpi->frame_info.frame_width; 4950 features.frame_height = cpi->frame_info.frame_height; 4951 features.block_size = bsize; 4952 av1_ext_part_send_features(ext_part_controller, &features); 4953 4954 // rd mode search (dry run) for a valid partition decision from the ml model. 4955 aom_partition_decision_t partition_decision; 4956 do { 4957 const bool valid_decision = av1_ext_part_get_partition_decision( 4958 ext_part_controller, &partition_decision); 4959 if (!valid_decision) return false; 4960 4961 // First, let's take the easy approach. 4962 // We require that the ml model has to provide partition decisions for the 4963 // whole superblock. 4964 td->pc_root = av1_alloc_pc_tree_node(bsize); 4965 if (!td->pc_root) 4966 aom_internal_error(error_info, AOM_CODEC_MEM_ERROR, 4967 "Failed to allocate PC_TREE"); 4968 build_pc_tree_from_part_decision(&partition_decision, bsize, td->pc_root, 4969 error_info); 4970 4971 const RD_STATS this_rdcost = rd_search_for_fixed_partition( 4972 cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, td->pc_root); 4973 aom_partition_stats_t stats; 4974 update_partition_stats(&this_rdcost, &stats); 4975 av1_ext_part_send_partition_stats(ext_part_controller, &stats); 4976 if (!partition_decision.is_final_decision) { 4977 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0, 4978 cpi->sf.part_sf.partition_search_type); 4979 td->pc_root = NULL; 4980 } 4981 } while (!partition_decision.is_final_decision); 4982 4983 // Encode with the selected mode and partition. 4984 set_cb_offsets(x->cb_offset, 0, 0); 4985 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 4986 td->pc_root, NULL); 4987 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0, 4988 cpi->sf.part_sf.partition_search_type); 4989 td->pc_root = NULL; 4990 4991 return true; 4992 } 4993 4994 // Use a bitmask to represent the valid partition types for the current 4995 // block. "1" represents the corresponding partition type is vaild. 4996 // The least significant bit represents "PARTITION_NONE", the 4997 // largest significant bit represents "PARTITION_VERT_4", follow 4998 // the enum order for PARTITION_TYPE in "enums.h" 4999 static int get_valid_partition_types( 5000 const AV1_COMP *const cpi, 5001 const PartitionSearchState *const part_search_state, 5002 const BLOCK_SIZE bsize) { 5003 const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg; 5004 const PartitionBlkParams blk_params = part_search_state->part_blk_params; 5005 int valid_types = 0; 5006 // PARTITION_NONE 5007 valid_types |= (part_search_state->partition_none_allowed << 0); 5008 // PARTITION_HORZ 5009 valid_types |= (part_search_state->partition_rect_allowed[HORZ] << 1); 5010 // PARTITION_VERT 5011 valid_types |= (part_search_state->partition_rect_allowed[VERT] << 2); 5012 // PARTITION_SPLIT 5013 valid_types |= (part_search_state->do_square_split << 3); 5014 // PARTITION_HORZ_A 5015 const int ext_partition_allowed = part_search_state->do_rectangular_split && 5016 av1_blk_has_rows_and_cols(&blk_params); 5017 const int horzab_partition_allowed = 5018 ext_partition_allowed && part_cfg->enable_ab_partitions && 5019 part_search_state->partition_rect_allowed[HORZ]; 5020 valid_types |= (horzab_partition_allowed << 4); 5021 // PARTITION_HORZ_B 5022 valid_types |= (horzab_partition_allowed << 5); 5023 // PARTITION_VERT_A 5024 const int vertab_partition_allowed = 5025 ext_partition_allowed && part_cfg->enable_ab_partitions && 5026 part_search_state->partition_rect_allowed[VERT]; 5027 valid_types |= (vertab_partition_allowed << 6); 5028 // PARTITION_VERT_B 5029 valid_types |= (vertab_partition_allowed << 7); 5030 // PARTITION_HORZ_4 5031 const int partition4_allowed = part_cfg->enable_1to4_partitions && 5032 ext_partition_allowed && 5033 bsize != BLOCK_128X128; 5034 const int horz4_allowed = 5035 partition4_allowed && part_search_state->partition_rect_allowed[HORZ] && 5036 get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ_4), 5037 part_search_state->ss_x, 5038 part_search_state->ss_y) != BLOCK_INVALID; 5039 valid_types |= (horz4_allowed << 8); 5040 // PARTITION_VERT_4 5041 const int vert4_allowed = 5042 partition4_allowed && part_search_state->partition_rect_allowed[HORZ] && 5043 get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT_4), 5044 part_search_state->ss_x, 5045 part_search_state->ss_y) != BLOCK_INVALID; 5046 valid_types |= (vert4_allowed << 9); 5047 5048 return valid_types; 5049 } 5050 5051 static void prepare_tpl_stats_block(const AV1_COMP *const cpi, 5052 const BLOCK_SIZE bsize, const int mi_row, 5053 const int mi_col, int64_t *intra_cost, 5054 int64_t *inter_cost, int64_t *mc_dep_cost) { 5055 const AV1_COMMON *const cm = &cpi->common; 5056 GF_GROUP *gf_group = &cpi->ppi->gf_group; 5057 if (gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE || 5058 gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) { 5059 return; 5060 } 5061 5062 TplParams *const tpl_data = &cpi->ppi->tpl_data; 5063 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cpi->gf_frame_index]; 5064 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 5065 // If tpl stats is not established, early return 5066 if (!tpl_data->ready || gf_group->max_layer_depth_allowed == 0) { 5067 return; 5068 } 5069 5070 const int tpl_stride = tpl_frame->stride; 5071 const int step = 1 << tpl_data->tpl_stats_block_mis_log2; 5072 const int mi_width = 5073 AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col); 5074 const int mi_height = 5075 AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row); 5076 5077 int64_t sum_intra_cost = 0; 5078 int64_t sum_inter_cost = 0; 5079 int64_t sum_mc_dep_cost = 0; 5080 for (int row = 0; row < mi_height; row += step) { 5081 for (int col = 0; col < mi_width; col += step) { 5082 TplDepStats *this_stats = 5083 &tpl_stats[av1_tpl_ptr_pos(mi_row + row, mi_col + col, tpl_stride, 5084 tpl_data->tpl_stats_block_mis_log2)]; 5085 sum_intra_cost += this_stats->intra_cost; 5086 sum_inter_cost += this_stats->inter_cost; 5087 const int64_t mc_dep_delta = 5088 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, 5089 this_stats->mc_dep_dist); 5090 sum_mc_dep_cost += mc_dep_delta; 5091 } 5092 } 5093 5094 *intra_cost = sum_intra_cost; 5095 *inter_cost = sum_inter_cost; 5096 *mc_dep_cost = sum_mc_dep_cost; 5097 } 5098 5099 static bool recursive_partition(AV1_COMP *const cpi, ThreadData *td, 5100 TileDataEnc *tile_data, TokenExtra **tp, 5101 SIMPLE_MOTION_DATA_TREE *sms_root, 5102 PC_TREE *pc_tree, int mi_row, int mi_col, 5103 const BLOCK_SIZE bsize, RD_STATS *this_rdcost) { 5104 const AV1_COMMON *const cm = &cpi->common; 5105 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 5106 MACROBLOCK *const x = &td->mb; 5107 MACROBLOCKD *const xd = &x->e_mbd; 5108 if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) { 5109 return false; 5110 } 5111 aom_partition_decision_t partition_decision; 5112 do { 5113 PartitionSearchState part_search_state; 5114 // Initialization of state variables used in partition search. 5115 // TODO(chengchen): check if there is hidden conditions that don't allow 5116 // all possible partition types. 5117 init_partition_search_state_params(x, cpi, &part_search_state, mi_row, 5118 mi_col, bsize); 5119 // Override partition costs at the edges of the frame in the same 5120 // way as in read_partition (see decodeframe.c). 5121 PartitionBlkParams blk_params = part_search_state.part_blk_params; 5122 if (!av1_blk_has_rows_and_cols(&blk_params)) 5123 set_partition_cost_for_edge_blk(cm, &part_search_state); 5124 const int orig_rdmult = x->rdmult; 5125 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); 5126 const int valid_partition_types = 5127 get_valid_partition_types(cpi, &part_search_state, bsize); 5128 const FRAME_UPDATE_TYPE update_type = 5129 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 5130 const int qindex = av1_get_qindex(&cm->seg, xd->mi[0]->segment_id, 5131 cm->quant_params.base_qindex); 5132 // RD multiplier 5133 const int rdmult = x->rdmult; 5134 // pyramid level 5135 const int pyramid_level = 5136 cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index]; 5137 x->rdmult = orig_rdmult; 5138 // Neighbor information 5139 const int has_above = !!xd->above_mbmi; 5140 const int has_left = !!xd->left_mbmi; 5141 const BLOCK_SIZE above_bsize = 5142 has_above ? xd->above_mbmi->bsize : BLOCK_INVALID; 5143 const BLOCK_SIZE left_bsize = 5144 has_left ? xd->left_mbmi->bsize : BLOCK_INVALID; 5145 const int above_block_width = 5146 above_bsize == BLOCK_INVALID ? -1 : block_size_wide[above_bsize]; 5147 const int above_block_height = 5148 above_bsize == BLOCK_INVALID ? -1 : block_size_high[above_bsize]; 5149 const int left_block_width = 5150 left_bsize == BLOCK_INVALID ? -1 : block_size_wide[left_bsize]; 5151 const int left_block_height = 5152 left_bsize == BLOCK_INVALID ? -1 : block_size_high[left_bsize]; 5153 // Prepare simple motion search stats as features 5154 unsigned int block_sse = -1; 5155 unsigned int block_var = -1; 5156 unsigned int sub_block_sse[4] = { -1, -1, -1, -1 }; 5157 unsigned int sub_block_var[4] = { -1, -1, -1, -1 }; 5158 unsigned int horz_block_sse[2] = { -1, -1 }; 5159 unsigned int horz_block_var[2] = { -1, -1 }; 5160 unsigned int vert_block_sse[2] = { -1, -1 }; 5161 unsigned int vert_block_var[2] = { -1, -1 }; 5162 av1_prepare_motion_search_features_block( 5163 cpi, td, tile_data, mi_row, mi_col, bsize, valid_partition_types, 5164 &block_sse, &block_var, sub_block_sse, sub_block_var, horz_block_sse, 5165 horz_block_var, vert_block_sse, vert_block_var); 5166 // Prepare tpl stats for the current block as features 5167 int64_t tpl_intra_cost = -1; 5168 int64_t tpl_inter_cost = -1; 5169 int64_t tpl_mc_dep_cost = -1; 5170 prepare_tpl_stats_block(cpi, bsize, mi_row, mi_col, &tpl_intra_cost, 5171 &tpl_inter_cost, &tpl_mc_dep_cost); 5172 5173 aom_partition_features_t features; 5174 features.mi_row = mi_row; 5175 features.mi_col = mi_col; 5176 features.frame_width = cpi->frame_info.frame_width; 5177 features.frame_height = cpi->frame_info.frame_height; 5178 features.block_size = bsize; 5179 features.valid_partition_types = valid_partition_types; 5180 features.update_type = update_type; 5181 features.qindex = qindex; 5182 features.rdmult = rdmult; 5183 features.pyramid_level = pyramid_level; 5184 features.has_above_block = has_above; 5185 features.above_block_width = above_block_width; 5186 features.above_block_height = above_block_height; 5187 features.has_left_block = has_left; 5188 features.left_block_width = left_block_width; 5189 features.left_block_height = left_block_height; 5190 features.block_sse = block_sse; 5191 features.block_var = block_var; 5192 for (int i = 0; i < 4; ++i) { 5193 features.sub_block_sse[i] = sub_block_sse[i]; 5194 features.sub_block_var[i] = sub_block_var[i]; 5195 } 5196 for (int i = 0; i < 2; ++i) { 5197 features.horz_block_sse[i] = horz_block_sse[i]; 5198 features.horz_block_var[i] = horz_block_var[i]; 5199 features.vert_block_sse[i] = vert_block_sse[i]; 5200 features.vert_block_var[i] = vert_block_var[i]; 5201 } 5202 features.tpl_intra_cost = tpl_intra_cost; 5203 features.tpl_inter_cost = tpl_inter_cost; 5204 features.tpl_mc_dep_cost = tpl_mc_dep_cost; 5205 av1_ext_part_send_features(ext_part_controller, &features); 5206 const bool valid_decision = av1_ext_part_get_partition_decision( 5207 ext_part_controller, &partition_decision); 5208 if (!valid_decision) return false; 5209 pc_tree->partitioning = partition_decision.current_decision; 5210 5211 av1_init_rd_stats(this_rdcost); 5212 if (partition_decision.current_decision == PARTITION_SPLIT) { 5213 assert(block_size_wide[bsize] >= 8 && block_size_high[bsize] >= 8); 5214 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 5215 RD_STATS split_rdc[SUB_PARTITIONS_SPLIT]; 5216 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 5217 av1_init_rd_stats(&split_rdc[i]); 5218 if (pc_tree->split[i] == NULL) 5219 pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); 5220 if (!pc_tree->split[i]) 5221 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 5222 "Failed to allocate PC_TREE"); 5223 pc_tree->split[i]->index = i; 5224 } 5225 const int orig_rdmult_tmp = x->rdmult; 5226 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); 5227 // TODO(chengchen): check boundary conditions 5228 // top-left 5229 recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[0], 5230 mi_row, mi_col, subsize, &split_rdc[0]); 5231 // top-right 5232 recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[1], 5233 mi_row, mi_col + mi_size_wide[subsize], subsize, 5234 &split_rdc[1]); 5235 // bottom-left 5236 recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[2], 5237 mi_row + mi_size_high[subsize], mi_col, subsize, 5238 &split_rdc[2]); 5239 // bottom_right 5240 recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[3], 5241 mi_row + mi_size_high[subsize], 5242 mi_col + mi_size_wide[subsize], subsize, 5243 &split_rdc[3]); 5244 this_rdcost->rate += part_search_state.partition_cost[PARTITION_SPLIT]; 5245 // problem is here, the rdmult is different from the rdmult in sub block. 5246 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 5247 this_rdcost->rate += split_rdc[i].rate; 5248 this_rdcost->dist += split_rdc[i].dist; 5249 av1_rd_cost_update(x->rdmult, this_rdcost); 5250 } 5251 x->rdmult = orig_rdmult_tmp; 5252 } else { 5253 *this_rdcost = rd_search_for_fixed_partition( 5254 cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, pc_tree); 5255 } 5256 5257 aom_partition_stats_t stats; 5258 update_partition_stats(this_rdcost, &stats); 5259 av1_ext_part_send_partition_stats(ext_part_controller, &stats); 5260 if (!partition_decision.is_final_decision) { 5261 if (partition_decision.current_decision == PARTITION_SPLIT) { 5262 for (int i = 0; i < 4; ++i) { 5263 if (pc_tree->split[i] != NULL) { 5264 av1_free_pc_tree_recursive(pc_tree->split[i], av1_num_planes(cm), 0, 5265 0, 5266 cpi->sf.part_sf.partition_search_type); 5267 pc_tree->split[i] = NULL; 5268 } 5269 } 5270 } 5271 } 5272 } while (!partition_decision.is_final_decision); 5273 5274 return true; 5275 } 5276 5277 // The ML model only needs to make decisions for the current block each time. 5278 static bool ml_partition_search_partial(AV1_COMP *const cpi, ThreadData *td, 5279 TileDataEnc *tile_data, TokenExtra **tp, 5280 SIMPLE_MOTION_DATA_TREE *sms_root, 5281 int mi_row, int mi_col, 5282 const BLOCK_SIZE bsize) { 5283 AV1_COMMON *const cm = &cpi->common; 5284 MACROBLOCK *const x = &td->mb; 5285 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 5286 aom_partition_features_t features; 5287 prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize, 5288 &features); 5289 features.mi_row = mi_row; 5290 features.mi_col = mi_col; 5291 features.frame_width = cpi->frame_info.frame_width; 5292 features.frame_height = cpi->frame_info.frame_height; 5293 features.block_size = bsize; 5294 av1_ext_part_send_features(ext_part_controller, &features); 5295 td->pc_root = av1_alloc_pc_tree_node(bsize); 5296 if (!td->pc_root) 5297 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, 5298 "Failed to allocate PC_TREE"); 5299 5300 RD_STATS rdcost; 5301 const bool valid_partition = 5302 recursive_partition(cpi, td, tile_data, tp, sms_root, td->pc_root, mi_row, 5303 mi_col, bsize, &rdcost); 5304 if (!valid_partition) { 5305 return false; 5306 } 5307 5308 // Encode with the selected mode and partition. 5309 set_cb_offsets(x->cb_offset, 0, 0); 5310 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 5311 td->pc_root, NULL); 5312 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0, 5313 cpi->sf.part_sf.partition_search_type); 5314 td->pc_root = NULL; 5315 5316 return true; 5317 } 5318 5319 bool av1_rd_partition_search(AV1_COMP *const cpi, ThreadData *td, 5320 TileDataEnc *tile_data, TokenExtra **tp, 5321 SIMPLE_MOTION_DATA_TREE *sms_root, int mi_row, 5322 int mi_col, const BLOCK_SIZE bsize, 5323 RD_STATS *best_rd_cost) { 5324 AV1_COMMON *const cm = &cpi->common; 5325 if (cpi->ext_part_controller.ready) { 5326 bool valid_search = true; 5327 const aom_ext_part_decision_mode_t decision_mode = 5328 av1_get_ext_part_decision_mode(&cpi->ext_part_controller); 5329 if (decision_mode == AOM_EXT_PART_WHOLE_TREE) { 5330 valid_search = ml_partition_search_whole_tree( 5331 cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize); 5332 } else if (decision_mode == AOM_EXT_PART_RECURSIVE) { 5333 valid_search = ml_partition_search_partial( 5334 cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize); 5335 } else { 5336 assert(0 && "Unknown decision mode."); 5337 return false; 5338 } 5339 if (!valid_search) { 5340 aom_internal_error( 5341 cm->error, AOM_CODEC_ERROR, 5342 "Invalid search from ML model, partition search failed"); 5343 } 5344 return true; 5345 } 5346 5347 MACROBLOCK *const x = &td->mb; 5348 MACROBLOCKD *const xd = &x->e_mbd; 5349 int best_idx = 0; 5350 int64_t min_rdcost = INT64_MAX; 5351 int num_configs; 5352 int i = 0; 5353 do { 5354 td->pc_root = av1_alloc_pc_tree_node(bsize); 5355 if (!td->pc_root) 5356 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 5357 "Failed to allocate PC_TREE"); 5358 num_configs = read_partition_tree(cpi, td->pc_root, xd->error_info, i); 5359 if (num_configs <= 0) { 5360 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0, 5361 cpi->sf.part_sf.partition_search_type); 5362 td->pc_root = NULL; 5363 aom_internal_error(xd->error_info, AOM_CODEC_ERROR, "Invalid configs."); 5364 } 5365 verify_write_partition_tree(cpi, td->pc_root, bsize, i, mi_row, mi_col); 5366 if (i == 0) { 5367 AOM_CHECK_MEM_ERROR(xd->error_info, x->rdcost, 5368 aom_calloc(num_configs, sizeof(*x->rdcost))); 5369 } 5370 // Encode the block with the given partition tree. Get rdcost and encoding 5371 // time. 5372 x->rdcost[i] = rd_search_for_fixed_partition( 5373 cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, td->pc_root); 5374 5375 if (x->rdcost[i].rdcost < min_rdcost) { 5376 min_rdcost = x->rdcost[i].rdcost; 5377 best_idx = i; 5378 *best_rd_cost = x->rdcost[i]; 5379 } 5380 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0, 5381 cpi->sf.part_sf.partition_search_type); 5382 td->pc_root = NULL; 5383 ++i; 5384 } while (i < num_configs); 5385 5386 aom_free(x->rdcost); 5387 x->rdcost = NULL; 5388 // Encode with the partition configuration with the smallest rdcost. 5389 td->pc_root = av1_alloc_pc_tree_node(bsize); 5390 if (!td->pc_root) 5391 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 5392 "Failed to allocate PC_TREE"); 5393 read_partition_tree(cpi, td->pc_root, xd->error_info, best_idx); 5394 rd_search_for_fixed_partition(cpi, td, tile_data, tp, sms_root, mi_row, 5395 mi_col, bsize, td->pc_root); 5396 set_cb_offsets(x->cb_offset, 0, 0); 5397 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 5398 td->pc_root, NULL); 5399 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0, 5400 cpi->sf.part_sf.partition_search_type); 5401 td->pc_root = NULL; 5402 ++cpi->sb_counter; 5403 5404 return true; 5405 } 5406 #endif // CONFIG_PARTITION_SEARCH_ORDER 5407 5408 static inline bool should_do_dry_run_encode_for_current_block( 5409 BLOCK_SIZE sb_size, BLOCK_SIZE max_partition_size, int curr_block_index, 5410 BLOCK_SIZE bsize) { 5411 if (bsize > max_partition_size) return false; 5412 5413 // Enable the reconstruction with dry-run for the 4th sub-block only if its 5414 // parent block's reconstruction with dry-run is skipped. If 5415 // max_partition_size is the same as immediate split of superblock, then avoid 5416 // reconstruction of the 4th sub-block, as this data is not consumed. 5417 if (curr_block_index != 3) return true; 5418 5419 const BLOCK_SIZE sub_sb_size = 5420 get_partition_subsize(sb_size, PARTITION_SPLIT); 5421 return bsize == max_partition_size && sub_sb_size != max_partition_size; 5422 } 5423 5424 static void log_sub_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, 5425 double *var_min, double *var_max) { 5426 // This functions returns a the minimum and maximum log variances for 4x4 5427 // sub blocks in the current block. 5428 5429 const MACROBLOCKD *const xd = &x->e_mbd; 5430 const int is_hbd = is_cur_buf_hbd(xd); 5431 const int right_overflow = 5432 (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0; 5433 const int bottom_overflow = 5434 (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0; 5435 const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow; 5436 const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow; 5437 5438 // Initialize minimum variance to a large value and maximum variance to 0. 5439 double min_var_4x4 = (double)INT_MAX; 5440 double max_var_4x4 = 0.0; 5441 5442 aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf; 5443 for (int i = 0; i < bh; i += MI_SIZE) { 5444 for (int j = 0; j < bw; j += MI_SIZE) { 5445 int var; 5446 // Calculate the 4x4 sub-block variance. 5447 var = av1_calc_normalized_variance( 5448 vf, x->plane[0].src.buf + (i * x->plane[0].src.stride) + j, 5449 x->plane[0].src.stride, is_hbd); 5450 5451 // Record min and max for over-arching block 5452 min_var_4x4 = AOMMIN(min_var_4x4, var); 5453 max_var_4x4 = AOMMAX(max_var_4x4, var); 5454 } 5455 } 5456 *var_min = log1p(min_var_4x4 / 16.0); 5457 *var_max = log1p(max_var_4x4 / 16.0); 5458 } 5459 5460 static inline void set_sms_tree_partitioning(SIMPLE_MOTION_DATA_TREE *sms_tree, 5461 PARTITION_TYPE partition) { 5462 if (sms_tree == NULL) return; 5463 sms_tree->partitioning = partition; 5464 } 5465 5466 /*!\brief AV1 block partition search (full search). 5467 * 5468 * \ingroup partition_search 5469 * \callgraph 5470 * Searches for the best partition pattern for a block based on the 5471 * rate-distortion cost, and returns a bool value to indicate whether a valid 5472 * partition pattern is found. The partition can recursively go down to the 5473 * smallest block size. 5474 * 5475 * \param[in] cpi Top-level encoder structure 5476 * \param[in] td Pointer to thread data 5477 * \param[in] tile_data Pointer to struct holding adaptive 5478 data/contexts/models for the tile during 5479 encoding 5480 * \param[in] tp Pointer to the starting token 5481 * \param[in] mi_row Row coordinate of the block in a step size 5482 of MI_SIZE 5483 * \param[in] mi_col Column coordinate of the block in a step 5484 size of MI_SIZE 5485 * \param[in] bsize Current block size 5486 * \param[in] rd_cost Pointer to the final rd cost of the block 5487 * \param[in] best_rdc Upper bound of rd cost of a valid partition 5488 * \param[in] pc_tree Pointer to the PC_TREE node storing the 5489 picked partitions and mode info for the 5490 current block 5491 * \param[in] sms_tree Pointer to struct holding simple motion 5492 search data for the current block 5493 * \param[in] none_rd Pointer to the rd cost in the case of not 5494 splitting the current block 5495 * \param[in] multi_pass_mode SB_SINGLE_PASS/SB_DRY_PASS/SB_WET_PASS 5496 * \param[in] rect_part_win_info Pointer to struct storing whether horz/vert 5497 partition outperforms previously tested 5498 partitions 5499 * 5500 * \return A bool value is returned indicating if a valid partition is found. 5501 * The pc_tree struct is modified to store the picked partition and modes. 5502 * The rd_cost struct is also updated with the RD stats corresponding to the 5503 * best partition found. 5504 */ 5505 bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td, 5506 TileDataEnc *tile_data, TokenExtra **tp, int mi_row, 5507 int mi_col, BLOCK_SIZE bsize, RD_STATS *rd_cost, 5508 RD_STATS best_rdc, PC_TREE *pc_tree, 5509 SIMPLE_MOTION_DATA_TREE *sms_tree, int64_t *none_rd, 5510 SB_MULTI_PASS_MODE multi_pass_mode, 5511 RD_RECT_PART_WIN_INFO *rect_part_win_info) { 5512 const AV1_COMMON *const cm = &cpi->common; 5513 const int num_planes = av1_num_planes(cm); 5514 TileInfo *const tile_info = &tile_data->tile_info; 5515 MACROBLOCK *const x = &td->mb; 5516 MACROBLOCKD *const xd = &x->e_mbd; 5517 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 5518 const TokenExtra *const tp_orig = *tp; 5519 PartitionSearchState part_search_state; 5520 5521 // Initialization of state variables used in partition search. 5522 init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col, 5523 bsize); 5524 PartitionBlkParams blk_params = part_search_state.part_blk_params; 5525 5526 set_sms_tree_partitioning(sms_tree, PARTITION_NONE); 5527 if (best_rdc.rdcost < 0) { 5528 av1_invalid_rd_stats(rd_cost); 5529 return part_search_state.found_best_partition; 5530 } 5531 if (bsize == cm->seq_params->sb_size) x->must_find_valid_partition = 0; 5532 5533 // Override skipping rectangular partition operations for edge blocks. 5534 if (none_rd) *none_rd = 0; 5535 (void)*tp_orig; 5536 5537 #if CONFIG_COLLECT_PARTITION_STATS 5538 // Stats at the current quad tree 5539 PartitionTimingStats *part_timing_stats = 5540 &part_search_state.part_timing_stats; 5541 // Stats aggregated at frame level 5542 FramePartitionTimingStats *fr_part_timing_stats = &cpi->partition_stats; 5543 #endif // CONFIG_COLLECT_PARTITION_STATS 5544 5545 // Override partition costs at the edges of the frame in the same 5546 // way as in read_partition (see decodeframe.c). 5547 if (!av1_blk_has_rows_and_cols(&blk_params)) 5548 set_partition_cost_for_edge_blk(cm, &part_search_state); 5549 5550 // Disable rectangular partitions for inner blocks when the current block is 5551 // forced to only use square partitions. 5552 if (bsize > cpi->sf.part_sf.use_square_partition_only_threshold) { 5553 part_search_state.partition_rect_allowed[HORZ] &= !blk_params.has_rows; 5554 part_search_state.partition_rect_allowed[VERT] &= !blk_params.has_cols; 5555 } 5556 5557 #ifndef NDEBUG 5558 // Nothing should rely on the default value of this array (which is just 5559 // leftover from encoding the previous block. Setting it to fixed pattern 5560 // when debugging. 5561 // bit 0, 1, 2 are blk_skip of each plane 5562 // bit 4, 5, 6 are initialization checking of each plane 5563 memset(x->txfm_search_info.blk_skip, 0x77, 5564 sizeof(x->txfm_search_info.blk_skip)); 5565 #endif // NDEBUG 5566 5567 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 5568 5569 // Set buffers and offsets. 5570 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); 5571 5572 if (cpi->oxcf.mode == ALLINTRA) { 5573 if (bsize == cm->seq_params->sb_size) { 5574 double var_min, var_max; 5575 log_sub_block_var(cpi, x, bsize, &var_min, &var_max); 5576 5577 x->intra_sb_rdmult_modifier = 128; 5578 if ((var_min < 2.0) && (var_max > 4.0)) { 5579 if ((var_max - var_min) > 8.0) { 5580 x->intra_sb_rdmult_modifier -= 48; 5581 } else { 5582 x->intra_sb_rdmult_modifier -= (int)((var_max - var_min) * 6); 5583 } 5584 } 5585 } 5586 } 5587 5588 // Save rdmult before it might be changed, so it can be restored later. 5589 const int orig_rdmult = x->rdmult; 5590 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); 5591 5592 // Apply simple motion search for the entire super block with fixed block 5593 // size, e.g., 16x16, to collect features and write to files for the 5594 // external ML model. 5595 // TODO(chengchen): reduce motion search. This function is similar to 5596 // av1_get_max_min_partition_features(). 5597 if (COLLECT_MOTION_SEARCH_FEATURE_SB && !frame_is_intra_only(cm) && 5598 bsize == cm->seq_params->sb_size) { 5599 av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col, 5600 bsize, /*features=*/NULL); 5601 collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, /*features=*/NULL); 5602 } 5603 5604 // Update rd cost of the bound using the current multiplier. 5605 av1_rd_cost_update(x->rdmult, &best_rdc); 5606 5607 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) 5608 x->mb_energy = av1_log_block_var(cpi, x, bsize); 5609 5610 // Set the context. 5611 xd->above_txfm_context = 5612 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 5613 xd->left_txfm_context = 5614 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 5615 av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); 5616 5617 #if CONFIG_COLLECT_COMPONENT_TIMING 5618 start_timing(cpi, av1_prune_partitions_time); 5619 #endif 5620 // Pruning: before searching any partition type, using source and simple 5621 // motion search results to prune out unlikely partitions. 5622 av1_prune_partitions_before_search(cpi, x, sms_tree, &part_search_state); 5623 5624 // Pruning: eliminating partition types leading to coding block sizes outside 5625 // the min and max bsize limitations set from the encoder. 5626 av1_prune_partitions_by_max_min_bsize(&x->sb_enc, &part_search_state); 5627 #if CONFIG_COLLECT_COMPONENT_TIMING 5628 end_timing(cpi, av1_prune_partitions_time); 5629 #endif 5630 5631 // Partition search 5632 BEGIN_PARTITION_SEARCH: 5633 // If a valid partition is required, usually when the first round cannot find 5634 // a valid one under the cost limit after pruning, reset the limitations on 5635 // partition types and intra cnn output. 5636 if (x->must_find_valid_partition) { 5637 reset_part_limitations(cpi, &part_search_state); 5638 av1_prune_partitions_by_max_min_bsize(&x->sb_enc, &part_search_state); 5639 // Invalidate intra cnn output for key frames. 5640 if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) { 5641 part_search_state.intra_part_info->quad_tree_idx = 0; 5642 part_search_state.intra_part_info->cnn_output_valid = 0; 5643 } 5644 } 5645 // Partition block source pixel variance. 5646 unsigned int pb_source_variance = UINT_MAX; 5647 5648 #if CONFIG_COLLECT_COMPONENT_TIMING 5649 start_timing(cpi, none_partition_search_time); 5650 #endif 5651 5652 if (cpi->oxcf.mode == ALLINTRA) { 5653 const bool bsize_at_least_16x16 = (bsize >= BLOCK_16X16); 5654 const bool prune_rect_part_using_4x4_var_deviation = 5655 (cpi->sf.part_sf.prune_rect_part_using_4x4_var_deviation && 5656 !x->must_find_valid_partition); 5657 5658 if (bsize_at_least_16x16 || prune_rect_part_using_4x4_var_deviation) { 5659 double var_min, var_max; 5660 log_sub_block_var(cpi, x, bsize, &var_min, &var_max); 5661 5662 // Further pruning or in some cases reverse pruning when allintra is set. 5663 // This code helps visual and in some cases metrics quality where the 5664 // current block comprises at least one very low variance sub-block and at 5665 // least one where the variance is much higher. 5666 // 5667 // The idea is that in such cases there is danger of ringing and other 5668 // visual artifacts from a high variance feature such as an edge into a 5669 // very low variance region. 5670 // 5671 // The approach taken is to force break down / split to a smaller block 5672 // size to try and separate out the low variance and well predicted blocks 5673 // from the more complex ones and to prevent propagation of ringing over a 5674 // large region. 5675 if (bsize_at_least_16x16 && (var_min < 0.272) && 5676 ((var_max - var_min) > 3.0)) { 5677 part_search_state.partition_none_allowed = 0; 5678 part_search_state.terminate_partition_search = 0; 5679 part_search_state.do_square_split = 1; 5680 } else if (prune_rect_part_using_4x4_var_deviation && 5681 (var_max - var_min < 3.0)) { 5682 // Prune rectangular partitions if the variance deviation of 4x4 5683 // sub-blocks within the block is less than a threshold (derived 5684 // empirically). 5685 part_search_state.do_rectangular_split = 0; 5686 } 5687 } 5688 } 5689 5690 // PARTITION_NONE search stage. 5691 int64_t part_none_rd = INT64_MAX; 5692 none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx, 5693 &part_search_state, &best_rdc, &pb_source_variance, 5694 none_rd, &part_none_rd); 5695 5696 #if CONFIG_COLLECT_COMPONENT_TIMING 5697 end_timing(cpi, none_partition_search_time); 5698 #endif 5699 #if CONFIG_COLLECT_COMPONENT_TIMING 5700 start_timing(cpi, split_partition_search_time); 5701 #endif 5702 // PARTITION_SPLIT search stage. 5703 int64_t part_split_rd = INT64_MAX; 5704 split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx, 5705 &part_search_state, &best_rdc, multi_pass_mode, 5706 &part_split_rd); 5707 #if CONFIG_COLLECT_COMPONENT_TIMING 5708 end_timing(cpi, split_partition_search_time); 5709 #endif 5710 // Terminate partition search for child partition, 5711 // when NONE and SPLIT partition rd_costs are INT64_MAX. 5712 if (cpi->sf.part_sf.early_term_after_none_split && 5713 part_none_rd == INT64_MAX && part_split_rd == INT64_MAX && 5714 !x->must_find_valid_partition && (bsize != cm->seq_params->sb_size)) { 5715 part_search_state.terminate_partition_search = 1; 5716 } 5717 5718 // Do not evaluate non-square partitions if NONE partition did not choose a 5719 // newmv mode and is skippable. 5720 if ((cpi->sf.part_sf.skip_non_sq_part_based_on_none >= 2) && 5721 (pc_tree->none != NULL)) { 5722 if (x->qindex <= 200 && is_inter_mode(pc_tree->none->mic.mode) && 5723 !have_newmv_in_inter_mode(pc_tree->none->mic.mode) && 5724 pc_tree->none->skippable && !x->must_find_valid_partition && 5725 bsize >= BLOCK_16X16) 5726 part_search_state.do_rectangular_split = 0; 5727 } 5728 5729 // Prune partitions based on PARTITION_NONE and PARTITION_SPLIT. 5730 prune_partitions_after_split(cpi, x, sms_tree, &part_search_state, &best_rdc, 5731 part_none_rd, part_split_rd); 5732 #if CONFIG_COLLECT_COMPONENT_TIMING 5733 start_timing(cpi, rectangular_partition_search_time); 5734 #endif 5735 // Rectangular partitions search stage. 5736 rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx, 5737 &part_search_state, &best_rdc, 5738 rect_part_win_info, HORZ, VERT); 5739 #if CONFIG_COLLECT_COMPONENT_TIMING 5740 end_timing(cpi, rectangular_partition_search_time); 5741 #endif 5742 5743 if (pb_source_variance == UINT_MAX) { 5744 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize); 5745 pb_source_variance = av1_get_perpixel_variance_facade( 5746 cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y); 5747 } 5748 5749 assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions, 5750 !part_search_state.do_rectangular_split)); 5751 5752 const int prune_ext_part_state = prune_ext_part_none_skippable( 5753 pc_tree->none, x->must_find_valid_partition, 5754 cpi->sf.part_sf.skip_non_sq_part_based_on_none, bsize); 5755 5756 const int ab_partition_allowed = allow_ab_partition_search( 5757 &part_search_state, &cpi->sf.part_sf, pc_tree->partitioning, 5758 x->must_find_valid_partition, prune_ext_part_state, best_rdc.rdcost); 5759 5760 #if CONFIG_COLLECT_COMPONENT_TIMING 5761 start_timing(cpi, ab_partitions_search_time); 5762 #endif 5763 // AB partitions search stage. 5764 ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 5765 &part_search_state, &best_rdc, rect_part_win_info, 5766 pb_source_variance, ab_partition_allowed, HORZ_A, 5767 VERT_B); 5768 #if CONFIG_COLLECT_COMPONENT_TIMING 5769 end_timing(cpi, ab_partitions_search_time); 5770 #endif 5771 5772 // 4-way partitions search stage. 5773 int part4_search_allowed[NUM_PART4_TYPES] = { 1, 1 }; 5774 // Prune 4-way partition search. 5775 prune_4_way_partition_search(cpi, x, pc_tree, &part_search_state, &best_rdc, 5776 pb_source_variance, prune_ext_part_state, 5777 part4_search_allowed); 5778 5779 #if CONFIG_COLLECT_COMPONENT_TIMING 5780 start_timing(cpi, rd_pick_4partition_time); 5781 #endif 5782 // PARTITION_HORZ_4 5783 assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions, 5784 !part4_search_allowed[HORZ4])); 5785 if (!part_search_state.terminate_partition_search && 5786 part4_search_allowed[HORZ4]) { 5787 const int inc_step[NUM_PART4_TYPES] = { mi_size_high[blk_params.bsize] / 4, 5788 0 }; 5789 // Evaluation of Horz4 partition type. 5790 rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 5791 pc_tree->horizontal4, &part_search_state, &best_rdc, 5792 inc_step, PARTITION_HORZ_4); 5793 } 5794 5795 // PARTITION_VERT_4 5796 assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions, 5797 !part4_search_allowed[VERT4])); 5798 if (!part_search_state.terminate_partition_search && 5799 part4_search_allowed[VERT4] && blk_params.has_cols) { 5800 const int inc_step[NUM_PART4_TYPES] = { 0, mi_size_wide[blk_params.bsize] / 5801 4 }; 5802 // Evaluation of Vert4 partition type. 5803 rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, 5804 pc_tree->vertical4, &part_search_state, &best_rdc, 5805 inc_step, PARTITION_VERT_4); 5806 } 5807 #if CONFIG_COLLECT_COMPONENT_TIMING 5808 end_timing(cpi, rd_pick_4partition_time); 5809 #endif 5810 5811 if (bsize == cm->seq_params->sb_size && 5812 !part_search_state.found_best_partition) { 5813 // Did not find a valid partition, go back and search again, with less 5814 // constraint on which partition types to search. 5815 x->must_find_valid_partition = 1; 5816 #if CONFIG_COLLECT_PARTITION_STATS 5817 fr_part_timing_stats->partition_redo += 1; 5818 #endif // CONFIG_COLLECT_PARTITION_STATS 5819 goto BEGIN_PARTITION_SEARCH; 5820 } 5821 5822 // Store the final rd cost 5823 *rd_cost = best_rdc; 5824 5825 // Also record the best partition in simple motion data tree because it is 5826 // necessary for the related speed features. 5827 set_sms_tree_partitioning(sms_tree, pc_tree->partitioning); 5828 5829 #if CONFIG_COLLECT_PARTITION_STATS 5830 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) { 5831 part_timing_stats->partition_decisions[pc_tree->partitioning] += 1; 5832 } 5833 5834 // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each 5835 // prediction block. 5836 print_partition_timing_stats_with_rdcost( 5837 part_timing_stats, mi_row, mi_col, bsize, 5838 cpi->ppi->gf_group.update_type[cpi->gf_frame_index], 5839 cm->current_frame.frame_number, &best_rdc, "part_timing.csv"); 5840 const bool print_timing_stats = false; 5841 if (print_timing_stats) { 5842 print_partition_timing_stats(part_timing_stats, cm->show_frame, 5843 frame_is_intra_only(cm), bsize, 5844 "part_timing_data.csv"); 5845 } 5846 // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for 5847 // the whole clip. So we need to pass the information upstream to the encoder. 5848 accumulate_partition_timing_stats(fr_part_timing_stats, part_timing_stats, 5849 bsize); 5850 #endif // CONFIG_COLLECT_PARTITION_STATS 5851 5852 // Reset the PC_TREE deallocation flag. 5853 int pc_tree_dealloc = 0; 5854 5855 #if CONFIG_COLLECT_COMPONENT_TIMING 5856 start_timing(cpi, encode_sb_time); 5857 #endif 5858 if (part_search_state.found_best_partition) { 5859 if (bsize == cm->seq_params->sb_size) { 5860 // Encode the superblock. 5861 const int emit_output = multi_pass_mode != SB_DRY_PASS; 5862 const RUN_TYPE run_type = emit_output ? OUTPUT_ENABLED : DRY_RUN_NORMAL; 5863 5864 // Write partition tree to file. Not used by default. 5865 if (COLLECT_MOTION_SEARCH_FEATURE_SB) { 5866 write_partition_tree(cpi, pc_tree, bsize, mi_row, mi_col); 5867 ++cpi->sb_counter; 5868 } 5869 5870 set_cb_offsets(x->cb_offset, 0, 0); 5871 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize, 5872 pc_tree, NULL); 5873 assert(pc_tree == td->pc_root); 5874 // Dealloc the whole PC_TREE after a superblock is done. 5875 av1_free_pc_tree_recursive(pc_tree, num_planes, 0, 0, 5876 cpi->sf.part_sf.partition_search_type); 5877 pc_tree = NULL; 5878 td->pc_root = NULL; 5879 pc_tree_dealloc = 1; 5880 } else if (should_do_dry_run_encode_for_current_block( 5881 cm->seq_params->sb_size, x->sb_enc.max_partition_size, 5882 pc_tree->index, bsize)) { 5883 // Encode the smaller blocks in DRY_RUN mode. 5884 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 5885 pc_tree, NULL); 5886 } 5887 } 5888 #if CONFIG_COLLECT_COMPONENT_TIMING 5889 end_timing(cpi, encode_sb_time); 5890 #endif 5891 5892 // If the tree still exists (non-superblock), dealloc most nodes, only keep 5893 // nodes for the best partition and PARTITION_NONE. 5894 if (pc_tree_dealloc == 0) 5895 av1_free_pc_tree_recursive(pc_tree, num_planes, 1, 1, 5896 cpi->sf.part_sf.partition_search_type); 5897 5898 if (bsize == cm->seq_params->sb_size) { 5899 assert(best_rdc.rate < INT_MAX); 5900 assert(best_rdc.dist < INT64_MAX); 5901 } else { 5902 assert(tp_orig == *tp); 5903 } 5904 5905 // Restore the rd multiplier. 5906 x->rdmult = orig_rdmult; 5907 return part_search_state.found_best_partition; 5908 } 5909 #endif // !CONFIG_REALTIME_ONLY 5910 5911 #undef COLLECT_MOTION_SEARCH_FEATURE_SB 5912 5913 #if CONFIG_RT_ML_PARTITIONING 5914 #define FEATURES 6 5915 #define LABELS 2 5916 static int ml_predict_var_partitioning(AV1_COMP *cpi, MACROBLOCK *x, 5917 BLOCK_SIZE bsize, int mi_row, 5918 int mi_col) { 5919 AV1_COMMON *const cm = &cpi->common; 5920 const NN_CONFIG *nn_config = NULL; 5921 const float *means = NULL; 5922 const float *vars = NULL; 5923 switch (bsize) { 5924 case BLOCK_64X64: 5925 nn_config = &av1_var_part_nnconfig_64; 5926 means = av1_var_part_means_64; 5927 vars = av1_var_part_vars_64; 5928 break; 5929 case BLOCK_32X32: 5930 nn_config = &av1_var_part_nnconfig_32; 5931 means = av1_var_part_means_32; 5932 vars = av1_var_part_vars_32; 5933 break; 5934 case BLOCK_16X16: 5935 nn_config = &av1_var_part_nnconfig_16; 5936 means = av1_var_part_means_16; 5937 vars = av1_var_part_vars_16; 5938 break; 5939 case BLOCK_8X8: 5940 default: assert(0 && "Unexpected block size."); return -1; 5941 } 5942 5943 if (!nn_config) return -1; 5944 5945 { 5946 const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f; 5947 float features[FEATURES] = { 0.0f }; 5948 const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0, 5949 cm->seq_params->bit_depth); 5950 int feature_idx = 0; 5951 float score[LABELS]; 5952 5953 features[feature_idx] = 5954 (log1pf((float)(dc_q * dc_q) / 256.0f) - means[feature_idx]) / 5955 sqrtf(vars[feature_idx]); 5956 feature_idx++; 5957 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 1, bsize); 5958 { 5959 const int bs = block_size_wide[bsize]; 5960 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 5961 const int sb_offset_row = 4 * (mi_row & 15); 5962 const int sb_offset_col = 4 * (mi_col & 15); 5963 const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col; 5964 const uint8_t *src = x->plane[0].src.buf; 5965 const int src_stride = x->plane[0].src.stride; 5966 const int pred_stride = 64; 5967 unsigned int sse; 5968 int i; 5969 // Variance of whole block. 5970 const unsigned int var = 5971 cpi->ppi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); 5972 const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); 5973 5974 features[feature_idx] = 5975 (log1pf((float)var) - means[feature_idx]) / sqrtf(vars[feature_idx]); 5976 feature_idx++; 5977 for (i = 0; i < 4; ++i) { 5978 const int x_idx = (i & 1) * bs / 2; 5979 const int y_idx = (i >> 1) * bs / 2; 5980 const int src_offset = y_idx * src_stride + x_idx; 5981 const int pred_offset = y_idx * pred_stride + x_idx; 5982 // Variance of quarter block. 5983 const unsigned int sub_var = 5984 cpi->ppi->fn_ptr[subsize].vf(src + src_offset, src_stride, 5985 pred + pred_offset, pred_stride, &sse); 5986 const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; 5987 features[feature_idx] = 5988 (var_ratio - means[feature_idx]) / sqrtf(vars[feature_idx]); 5989 feature_idx++; 5990 } 5991 } 5992 // for (int i = 0; i<FEATURES; i++) 5993 // printf("F_%d, %f; ", i, features[i]); 5994 assert(feature_idx == FEATURES); 5995 av1_nn_predict(features, nn_config, 1, score); 5996 // printf("Score %f, thr %f ", (float)score[0], thresh); 5997 if (score[0] > thresh) return PARTITION_SPLIT; 5998 if (score[0] < -thresh) return PARTITION_NONE; 5999 return -1; 6000 } 6001 } 6002 #undef FEATURES 6003 #undef LABELS 6004 6005 // Uncomment for collecting data for ML-based partitioning 6006 // #define _COLLECT_GROUND_TRUTH_ 6007 6008 #ifdef _COLLECT_GROUND_TRUTH_ 6009 static int store_partition_data(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 6010 int mi_row, int mi_col, PARTITION_TYPE part) { 6011 AV1_COMMON *const cm = &cpi->common; 6012 char fname[128]; 6013 switch (bsize) { 6014 case BLOCK_64X64: sprintf(fname, "data_64x64.txt"); break; 6015 case BLOCK_32X32: sprintf(fname, "data_32x32.txt"); break; 6016 case BLOCK_16X16: sprintf(fname, "data_16x16.txt"); break; 6017 case BLOCK_8X8: sprintf(fname, "data_8x8.txt"); break; 6018 default: assert(0 && "Unexpected block size."); return -1; 6019 } 6020 6021 float features[6]; // DC_Q, VAR, VAR_RATIO-0..3 6022 6023 FILE *f = fopen(fname, "a"); 6024 6025 { 6026 const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0, 6027 cm->seq_params->bit_depth); 6028 int feature_idx = 0; 6029 6030 features[feature_idx++] = log1pf((float)(dc_q * dc_q) / 256.0f); 6031 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 1, bsize); 6032 { 6033 const int bs = block_size_wide[bsize]; 6034 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 6035 const int sb_offset_row = 4 * (mi_row & 15); 6036 const int sb_offset_col = 4 * (mi_col & 15); 6037 const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col; 6038 const uint8_t *src = x->plane[0].src.buf; 6039 const int src_stride = x->plane[0].src.stride; 6040 const int pred_stride = 64; 6041 unsigned int sse; 6042 int i; 6043 // Variance of whole block. 6044 /* 6045 if (bs == 8) 6046 { 6047 int r, c; 6048 printf("%d %d\n", mi_row, mi_col); 6049 for (r = 0; r < bs; ++r) { 6050 for (c = 0; c < bs; ++c) { 6051 printf("%3d ", 6052 src[r * src_stride + c] - pred[64 * r + c]); 6053 } 6054 printf("\n"); 6055 } 6056 printf("\n"); 6057 } 6058 */ 6059 const unsigned int var = 6060 cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); 6061 const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); 6062 6063 features[feature_idx++] = log1pf((float)var); 6064 6065 fprintf(f, "%f,%f,", features[0], features[1]); 6066 for (i = 0; i < 4; ++i) { 6067 const int x_idx = (i & 1) * bs / 2; 6068 const int y_idx = (i >> 1) * bs / 2; 6069 const int src_offset = y_idx * src_stride + x_idx; 6070 const int pred_offset = y_idx * pred_stride + x_idx; 6071 // Variance of quarter block. 6072 const unsigned int sub_var = 6073 cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, 6074 pred + pred_offset, pred_stride, &sse); 6075 const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; 6076 features[feature_idx++] = var_ratio; 6077 fprintf(f, "%f,", var_ratio); 6078 } 6079 6080 fprintf(f, "%d\n", part == PARTITION_NONE ? 0 : 1); 6081 } 6082 6083 fclose(f); 6084 return -1; 6085 } 6086 } 6087 #endif 6088 6089 static void duplicate_mode_info_in_sb(AV1_COMMON *cm, MACROBLOCKD *xd, 6090 int mi_row, int mi_col, 6091 BLOCK_SIZE bsize) { 6092 const int block_width = 6093 AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col); 6094 const int block_height = 6095 AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row); 6096 const int mi_stride = xd->mi_stride; 6097 MB_MODE_INFO *const src_mi = xd->mi[0]; 6098 int i, j; 6099 6100 for (j = 0; j < block_height; ++j) 6101 for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi; 6102 } 6103 6104 static inline void copy_mbmi_ext_frame_to_mbmi_ext( 6105 MB_MODE_INFO_EXT *const mbmi_ext, 6106 const MB_MODE_INFO_EXT_FRAME *mbmi_ext_best, uint8_t ref_frame_type) { 6107 memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack, 6108 sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); 6109 memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight, 6110 sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); 6111 mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context; 6112 mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count; 6113 memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs, 6114 sizeof(mbmi_ext->global_mvs)); 6115 } 6116 6117 static void fill_mode_info_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row, 6118 int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) { 6119 AV1_COMMON *const cm = &cpi->common; 6120 MACROBLOCKD *xd = &x->e_mbd; 6121 int hbs = mi_size_wide[bsize] >> 1; 6122 PARTITION_TYPE partition = pc_tree->partitioning; 6123 BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); 6124 6125 assert(bsize >= BLOCK_8X8); 6126 6127 if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) 6128 return; 6129 6130 switch (partition) { 6131 case PARTITION_NONE: 6132 set_mode_info_offsets(&cm->mi_params, &cpi->mbmi_ext_info, x, xd, mi_row, 6133 mi_col); 6134 *(xd->mi[0]) = pc_tree->none->mic; 6135 copy_mbmi_ext_frame_to_mbmi_ext( 6136 &x->mbmi_ext, &pc_tree->none->mbmi_ext_best, LAST_FRAME); 6137 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); 6138 break; 6139 case PARTITION_SPLIT: { 6140 fill_mode_info_sb(cpi, x, mi_row, mi_col, subsize, pc_tree->split[0]); 6141 fill_mode_info_sb(cpi, x, mi_row, mi_col + hbs, subsize, 6142 pc_tree->split[1]); 6143 fill_mode_info_sb(cpi, x, mi_row + hbs, mi_col, subsize, 6144 pc_tree->split[2]); 6145 fill_mode_info_sb(cpi, x, mi_row + hbs, mi_col + hbs, subsize, 6146 pc_tree->split[3]); 6147 break; 6148 } 6149 default: break; 6150 } 6151 } 6152 6153 void av1_nonrd_pick_partition(AV1_COMP *cpi, ThreadData *td, 6154 TileDataEnc *tile_data, TokenExtra **tp, 6155 int mi_row, int mi_col, BLOCK_SIZE bsize, 6156 RD_STATS *rd_cost, int do_recon, int64_t best_rd, 6157 PC_TREE *pc_tree) { 6158 AV1_COMMON *const cm = &cpi->common; 6159 TileInfo *const tile_info = &tile_data->tile_info; 6160 MACROBLOCK *const x = &td->mb; 6161 MACROBLOCKD *const xd = &x->e_mbd; 6162 const int hbs = mi_size_wide[bsize] >> 1; 6163 TokenExtra *tp_orig = *tp; 6164 const ModeCosts *mode_costs = &x->mode_costs; 6165 RD_STATS this_rdc, best_rdc; 6166 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; 6167 int do_split = bsize > BLOCK_8X8; 6168 // Override skipping rectangular partition operations for edge blocks 6169 const int force_horz_split = (mi_row + 2 * hbs > cm->mi_params.mi_rows); 6170 const int force_vert_split = (mi_col + 2 * hbs > cm->mi_params.mi_cols); 6171 6172 int partition_none_allowed = !force_horz_split && !force_vert_split; 6173 6174 assert(mi_size_wide[bsize] == mi_size_high[bsize]); // Square partition only 6175 assert(cm->seq_params->sb_size == BLOCK_64X64); // Small SB so far 6176 6177 (void)*tp_orig; 6178 6179 av1_invalid_rd_stats(&best_rdc); 6180 best_rdc.rdcost = best_rd; 6181 #ifndef _COLLECT_GROUND_TRUTH_ 6182 if (partition_none_allowed && do_split) { 6183 const int ml_predicted_partition = 6184 ml_predict_var_partitioning(cpi, x, bsize, mi_row, mi_col); 6185 if (ml_predicted_partition == PARTITION_NONE) do_split = 0; 6186 if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; 6187 } 6188 #endif 6189 6190 xd->above_txfm_context = 6191 cm->above_contexts.txfm[tile_info->tile_row] + mi_col; 6192 xd->left_txfm_context = 6193 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); 6194 av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, 3); 6195 6196 // PARTITION_NONE 6197 if (partition_none_allowed) { 6198 pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf); 6199 if (!pc_tree->none) 6200 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 6201 "Failed to allocate PICK_MODE_CONTEXT"); 6202 PICK_MODE_CONTEXT *ctx = pc_tree->none; 6203 6204 // Flip for RDO based pick mode 6205 #if 0 6206 RD_STATS dummy; 6207 av1_invalid_rd_stats(&dummy); 6208 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, 6209 PARTITION_NONE, bsize, ctx, dummy); 6210 #else 6211 pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, 6212 ctx); 6213 #endif 6214 if (this_rdc.rate != INT_MAX) { 6215 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 6216 6217 this_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE]; 6218 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist); 6219 if (this_rdc.rdcost < best_rdc.rdcost) { 6220 best_rdc = this_rdc; 6221 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; 6222 } 6223 } 6224 } 6225 6226 // PARTITION_SPLIT 6227 if (do_split) { 6228 RD_STATS sum_rdc; 6229 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 6230 6231 av1_init_rd_stats(&sum_rdc); 6232 6233 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 6234 pc_tree->split[i] = av1_alloc_pc_tree_node(subsize); 6235 if (!pc_tree->split[i]) 6236 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 6237 "Failed to allocate PC_TREE"); 6238 pc_tree->split[i]->index = i; 6239 } 6240 6241 int pl = partition_plane_context(xd, mi_row, mi_col, bsize); 6242 sum_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT]; 6243 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); 6244 for (int i = 0; 6245 i < SUB_PARTITIONS_SPLIT && sum_rdc.rdcost < best_rdc.rdcost; ++i) { 6246 const int x_idx = (i & 1) * hbs; 6247 const int y_idx = (i >> 1) * hbs; 6248 6249 if (mi_row + y_idx >= cm->mi_params.mi_rows || 6250 mi_col + x_idx >= cm->mi_params.mi_cols) 6251 continue; 6252 av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, 6253 mi_col + x_idx, subsize, &this_rdc, i < 3, 6254 best_rdc.rdcost - sum_rdc.rdcost, 6255 pc_tree->split[i]); 6256 6257 if (this_rdc.rate == INT_MAX) { 6258 av1_invalid_rd_stats(&sum_rdc); 6259 } else { 6260 sum_rdc.rate += this_rdc.rate; 6261 sum_rdc.dist += this_rdc.dist; 6262 sum_rdc.rdcost += this_rdc.rdcost; 6263 } 6264 } 6265 if (sum_rdc.rdcost < best_rdc.rdcost) { 6266 best_rdc = sum_rdc; 6267 pc_tree->partitioning = PARTITION_SPLIT; 6268 } 6269 } 6270 6271 #ifdef _COLLECT_GROUND_TRUTH_ 6272 store_partition_data(cpi, x, bsize, mi_row, mi_col, pc_tree->partitioning); 6273 #endif 6274 6275 *rd_cost = best_rdc; 6276 6277 av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3); 6278 6279 if (best_rdc.rate == INT_MAX) { 6280 av1_invalid_rd_stats(rd_cost); 6281 return; 6282 } 6283 6284 // update mode info array 6285 fill_mode_info_sb(cpi, x, mi_row, mi_col, bsize, pc_tree); 6286 6287 if (do_recon) { 6288 if (bsize == cm->seq_params->sb_size) { 6289 // NOTE: To get estimate for rate due to the tokens, use: 6290 // int rate_coeffs = 0; 6291 // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, 6292 // bsize, pc_tree, &rate_coeffs); 6293 set_cb_offsets(x->cb_offset, 0, 0); 6294 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, 6295 pc_tree, NULL); 6296 } else { 6297 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, 6298 pc_tree, NULL); 6299 } 6300 } 6301 6302 if (bsize == BLOCK_64X64 && do_recon) { 6303 assert(best_rdc.rate < INT_MAX); 6304 assert(best_rdc.dist < INT64_MAX); 6305 } else { 6306 assert(tp_orig == *tp); 6307 } 6308 } 6309 #endif // CONFIG_RT_ML_PARTITIONING