speed_features.c (122983B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <limits.h> 13 14 #include "av1/common/reconintra.h" 15 16 #include "av1/encoder/encoder.h" 17 #include "av1/encoder/speed_features.h" 18 #include "av1/encoder/rdopt.h" 19 20 #include "aom_dsp/aom_dsp_common.h" 21 22 #define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method 23 // Max speed setting for tx domain evaluation 24 #define MAX_TX_DOMAIN_EVAL_SPEED 5 25 static const MESH_PATTERN 26 good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { 27 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, 28 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, 29 { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } }, 30 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, 31 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, 32 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, 33 }; 34 35 // TODO(huisu@google.com): These settings are pretty relaxed, tune them for 36 // each speed setting 37 static const MESH_PATTERN 38 intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { 39 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } }, 40 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } }, 41 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, 42 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, 43 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } }, 44 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } }, 45 }; 46 47 // Threshold values to be used for pruning the txfm_domain_distortion 48 // based on block MSE 49 // Index 0: Default mode evaluation, Winner mode processing is not 50 // applicable (Eg : IntraBc). Index 1: Mode evaluation. 51 // Index 2: Winner mode evaluation. Index 1 and 2 are applicable when 52 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON 53 // TODO(any): Experiment the threshold logic based on variance metric 54 static const unsigned int tx_domain_dist_thresholds[4][MODE_EVAL_TYPES] = { 55 { UINT_MAX, UINT_MAX, UINT_MAX }, 56 { 22026, 22026, 22026 }, 57 { 1377, 1377, 1377 }, 58 { 0, 0, 0 } 59 }; 60 61 // Number of different levels of aggressiveness in using transform domain 62 // distortion during the R-D evaluation based on the speed feature 63 // tx_domain_dist_level. 64 #define TX_DOMAIN_DIST_LEVELS 4 65 66 // Transform domain distortion type to be used for default, mode and winner mode 67 // evaluation Index 0: Default mode evaluation, Winner mode processing is not 68 // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode 69 // evaluation. Index 1 and 2 are applicable when 70 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON 71 static const unsigned int 72 tx_domain_dist_types[TX_DOMAIN_DIST_LEVELS][MODE_EVAL_TYPES] = { 73 { 0, 2, 0 }, { 1, 2, 0 }, { 2, 2, 0 }, { 2, 2, 2 } 74 }; 75 76 // Threshold values to be used for disabling coeff RD-optimization 77 // based on block MSE / qstep^2. 78 // TODO(any): Experiment the threshold logic based on variance metric. 79 // Table has satd and dist threshold value index 0 : dist,index 1: satd 80 // For each row, the indices are as follows. 81 // Index 0: Default mode evaluation, Winner mode processing is not applicable 82 // (Eg : IntraBc) 83 // Index 1: Mode evaluation. 84 // Index 2: Winner mode evaluation. 85 // Index 1 and 2 are applicable when enable_winner_mode_for_coeff_opt speed 86 // feature is ON 87 // There are 7 levels with increasing speed, mapping to vertical indices. 88 static const unsigned int coeff_opt_thresholds[9][MODE_EVAL_TYPES][2] = { 89 { { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX } }, 90 { { 3200, UINT_MAX }, { 250, UINT_MAX }, { UINT_MAX, UINT_MAX } }, 91 { { 1728, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } }, 92 { { 864, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } }, 93 { { 432, UINT_MAX }, { 86, UINT_MAX }, { UINT_MAX, UINT_MAX } }, 94 { { 864, 97 }, { 142, 16 }, { UINT_MAX, UINT_MAX } }, 95 { { 432, 97 }, { 86, 16 }, { UINT_MAX, UINT_MAX } }, 96 { { 216, 25 }, { 86, 10 }, { UINT_MAX, UINT_MAX } }, 97 { { 216, 25 }, { 0, 10 }, { UINT_MAX, UINT_MAX } } 98 }; 99 100 // Transform size to be used for default, mode and winner mode evaluation 101 // Index 0: Default mode evaluation, Winner mode processing is not applicable 102 // (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation. 103 // Index 1 and 2 are applicable when enable_winner_mode_for_tx_size_srch speed 104 // feature is ON 105 static const TX_SIZE_SEARCH_METHOD 106 tx_size_search_methods[4][MODE_EVAL_TYPES] = { 107 { USE_FULL_RD, USE_LARGESTALL, USE_FULL_RD }, 108 { USE_FAST_RD, USE_LARGESTALL, USE_FULL_RD }, 109 { USE_LARGESTALL, USE_LARGESTALL, USE_FULL_RD }, 110 { USE_LARGESTALL, USE_LARGESTALL, USE_LARGESTALL } 111 }; 112 113 // Predict transform skip levels to be used for default, mode and winner mode 114 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not 115 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation 116 // Values indicate the aggressiveness of skip flag prediction. 117 // 0 : no early skip prediction 118 // 1 : conservative early skip prediction using DCT_DCT 119 // 2 : early skip prediction based on SSE 120 static const unsigned int predict_skip_levels[3][MODE_EVAL_TYPES] = { 121 { 0, 0, 0 }, { 1, 1, 1 }, { 1, 2, 1 } 122 }; 123 124 // Predict skip or DC block level used during transform type search. It is 125 // indexed using the following: 126 // First index : Speed feature 'dc_blk_pred_level' (0 to 3) 127 // Second index : Mode evaluation type (DEFAULT_EVAL, MODE_EVAL and 128 // WINNER_MODE_EVAL). 129 // 130 // The values of predict_dc_levels[][] indicate the aggressiveness of predicting 131 // a block as transform skip or DC only. 132 // Type 0 : No skip block or DC only block prediction 133 // Type 1 : Prediction of skip block based on residual mean and variance 134 // Type 2 : Prediction of skip block or DC only block based on residual mean and 135 // variance 136 static const unsigned int predict_dc_levels[4][MODE_EVAL_TYPES] = { 137 { 0, 0, 0 }, { 1, 1, 0 }, { 2, 2, 0 }, { 2, 2, 2 } 138 }; 139 140 #if !CONFIG_FPMT_TEST 141 // This table holds the maximum number of reference frames for global motion. 142 // The table is indexed as per the speed feature 'gm_search_type'. 143 // 0 : All reference frames are allowed. 144 // 1 : All reference frames except L2 and L3 are allowed. 145 // 2 : All reference frames except L2, L3 and ARF2 are allowed. 146 // 3 : No reference frame is allowed. 147 static const int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = { 148 INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0 149 }; 150 #endif 151 152 // Qindex threshold levels used for selecting full-pel motion search. 153 // ms_qthresh[i][j][k] indicates the qindex boundary value for 'k'th qindex band 154 // for resolution index 'j' for aggressiveness level 'i'. 155 // Aggressiveness increases from i = 0 to 2. 156 // j = 0: lower than 720p resolution, j = 1: 720p or larger resolution. 157 // Currently invoked only for speed 0, 1 and 2. 158 static const int ms_qindex_thresh[3][2][2] = { { { 200, 70 }, { MAXQ, 200 } }, 159 { { 170, 50 }, { MAXQ, 200 } }, 160 { { 170, 40 }, { 200, 40 } } }; 161 162 // Full-pel search methods for aggressive search based on qindex. 163 // Index 0 is for resolutions lower than 720p, index 1 for 720p or larger 164 // resolutions. Currently invoked only for speed 1 and 2. 165 static const SEARCH_METHODS motion_search_method[2] = { CLAMPED_DIAMOND, 166 DIAMOND }; 167 168 // Intra only frames, golden frames (except alt ref overlays) and 169 // alt ref frames tend to be coded at a higher than ambient quality 170 static int frame_is_boosted(const AV1_COMP *cpi) { 171 return frame_is_kf_gf_arf(cpi); 172 } 173 174 // Set transform rd gate level for all transform search cases. 175 static inline void set_txfm_rd_gate_level( 176 int txfm_rd_gate_level[TX_SEARCH_CASES], int level) { 177 assert(level <= MAX_TX_RD_GATE_LEVEL); 178 for (int idx = 0; idx < TX_SEARCH_CASES; idx++) 179 txfm_rd_gate_level[idx] = level; 180 } 181 182 static void set_allintra_speed_feature_framesize_dependent( 183 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 184 const AV1_COMMON *const cm = &cpi->common; 185 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 186 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 187 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080; 188 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160; 189 const bool use_hbd = cpi->oxcf.use_highbitdepth; 190 191 if (is_480p_or_larger) { 192 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; 193 if (is_720p_or_larger) 194 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED; 195 else 196 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED; 197 } else { 198 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; 199 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED; 200 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1; 201 } 202 203 if (is_4k_or_larger) { 204 sf->part_sf.default_min_partition_size = BLOCK_8X8; 205 } 206 207 // TODO(huisu@google.com): train models for 720P and above. 208 if (!is_720p_or_larger) { 209 sf->part_sf.ml_partition_search_breakout_thresh[0] = -1.0f; 210 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.993307f; 211 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.952574f; 212 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.924142f; 213 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.880797f; 214 sf->part_sf.ml_early_term_after_part_split_level = 1; 215 } 216 217 sf->part_sf.ml_partition_search_breakout_model_index = 0; 218 219 if (is_720p_or_larger) { 220 // TODO(chiyotsai@google.com): make this speed feature adaptive based on 221 // current block's vertical texture instead of hardcoded with resolution 222 sf->mv_sf.use_downsampled_sad = 2; 223 } 224 225 if (speed >= 1) { 226 sf->part_sf.ml_4_partition_search_level_index = 1; 227 if (is_720p_or_larger) { 228 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; 229 } else if (is_480p_or_larger) { 230 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; 231 } else { 232 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32; 233 } 234 235 if (is_720p_or_larger) { 236 sf->part_sf.ml_partition_search_breakout_thresh[0] = 0.9999999f; 237 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.9999999f; 238 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.9618367258814811f; 239 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.9990705139233304f; 240 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.9648891196441841f; 241 242 sf->part_sf.ml_partition_search_breakout_model_index = 1; 243 } else { 244 sf->part_sf.ml_partition_search_breakout_thresh[0] = -1.0f; 245 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.952574f; 246 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.952574f; 247 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.924142f; 248 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.880797f; 249 } 250 sf->part_sf.ml_early_term_after_part_split_level = 2; 251 } 252 253 if (speed >= 2) { 254 sf->part_sf.ml_4_partition_search_level_index = 2; 255 if (is_720p_or_larger) { 256 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; 257 } else { 258 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32; 259 } 260 261 if (is_720p_or_larger) { 262 sf->part_sf.ml_partition_search_breakout_thresh[0] = 0.9583713938680828f; 263 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.9999999f; 264 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.9634239069901543f; 265 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.9000000000000001f; 266 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.9196596355880025f; 267 sf->part_sf.ml_partition_search_breakout_model_index = 1; 268 } 269 270 if (is_720p_or_larger) { 271 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24); 272 sf->part_sf.partition_search_breakout_rate_thr = 120; 273 } else { 274 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22); 275 sf->part_sf.partition_search_breakout_rate_thr = 100; 276 } 277 278 if (is_480p_or_larger) { 279 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1; 280 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2; 281 } else { 282 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3; 283 } 284 } 285 286 if (speed >= 3) { 287 sf->part_sf.ml_early_term_after_part_split_level = 0; 288 sf->part_sf.ml_4_partition_search_level_index = 3; 289 290 if (is_720p_or_larger) { 291 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) { 292 sf->part_sf.ml_partition_search_breakout_thresh[i] = 293 -1; // -1 means not enabled. 294 } 295 sf->part_sf.ml_partition_search_breakout_model_index = 0; 296 } 297 298 if (is_720p_or_larger) { 299 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25); 300 sf->part_sf.partition_search_breakout_rate_thr = 200; 301 } else { 302 sf->part_sf.max_intra_bsize = BLOCK_32X32; 303 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23); 304 sf->part_sf.partition_search_breakout_rate_thr = 120; 305 } 306 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3; 307 } 308 309 if (speed >= 4) { 310 if (is_720p_or_larger) { 311 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26); 312 } else { 313 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24); 314 } 315 316 if (is_480p_or_larger) { 317 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2; 318 } 319 } 320 321 if (speed >= 6) { 322 if (is_720p_or_larger) { 323 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE; 324 } else if (is_480p_or_larger) { 325 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED; 326 } 327 328 if (is_1080p_or_larger) { 329 sf->part_sf.default_min_partition_size = BLOCK_8X8; 330 } 331 332 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16; 333 } 334 335 if (speed >= 7) { 336 // TODO(kyslov): add more speed features to control speed/quality 337 } 338 339 if (speed >= 8) { 340 if (!is_480p_or_larger) { 341 sf->rt_sf.nonrd_check_partition_merge_mode = 2; 342 } 343 if (is_720p_or_larger) { 344 sf->rt_sf.force_large_partition_blocks_intra = 1; 345 } 346 } 347 348 if (speed >= 9) { 349 // TODO(kyslov): add more speed features to control speed/quality 350 if (!is_4k_or_larger) { 351 // In av1_select_sb_size(), superblock size is set to 64x64 only for 352 // resolutions less than 4k in speed>=9, to improve the multithread 353 // performance. If cost update levels are set to INTERNAL_COST_UPD_OFF 354 // for resolutions >= 4k, the SB size setting can be modified for these 355 // resolutions as well. 356 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_OFF; 357 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_OFF; 358 } 359 } 360 } 361 362 static void set_allintra_speed_features_framesize_independent( 363 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 364 const AV1_COMMON *const cm = &cpi->common; 365 const int allow_screen_content_tools = 366 cm->features.allow_screen_content_tools; 367 const int use_hbd = cpi->oxcf.use_highbitdepth; 368 369 sf->part_sf.less_rectangular_check_level = 1; 370 sf->part_sf.ml_prune_partition = 1; 371 sf->part_sf.prune_ext_partition_types_search_level = 1; 372 sf->part_sf.prune_part4_search = 2; 373 sf->part_sf.simple_motion_search_prune_rect = 1; 374 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3; 375 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1; 376 sf->part_sf.use_best_rd_for_pruning = 1; 377 378 sf->intra_sf.intra_pruning_with_hog = 1; 379 sf->intra_sf.prune_luma_palette_size_search_level = 1; 380 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF; 381 sf->intra_sf.early_term_chroma_palette_size_search = 1; 382 383 sf->tx_sf.adaptive_txb_search_level = 1; 384 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1; 385 sf->tx_sf.model_based_prune_tx_search_level = 1; 386 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1; 387 388 sf->rt_sf.use_nonrd_pick_mode = 0; 389 sf->rt_sf.discount_color_cost = 0; 390 sf->rt_sf.use_real_time_ref_set = 0; 391 392 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION || 393 cpi->use_screen_content_tools) { 394 sf->mv_sf.exhaustive_searches_thresh = (1 << 20); 395 } else { 396 sf->mv_sf.exhaustive_searches_thresh = (1 << 25); 397 } 398 399 sf->rd_sf.perform_coeff_opt = 1; 400 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL; 401 402 if (speed >= 1) { 403 sf->part_sf.intra_cnn_based_part_prune_level = 404 allow_screen_content_tools ? 0 : 2; 405 sf->part_sf.simple_motion_search_prune_agg = 406 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL1; 407 sf->part_sf.simple_motion_search_early_term_none = 1; 408 // TODO(Venkat): Clean-up frame type dependency for 409 // simple_motion_search_split in partition search function and set the 410 // speed feature accordingly 411 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2; 412 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3; 413 sf->part_sf.reuse_best_prediction_for_part_ab = 1; 414 415 sf->mv_sf.exhaustive_searches_thresh <<= 1; 416 sf->mv_sf.prune_intrabc_candidate_block_hash_search = 1; 417 418 sf->intra_sf.prune_palette_search_level = 1; 419 sf->intra_sf.prune_luma_palette_size_search_level = 2; 420 sf->intra_sf.top_intra_model_count_allowed = 3; 421 422 sf->tx_sf.adaptive_txb_search_level = 2; 423 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1; 424 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1; 425 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1; 426 sf->tx_sf.model_based_prune_tx_search_level = 0; 427 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000; 428 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2; 429 sf->tx_sf.tx_type_search.skip_tx_search = 1; 430 431 sf->rd_sf.perform_coeff_opt = 2; 432 sf->rd_sf.tx_domain_dist_level = 1; 433 sf->rd_sf.tx_domain_dist_thres_level = 1; 434 435 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1; 436 sf->lpf_sf.dual_sgr_penalty_level = 1; 437 sf->lpf_sf.enable_sgr_ep_pruning = 1; 438 } 439 440 if (speed >= 2) { 441 sf->mv_sf.auto_mv_step_size = 1; 442 443 sf->part_sf.simple_motion_search_prune_agg = 444 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL2; 445 sf->intra_sf.disable_smooth_intra = 1; 446 sf->intra_sf.intra_pruning_with_hog = 2; 447 sf->intra_sf.prune_filter_intra_level = 1; 448 449 sf->rd_sf.perform_coeff_opt = 3; 450 451 sf->lpf_sf.prune_wiener_based_on_src_var = 1; 452 sf->lpf_sf.prune_sgr_based_on_wiener = 1; 453 } 454 455 if (speed >= 3) { 456 sf->hl_sf.high_precision_mv_usage = CURRENT_Q; 457 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF; 458 sf->hl_sf.screen_detection_mode2_fast_detection = 1; 459 460 sf->part_sf.less_rectangular_check_level = 2; 461 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL3; 462 sf->part_sf.prune_ext_part_using_split_info = 1; 463 464 sf->mv_sf.full_pixel_search_level = 1; 465 sf->mv_sf.search_method = DIAMOND; 466 467 // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are 468 // inherited directly from luma hog with some minor tweaking. Eventually we 469 // should run this with a bayesian optimizer to find the Pareto frontier. 470 sf->intra_sf.chroma_intra_pruning_with_hog = 2; 471 sf->intra_sf.intra_pruning_with_hog = 3; 472 sf->intra_sf.prune_palette_search_level = 2; 473 474 sf->tx_sf.adaptive_txb_search_level = 2; 475 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2; 476 sf->tx_sf.use_rd_based_breakout_for_intra_tx_search = true; 477 478 // TODO(any): evaluate if these lpf features can be moved to speed 2. 479 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality 480 // loss. 481 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2; 482 sf->lpf_sf.disable_loop_restoration_chroma = 0; 483 sf->lpf_sf.reduce_wiener_window_size = 1; 484 sf->lpf_sf.prune_wiener_based_on_src_var = 2; 485 } 486 487 if (speed >= 4) { 488 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 489 490 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL4; 491 sf->part_sf.simple_motion_search_reduce_search_steps = 4; 492 sf->part_sf.prune_ext_part_using_split_info = 2; 493 sf->part_sf.early_term_after_none_split = 1; 494 sf->part_sf.ml_predict_breakout_level = 3; 495 496 sf->intra_sf.prune_chroma_modes_using_luma_winner = 1; 497 498 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL; 499 500 sf->tpl_sf.prune_starting_mv = 2; 501 sf->tpl_sf.subpel_force_stop = HALF_PEL; 502 sf->tpl_sf.search_method = FAST_BIGDIA; 503 504 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2; 505 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1; 506 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3; 507 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1; 508 509 sf->rd_sf.perform_coeff_opt = 5; 510 sf->rd_sf.tx_domain_dist_thres_level = 3; 511 512 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL; 513 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3; 514 515 sf->mv_sf.reduce_search_range = 1; 516 sf->mv_sf.hash_max_8x8_intrabc_blocks = 1; 517 518 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1; 519 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1; 520 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_DEFAULT; 521 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1; 522 } 523 524 if (speed >= 5) { 525 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL5; 526 sf->part_sf.ext_partition_eval_thresh = 527 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16; 528 sf->part_sf.intra_cnn_based_part_prune_level = 529 allow_screen_content_tools ? 1 : 2; 530 531 sf->intra_sf.chroma_intra_pruning_with_hog = 3; 532 533 sf->lpf_sf.use_coarse_filter_level_search = 0; 534 // Disable Wiener and Self-guided Loop restoration filters. 535 sf->lpf_sf.disable_wiener_filter = true; 536 sf->lpf_sf.disable_sgr_filter = true; 537 538 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2; 539 540 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_FAST; 541 } 542 543 if (speed >= 6) { 544 sf->intra_sf.prune_smooth_intra_mode_for_chroma = 1; 545 sf->intra_sf.prune_filter_intra_level = 2; 546 sf->intra_sf.chroma_intra_pruning_with_hog = 4; 547 sf->intra_sf.intra_pruning_with_hog = 4; 548 sf->intra_sf.cfl_search_range = 1; 549 sf->intra_sf.top_intra_model_count_allowed = 2; 550 sf->intra_sf.adapt_top_model_rd_count_using_neighbors = 1; 551 sf->intra_sf.prune_luma_odd_delta_angles_in_intra = 1; 552 553 sf->part_sf.prune_rectangular_split_based_on_qidx = 554 allow_screen_content_tools ? 0 : 2; 555 sf->part_sf.prune_rect_part_using_4x4_var_deviation = true; 556 sf->part_sf.prune_rect_part_using_none_pred_mode = true; 557 sf->part_sf.prune_sub_8x8_partition_level = 558 allow_screen_content_tools ? 0 : 1; 559 sf->part_sf.prune_part4_search = 3; 560 // TODO(jingning): This might not be a good trade off if the 561 // target image quality is very low. 562 sf->part_sf.default_max_partition_size = BLOCK_32X32; 563 564 sf->mv_sf.use_bsize_dependent_search_method = 1; 565 sf->mv_sf.intrabc_search_level = 1; 566 567 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3; 568 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0; 569 sf->tx_sf.prune_intra_tx_depths_using_nn = true; 570 571 sf->rd_sf.perform_coeff_opt = 6; 572 sf->rd_sf.tx_domain_dist_level = 3; 573 574 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4; 575 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q; 576 577 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF; 578 sf->winner_mode_sf.prune_winner_mode_eval_level = 1; 579 sf->winner_mode_sf.dc_blk_pred_level = 1; 580 } 581 // The following should make all-intra mode speed 7 approximately equal 582 // to real-time speed 6, 583 // all-intra speed 8 close to real-time speed 7, and all-intra speed 9 584 // close to real-time speed 8 585 if (speed >= 7) { 586 sf->part_sf.default_min_partition_size = BLOCK_8X8; 587 sf->part_sf.partition_search_type = VAR_BASED_PARTITION; 588 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q; 589 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; 590 sf->rt_sf.var_part_split_threshold_shift = 7; 591 } 592 593 if (speed >= 8) { 594 sf->rt_sf.hybrid_intra_pickmode = 2; 595 sf->rt_sf.use_nonrd_pick_mode = 1; 596 sf->rt_sf.nonrd_check_partition_merge_mode = 1; 597 sf->rt_sf.var_part_split_threshold_shift = 8; 598 sf->rt_sf.prune_palette_search_nonrd = 1; 599 // Set mask for intra modes. 600 for (int i = 0; i < BLOCK_SIZES; ++i) 601 if (i >= BLOCK_32X32) 602 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; 603 else 604 // Use DC, H, V intra mode for block sizes < 32X32. 605 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V; 606 } 607 608 if (speed >= 9) { 609 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW; 610 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW; 611 612 sf->rt_sf.nonrd_check_partition_merge_mode = 0; 613 sf->rt_sf.hybrid_intra_pickmode = 0; 614 // Note that the threshold value below is intentionally lower than speed 615 // 8's. This is due to the lack of hybrid intra pick mode, which causes 616 // partitions to be bigger on average, causing noticeable ringing artifacts. 617 sf->rt_sf.var_part_split_threshold_shift = 7; 618 sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true; 619 sf->rt_sf.prune_h_pred_using_best_mode_so_far = true; 620 sf->rt_sf.enable_intra_mode_pruning_using_neighbors = true; 621 sf->rt_sf.prune_intra_mode_using_best_sad_so_far = true; 622 } 623 624 // As the speed feature prune_chroma_modes_using_luma_winner already 625 // constrains the number of chroma directional mode evaluations to a maximum 626 // of 1, the HOG computation and the associated pruning logic does not seem to 627 // help speed-up the chroma mode evaluations. Hence disable the speed feature 628 // chroma_intra_pruning_with_hog when prune_chroma_modes_using_luma_winner is 629 // enabled. 630 if (sf->intra_sf.prune_chroma_modes_using_luma_winner) 631 sf->intra_sf.chroma_intra_pruning_with_hog = 0; 632 } 633 634 // Configures framesize dependent speed features for low complexity decoding. 635 static void set_good_speed_features_lc_dec_framesize_dependent( 636 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 637 if (speed < 1 || speed > 3) return; 638 639 const AV1_COMMON *const cm = &cpi->common; 640 const bool is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 641 const bool is_between_608p_and_720p = AOMMIN(cm->width, cm->height) >= 608 && 642 AOMMIN(cm->width, cm->height) <= 720; 643 const bool is_vertical_video = cm->width < cm->height; 644 645 const FRAME_UPDATE_TYPE update_type = 646 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 647 const int boosted = frame_is_boosted(cpi); 648 const int is_key_frame = frame_is_intra_only(cm); 649 650 // Speed features for vertical videos 651 if (is_vertical_video && is_between_608p_and_720p) { 652 const int leaf_and_overlay_frames = 653 (update_type == LF_UPDATE || update_type == OVERLAY_UPDATE || 654 update_type == INTNL_OVERLAY_UPDATE); 655 if (leaf_and_overlay_frames) sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH; 656 657 sf->hl_sf.ref_frame_mvs_lvl = 2; 658 659 sf->lpf_sf.dual_sgr_penalty_level = boosted ? 1 : 3; 660 sf->lpf_sf.switchable_lr_with_bias_level = 1; 661 662 sf->inter_sf.bias_warp_mode_rd_scale_pct = 4; 663 664 sf->part_sf.split_partition_penalty_level = is_key_frame ? 0 : 2; 665 666 if (speed >= 2) { 667 sf->part_sf.split_partition_penalty_level = is_key_frame ? 0 : 1; 668 } 669 } 670 671 // Speed features for regular videos 672 if (!is_vertical_video && is_720p_or_larger) { 673 sf->gm_sf.gm_erroradv_tr_level = 1; 674 675 sf->hl_sf.ref_frame_mvs_lvl = 1; 676 677 sf->lpf_sf.dual_sgr_penalty_level = boosted ? 1 : 2; 678 sf->lpf_sf.switchable_lr_with_bias_level = 1; 679 sf->lpf_sf.skip_loop_filter_using_filt_error = 680 (update_type != OVERLAY_UPDATE && update_type != INTNL_OVERLAY_UPDATE && 681 cpi->common.current_frame.pyramid_level > 1) 682 ? 1 683 : 0; 684 685 sf->inter_sf.bias_warp_mode_rd_scale_pct = 4; 686 687 sf->part_sf.split_partition_penalty_level = is_key_frame ? 0 : 2; 688 689 if (speed >= 2) { 690 sf->part_sf.split_partition_penalty_level = is_key_frame ? 0 : 1; 691 } 692 } 693 } 694 695 // Configures framesize independent speed features for low complexity decoding. 696 static void set_good_speed_features_lc_dec_framesize_independent( 697 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 698 if (speed < 1 || speed > 3) return; 699 700 const FRAME_UPDATE_TYPE update_type = 701 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 702 703 sf->lpf_sf.adaptive_luma_loop_filter_skip = 704 (update_type != OVERLAY_UPDATE && update_type != INTNL_OVERLAY_UPDATE) 705 ? 1 706 : 0; 707 } 708 709 static void set_good_speed_feature_framesize_dependent( 710 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 711 const AV1_COMMON *const cm = &cpi->common; 712 const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480; 713 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 714 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 715 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080; 716 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160; 717 const bool use_hbd = cpi->oxcf.use_highbitdepth; 718 // Speed features applicable for temporal filtering and tpl modules may be 719 // changed based on frame type at places where the sf is applied (Example : 720 // use_downsampled_sad). This is because temporal filtering and tpl modules 721 // are called before this function (except for the first key frame). 722 // TODO(deepa.kg@ittiam.com): For the speed features applicable to temporal 723 // filtering and tpl modules, modify the sf initialization appropriately 724 // before calling the modules. 725 const int boosted = frame_is_boosted(cpi); 726 const int is_boosted_arf2_bwd_type = 727 boosted || 728 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE; 729 const int is_lf_frame = 730 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == LF_UPDATE; 731 const int allow_screen_content_tools = 732 cm->features.allow_screen_content_tools; 733 734 if (is_480p_or_larger) { 735 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; 736 if (is_720p_or_larger) 737 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED; 738 else 739 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED; 740 } else { 741 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; 742 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED; 743 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1; 744 } 745 746 if (is_4k_or_larger) { 747 sf->part_sf.default_min_partition_size = BLOCK_8X8; 748 } 749 750 // TODO(huisu@google.com): train models for 720P and above. 751 if (!is_720p_or_larger) { 752 sf->part_sf.ml_partition_search_breakout_thresh[0] = -1.0f; 753 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.993307f; 754 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.952574f; 755 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.924142f; 756 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.880797f; 757 sf->part_sf.ml_early_term_after_part_split_level = 1; 758 } 759 760 sf->part_sf.ml_partition_search_breakout_model_index = 0; 761 762 if (is_720p_or_larger) { 763 // TODO(chiyotsai@google.com): make this speed feature adaptive based on 764 // current block's vertical texture instead of hardcoded with resolution 765 sf->mv_sf.use_downsampled_sad = 2; 766 } 767 768 if (!is_720p_or_larger) { 769 const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg; 770 const int rate_tolerance = 771 AOMMIN(rc_cfg->under_shoot_pct, rc_cfg->over_shoot_pct); 772 sf->hl_sf.recode_tolerance = 25 + (rate_tolerance >> 2); 773 } 774 775 if (speed >= 1) { 776 sf->part_sf.ml_4_partition_search_level_index = 1; 777 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 1; 778 779 if (is_720p_or_larger) { 780 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; 781 } else if (is_480p_or_larger) { 782 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; 783 } else { 784 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32; 785 } 786 787 if (is_720p_or_larger) { 788 sf->part_sf.ml_partition_search_breakout_thresh[0] = 0.9999999f; 789 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.9999999f; 790 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.9618367258814811f; 791 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.9990705139233304f; 792 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.9648891196441841f; 793 sf->part_sf.ml_partition_search_breakout_model_index = 1; 794 } else { 795 sf->part_sf.ml_partition_search_breakout_thresh[0] = -1.0f; 796 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.952574f; 797 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.952574f; 798 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.924142f; 799 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.880797f; 800 } 801 sf->part_sf.ml_early_term_after_part_split_level = 2; 802 803 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1; 804 } 805 806 if (speed >= 2) { 807 sf->part_sf.ml_4_partition_search_level_index = 2; 808 if (is_720p_or_larger) { 809 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64; 810 } else { 811 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32; 812 } 813 814 if (is_720p_or_larger) { 815 sf->part_sf.ml_partition_search_breakout_thresh[0] = 0.9583713938680828f; 816 sf->part_sf.ml_partition_search_breakout_thresh[1] = 0.9999999f; 817 sf->part_sf.ml_partition_search_breakout_thresh[2] = 0.9634239069901543f; 818 sf->part_sf.ml_partition_search_breakout_thresh[3] = 0.9000000000000001f; 819 sf->part_sf.ml_partition_search_breakout_thresh[4] = 0.9196596355880025f; 820 sf->part_sf.ml_partition_search_breakout_model_index = 1; 821 } 822 823 if (is_720p_or_larger) { 824 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24); 825 sf->part_sf.partition_search_breakout_rate_thr = 120; 826 } else { 827 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22); 828 sf->part_sf.partition_search_breakout_rate_thr = 100; 829 } 830 831 if (is_720p_or_larger) { 832 sf->inter_sf.prune_obmc_prob_thresh = 16; 833 } else { 834 sf->inter_sf.prune_obmc_prob_thresh = 8; 835 } 836 837 if (is_480p_or_larger) { 838 sf->inter_sf.disable_interintra_wedge_var_thresh = 100; 839 } else { 840 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX; 841 } 842 843 if (is_480p_or_lesser) sf->inter_sf.skip_ext_comp_nearmv_mode = 1; 844 845 if (is_720p_or_larger) { 846 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 1 : 0; 847 } else { 848 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 2 : 0; 849 } 850 851 if (is_480p_or_larger) { 852 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1; 853 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2; 854 } else { 855 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3; 856 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = boosted ? 0 : 1; 857 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = boosted ? 0 : 1; 858 } 859 860 if (!is_720p_or_larger) { 861 sf->mv_sf.disable_second_mv = 1; 862 sf->mv_sf.auto_mv_step_size = 2; 863 } else { 864 sf->mv_sf.disable_second_mv = boosted ? 0 : 2; 865 sf->mv_sf.auto_mv_step_size = 1; 866 } 867 868 if (!is_720p_or_larger) { 869 sf->hl_sf.recode_tolerance = 50; 870 sf->inter_sf.disable_interinter_wedge_newmv_search = 871 is_boosted_arf2_bwd_type ? 0 : 1; 872 sf->inter_sf.enable_fast_wedge_mask_search = 1; 873 } 874 } 875 876 if (speed >= 3) { 877 sf->inter_sf.enable_fast_wedge_mask_search = 1; 878 sf->inter_sf.skip_newmv_in_drl = 2; 879 sf->inter_sf.skip_ext_comp_nearmv_mode = 1; 880 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 3 : 0; 881 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1; 882 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1; 883 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 884 frame_is_intra_only(&cpi->common) ? 0 : 1; 885 886 sf->part_sf.ml_early_term_after_part_split_level = 0; 887 888 if (is_720p_or_larger) { 889 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) { 890 sf->part_sf.ml_partition_search_breakout_thresh[i] = 891 -1; // -1 means not enabled. 892 } 893 sf->part_sf.ml_partition_search_breakout_model_index = 0; 894 } 895 896 sf->part_sf.ml_4_partition_search_level_index = 3; 897 898 if (is_720p_or_larger) { 899 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25); 900 sf->part_sf.partition_search_breakout_rate_thr = 200; 901 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 2 : 0; 902 } else { 903 sf->part_sf.max_intra_bsize = BLOCK_32X32; 904 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23); 905 sf->part_sf.partition_search_breakout_rate_thr = 120; 906 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 1 : 0; 907 } 908 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3; 909 910 if (is_480p_or_larger) { 911 sf->part_sf.early_term_after_none_split = 1; 912 } else { 913 sf->part_sf.early_term_after_none_split = 0; 914 } 915 if (is_720p_or_larger) { 916 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 2; 917 } else { 918 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 3; 919 } 920 921 if (is_720p_or_larger) { 922 sf->inter_sf.disable_interinter_wedge_var_thresh = 100; 923 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 1; 924 } else { 925 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX; 926 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2; 927 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL2; 928 } 929 930 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX; 931 } 932 933 if (speed >= 4) { 934 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2; 935 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1; 936 if (is_720p_or_larger) { 937 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26); 938 } else { 939 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24); 940 } 941 sf->part_sf.early_term_after_none_split = 1; 942 943 if (is_480p_or_larger) { 944 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2; 945 } else { 946 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1; 947 } 948 949 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX; 950 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX; 951 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2; 952 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 3; 953 954 if (is_720p_or_larger) { 955 sf->inter_sf.prune_comp_ref_frames = 1; 956 } else if (is_480p_or_larger) { 957 sf->inter_sf.prune_comp_ref_frames = is_boosted_arf2_bwd_type ? 0 : 1; 958 } 959 960 if (is_720p_or_larger) 961 sf->hl_sf.recode_tolerance = 32; 962 else 963 sf->hl_sf.recode_tolerance = 55; 964 965 sf->intra_sf.skip_intra_in_interframe = 4; 966 967 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3; 968 } 969 970 if (speed >= 5) { 971 if (is_720p_or_larger) { 972 sf->inter_sf.prune_warped_prob_thresh = 16; 973 } else if (is_480p_or_larger) { 974 sf->inter_sf.prune_warped_prob_thresh = 8; 975 } 976 if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 40; 977 978 sf->inter_sf.skip_newmv_in_drl = 4; 979 sf->inter_sf.prune_comp_ref_frames = 1; 980 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1; 981 982 if (!is_720p_or_larger) { 983 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET; 984 sf->inter_sf.prune_nearest_near_mv_using_refmv_weight = 985 (boosted || allow_screen_content_tools) ? 0 : 1; 986 sf->mv_sf.use_downsampled_sad = 1; 987 } 988 989 if (!is_480p_or_larger) { 990 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26); 991 } 992 993 if (is_480p_or_lesser) { 994 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL1; 995 } else { 996 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL2; 997 } 998 999 if (is_720p_or_larger) 1000 sf->part_sf.ext_part_eval_based_on_cur_best = 1001 (allow_screen_content_tools || frame_is_intra_only(cm)) ? 0 : 1; 1002 1003 if (is_480p_or_larger) { 1004 sf->tpl_sf.reduce_num_frames = 1; 1005 } 1006 } 1007 1008 if (speed >= 6) { 1009 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4; 1010 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3; 1011 sf->inter_sf.prune_comp_ref_frames = 2; 1012 sf->inter_sf.prune_nearest_near_mv_using_refmv_weight = 1013 (boosted || allow_screen_content_tools) ? 0 : 1; 1014 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 2; 1015 1016 if (is_720p_or_larger) { 1017 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE; 1018 } else if (is_480p_or_larger) { 1019 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED; 1020 } 1021 1022 if (is_480p_or_larger) { 1023 sf->hl_sf.allow_sub_blk_me_in_tf = 1; 1024 } 1025 1026 if (is_1080p_or_larger) { 1027 sf->part_sf.default_min_partition_size = BLOCK_8X8; 1028 } 1029 1030 if (is_720p_or_larger) { 1031 sf->inter_sf.disable_masked_comp = 1; 1032 } 1033 1034 if (!is_720p_or_larger) { 1035 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW; 1036 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW; 1037 } 1038 1039 if (is_720p_or_larger) { 1040 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32; 1041 sf->part_sf.partition_search_breakout_dist_thr = (1 << 28); 1042 } else { 1043 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16; 1044 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26); 1045 } 1046 1047 if (is_720p_or_larger) { 1048 sf->inter_sf.prune_ref_mv_idx_search = 2; 1049 } else { 1050 sf->inter_sf.prune_ref_mv_idx_search = 1; 1051 } 1052 1053 if (!is_720p_or_larger) { 1054 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 1055 is_boosted_arf2_bwd_type ? 450 : 150; 1056 } 1057 1058 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4; 1059 1060 sf->hl_sf.recode_tolerance = 55; 1061 } 1062 1063 if (cpi->oxcf.enable_low_complexity_decode) 1064 set_good_speed_features_lc_dec_framesize_dependent(cpi, sf, speed); 1065 } 1066 1067 static void set_good_speed_features_framesize_independent( 1068 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 1069 const AV1_COMMON *const cm = &cpi->common; 1070 const GF_GROUP *const gf_group = &cpi->ppi->gf_group; 1071 const int boosted = frame_is_boosted(cpi); 1072 const int is_boosted_arf2_bwd_type = 1073 boosted || gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE; 1074 const int is_inter_frame = 1075 gf_group->frame_type[cpi->gf_frame_index] == INTER_FRAME; 1076 const int allow_screen_content_tools = 1077 cm->features.allow_screen_content_tools; 1078 const int use_hbd = cpi->oxcf.use_highbitdepth; 1079 if (!cpi->oxcf.tile_cfg.enable_large_scale_tile) { 1080 sf->hl_sf.high_precision_mv_usage = LAST_MV_DATA; 1081 } 1082 1083 // Speed 0 for all speed features that give neutral coding performance change. 1084 sf->gm_sf.gm_search_type = boosted ? GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2 1085 : GM_SEARCH_CLOSEST_REFS_ONLY; 1086 sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1; 1087 sf->gm_sf.disable_gm_search_based_on_stats = 1; 1088 1089 sf->part_sf.less_rectangular_check_level = 1; 1090 sf->part_sf.ml_prune_partition = 1; 1091 sf->part_sf.prune_ext_partition_types_search_level = 1; 1092 sf->part_sf.prune_part4_search = 2; 1093 sf->part_sf.simple_motion_search_prune_rect = 1; 1094 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3; 1095 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1; 1096 sf->part_sf.use_best_rd_for_pruning = 1; 1097 sf->part_sf.simple_motion_search_prune_agg = 1098 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL0; 1099 1100 sf->inter_sf.inter_mode_rd_model_estimation = 1101 cpi->oxcf.algo_cfg.sharpness ? 0 : 1; 1102 sf->inter_sf.model_based_post_interp_filter_breakout = 1; 1103 sf->inter_sf.prune_compound_using_single_ref = 1; 1104 sf->inter_sf.prune_mode_search_simple_translation = 1; 1105 sf->inter_sf.prune_ref_frame_for_rect_partitions = 1106 (boosted || (allow_screen_content_tools)) 1107 ? 0 1108 : (is_boosted_arf2_bwd_type ? 1 : 2); 1109 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 2; 1110 sf->inter_sf.selective_ref_frame = 1; 1111 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH; 1112 1113 sf->interp_sf.use_fast_interpolation_filter_search = 1; 1114 1115 sf->intra_sf.intra_pruning_with_hog = 1; 1116 1117 sf->tx_sf.adaptive_txb_search_level = 1; 1118 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1; 1119 sf->tx_sf.model_based_prune_tx_search_level = 1; 1120 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1; 1121 1122 sf->tpl_sf.search_method = NSTEP_8PT; 1123 1124 sf->rt_sf.use_nonrd_pick_mode = 0; 1125 sf->rt_sf.discount_color_cost = 0; 1126 sf->rt_sf.use_real_time_ref_set = 0; 1127 1128 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION || 1129 cpi->use_screen_content_tools) { 1130 sf->mv_sf.exhaustive_searches_thresh = (1 << 20); 1131 } else { 1132 sf->mv_sf.exhaustive_searches_thresh = (1 << 25); 1133 } 1134 1135 sf->rd_sf.perform_coeff_opt = 1; 1136 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL; 1137 1138 if (speed >= 1) { 1139 sf->hl_sf.adjust_num_frames_for_arf_filtering = 1140 allow_screen_content_tools ? 0 : 1; 1141 1142 sf->part_sf.intra_cnn_based_part_prune_level = 1143 allow_screen_content_tools ? 0 : 2; 1144 1145 sf->part_sf.simple_motion_search_prune_agg = 1146 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL1; 1147 sf->part_sf.simple_motion_search_early_term_none = 1; 1148 // TODO(Venkat): Clean-up frame type dependency for 1149 // simple_motion_search_split in partition search function and set the 1150 // speed feature accordingly 1151 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2; 1152 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3; 1153 1154 sf->mv_sf.exhaustive_searches_thresh <<= 1; 1155 sf->mv_sf.obmc_full_pixel_search_level = 1; 1156 sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS; 1157 sf->mv_sf.disable_extensive_joint_motion_search = 1; 1158 1159 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1; 1160 sf->inter_sf.prune_comp_type_by_comp_avg = 1; 1161 sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1; 1162 sf->inter_sf.prune_ref_frame_for_rect_partitions = 1163 (frame_is_intra_only(&cpi->common) || (allow_screen_content_tools)) 1164 ? 0 1165 : (boosted ? 1 : 2); 1166 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3; 1167 sf->inter_sf.reuse_inter_intra_mode = 1; 1168 sf->inter_sf.selective_ref_frame = 2; 1169 sf->inter_sf.skip_arf_compound = 1; 1170 1171 sf->interp_sf.use_interp_filter = 1; 1172 1173 sf->intra_sf.prune_palette_search_level = 1; 1174 1175 sf->tx_sf.adaptive_txb_search_level = 2; 1176 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1; 1177 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1; 1178 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1; 1179 sf->tx_sf.model_based_prune_tx_search_level = 0; 1180 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000; 1181 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2; 1182 sf->tx_sf.tx_type_search.skip_tx_search = 1; 1183 1184 sf->rd_sf.perform_coeff_opt = boosted ? 2 : 3; 1185 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2; 1186 sf->rd_sf.tx_domain_dist_thres_level = 1; 1187 1188 sf->lpf_sf.dual_sgr_penalty_level = 1; 1189 sf->lpf_sf.enable_sgr_ep_pruning = 1; 1190 1191 // TODO(any, yunqing): move this feature to speed 0. 1192 sf->tpl_sf.skip_alike_starting_mv = 1; 1193 } 1194 1195 if (speed >= 2) { 1196 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF; 1197 1198 sf->part_sf.simple_motion_search_prune_agg = 1199 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL2; 1200 sf->fp_sf.skip_motion_search_threshold = 25; 1201 1202 sf->gm_sf.num_refinement_steps = 2; 1203 1204 sf->part_sf.reuse_best_prediction_for_part_ab = 1205 !frame_is_intra_only(&cpi->common); 1206 1207 sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL; 1208 sf->mv_sf.subpel_iters_per_step = 1; 1209 sf->mv_sf.reduce_search_range = 1; 1210 1211 // TODO(chiyotsai@google.com): We can get 10% speed up if we move 1212 // adaptive_rd_thresh to speed 1. But currently it performs poorly on some 1213 // clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a 1214 // bit more closely to figure out why. 1215 sf->inter_sf.adaptive_rd_thresh = 1; 1216 sf->inter_sf.disable_interinter_wedge_var_thresh = 100; 1217 sf->inter_sf.fast_interintra_wedge_search = 1; 1218 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1; 1219 sf->inter_sf.prune_ext_comp_using_neighbors = 1; 1220 sf->inter_sf.prune_comp_using_best_single_mode_ref = 2; 1221 sf->inter_sf.prune_comp_type_by_comp_avg = 2; 1222 sf->inter_sf.selective_ref_frame = 3; 1223 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED; 1224 sf->inter_sf.enable_fast_compound_mode_search = 1; 1225 sf->inter_sf.reuse_mask_search_results = 1; 1226 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 1); 1227 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1; 1228 sf->inter_sf.alt_ref_search_fp = 1; 1229 1230 sf->interp_sf.adaptive_interp_filter_search = 1; 1231 sf->interp_sf.disable_dual_filter = 1; 1232 1233 sf->intra_sf.disable_smooth_intra = 1234 !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key > 1); 1235 sf->intra_sf.intra_pruning_with_hog = 2; 1236 sf->intra_sf.skip_intra_in_interframe = is_inter_frame ? 2 : 1; 1237 sf->intra_sf.skip_filter_intra_in_inter_frames = 1; 1238 1239 sf->tpl_sf.prune_starting_mv = 1; 1240 sf->tpl_sf.search_method = DIAMOND; 1241 1242 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 3 : 4; 1243 sf->rd_sf.use_mb_rd_hash = 1; 1244 1245 sf->lpf_sf.prune_wiener_based_on_src_var = 1; 1246 sf->lpf_sf.prune_sgr_based_on_wiener = 1; 1247 sf->lpf_sf.disable_loop_restoration_chroma = boosted ? 0 : 1; 1248 sf->lpf_sf.reduce_wiener_window_size = boosted ? 0 : 1; 1249 1250 // TODO(any): Re-evaluate this feature set to 1 in speed 2. 1251 sf->tpl_sf.allow_compound_pred = 0; 1252 sf->tpl_sf.prune_ref_frames_in_tpl = 1; 1253 } 1254 1255 if (speed >= 3) { 1256 sf->hl_sf.high_precision_mv_usage = CURRENT_Q; 1257 1258 sf->gm_sf.prune_ref_frame_for_gm_search = 1; 1259 sf->gm_sf.prune_zero_mv_with_sse = 1; 1260 sf->gm_sf.num_refinement_steps = 0; 1261 1262 sf->part_sf.less_rectangular_check_level = 2; 1263 sf->part_sf.simple_motion_search_prune_agg = 1264 allow_screen_content_tools 1265 ? SIMPLE_AGG_LVL0 1266 : (boosted ? SIMPLE_AGG_LVL3 : QIDX_BASED_AGG_LVL1); 1267 sf->part_sf.prune_ext_part_using_split_info = 1; 1268 sf->part_sf.simple_motion_search_rect_split = 1; 1269 1270 sf->mv_sf.full_pixel_search_level = 1; 1271 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED; 1272 sf->mv_sf.search_method = DIAMOND; 1273 sf->mv_sf.disable_second_mv = 2; 1274 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_1; 1275 sf->mv_sf.use_intrabc = 0; 1276 1277 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1; 1278 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW; 1279 sf->inter_sf.disable_onesided_comp = 1; 1280 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX; 1281 // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2 1282 // and clean-up the speed feature 1283 sf->inter_sf.perform_best_rd_based_gating_for_chroma = 1; 1284 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 1; 1285 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2; 1286 sf->inter_sf.selective_ref_frame = 5; 1287 sf->inter_sf.reuse_compound_type_decision = 1; 1288 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, 1289 boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2)); 1290 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 2; 1291 1292 sf->interp_sf.adaptive_interp_filter_search = 2; 1293 1294 // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are 1295 // inherited directly from luma hog with some minor tweaking. Eventually we 1296 // should run this with a bayesian optimizer to find the Pareto frontier. 1297 sf->intra_sf.chroma_intra_pruning_with_hog = 2; 1298 sf->intra_sf.intra_pruning_with_hog = 3; 1299 sf->intra_sf.prune_palette_search_level = 2; 1300 sf->intra_sf.top_intra_model_count_allowed = 2; 1301 1302 sf->tpl_sf.prune_starting_mv = 2; 1303 sf->tpl_sf.skip_alike_starting_mv = 2; 1304 sf->tpl_sf.prune_intra_modes = 1; 1305 sf->tpl_sf.reduce_first_step_size = 6; 1306 sf->tpl_sf.subpel_force_stop = QUARTER_PEL; 1307 sf->tpl_sf.gop_length_decision_method = 1; 1308 1309 sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3; 1310 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2; 1311 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3; 1312 1313 // TODO(any): Refactor the code related to following winner mode speed 1314 // features 1315 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1; 1316 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1; 1317 sf->winner_mode_sf.motion_mode_for_winner_cand = 1318 boosted ? 0 1319 : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE ? 1 1320 : 2; 1321 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 4; 1322 1323 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality 1324 // loss. 1325 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2; 1326 sf->lpf_sf.prune_wiener_based_on_src_var = 2; 1327 sf->lpf_sf.use_coarse_filter_level_search = 1328 frame_is_intra_only(&cpi->common) ? 0 : 1; 1329 sf->lpf_sf.use_downsampled_wiener_stats = 1; 1330 } 1331 1332 if (speed >= 4) { 1333 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 1334 1335 sf->gm_sf.prune_zero_mv_with_sse = 2; 1336 sf->gm_sf.downsample_level = 1; 1337 1338 sf->part_sf.simple_motion_search_prune_agg = 1339 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL4; 1340 sf->part_sf.simple_motion_search_reduce_search_steps = 4; 1341 sf->part_sf.prune_ext_part_using_split_info = 2; 1342 sf->part_sf.ml_predict_breakout_level = 3; 1343 sf->part_sf.prune_rectangular_split_based_on_qidx = 1344 (allow_screen_content_tools || frame_is_intra_only(&cpi->common)) ? 0 1345 : 1; 1346 1347 sf->inter_sf.alt_ref_search_fp = 2; 1348 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_DEFAULT] = boosted ? 0 : 3; 1349 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_MOTION_MODE] = boosted ? 0 : 5; 1350 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_COMP_TYPE_MODE] = boosted ? 0 : 3; 1351 1352 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2; 1353 sf->inter_sf.prune_ext_comp_using_neighbors = 2; 1354 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX; 1355 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX; 1356 1357 sf->interp_sf.cb_pred_filter_search = 1; 1358 sf->interp_sf.skip_sharp_interp_filter_search = 1; 1359 sf->interp_sf.use_interp_filter = 2; 1360 1361 sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; 1362 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; 1363 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; 1364 // TODO(any): "intra_y_mode_mask" doesn't help much at speed 4. 1365 // sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; 1366 // sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; 1367 // sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; 1368 sf->intra_sf.skip_intra_in_interframe = 4; 1369 1370 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL; 1371 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2; 1372 1373 sf->tpl_sf.subpel_force_stop = HALF_PEL; 1374 sf->tpl_sf.search_method = FAST_BIGDIA; 1375 sf->tpl_sf.use_sad_for_mode_decision = 1; 1376 1377 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1; 1378 1379 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 5 : 7; 1380 1381 // TODO(any): Extend multi-winner mode processing support for inter frames 1382 sf->winner_mode_sf.multi_winner_mode_type = 1383 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_DEFAULT 1384 : MULTI_WINNER_MODE_OFF; 1385 sf->winner_mode_sf.dc_blk_pred_level = boosted ? 0 : 2; 1386 1387 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL; 1388 } 1389 1390 if (speed >= 5) { 1391 sf->hl_sf.weight_calc_level_in_tf = 1; 1392 sf->hl_sf.adjust_num_frames_for_arf_filtering = 1393 allow_screen_content_tools ? 0 : 2; 1394 1395 sf->fp_sf.reduce_mv_step_param = 4; 1396 1397 sf->part_sf.simple_motion_search_prune_agg = 1398 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL5; 1399 sf->part_sf.ext_partition_eval_thresh = 1400 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16; 1401 sf->part_sf.prune_sub_8x8_partition_level = 1402 allow_screen_content_tools ? 1 : 2; 1403 1404 sf->mv_sf.warp_search_method = WARP_SEARCH_DIAMOND; 1405 1406 sf->inter_sf.prune_inter_modes_if_skippable = 1; 1407 sf->inter_sf.prune_single_ref = is_boosted_arf2_bwd_type ? 0 : 1; 1408 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_DEFAULT] = boosted ? 0 : 4; 1409 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_COMP_TYPE_MODE] = boosted ? 0 : 5; 1410 sf->inter_sf.enable_fast_compound_mode_search = 2; 1411 1412 sf->interp_sf.skip_interp_filter_search = boosted ? 0 : 1; 1413 1414 sf->intra_sf.chroma_intra_pruning_with_hog = 3; 1415 1416 // TODO(any): Extend multi-winner mode processing support for inter frames 1417 sf->winner_mode_sf.multi_winner_mode_type = 1418 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_FAST 1419 : MULTI_WINNER_MODE_OFF; 1420 1421 // Disable Self-guided Loop restoration filter. 1422 sf->lpf_sf.disable_sgr_filter = true; 1423 sf->lpf_sf.disable_wiener_coeff_refine_search = true; 1424 1425 sf->tpl_sf.prune_starting_mv = 3; 1426 sf->tpl_sf.use_y_only_rate_distortion = 1; 1427 sf->tpl_sf.subpel_force_stop = FULL_PEL; 1428 sf->tpl_sf.gop_length_decision_method = 2; 1429 sf->tpl_sf.use_sad_for_mode_decision = 2; 1430 1431 sf->winner_mode_sf.dc_blk_pred_level = 2; 1432 1433 sf->fp_sf.disable_recon = 1; 1434 } 1435 1436 if (speed >= 6) { 1437 sf->hl_sf.disable_extra_sc_testing = 1; 1438 sf->hl_sf.second_alt_ref_filtering = 0; 1439 1440 sf->gm_sf.downsample_level = 2; 1441 1442 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3; 1443 sf->inter_sf.selective_ref_frame = 6; 1444 sf->inter_sf.prune_single_ref = is_boosted_arf2_bwd_type ? 0 : 2; 1445 sf->inter_sf.prune_ext_comp_using_neighbors = 3; 1446 1447 sf->intra_sf.chroma_intra_pruning_with_hog = 4; 1448 sf->intra_sf.intra_pruning_with_hog = 4; 1449 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC; 1450 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC; 1451 sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC; 1452 sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC; 1453 sf->intra_sf.early_term_chroma_palette_size_search = 1; 1454 1455 sf->part_sf.prune_rectangular_split_based_on_qidx = 1456 boosted || allow_screen_content_tools ? 0 : 2; 1457 1458 sf->part_sf.prune_part4_search = 3; 1459 1460 sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL; 1461 sf->mv_sf.use_bsize_dependent_search_method = 1; 1462 1463 sf->tpl_sf.gop_length_decision_method = 3; 1464 1465 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8; 1466 1467 sf->winner_mode_sf.dc_blk_pred_level = 3; 1468 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF; 1469 1470 sf->fp_sf.skip_zeromv_motion_search = 1; 1471 } 1472 1473 if (cpi->oxcf.enable_low_complexity_decode) 1474 set_good_speed_features_lc_dec_framesize_independent(cpi, sf, speed); 1475 1476 if (cpi->oxcf.algo_cfg.sharpness == 3) { 1477 sf->tx_sf.adaptive_txb_search_level = 0; 1478 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 0; 1479 } 1480 } 1481 1482 static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi, 1483 SPEED_FEATURES *const sf, 1484 int speed) { 1485 const AV1_COMMON *const cm = &cpi->common; 1486 const int boosted = frame_is_boosted(cpi); 1487 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080; 1488 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 1489 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 1490 const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360; 1491 1492 if (!is_360p_or_larger) { 1493 sf->rt_sf.prune_intra_mode_based_on_mv_range = 1; 1494 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1; 1495 if (speed >= 6) 1496 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2; 1497 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 2; 1498 if (speed >= 7) { 1499 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q; 1500 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true; 1501 sf->rt_sf.use_rtc_tf = 2; 1502 } 1503 if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 1; 1504 if (speed >= 8) { 1505 sf->rt_sf.use_nonrd_filter_search = 1; 1506 sf->rt_sf.tx_size_level_based_on_qstep = 1; 1507 } 1508 if (speed >= 9) { 1509 sf->rt_sf.use_comp_ref_nonrd = 0; 1510 sf->rt_sf.nonrd_aggressive_skip = 1; 1511 sf->rt_sf.skip_intra_pred = 1; 1512 // Only turn on enable_ref_short_signaling for low resolution when only 1513 // LAST and GOLDEN ref frames are used. 1514 sf->rt_sf.enable_ref_short_signaling = 1515 (!sf->rt_sf.use_nonrd_altref_frame && 1516 (!sf->rt_sf.use_comp_ref_nonrd || 1517 (!sf->rt_sf.ref_frame_comp_nonrd[1] && 1518 !sf->rt_sf.ref_frame_comp_nonrd[2]))); 1519 1520 // TODO(kyslov) Re-enable when AV1 models are trained 1521 #if 0 1522 #if CONFIG_RT_ML_PARTITIONING 1523 if (!frame_is_intra_only(cm)) { 1524 sf->part_sf.partition_search_type = ML_BASED_PARTITION; 1525 sf->rt_sf.reuse_inter_pred_nonrd = 0; 1526 } 1527 #endif 1528 #endif 1529 sf->rt_sf.use_adaptive_subpel_search = false; 1530 } 1531 if (speed >= 10) { 1532 // TODO(yunqingwang@google.com): To be conservative, disable 1533 // sf->rt_sf.estimate_motion_for_var_based_partition = 3 for speed 10/qvga 1534 // for now. May enable it in the future. 1535 sf->rt_sf.estimate_motion_for_var_based_partition = 0; 1536 sf->rt_sf.skip_intra_pred = 2; 1537 sf->rt_sf.hybrid_intra_pickmode = 3; 1538 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1; 1539 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2; 1540 sf->rt_sf.use_nonrd_filter_search = 0; 1541 } 1542 } else { 1543 sf->rt_sf.prune_intra_mode_based_on_mv_range = 2; 1544 sf->intra_sf.skip_filter_intra_in_inter_frames = 1; 1545 if (speed <= 5) { 1546 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 1547 boosted ? INT_MAX : 350; 1548 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2; 1549 } 1550 if (speed == 6) sf->part_sf.disable_8x8_part_based_on_qidx = 1; 1551 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 2; 1552 if (speed == 7) { 1553 sf->rt_sf.prefer_large_partition_blocks = 1; 1554 // Enable this feature for [360p, 720p] resolution range initially. 1555 // Only enable for low bitdepth to mitigate issue: b/303023614. 1556 if (!cpi->rc.rtc_external_ratectrl && 1557 AOMMIN(cm->width, cm->height) <= 720 && !cpi->oxcf.use_highbitdepth) 1558 sf->hl_sf.accurate_bit_estimate = cpi->oxcf.q_cfg.aq_mode == NO_AQ; 1559 } 1560 if (speed >= 7) { 1561 sf->rt_sf.use_rtc_tf = 1; 1562 } 1563 if (speed == 8 && !cpi->ppi->use_svc) { 1564 sf->rt_sf.short_circuit_low_temp_var = 0; 1565 sf->rt_sf.use_nonrd_altref_frame = 1; 1566 } 1567 if (speed >= 8) sf->rt_sf.tx_size_level_based_on_qstep = 2; 1568 if (speed >= 9) { 1569 sf->rt_sf.gf_length_lvl = 1; 1570 sf->rt_sf.skip_cdef_sb = 1; 1571 sf->rt_sf.sad_based_adp_altref_lag = 2; 1572 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2; 1573 sf->rt_sf.use_adaptive_subpel_search = true; 1574 sf->interp_sf.cb_pred_filter_search = 1; 1575 } 1576 if (speed >= 10) { 1577 sf->rt_sf.hybrid_intra_pickmode = 2; 1578 sf->rt_sf.sad_based_adp_altref_lag = 4; 1579 sf->rt_sf.tx_size_level_based_on_qstep = 0; 1580 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3; 1581 sf->rt_sf.use_adaptive_subpel_search = false; 1582 sf->interp_sf.cb_pred_filter_search = 2; 1583 } 1584 } 1585 if (!is_480p_or_larger) { 1586 if (speed == 7) { 1587 sf->rt_sf.nonrd_check_partition_merge_mode = 2; 1588 } 1589 } 1590 if (!is_720p_or_larger) { 1591 if (speed >= 9) { 1592 sf->rt_sf.force_large_partition_blocks_intra = 1; 1593 } 1594 } else { 1595 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3; 1596 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 0; 1597 if (speed >= 7) { 1598 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2; 1599 sf->rt_sf.reduce_mv_pel_precision_highmotion = 1; 1600 } 1601 if (speed >= 9) { 1602 sf->rt_sf.sad_based_adp_altref_lag = 1; 1603 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 0; 1604 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2; 1605 } 1606 if (speed >= 10) { 1607 sf->rt_sf.sad_based_adp_altref_lag = 3; 1608 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3; 1609 } 1610 } 1611 // TODO(Any): Check/Tune settings of other sfs for 1080p. 1612 if (is_1080p_or_larger) { 1613 if (speed >= 7) { 1614 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0; 1615 sf->rt_sf.use_adaptive_subpel_search = 0; 1616 } 1617 if (speed >= 9) sf->interp_sf.cb_pred_filter_search = 0; 1618 } else { 1619 if (speed >= 9) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q; 1620 if (speed >= 10) sf->rt_sf.nonrd_aggressive_skip = 1; 1621 } 1622 // TODO(marpan): Tune settings for speed 11 video mode, 1623 if (speed >= 11 && cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) { 1624 sf->rt_sf.skip_cdef_sb = 1; 1625 sf->rt_sf.force_only_last_ref = 1; 1626 sf->rt_sf.selective_cdf_update = 1; 1627 sf->rt_sf.use_nonrd_filter_search = 0; 1628 if (is_360p_or_larger) { 1629 sf->part_sf.fixed_partition_size = BLOCK_32X32; 1630 sf->rt_sf.use_fast_fixed_part = 1; 1631 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2; 1632 } 1633 sf->rt_sf.increase_source_sad_thresh = 1; 1634 sf->rt_sf.part_early_exit_zeromv = 2; 1635 sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2; 1636 for (int i = 0; i < BLOCK_SIZES; ++i) { 1637 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; 1638 } 1639 sf->rt_sf.hybrid_intra_pickmode = 0; 1640 } 1641 // Setting for SVC, or when the ref_frame_config control is 1642 // used to set the reference structure. 1643 if (cpi->ppi->use_svc || cpi->ppi->rtc_ref.set_ref_frame_config) { 1644 const RTC_REF *const rtc_ref = &cpi->ppi->rtc_ref; 1645 // For SVC: for greater than 2 temporal layers, use better mv search on 1646 // base temporal layers, and only on base spatial layer if highest 1647 // resolution is above 640x360. 1648 if (cpi->svc.number_temporal_layers >= 2 && 1649 cpi->svc.temporal_layer_id == 0 && 1650 (cpi->svc.spatial_layer_id == 0 || 1651 cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <= 1652 640 * 360)) { 1653 sf->mv_sf.search_method = NSTEP; 1654 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED; 1655 sf->rt_sf.fullpel_search_step_param = 10; 1656 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0; 1657 if (cm->width * cm->height <= 352 * 288) 1658 sf->rt_sf.nonrd_prune_ref_frame_search = 2; 1659 sf->rt_sf.force_large_partition_blocks_intra = 0; 1660 } 1661 if (speed >= 8) { 1662 if (cpi->svc.number_temporal_layers > 2) 1663 sf->rt_sf.disable_cdf_update_non_reference_frame = true; 1664 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3; 1665 if (rtc_ref->non_reference_frame) { 1666 sf->rt_sf.nonrd_aggressive_skip = 1; 1667 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 1668 } 1669 } 1670 if (speed <= 9 && cpi->svc.number_temporal_layers > 2 && 1671 cpi->svc.temporal_layer_id == 0) 1672 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = false; 1673 else 1674 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true; 1675 sf->rt_sf.frame_level_mode_cost_update = false; 1676 1677 // Compound mode enabling. 1678 if (rtc_ref->ref_frame_comp[0] || rtc_ref->ref_frame_comp[1] || 1679 rtc_ref->ref_frame_comp[2]) { 1680 sf->rt_sf.use_comp_ref_nonrd = 1; 1681 sf->rt_sf.ref_frame_comp_nonrd[0] = 1682 rtc_ref->ref_frame_comp[0] && rtc_ref->reference[GOLDEN_FRAME - 1]; 1683 sf->rt_sf.ref_frame_comp_nonrd[1] = 1684 rtc_ref->ref_frame_comp[1] && rtc_ref->reference[LAST2_FRAME - 1]; 1685 sf->rt_sf.ref_frame_comp_nonrd[2] = 1686 rtc_ref->ref_frame_comp[2] && rtc_ref->reference[ALTREF_FRAME - 1]; 1687 } else { 1688 sf->rt_sf.use_comp_ref_nonrd = 0; 1689 } 1690 1691 if (cpi->svc.number_spatial_layers > 1 || 1692 cpi->svc.number_temporal_layers > 1) 1693 sf->hl_sf.accurate_bit_estimate = 0; 1694 1695 sf->rt_sf.estimate_motion_for_var_based_partition = 1; 1696 1697 // For single layers RPS: bias/adjustment for recovery frame. 1698 if (cpi->ppi->rtc_ref.bias_recovery_frame) { 1699 sf->mv_sf.search_method = NSTEP; 1700 sf->mv_sf.subpel_search_method = SUBPEL_TREE; 1701 sf->rt_sf.fullpel_search_step_param = 8; 1702 sf->rt_sf.nonrd_aggressive_skip = 0; 1703 } 1704 } 1705 // Screen settings. 1706 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { 1707 // TODO(marpan): Check settings for speed 7 and 8. 1708 if (speed >= 7) { 1709 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0; 1710 sf->mv_sf.use_bsize_dependent_search_method = 0; 1711 sf->rt_sf.skip_cdef_sb = 1; 1712 sf->rt_sf.increase_color_thresh_palette = 1; 1713 if (!frame_is_intra_only(cm)) sf->rt_sf.dct_only_palette_nonrd = 1; 1714 } 1715 if (speed >= 8) { 1716 sf->rt_sf.nonrd_check_partition_merge_mode = 3; 1717 sf->rt_sf.nonrd_prune_ref_frame_search = 1; 1718 sf->rt_sf.use_nonrd_filter_search = 0; 1719 sf->rt_sf.prune_hv_pred_modes_using_src_sad = false; 1720 } 1721 if (speed >= 9) { 1722 sf->rt_sf.prune_idtx_nonrd = 1; 1723 sf->rt_sf.part_early_exit_zeromv = 2; 1724 sf->rt_sf.skip_lf_screen = 1; 1725 sf->rt_sf.nonrd_prune_ref_frame_search = 3; 1726 sf->rt_sf.var_part_split_threshold_shift = 10; 1727 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 1728 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1; 1729 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q; 1730 sf->rt_sf.nonrd_check_partition_merge_mode = 0; 1731 sf->interp_sf.cb_pred_filter_search = 0; 1732 } 1733 if (speed >= 10) { 1734 if (cm->width * cm->height > 1920 * 1080) 1735 sf->part_sf.disable_8x8_part_based_on_qidx = 1; 1736 sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80; 1737 sf->rt_sf.part_early_exit_zeromv = 1; 1738 sf->rt_sf.nonrd_aggressive_skip = 1; 1739 sf->rt_sf.thresh_active_maps_skip_lf_cdef = 90; 1740 sf->rt_sf.hybrid_intra_pickmode = 0; 1741 sf->rt_sf.dct_only_palette_nonrd = 1; 1742 sf->rt_sf.prune_palette_search_nonrd = 1; 1743 sf->rt_sf.prune_intra_mode_using_best_sad_so_far = true; 1744 sf->rt_sf.rc_faster_convergence_static = 1; 1745 sf->rt_sf.rc_compute_spatial_var_sc = 1; 1746 } 1747 if (speed >= 11) { 1748 sf->rt_sf.skip_lf_screen = 2; 1749 sf->rt_sf.skip_cdef_sb = 2; 1750 sf->rt_sf.prune_palette_search_nonrd = 2; 1751 sf->rt_sf.increase_color_thresh_palette = 0; 1752 sf->rt_sf.prune_h_pred_using_best_mode_so_far = true; 1753 sf->rt_sf.enable_intra_mode_pruning_using_neighbors = true; 1754 } 1755 if (speed >= 12) { 1756 if (cpi->rc.high_source_sad && cpi->rc.frame_source_sad > 40000 && 1757 cpi->rc.prev_avg_source_sad < 1000 && 1758 cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height >= 1759 1280 * 720) { 1760 sf->rt_sf.prune_palette_search_nonrd = 3; 1761 sf->rt_sf.skip_newmv_mode_sad_screen = 1; 1762 } 1763 } 1764 sf->rt_sf.skip_encoding_non_reference_slide_change = 1765 cpi->oxcf.rc_cfg.drop_frames_water_mark > 0 ? 1 : 0; 1766 sf->rt_sf.skip_newmv_flat_blocks_screen = 1; 1767 sf->rt_sf.use_idtx_nonrd = 1; 1768 sf->rt_sf.higher_thresh_scene_detection = 0; 1769 sf->rt_sf.use_nonrd_altref_frame = 0; 1770 sf->rt_sf.use_rtc_tf = 0; 1771 sf->rt_sf.use_comp_ref_nonrd = 0; 1772 sf->rt_sf.source_metrics_sb_nonrd = 1; 1773 if (cpi->rc.high_source_sad == 1) { 1774 sf->rt_sf.prefer_large_partition_blocks = 0; 1775 sf->part_sf.max_intra_bsize = BLOCK_128X128; 1776 for (int i = 0; i < BLOCK_SIZES; ++i) { 1777 if (i > BLOCK_32X32) 1778 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; 1779 else 1780 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V; 1781 } 1782 } 1783 if (speed >= 11 && cpi->rc.high_motion_content_screen_rtc) { 1784 sf->rt_sf.higher_thresh_scene_detection = 1; 1785 sf->rt_sf.force_only_last_ref = 1; 1786 sf->rt_sf.use_nonrd_filter_search = 0; 1787 sf->part_sf.fixed_partition_size = BLOCK_32X32; 1788 sf->rt_sf.use_fast_fixed_part = 1; 1789 sf->rt_sf.increase_source_sad_thresh = 1; 1790 sf->rt_sf.selective_cdf_update = 1; 1791 sf->mv_sf.search_method = FAST_DIAMOND; 1792 } else if (cpi->rc.max_block_source_sad > 20000 && 1793 cpi->rc.frame_source_sad > 100 && speed >= 6 && 1794 (cpi->rc.percent_blocks_with_motion > 1 || 1795 cpi->svc.last_layer_dropped[0])) { 1796 sf->mv_sf.search_method = NSTEP; 1797 sf->rt_sf.fullpel_search_step_param = 2; 1798 } 1799 if (cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) { 1800 sf->rt_sf.use_idtx_nonrd = 0; 1801 sf->rt_sf.prefer_large_partition_blocks = 1; 1802 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 1803 sf->rt_sf.fullpel_search_step_param = 10; 1804 } 1805 sf->rt_sf.partition_direct_merging = 0; 1806 sf->hl_sf.accurate_bit_estimate = 0; 1807 // This feature is for nonrd_pickmode. 1808 if (sf->rt_sf.use_nonrd_pick_mode) 1809 sf->rt_sf.estimate_motion_for_var_based_partition = 1; 1810 else 1811 sf->rt_sf.estimate_motion_for_var_based_partition = 0; 1812 } 1813 if (is_lossless_requested(&cpi->oxcf.rc_cfg)) { 1814 sf->rt_sf.use_rtc_tf = 0; 1815 // TODO(aomedia:3412): The setting accurate_bit_estimate = 0 1816 // can be removed once it's fixed for lossless mode. 1817 sf->hl_sf.accurate_bit_estimate = 0; 1818 } 1819 if (cpi->oxcf.use_highbitdepth) { 1820 // Disable for use_highbitdepth = 1 to mitigate issue: b/303023614. 1821 sf->rt_sf.estimate_motion_for_var_based_partition = 0; 1822 } 1823 if (cpi->oxcf.superres_cfg.enable_superres) { 1824 sf->rt_sf.use_rtc_tf = 0; 1825 sf->rt_sf.nonrd_prune_ref_frame_search = 1; 1826 } 1827 // rtc_tf feature allocates new source because of possible 1828 // temporal filtering which may change the input source during encoding: 1829 // this causes an issue on resized frames when psnr is calculated, 1830 // so disable it here for frames that are resized (encoding width/height 1831 // different from configured width/height). 1832 if (is_psnr_calc_enabled(cpi) && (cpi->oxcf.frm_dim_cfg.width != cm->width || 1833 cpi->oxcf.frm_dim_cfg.height != cm->height)) 1834 sf->rt_sf.use_rtc_tf = 0; 1835 } 1836 1837 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, 1838 SPEED_FEATURES *sf, 1839 int speed) { 1840 AV1_COMMON *const cm = &cpi->common; 1841 const int boosted = frame_is_boosted(cpi); 1842 1843 // Currently, rt speed 0, 1, 2, 3, 4, 5 are the same. 1844 // Following set of speed features are not impacting encoder's decisions as 1845 // the relevant tools are disabled by default. 1846 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH; 1847 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF; 1848 sf->inter_sf.reuse_inter_intra_mode = 1; 1849 sf->inter_sf.prune_compound_using_single_ref = 0; 1850 sf->inter_sf.prune_comp_search_by_single_result = 2; 1851 sf->inter_sf.prune_comp_type_by_comp_avg = 2; 1852 sf->inter_sf.fast_wedge_sign_estimate = 1; 1853 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED; 1854 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW; 1855 sf->inter_sf.disable_interinter_wedge_var_thresh = 100; 1856 sf->interp_sf.cb_pred_filter_search = 0; 1857 sf->interp_sf.skip_interp_filter_search = 1; 1858 sf->part_sf.ml_prune_partition = 1; 1859 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1; 1860 sf->part_sf.prune_ext_partition_types_search_level = 2; 1861 sf->part_sf.less_rectangular_check_level = 2; 1862 sf->mv_sf.obmc_full_pixel_search_level = 1; 1863 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF; 1864 sf->tx_sf.model_based_prune_tx_search_level = 0; 1865 sf->lpf_sf.dual_sgr_penalty_level = 1; 1866 // Disable Wiener and Self-guided Loop restoration filters. 1867 sf->lpf_sf.disable_wiener_filter = true; 1868 sf->lpf_sf.disable_sgr_filter = true; 1869 sf->intra_sf.prune_palette_search_level = 2; 1870 sf->intra_sf.prune_luma_palette_size_search_level = 2; 1871 sf->intra_sf.early_term_chroma_palette_size_search = 1; 1872 1873 // End of set 1874 1875 // TODO(any, yunqing): tune these features for real-time use cases. 1876 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_SOLO; 1877 sf->hl_sf.frame_parameter_update = 0; 1878 1879 sf->inter_sf.model_based_post_interp_filter_breakout = 1; 1880 // TODO(any): As per the experiments, this speed feature is doing redundant 1881 // computation since the model rd based pruning logic is similar to model rd 1882 // based gating when inter_mode_rd_model_estimation = 2. Enable this SF if 1883 // either of the condition becomes true. 1884 // (1) inter_mode_rd_model_estimation != 2 1885 // (2) skip_interp_filter_search == 0 1886 // (3) Motion mode or compound mode is enabled */ 1887 sf->inter_sf.prune_mode_search_simple_translation = 0; 1888 sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted; 1889 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX; 1890 sf->inter_sf.selective_ref_frame = 4; 1891 sf->inter_sf.alt_ref_search_fp = 2; 1892 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 4); 1893 sf->inter_sf.limit_txfm_eval_per_mode = 3; 1894 1895 sf->inter_sf.adaptive_rd_thresh = 4; 1896 sf->inter_sf.inter_mode_rd_model_estimation = 2; 1897 sf->inter_sf.prune_inter_modes_if_skippable = 1; 1898 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3; 1899 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3; 1900 sf->inter_sf.skip_newmv_in_drl = 4; 1901 1902 sf->interp_sf.use_fast_interpolation_filter_search = 1; 1903 sf->interp_sf.use_interp_filter = 1; 1904 sf->interp_sf.adaptive_interp_filter_search = 1; 1905 sf->interp_sf.disable_dual_filter = 1; 1906 1907 sf->part_sf.default_max_partition_size = BLOCK_128X128; 1908 sf->part_sf.default_min_partition_size = BLOCK_8X8; 1909 sf->part_sf.use_best_rd_for_pruning = 1; 1910 sf->part_sf.early_term_after_none_split = 1; 1911 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25); 1912 sf->part_sf.max_intra_bsize = BLOCK_16X16; 1913 sf->part_sf.partition_search_breakout_rate_thr = 500; 1914 sf->part_sf.partition_search_type = VAR_BASED_PARTITION; 1915 sf->part_sf.adjust_var_based_rd_partitioning = 2; 1916 1917 sf->mv_sf.full_pixel_search_level = 1; 1918 sf->mv_sf.exhaustive_searches_thresh = INT_MAX; 1919 sf->mv_sf.auto_mv_step_size = 1; 1920 sf->mv_sf.subpel_iters_per_step = 1; 1921 sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS; 1922 sf->mv_sf.search_method = FAST_DIAMOND; 1923 sf->mv_sf.subpel_force_stop = EIGHTH_PEL; 1924 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED; 1925 1926 for (int i = 0; i < TX_SIZES; ++i) { 1927 sf->intra_sf.intra_y_mode_mask[i] = INTRA_DC; 1928 sf->intra_sf.intra_uv_mode_mask[i] = UV_INTRA_DC_CFL; 1929 } 1930 sf->intra_sf.skip_intra_in_interframe = 5; 1931 sf->intra_sf.disable_smooth_intra = 1; 1932 sf->intra_sf.skip_filter_intra_in_inter_frames = 1; 1933 1934 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1; 1935 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1; 1936 sf->tx_sf.adaptive_txb_search_level = 2; 1937 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1; 1938 sf->tx_sf.tx_size_search_lgr_block = 1; 1939 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000; 1940 sf->tx_sf.tx_type_search.skip_tx_search = 1; 1941 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1; 1942 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1; 1943 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3; 1944 sf->tx_sf.refine_fast_tx_search_results = 0; 1945 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1; 1946 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2; 1947 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4; 1948 1949 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT; 1950 sf->rd_sf.simple_model_rd_from_var = 1; 1951 sf->rd_sf.tx_domain_dist_level = 2; 1952 sf->rd_sf.tx_domain_dist_thres_level = 2; 1953 1954 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4; 1955 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q; 1956 1957 sf->winner_mode_sf.dc_blk_pred_level = frame_is_intra_only(cm) ? 0 : 3; 1958 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1; 1959 sf->winner_mode_sf.tx_size_search_level = 1; 1960 sf->winner_mode_sf.winner_mode_ifs = 1; 1961 1962 sf->rt_sf.check_intra_pred_nonrd = 1; 1963 sf->rt_sf.estimate_motion_for_var_based_partition = 2; 1964 sf->rt_sf.hybrid_intra_pickmode = 1; 1965 sf->rt_sf.use_comp_ref_nonrd = 0; 1966 sf->rt_sf.ref_frame_comp_nonrd[0] = 0; 1967 sf->rt_sf.ref_frame_comp_nonrd[1] = 0; 1968 sf->rt_sf.ref_frame_comp_nonrd[2] = 0; 1969 sf->rt_sf.use_nonrd_filter_search = 1; 1970 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; 1971 sf->rt_sf.num_inter_modes_for_tx_search = 5; 1972 sf->rt_sf.prune_inter_modes_using_temp_var = 1; 1973 sf->rt_sf.use_real_time_ref_set = 1; 1974 sf->rt_sf.use_simple_rd_model = 1; 1975 sf->rt_sf.prune_inter_modes_with_golden_ref = boosted ? 0 : 1; 1976 // TODO(any): This sf could be removed. 1977 sf->rt_sf.short_circuit_low_temp_var = 1; 1978 sf->rt_sf.check_scene_detection = 1; 1979 if (cpi->rc.rtc_external_ratectrl) sf->rt_sf.check_scene_detection = 0; 1980 if (cm->current_frame.frame_type != KEY_FRAME && 1981 cpi->oxcf.rc_cfg.mode == AOM_CBR) 1982 sf->rt_sf.overshoot_detection_cbr = FAST_DETECTION_MAXQ; 1983 // Enable noise estimation only for high resolutions for now. 1984 // 1985 // Since use_temporal_noise_estimate has no effect for all-intra frame 1986 // encoding, it is disabled for this case. 1987 if (cpi->oxcf.kf_cfg.key_freq_max != 0 && cm->width * cm->height > 640 * 480) 1988 sf->rt_sf.use_temporal_noise_estimate = 1; 1989 sf->rt_sf.skip_tx_no_split_var_based_partition = 1; 1990 sf->rt_sf.skip_newmv_mode_based_on_sse = 1; 1991 sf->rt_sf.mode_search_skip_flags = 1992 (cm->current_frame.frame_type == KEY_FRAME) 1993 ? 0 1994 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | 1995 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR | 1996 FLAG_EARLY_TERMINATE; 1997 sf->rt_sf.var_part_split_threshold_shift = 5; 1998 if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 1; 1999 sf->rt_sf.use_fast_fixed_part = 0; 2000 sf->rt_sf.increase_source_sad_thresh = 0; 2001 2002 if (speed >= 6) { 2003 sf->mv_sf.use_fullpel_costlist = 1; 2004 2005 sf->rd_sf.tx_domain_dist_thres_level = 3; 2006 2007 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 0; 2008 sf->inter_sf.limit_inter_mode_cands = 4; 2009 sf->inter_sf.prune_warped_prob_thresh = 8; 2010 sf->inter_sf.extra_prune_warped = 1; 2011 2012 sf->rt_sf.gf_refresh_based_on_qp = 1; 2013 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1; 2014 sf->rt_sf.var_part_split_threshold_shift = 7; 2015 if (!frame_is_intra_only(&cpi->common)) 2016 sf->rt_sf.var_part_based_on_qidx = 2; 2017 2018 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 3; 2019 } 2020 2021 if (speed >= 7) { 2022 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_1; 2023 sf->rt_sf.use_comp_ref_nonrd = 1; 2024 sf->rt_sf.ref_frame_comp_nonrd[2] = 1; // LAST_ALTREF 2025 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 2; 2026 sf->part_sf.partition_search_type = VAR_BASED_PARTITION; 2027 sf->part_sf.max_intra_bsize = BLOCK_32X32; 2028 2029 sf->mv_sf.search_method = FAST_DIAMOND; 2030 sf->mv_sf.subpel_force_stop = QUARTER_PEL; 2031 2032 sf->inter_sf.inter_mode_rd_model_estimation = 2; 2033 // This sf is not applicable in non-rd path. 2034 sf->inter_sf.skip_newmv_in_drl = 0; 2035 2036 sf->interp_sf.skip_interp_filter_search = 0; 2037 2038 // Disable intra_y_mode_mask pruning since the performance at speed 7 isn't 2039 // good. May need more study. 2040 for (int i = 0; i < TX_SIZES; ++i) { 2041 sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL; 2042 } 2043 2044 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q; 2045 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL5; 2046 2047 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; 2048 sf->rt_sf.nonrd_prune_ref_frame_search = 1; 2049 // This is for rd path only. 2050 sf->rt_sf.prune_inter_modes_using_temp_var = 0; 2051 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 0; 2052 sf->rt_sf.prune_intra_mode_based_on_mv_range = 0; 2053 #if !CONFIG_REALTIME_ONLY 2054 sf->rt_sf.reuse_inter_pred_nonrd = 2055 (cpi->oxcf.motion_mode_cfg.enable_warped_motion == 0); 2056 #else 2057 sf->rt_sf.reuse_inter_pred_nonrd = 1; 2058 #endif 2059 #if CONFIG_AV1_TEMPORAL_DENOISING 2060 sf->rt_sf.reuse_inter_pred_nonrd = (cpi->oxcf.noise_sensitivity == 0); 2061 #endif 2062 sf->rt_sf.short_circuit_low_temp_var = 0; 2063 // For spatial layers, only LAST and GOLDEN are currently used in the SVC 2064 // for nonrd. The flag use_nonrd_altref_frame can disable GOLDEN in the 2065 // get_ref_frame_flags() for some patterns, so disable it here for 2066 // spatial layers. 2067 sf->rt_sf.use_nonrd_altref_frame = 2068 (cpi->svc.number_spatial_layers > 1) ? 0 : 1; 2069 sf->rt_sf.use_nonrd_pick_mode = 1; 2070 sf->rt_sf.discount_color_cost = 1; 2071 sf->rt_sf.nonrd_check_partition_merge_mode = 3; 2072 sf->rt_sf.skip_intra_pred = 1; 2073 sf->rt_sf.source_metrics_sb_nonrd = 1; 2074 // Set mask for intra modes. 2075 for (int i = 0; i < BLOCK_SIZES; ++i) 2076 if (i >= BLOCK_32X32) 2077 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; 2078 else 2079 // Use DC, H, V intra mode for block sizes < 32X32. 2080 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V; 2081 2082 sf->winner_mode_sf.dc_blk_pred_level = 0; 2083 sf->rt_sf.var_part_based_on_qidx = 3; 2084 sf->rt_sf.prune_compoundmode_with_singlecompound_var = true; 2085 sf->rt_sf.prune_compoundmode_with_singlemode_var = true; 2086 sf->rt_sf.skip_compound_based_on_var = true; 2087 sf->rt_sf.use_adaptive_subpel_search = true; 2088 } 2089 2090 if (speed >= 8) { 2091 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_2; 2092 sf->intra_sf.intra_pruning_with_hog = 1; 2093 sf->rt_sf.short_circuit_low_temp_var = 1; 2094 sf->rt_sf.use_nonrd_altref_frame = 0; 2095 sf->rt_sf.nonrd_prune_ref_frame_search = 2; 2096 sf->rt_sf.nonrd_check_partition_merge_mode = 0; 2097 sf->rt_sf.var_part_split_threshold_shift = 8; 2098 sf->rt_sf.var_part_based_on_qidx = 4; 2099 sf->rt_sf.partition_direct_merging = 1; 2100 sf->rt_sf.prune_compoundmode_with_singlemode_var = false; 2101 sf->mv_sf.use_bsize_dependent_search_method = 2; 2102 sf->rt_sf.prune_hv_pred_modes_using_src_sad = true; 2103 } 2104 if (speed >= 9) { 2105 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3; 2106 sf->rt_sf.estimate_motion_for_var_based_partition = 3; 2107 sf->rt_sf.prefer_large_partition_blocks = 3; 2108 sf->rt_sf.skip_intra_pred = 2; 2109 sf->rt_sf.var_part_split_threshold_shift = 9; 2110 for (int i = 0; i < BLOCK_SIZES; ++i) 2111 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; 2112 sf->rt_sf.var_part_based_on_qidx = 0; 2113 sf->rt_sf.frame_level_mode_cost_update = true; 2114 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true; 2115 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0; 2116 sf->rt_sf.use_adaptive_subpel_search = true; 2117 sf->mv_sf.use_bsize_dependent_search_method = 0; 2118 } 2119 if (speed >= 10) { 2120 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_4; 2121 sf->rt_sf.nonrd_prune_ref_frame_search = 3; 2122 sf->rt_sf.var_part_split_threshold_shift = 10; 2123 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; 2124 } 2125 if (speed >= 11 && !frame_is_intra_only(cm) && 2126 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { 2127 sf->winner_mode_sf.dc_blk_pred_level = 3; 2128 } 2129 } 2130 2131 static inline void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) { 2132 // best quality defaults 2133 hl_sf->frame_parameter_update = 1; 2134 hl_sf->recode_loop = ALLOW_RECODE; 2135 // Recode loop tolerance %. 2136 hl_sf->recode_tolerance = 25; 2137 hl_sf->high_precision_mv_usage = CURRENT_Q; 2138 hl_sf->superres_auto_search_type = SUPERRES_AUTO_ALL; 2139 hl_sf->disable_extra_sc_testing = 0; 2140 hl_sf->second_alt_ref_filtering = 1; 2141 hl_sf->adjust_num_frames_for_arf_filtering = 0; 2142 hl_sf->accurate_bit_estimate = 0; 2143 hl_sf->weight_calc_level_in_tf = 0; 2144 hl_sf->allow_sub_blk_me_in_tf = 0; 2145 hl_sf->ref_frame_mvs_lvl = 0; 2146 hl_sf->screen_detection_mode2_fast_detection = 0; 2147 } 2148 2149 static inline void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) { 2150 fp_sf->reduce_mv_step_param = 3; 2151 fp_sf->skip_motion_search_threshold = 0; 2152 fp_sf->disable_recon = 0; 2153 fp_sf->skip_zeromv_motion_search = 0; 2154 } 2155 2156 static inline void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) { 2157 tpl_sf->gop_length_decision_method = 0; 2158 tpl_sf->prune_intra_modes = 0; 2159 tpl_sf->prune_starting_mv = 0; 2160 tpl_sf->reduce_first_step_size = 0; 2161 tpl_sf->skip_alike_starting_mv = 0; 2162 tpl_sf->subpel_force_stop = EIGHTH_PEL; 2163 tpl_sf->search_method = NSTEP; 2164 tpl_sf->prune_ref_frames_in_tpl = 0; 2165 tpl_sf->allow_compound_pred = 1; 2166 tpl_sf->use_y_only_rate_distortion = 0; 2167 tpl_sf->use_sad_for_mode_decision = 0; 2168 tpl_sf->reduce_num_frames = 0; 2169 } 2170 2171 static inline void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) { 2172 gm_sf->gm_search_type = GM_FULL_SEARCH; 2173 gm_sf->prune_ref_frame_for_gm_search = 0; 2174 gm_sf->prune_zero_mv_with_sse = 0; 2175 gm_sf->disable_gm_search_based_on_stats = 0; 2176 gm_sf->downsample_level = 0; 2177 gm_sf->num_refinement_steps = GM_MAX_REFINEMENT_STEPS; 2178 gm_sf->gm_erroradv_tr_level = 0; 2179 } 2180 2181 static inline void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) { 2182 part_sf->partition_search_type = SEARCH_PARTITION; 2183 part_sf->less_rectangular_check_level = 0; 2184 part_sf->use_square_partition_only_threshold = BLOCK_128X128; 2185 part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE; 2186 part_sf->default_max_partition_size = BLOCK_LARGEST; 2187 part_sf->default_min_partition_size = BLOCK_4X4; 2188 part_sf->adjust_var_based_rd_partitioning = 0; 2189 part_sf->max_intra_bsize = BLOCK_LARGEST; 2190 // This setting only takes effect when partition_search_type is set 2191 // to FIXED_PARTITION. 2192 part_sf->fixed_partition_size = BLOCK_16X16; 2193 // Recode loop tolerance %. 2194 part_sf->partition_search_breakout_dist_thr = 0; 2195 part_sf->partition_search_breakout_rate_thr = 0; 2196 part_sf->prune_ext_partition_types_search_level = 0; 2197 part_sf->prune_part4_search = 0; 2198 part_sf->ml_prune_partition = 0; 2199 part_sf->ml_early_term_after_part_split_level = 0; 2200 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) { 2201 part_sf->ml_partition_search_breakout_thresh[i] = 2202 -1; // -1 means not enabled. 2203 } 2204 part_sf->ml_partition_search_breakout_model_index = 0; 2205 part_sf->ml_4_partition_search_level_index = 0; 2206 part_sf->simple_motion_search_prune_agg = SIMPLE_AGG_LVL0; 2207 part_sf->simple_motion_search_split = 0; 2208 part_sf->simple_motion_search_prune_rect = 0; 2209 part_sf->simple_motion_search_early_term_none = 0; 2210 part_sf->simple_motion_search_reduce_search_steps = 0; 2211 part_sf->intra_cnn_based_part_prune_level = 0; 2212 part_sf->ext_partition_eval_thresh = BLOCK_8X8; 2213 part_sf->rect_partition_eval_thresh = BLOCK_128X128; 2214 part_sf->ext_part_eval_based_on_cur_best = 0; 2215 part_sf->prune_ext_part_using_split_info = 0; 2216 part_sf->prune_rectangular_split_based_on_qidx = 0; 2217 part_sf->prune_rect_part_using_4x4_var_deviation = false; 2218 part_sf->prune_rect_part_using_none_pred_mode = false; 2219 part_sf->early_term_after_none_split = 0; 2220 part_sf->ml_predict_breakout_level = 0; 2221 part_sf->prune_sub_8x8_partition_level = 0; 2222 part_sf->simple_motion_search_rect_split = 0; 2223 part_sf->reuse_prev_rd_results_for_part_ab = 0; 2224 part_sf->reuse_best_prediction_for_part_ab = 0; 2225 part_sf->use_best_rd_for_pruning = 0; 2226 part_sf->skip_non_sq_part_based_on_none = 0; 2227 part_sf->disable_8x8_part_based_on_qidx = 0; 2228 part_sf->split_partition_penalty_level = 0; 2229 } 2230 2231 static inline void init_mv_sf(MV_SPEED_FEATURES *mv_sf) { 2232 mv_sf->full_pixel_search_level = 0; 2233 mv_sf->auto_mv_step_size = 0; 2234 mv_sf->exhaustive_searches_thresh = 0; 2235 mv_sf->obmc_full_pixel_search_level = 0; 2236 mv_sf->prune_mesh_search = PRUNE_MESH_SEARCH_DISABLED; 2237 mv_sf->reduce_search_range = 0; 2238 mv_sf->search_method = NSTEP; 2239 mv_sf->simple_motion_subpel_force_stop = EIGHTH_PEL; 2240 mv_sf->subpel_force_stop = EIGHTH_PEL; 2241 mv_sf->subpel_iters_per_step = 2; 2242 mv_sf->subpel_search_method = SUBPEL_TREE; 2243 mv_sf->use_accurate_subpel_search = USE_8_TAPS; 2244 mv_sf->use_bsize_dependent_search_method = 0; 2245 mv_sf->use_fullpel_costlist = 0; 2246 mv_sf->use_downsampled_sad = 0; 2247 mv_sf->disable_extensive_joint_motion_search = 0; 2248 mv_sf->disable_second_mv = 0; 2249 mv_sf->skip_fullpel_search_using_startmv = 0; 2250 mv_sf->warp_search_method = WARP_SEARCH_SQUARE; 2251 mv_sf->warp_search_iters = 8; 2252 mv_sf->use_intrabc = 1; 2253 mv_sf->prune_intrabc_candidate_block_hash_search = 0; 2254 mv_sf->intrabc_search_level = 0; 2255 mv_sf->hash_max_8x8_intrabc_blocks = 0; 2256 } 2257 2258 static inline void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) { 2259 inter_sf->adaptive_rd_thresh = 0; 2260 inter_sf->model_based_post_interp_filter_breakout = 0; 2261 inter_sf->reduce_inter_modes = 0; 2262 inter_sf->alt_ref_search_fp = 0; 2263 inter_sf->prune_single_ref = 0; 2264 inter_sf->prune_comp_ref_frames = 0; 2265 inter_sf->selective_ref_frame = 0; 2266 inter_sf->prune_ref_frame_for_rect_partitions = 0; 2267 inter_sf->fast_wedge_sign_estimate = 0; 2268 inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED; 2269 inter_sf->reuse_inter_intra_mode = 0; 2270 inter_sf->mv_cost_upd_level = INTERNAL_COST_UPD_SB; 2271 inter_sf->coeff_cost_upd_level = INTERNAL_COST_UPD_SB; 2272 inter_sf->mode_cost_upd_level = INTERNAL_COST_UPD_SB; 2273 inter_sf->prune_inter_modes_based_on_tpl = 0; 2274 inter_sf->prune_nearmv_using_neighbors = PRUNE_NEARMV_OFF; 2275 inter_sf->prune_comp_search_by_single_result = 0; 2276 inter_sf->skip_repeated_ref_mv = 0; 2277 inter_sf->skip_newmv_in_drl = 0; 2278 inter_sf->inter_mode_rd_model_estimation = 0; 2279 inter_sf->prune_compound_using_single_ref = 0; 2280 inter_sf->prune_ext_comp_using_neighbors = 0; 2281 inter_sf->skip_ext_comp_nearmv_mode = 0; 2282 inter_sf->prune_comp_using_best_single_mode_ref = 0; 2283 inter_sf->prune_nearest_near_mv_using_refmv_weight = 0; 2284 inter_sf->disable_onesided_comp = 0; 2285 inter_sf->prune_mode_search_simple_translation = 0; 2286 inter_sf->prune_comp_type_by_comp_avg = 0; 2287 inter_sf->disable_interinter_wedge_newmv_search = 0; 2288 inter_sf->fast_interintra_wedge_search = 0; 2289 inter_sf->prune_comp_type_by_model_rd = 0; 2290 inter_sf->perform_best_rd_based_gating_for_chroma = 0; 2291 inter_sf->prune_obmc_prob_thresh = 0; 2292 inter_sf->disable_interinter_wedge_var_thresh = 0; 2293 inter_sf->disable_interintra_wedge_var_thresh = 0; 2294 inter_sf->prune_ref_mv_idx_search = 0; 2295 inter_sf->prune_warped_prob_thresh = 0; 2296 inter_sf->reuse_compound_type_decision = 0; 2297 inter_sf->prune_inter_modes_if_skippable = 0; 2298 inter_sf->disable_masked_comp = 0; 2299 inter_sf->enable_fast_compound_mode_search = 0; 2300 inter_sf->reuse_mask_search_results = 0; 2301 inter_sf->enable_fast_wedge_mask_search = 0; 2302 inter_sf->inter_mode_txfm_breakout = 0; 2303 inter_sf->limit_inter_mode_cands = 0; 2304 inter_sf->limit_txfm_eval_per_mode = 0; 2305 inter_sf->skip_arf_compound = 0; 2306 inter_sf->bias_warp_mode_rd_scale_pct = 0; 2307 set_txfm_rd_gate_level(inter_sf->txfm_rd_gate_level, 0); 2308 } 2309 2310 static inline void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) { 2311 interp_sf->adaptive_interp_filter_search = 0; 2312 interp_sf->cb_pred_filter_search = 0; 2313 interp_sf->disable_dual_filter = 0; 2314 interp_sf->skip_sharp_interp_filter_search = 0; 2315 interp_sf->use_fast_interpolation_filter_search = 0; 2316 interp_sf->use_interp_filter = 0; 2317 interp_sf->skip_interp_filter_search = 0; 2318 } 2319 2320 static inline void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) { 2321 intra_sf->dv_cost_upd_level = INTERNAL_COST_UPD_SB; 2322 intra_sf->skip_intra_in_interframe = 1; 2323 intra_sf->intra_pruning_with_hog = 0; 2324 intra_sf->chroma_intra_pruning_with_hog = 0; 2325 intra_sf->prune_palette_search_level = 0; 2326 intra_sf->prune_luma_palette_size_search_level = 0; 2327 2328 for (int i = 0; i < TX_SIZES; i++) { 2329 intra_sf->intra_y_mode_mask[i] = INTRA_ALL; 2330 intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL; 2331 } 2332 intra_sf->disable_smooth_intra = 0; 2333 intra_sf->prune_smooth_intra_mode_for_chroma = 0; 2334 intra_sf->prune_filter_intra_level = 0; 2335 intra_sf->prune_chroma_modes_using_luma_winner = 0; 2336 intra_sf->cfl_search_range = 3; 2337 intra_sf->top_intra_model_count_allowed = TOP_INTRA_MODEL_COUNT; 2338 intra_sf->adapt_top_model_rd_count_using_neighbors = 0; 2339 intra_sf->early_term_chroma_palette_size_search = 0; 2340 intra_sf->skip_filter_intra_in_inter_frames = 0; 2341 intra_sf->prune_luma_odd_delta_angles_in_intra = 0; 2342 } 2343 2344 static inline void init_tx_sf(TX_SPEED_FEATURES *tx_sf) { 2345 tx_sf->inter_tx_size_search_init_depth_sqr = 0; 2346 tx_sf->inter_tx_size_search_init_depth_rect = 0; 2347 tx_sf->intra_tx_size_search_init_depth_rect = 0; 2348 tx_sf->intra_tx_size_search_init_depth_sqr = 0; 2349 tx_sf->tx_size_search_lgr_block = 0; 2350 tx_sf->model_based_prune_tx_search_level = 0; 2351 tx_sf->tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_1; 2352 tx_sf->tx_type_search.ml_tx_split_thresh = 8500; 2353 tx_sf->tx_type_search.use_skip_flag_prediction = 1; 2354 tx_sf->tx_type_search.use_reduced_intra_txset = 0; 2355 tx_sf->tx_type_search.fast_intra_tx_type_search = 0; 2356 tx_sf->tx_type_search.fast_inter_tx_type_prob_thresh = INT_MAX; 2357 tx_sf->tx_type_search.skip_tx_search = 0; 2358 tx_sf->tx_type_search.prune_tx_type_using_stats = 0; 2359 tx_sf->tx_type_search.prune_tx_type_est_rd = 0; 2360 tx_sf->tx_type_search.winner_mode_tx_type_pruning = 0; 2361 tx_sf->txb_split_cap = 1; 2362 tx_sf->adaptive_txb_search_level = 0; 2363 tx_sf->refine_fast_tx_search_results = 1; 2364 tx_sf->prune_tx_size_level = 0; 2365 tx_sf->prune_intra_tx_depths_using_nn = false; 2366 tx_sf->use_rd_based_breakout_for_intra_tx_search = false; 2367 } 2368 2369 static inline void init_rd_sf(RD_CALC_SPEED_FEATURES *rd_sf, 2370 const AV1EncoderConfig *oxcf) { 2371 const int disable_trellis_quant = oxcf->algo_cfg.disable_trellis_quant; 2372 if (disable_trellis_quant == 3) { 2373 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg) 2374 ? NO_ESTIMATE_YRD_TRELLIS_OPT 2375 : NO_TRELLIS_OPT; 2376 } else if (disable_trellis_quant == 2) { 2377 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg) 2378 ? FINAL_PASS_TRELLIS_OPT 2379 : NO_TRELLIS_OPT; 2380 } else if (disable_trellis_quant == 0) { 2381 if (is_lossless_requested(&oxcf->rc_cfg)) { 2382 rd_sf->optimize_coefficients = NO_TRELLIS_OPT; 2383 } else { 2384 rd_sf->optimize_coefficients = FULL_TRELLIS_OPT; 2385 } 2386 } else if (disable_trellis_quant == 1) { 2387 rd_sf->optimize_coefficients = NO_TRELLIS_OPT; 2388 } else { 2389 assert(0 && "Invalid disable_trellis_quant value"); 2390 } 2391 rd_sf->use_mb_rd_hash = 0; 2392 rd_sf->simple_model_rd_from_var = 0; 2393 rd_sf->tx_domain_dist_level = 0; 2394 rd_sf->tx_domain_dist_thres_level = 0; 2395 rd_sf->perform_coeff_opt = 0; 2396 } 2397 2398 static inline void init_winner_mode_sf( 2399 WINNER_MODE_SPEED_FEATURES *winner_mode_sf) { 2400 winner_mode_sf->motion_mode_for_winner_cand = 0; 2401 // Set this at the appropriate speed levels 2402 winner_mode_sf->tx_size_search_level = 0; 2403 winner_mode_sf->enable_winner_mode_for_coeff_opt = 0; 2404 winner_mode_sf->enable_winner_mode_for_tx_size_srch = 0; 2405 winner_mode_sf->enable_winner_mode_for_use_tx_domain_dist = 0; 2406 winner_mode_sf->multi_winner_mode_type = 0; 2407 winner_mode_sf->dc_blk_pred_level = 0; 2408 winner_mode_sf->winner_mode_ifs = 0; 2409 winner_mode_sf->prune_winner_mode_eval_level = 0; 2410 } 2411 2412 static inline void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) { 2413 lpf_sf->disable_loop_restoration_chroma = 0; 2414 lpf_sf->disable_loop_restoration_luma = 0; 2415 lpf_sf->min_lr_unit_size = RESTORATION_PROC_UNIT_SIZE; 2416 lpf_sf->max_lr_unit_size = RESTORATION_UNITSIZE_MAX; 2417 lpf_sf->prune_wiener_based_on_src_var = 0; 2418 lpf_sf->prune_sgr_based_on_wiener = 0; 2419 lpf_sf->enable_sgr_ep_pruning = 0; 2420 lpf_sf->reduce_wiener_window_size = 0; 2421 lpf_sf->adaptive_luma_loop_filter_skip = 0; 2422 lpf_sf->skip_loop_filter_using_filt_error = 0; 2423 lpf_sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; 2424 lpf_sf->use_coarse_filter_level_search = 0; 2425 lpf_sf->cdef_pick_method = CDEF_FULL_SEARCH; 2426 // Set decoder side speed feature to use less dual sgr modes 2427 lpf_sf->dual_sgr_penalty_level = 0; 2428 // Enable Wiener and Self-guided Loop restoration filters by default. 2429 lpf_sf->disable_wiener_filter = false; 2430 lpf_sf->disable_sgr_filter = false; 2431 lpf_sf->disable_wiener_coeff_refine_search = false; 2432 lpf_sf->use_downsampled_wiener_stats = 0; 2433 lpf_sf->switchable_lr_with_bias_level = 0; 2434 } 2435 2436 static inline void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) { 2437 rt_sf->check_intra_pred_nonrd = 0; 2438 rt_sf->skip_intra_pred = 0; 2439 rt_sf->estimate_motion_for_var_based_partition = 0; 2440 rt_sf->nonrd_check_partition_merge_mode = 0; 2441 rt_sf->nonrd_check_partition_split = 0; 2442 rt_sf->mode_search_skip_flags = 0; 2443 rt_sf->nonrd_prune_ref_frame_search = 0; 2444 rt_sf->use_nonrd_pick_mode = 0; 2445 rt_sf->discount_color_cost = 0; 2446 rt_sf->use_nonrd_altref_frame = 0; 2447 rt_sf->use_comp_ref_nonrd = 0; 2448 rt_sf->use_real_time_ref_set = 0; 2449 rt_sf->short_circuit_low_temp_var = 0; 2450 rt_sf->reuse_inter_pred_nonrd = 0; 2451 rt_sf->num_inter_modes_for_tx_search = INT_MAX; 2452 rt_sf->use_nonrd_filter_search = 0; 2453 rt_sf->use_simple_rd_model = 0; 2454 rt_sf->hybrid_intra_pickmode = 0; 2455 rt_sf->prune_palette_search_nonrd = 0; 2456 rt_sf->source_metrics_sb_nonrd = 0; 2457 rt_sf->overshoot_detection_cbr = NO_DETECTION; 2458 rt_sf->check_scene_detection = 0; 2459 rt_sf->rc_adjust_keyframe = 0; 2460 rt_sf->rc_compute_spatial_var_sc = 0; 2461 rt_sf->prefer_large_partition_blocks = 0; 2462 rt_sf->use_temporal_noise_estimate = 0; 2463 rt_sf->fullpel_search_step_param = 0; 2464 for (int i = 0; i < BLOCK_SIZES; ++i) 2465 rt_sf->intra_y_mode_bsize_mask_nrd[i] = INTRA_ALL; 2466 rt_sf->prune_hv_pred_modes_using_src_sad = false; 2467 rt_sf->nonrd_aggressive_skip = 0; 2468 rt_sf->skip_cdef_sb = 0; 2469 rt_sf->force_large_partition_blocks_intra = 0; 2470 rt_sf->skip_tx_no_split_var_based_partition = 0; 2471 rt_sf->skip_newmv_mode_based_on_sse = 0; 2472 rt_sf->gf_length_lvl = 0; 2473 rt_sf->prune_inter_modes_with_golden_ref = 0; 2474 rt_sf->prune_inter_modes_wrt_gf_arf_based_on_sad = 0; 2475 rt_sf->prune_inter_modes_using_temp_var = 0; 2476 rt_sf->reduce_mv_pel_precision_highmotion = 0; 2477 rt_sf->reduce_mv_pel_precision_lowcomplex = 0; 2478 rt_sf->prune_intra_mode_based_on_mv_range = 0; 2479 rt_sf->var_part_split_threshold_shift = 7; 2480 rt_sf->gf_refresh_based_on_qp = 0; 2481 rt_sf->use_rtc_tf = 0; 2482 rt_sf->use_idtx_nonrd = 0; 2483 rt_sf->prune_idtx_nonrd = 0; 2484 rt_sf->dct_only_palette_nonrd = 0; 2485 rt_sf->part_early_exit_zeromv = 0; 2486 rt_sf->sse_early_term_inter_search = EARLY_TERM_DISABLED; 2487 rt_sf->skip_lf_screen = 0; 2488 rt_sf->thresh_active_maps_skip_lf_cdef = 100; 2489 rt_sf->sad_based_adp_altref_lag = 0; 2490 rt_sf->partition_direct_merging = 0; 2491 rt_sf->var_part_based_on_qidx = 0; 2492 rt_sf->tx_size_level_based_on_qstep = 0; 2493 rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false; 2494 rt_sf->prune_compoundmode_with_singlecompound_var = false; 2495 rt_sf->frame_level_mode_cost_update = false; 2496 rt_sf->prune_h_pred_using_best_mode_so_far = false; 2497 rt_sf->enable_intra_mode_pruning_using_neighbors = false; 2498 rt_sf->prune_intra_mode_using_best_sad_so_far = false; 2499 rt_sf->check_only_zero_zeromv_on_large_blocks = false; 2500 rt_sf->disable_cdf_update_non_reference_frame = false; 2501 rt_sf->prune_compoundmode_with_singlemode_var = false; 2502 rt_sf->skip_compound_based_on_var = false; 2503 rt_sf->set_zeromv_skip_based_on_source_sad = 1; 2504 rt_sf->use_adaptive_subpel_search = false; 2505 rt_sf->screen_content_cdef_filter_qindex_thresh = 0; 2506 rt_sf->enable_ref_short_signaling = false; 2507 rt_sf->check_globalmv_on_single_ref = true; 2508 rt_sf->increase_color_thresh_palette = false; 2509 rt_sf->selective_cdf_update = 0; 2510 rt_sf->force_only_last_ref = 0; 2511 rt_sf->higher_thresh_scene_detection = 1; 2512 rt_sf->skip_newmv_flat_blocks_screen = 0; 2513 rt_sf->skip_encoding_non_reference_slide_change = 0; 2514 rt_sf->rc_faster_convergence_static = 0; 2515 rt_sf->skip_newmv_mode_sad_screen = 0; 2516 } 2517 2518 static fractional_mv_step_fp 2519 *const fractional_mv_search[SUBPEL_SEARCH_METHODS] = { 2520 av1_find_best_sub_pixel_tree, // SUBPEL_TREE = 0 2521 av1_find_best_sub_pixel_tree_pruned, // SUBPEL_TREE_PRUNED = 1 2522 av1_find_best_sub_pixel_tree_pruned_more // SUBPEL_TREE_PRUNED_MORE = 2 2523 }; 2524 2525 // Populate appropriate sub-pel search method based on speed feature and user 2526 // specified settings 2527 static void set_subpel_search_method( 2528 MotionVectorSearchParams *mv_search_params, 2529 unsigned int motion_vector_unit_test, 2530 SUBPEL_SEARCH_METHOD subpel_search_method) { 2531 assert(subpel_search_method <= SUBPEL_TREE_PRUNED_MORE); 2532 mv_search_params->find_fractional_mv_step = 2533 fractional_mv_search[subpel_search_method]; 2534 2535 // This is only used in motion vector unit test. 2536 if (motion_vector_unit_test == 1) 2537 mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv; 2538 else if (motion_vector_unit_test == 2) 2539 mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv; 2540 } 2541 2542 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) { 2543 SPEED_FEATURES *const sf = &cpi->sf; 2544 const AV1EncoderConfig *const oxcf = &cpi->oxcf; 2545 2546 switch (oxcf->mode) { 2547 case GOOD: 2548 set_good_speed_feature_framesize_dependent(cpi, sf, speed); 2549 break; 2550 case ALLINTRA: 2551 set_allintra_speed_feature_framesize_dependent(cpi, sf, speed); 2552 break; 2553 case REALTIME: 2554 set_rt_speed_feature_framesize_dependent(cpi, sf, speed); 2555 break; 2556 } 2557 2558 if (!cpi->ppi->seq_params_locked) { 2559 cpi->common.seq_params->enable_masked_compound &= 2560 !sf->inter_sf.disable_masked_comp; 2561 cpi->common.seq_params->enable_interintra_compound &= 2562 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX); 2563 } 2564 2565 set_subpel_search_method(&cpi->mv_search_params, 2566 cpi->oxcf.unit_test_cfg.motion_vector_unit_test, 2567 sf->mv_sf.subpel_search_method); 2568 2569 // For multi-thread use case with row_mt enabled, cost update for a set of 2570 // SB rows is not desirable. Hence, the sf mv_cost_upd_level is set to 2571 // INTERNAL_COST_UPD_SBROW in such cases. 2572 if ((cpi->oxcf.row_mt == 1) && (cpi->mt_info.num_workers > 1)) { 2573 if (sf->inter_sf.mv_cost_upd_level == INTERNAL_COST_UPD_SBROW_SET) { 2574 // Set mv_cost_upd_level to use row level update. 2575 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW; 2576 } 2577 } 2578 } 2579 2580 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) { 2581 SPEED_FEATURES *const sf = &cpi->sf; 2582 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params; 2583 const AV1EncoderConfig *const oxcf = &cpi->oxcf; 2584 int i; 2585 2586 init_hl_sf(&sf->hl_sf); 2587 init_fp_sf(&sf->fp_sf); 2588 init_tpl_sf(&sf->tpl_sf); 2589 init_gm_sf(&sf->gm_sf); 2590 init_part_sf(&sf->part_sf); 2591 init_mv_sf(&sf->mv_sf); 2592 init_inter_sf(&sf->inter_sf); 2593 init_interp_sf(&sf->interp_sf); 2594 init_intra_sf(&sf->intra_sf); 2595 init_tx_sf(&sf->tx_sf); 2596 init_rd_sf(&sf->rd_sf, oxcf); 2597 init_winner_mode_sf(&sf->winner_mode_sf); 2598 init_lpf_sf(&sf->lpf_sf); 2599 init_rt_sf(&sf->rt_sf); 2600 2601 switch (oxcf->mode) { 2602 case GOOD: 2603 set_good_speed_features_framesize_independent(cpi, sf, speed); 2604 break; 2605 case ALLINTRA: 2606 set_allintra_speed_features_framesize_independent(cpi, sf, speed); 2607 break; 2608 case REALTIME: 2609 set_rt_speed_features_framesize_independent(cpi, sf, speed); 2610 break; 2611 } 2612 2613 // Note: when use_nonrd_pick_mode is true, the transform size is the 2614 // minimum of 16x16 and the largest possible size of the current block, 2615 // which conflicts with the speed feature "enable_tx_size_search". 2616 if (!oxcf->txfm_cfg.enable_tx_size_search && 2617 sf->rt_sf.use_nonrd_pick_mode == 0) { 2618 sf->winner_mode_sf.tx_size_search_level = 3; 2619 } 2620 2621 if (cpi->mt_info.num_workers > 1) { 2622 // Loop restoration stage is conditionally disabled for speed 5, 6 when 2623 // num_workers > 1. Since av1_pick_filter_restoration() is not 2624 // multi-threaded, enabling the Loop restoration stage will cause an 2625 // increase in encode time (3% to 7% increase depends on frame 2626 // resolution). 2627 // TODO(aomedia:3446): Implement multi-threading of 2628 // av1_pick_filter_restoration() and enable Wiener filter for speed 5, 6 2629 // similar to single thread encoding path. 2630 if (speed >= 5) { 2631 sf->lpf_sf.disable_sgr_filter = true; 2632 sf->lpf_sf.disable_wiener_filter = true; 2633 } 2634 } 2635 2636 if (!cpi->ppi->seq_params_locked) { 2637 cpi->common.seq_params->order_hint_info.enable_dist_wtd_comp &= 2638 (sf->inter_sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED); 2639 cpi->common.seq_params->enable_dual_filter &= 2640 !sf->interp_sf.disable_dual_filter; 2641 // Set the flag 'enable_restoration', if one the Loop restoration filters 2642 // (i.e., Wiener or Self-guided) is enabled. 2643 cpi->common.seq_params->enable_restoration &= 2644 (!sf->lpf_sf.disable_wiener_filter || !sf->lpf_sf.disable_sgr_filter); 2645 2646 cpi->common.seq_params->enable_interintra_compound &= 2647 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX); 2648 } 2649 2650 const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED); 2651 for (i = 0; i < MAX_MESH_STEP; ++i) { 2652 sf->mv_sf.mesh_patterns[i].range = 2653 good_quality_mesh_patterns[mesh_speed][i].range; 2654 sf->mv_sf.mesh_patterns[i].interval = 2655 good_quality_mesh_patterns[mesh_speed][i].interval; 2656 } 2657 2658 // Update the mesh pattern of exhaustive motion search for intraBC 2659 // Though intraBC mesh pattern is populated for all frame types, it is used 2660 // only for intra frames of screen contents 2661 for (i = 0; i < MAX_MESH_STEP; ++i) { 2662 sf->mv_sf.intrabc_mesh_patterns[i].range = 2663 intrabc_mesh_patterns[mesh_speed][i].range; 2664 sf->mv_sf.intrabc_mesh_patterns[i].interval = 2665 intrabc_mesh_patterns[mesh_speed][i].interval; 2666 } 2667 2668 // Slow quant, dct and trellis not worthwhile for first pass 2669 // so make sure they are always turned off. 2670 if (is_stat_generation_stage(cpi)) 2671 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT; 2672 2673 // No recode for 1 pass. 2674 if (oxcf->pass == AOM_RC_ONE_PASS && has_no_stats_stage(cpi)) 2675 sf->hl_sf.recode_loop = DISALLOW_RECODE; 2676 2677 set_subpel_search_method(&cpi->mv_search_params, 2678 cpi->oxcf.unit_test_cfg.motion_vector_unit_test, 2679 sf->mv_sf.subpel_search_method); 2680 2681 // assert ensures that tx_domain_dist_level is accessed correctly 2682 assert(cpi->sf.rd_sf.tx_domain_dist_thres_level >= 0 && 2683 cpi->sf.rd_sf.tx_domain_dist_thres_level < 4); 2684 memcpy(winner_mode_params->tx_domain_dist_threshold, 2685 tx_domain_dist_thresholds[cpi->sf.rd_sf.tx_domain_dist_thres_level], 2686 sizeof(winner_mode_params->tx_domain_dist_threshold)); 2687 2688 assert(cpi->sf.rd_sf.tx_domain_dist_level >= 0 && 2689 cpi->sf.rd_sf.tx_domain_dist_level < TX_DOMAIN_DIST_LEVELS); 2690 memcpy(winner_mode_params->use_transform_domain_distortion, 2691 tx_domain_dist_types[cpi->sf.rd_sf.tx_domain_dist_level], 2692 sizeof(winner_mode_params->use_transform_domain_distortion)); 2693 2694 // assert ensures that coeff_opt_thresholds is accessed correctly 2695 assert(cpi->sf.rd_sf.perform_coeff_opt >= 0 && 2696 cpi->sf.rd_sf.perform_coeff_opt < 9); 2697 memcpy(winner_mode_params->coeff_opt_thresholds, 2698 &coeff_opt_thresholds[cpi->sf.rd_sf.perform_coeff_opt], 2699 sizeof(winner_mode_params->coeff_opt_thresholds)); 2700 2701 // assert ensures that predict_skip_levels is accessed correctly 2702 assert(cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction >= 0 && 2703 cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction < 3); 2704 memcpy(winner_mode_params->skip_txfm_level, 2705 predict_skip_levels[cpi->sf.tx_sf.tx_type_search 2706 .use_skip_flag_prediction], 2707 sizeof(winner_mode_params->skip_txfm_level)); 2708 2709 // assert ensures that tx_size_search_level is accessed correctly 2710 assert(cpi->sf.winner_mode_sf.tx_size_search_level >= 0 && 2711 cpi->sf.winner_mode_sf.tx_size_search_level <= 3); 2712 memcpy(winner_mode_params->tx_size_search_methods, 2713 tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level], 2714 sizeof(winner_mode_params->tx_size_search_methods)); 2715 memcpy(winner_mode_params->predict_dc_level, 2716 predict_dc_levels[cpi->sf.winner_mode_sf.dc_blk_pred_level], 2717 sizeof(winner_mode_params->predict_dc_level)); 2718 2719 if (cpi->oxcf.row_mt == 1 && (cpi->mt_info.num_workers > 1)) { 2720 if (sf->inter_sf.inter_mode_rd_model_estimation == 1) { 2721 // Revert to type 2 2722 sf->inter_sf.inter_mode_rd_model_estimation = 2; 2723 } 2724 2725 #if !CONFIG_FPMT_TEST 2726 // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve 2727 // better parallelism when number of threads available are greater than or 2728 // equal to maximum number of reference frames allowed for global motion. 2729 if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH && 2730 (cpi->mt_info.num_workers >= 2731 gm_available_reference_frames[sf->gm_sf.gm_search_type])) 2732 sf->gm_sf.prune_ref_frame_for_gm_search = 0; 2733 #endif 2734 } 2735 2736 // This only applies to the real time mode. Adaptive gf refresh is disabled if 2737 // gf_cbr_boost_pct that is set by the user is larger than 0. 2738 if (cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 0) 2739 sf->rt_sf.gf_refresh_based_on_qp = 0; 2740 } 2741 2742 // Override some speed features for low complexity decode based on qindex. 2743 static void set_good_speed_features_lc_dec_qindex_dependent( 2744 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { 2745 if (speed < 1 || speed > 3) return; 2746 2747 const AV1_COMMON *const cm = &cpi->common; 2748 const bool is_between_608p_and_720p = AOMMIN(cm->width, cm->height) >= 608 && 2749 AOMMIN(cm->width, cm->height) <= 720; 2750 const bool is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 2751 const bool is_vertical_video = cm->width < cm->height; 2752 const FRAME_UPDATE_TYPE update_type = 2753 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 2754 const bool leaf_and_overlay_frames = 2755 (update_type == LF_UPDATE || update_type == OVERLAY_UPDATE || 2756 update_type == INTNL_OVERLAY_UPDATE); 2757 2758 // Speed features for vertical videos 2759 if (is_vertical_video && is_between_608p_and_720p) { 2760 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1; 2761 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1; 2762 } 2763 2764 // Speed features for regular videos 2765 if (!is_vertical_video && is_720p_or_larger) { 2766 if (speed <= 2 && leaf_and_overlay_frames) { 2767 // For 720p and above, only enable this feature for leaf and overlay 2768 // frames to avoid quality degradation on ARF frames. 2769 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1; 2770 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1; 2771 } 2772 } 2773 } 2774 2775 // Override some speed features based on qindex 2776 void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) { 2777 AV1_COMMON *const cm = &cpi->common; 2778 SPEED_FEATURES *const sf = &cpi->sf; 2779 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params; 2780 const int boosted = frame_is_boosted(cpi); 2781 const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480; 2782 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 2783 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 2784 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080; 2785 const int is_1440p_or_larger = AOMMIN(cm->width, cm->height) >= 1440; 2786 const int is_arf2_bwd_type = 2787 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE; 2788 2789 if (cpi->oxcf.mode == REALTIME) { 2790 if (speed >= 6) { 2791 const int qindex_thresh = boosted ? 190 : (is_720p_or_larger ? 120 : 150); 2792 sf->part_sf.adjust_var_based_rd_partitioning = 2793 frame_is_intra_only(cm) 2794 ? 0 2795 : cm->quant_params.base_qindex > qindex_thresh; 2796 } 2797 return; 2798 } 2799 2800 if (speed == 0) { 2801 // qindex_thresh for resolution < 720p 2802 const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140); 2803 if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) { 2804 sf->part_sf.simple_motion_search_split = 2805 cm->features.allow_screen_content_tools ? 1 : 2; 2806 sf->part_sf.simple_motion_search_early_term_none = 1; 2807 sf->tx_sf.model_based_prune_tx_search_level = 0; 2808 } 2809 2810 if (is_720p_or_larger && cm->quant_params.base_qindex <= 128) { 2811 sf->rd_sf.perform_coeff_opt = 2 + is_1080p_or_larger; 2812 memcpy(winner_mode_params->coeff_opt_thresholds, 2813 &coeff_opt_thresholds[sf->rd_sf.perform_coeff_opt], 2814 sizeof(winner_mode_params->coeff_opt_thresholds)); 2815 sf->part_sf.simple_motion_search_split = 2816 cm->features.allow_screen_content_tools ? 1 : 2; 2817 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1; 2818 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1; 2819 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1; 2820 sf->tx_sf.model_based_prune_tx_search_level = 0; 2821 2822 if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) { 2823 sf->inter_sf.selective_ref_frame = 2; 2824 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2; 2825 sf->rd_sf.tx_domain_dist_thres_level = 1; 2826 sf->part_sf.simple_motion_search_early_term_none = 1; 2827 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000; 2828 sf->interp_sf.cb_pred_filter_search = 0; 2829 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2; 2830 sf->tx_sf.tx_type_search.skip_tx_search = 1; 2831 } 2832 } 2833 } 2834 2835 if (speed >= 2) { 2836 // Disable extended partitions for lower quantizers 2837 const int aggr = AOMMIN(4, speed - 2); 2838 const int qindex_thresh1[4] = { 50, 50, 80, 100 }; 2839 const int qindex_thresh2[4] = { 80, 100, 120, 160 }; 2840 int qindex_thresh; 2841 if (aggr <= 1) { 2842 const int qthresh2 = 2843 (!aggr && !is_480p_or_larger) ? 70 : qindex_thresh2[aggr]; 2844 qindex_thresh = cm->features.allow_screen_content_tools 2845 ? qindex_thresh1[aggr] 2846 : qthresh2; 2847 if (cm->quant_params.base_qindex <= qindex_thresh && !boosted) 2848 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; 2849 } else if (aggr <= 2) { 2850 qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr]; 2851 if (cm->quant_params.base_qindex <= qindex_thresh && 2852 !frame_is_intra_only(cm)) 2853 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; 2854 } else if (aggr <= 3) { 2855 if (!is_480p_or_larger) { 2856 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; 2857 } else if (!is_720p_or_larger && !frame_is_intra_only(cm) && 2858 !cm->features.allow_screen_content_tools) { 2859 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; 2860 } else { 2861 qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr]; 2862 if (cm->quant_params.base_qindex <= qindex_thresh && 2863 !frame_is_intra_only(cm)) 2864 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; 2865 } 2866 } else { 2867 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; 2868 } 2869 } 2870 2871 if (speed >= 4) { 2872 // Disable rectangular partitions for lower quantizers 2873 const int aggr = AOMMIN(1, speed - 4); 2874 const int qindex_thresh[2] = { 65, 80 }; 2875 int disable_rect_part; 2876 disable_rect_part = !boosted; 2877 if (cm->quant_params.base_qindex <= qindex_thresh[aggr] && 2878 disable_rect_part && is_480p_or_larger) { 2879 sf->part_sf.rect_partition_eval_thresh = BLOCK_8X8; 2880 } 2881 } 2882 2883 if (speed <= 2) { 2884 if (!is_stat_generation_stage(cpi)) { 2885 // Use faster full-pel motion search for high quantizers. 2886 // Also use reduced total search range for low resolutions at high 2887 // quantizers. 2888 const int aggr = speed; 2889 const int qindex_thresh1 = ms_qindex_thresh[aggr][is_720p_or_larger][0]; 2890 const int qindex_thresh2 = ms_qindex_thresh[aggr][is_720p_or_larger][1]; 2891 const SEARCH_METHODS search_method = 2892 motion_search_method[is_720p_or_larger]; 2893 if (cm->quant_params.base_qindex > qindex_thresh1) { 2894 sf->mv_sf.search_method = search_method; 2895 sf->tpl_sf.search_method = search_method; 2896 } else if (cm->quant_params.base_qindex > qindex_thresh2) { 2897 sf->mv_sf.search_method = NSTEP_8PT; 2898 } 2899 } 2900 } 2901 2902 if (speed >= 4) { 2903 // Disable LR search at low and high quantizers and enable only for 2904 // mid-quantizer range. 2905 if (!boosted && !is_arf2_bwd_type) { 2906 const int qindex_low[2] = { 100, 60 }; 2907 const int qindex_high[2] = { 180, 160 }; 2908 if (cm->quant_params.base_qindex <= qindex_low[is_720p_or_larger] || 2909 cm->quant_params.base_qindex > qindex_high[is_720p_or_larger]) { 2910 sf->lpf_sf.disable_loop_restoration_luma = 1; 2911 } 2912 } 2913 } 2914 2915 if (speed == 1) { 2916 // Reuse interinter wedge mask search from first search for non-boosted 2917 // non-internal-arf frames, except at very high quantizers. 2918 if (cm->quant_params.base_qindex <= 200) { 2919 if (!boosted && !is_arf2_bwd_type) 2920 sf->inter_sf.reuse_mask_search_results = 1; 2921 } 2922 } 2923 2924 if (speed == 5) { 2925 if (!(frame_is_intra_only(&cpi->common) || 2926 cm->features.allow_screen_content_tools)) { 2927 const int qindex[2] = { 256, 128 }; 2928 // Set the sf value as 3 for low resolution and 2929 // for higher resolutions with low quantizers. 2930 if (cm->quant_params.base_qindex < qindex[is_480p_or_larger]) 2931 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3; 2932 } 2933 } 2934 2935 if (speed >= 5) { 2936 // Disable the sf for low quantizers in case of low resolution screen 2937 // contents. 2938 if (cm->features.allow_screen_content_tools && 2939 cm->quant_params.base_qindex < 128 && is_480p_or_lesser) { 2940 sf->part_sf.prune_sub_8x8_partition_level = 0; 2941 } 2942 } 2943 2944 // Loop restoration size search 2945 // At speed 0, always search all available sizes for the maximum possible gain 2946 sf->lpf_sf.min_lr_unit_size = RESTORATION_PROC_UNIT_SIZE; 2947 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX; 2948 2949 if (speed >= 1) { 2950 // For large frames, small restoration units are almost never useful, 2951 // so prune them away 2952 if (is_1440p_or_larger) { 2953 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX; 2954 } else if (is_720p_or_larger) { 2955 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1; 2956 } 2957 } 2958 2959 if (speed >= 3 || (cpi->oxcf.mode == ALLINTRA && speed >= 1)) { 2960 // At this speed, a full search is too expensive. Instead, pick a single 2961 // size based on size and qindex. Note that, in general, higher quantizers 2962 // (== lower quality) and larger frames generally want to use larger 2963 // restoration units. 2964 int qindex_thresh = 96; 2965 if (cm->quant_params.base_qindex <= qindex_thresh && !is_1440p_or_larger) { 2966 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1; 2967 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1; 2968 } else { 2969 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX; 2970 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX; 2971 } 2972 } 2973 2974 set_subpel_search_method(&cpi->mv_search_params, 2975 cpi->oxcf.unit_test_cfg.motion_vector_unit_test, 2976 sf->mv_sf.subpel_search_method); 2977 2978 if (cpi->oxcf.enable_low_complexity_decode && cpi->oxcf.mode == GOOD) 2979 set_good_speed_features_lc_dec_qindex_dependent(cpi, sf, speed); 2980 }