speed_features.h (83878B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_ENCODER_SPEED_FEATURES_H_ 13 #define AOM_AV1_ENCODER_SPEED_FEATURES_H_ 14 15 #include "av1/common/enums.h" 16 #include "av1/encoder/enc_enums.h" 17 #include "av1/encoder/mcomp.h" 18 #include "av1/encoder/encodemb.h" 19 20 #ifdef __cplusplus 21 extern "C" { 22 #endif 23 24 /*! @file */ 25 26 /*!\cond */ 27 #define MAX_MESH_STEP 4 28 29 typedef struct MESH_PATTERN { 30 int range; 31 int interval; 32 } MESH_PATTERN; 33 34 enum { 35 GM_FULL_SEARCH, 36 GM_REDUCED_REF_SEARCH_SKIP_L2_L3, 37 GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2, 38 39 // Same as GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2 but with extra filtering 40 // to keep at most two ref frames 41 GM_SEARCH_CLOSEST_REFS_ONLY, 42 43 GM_DISABLE_SEARCH 44 } UENUM1BYTE(GM_SEARCH_TYPE); 45 46 enum { 47 DIST_WTD_COMP_ENABLED, 48 DIST_WTD_COMP_SKIP_MV_SEARCH, 49 DIST_WTD_COMP_DISABLED, 50 } UENUM1BYTE(DIST_WTD_COMP_FLAG); 51 52 enum { 53 INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | 54 (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) | 55 (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) | 56 (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | (1 << PAETH_PRED), 57 UV_INTRA_ALL = 58 (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) | 59 (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | (1 << UV_D113_PRED) | 60 (1 << UV_D157_PRED) | (1 << UV_D203_PRED) | (1 << UV_D67_PRED) | 61 (1 << UV_SMOOTH_PRED) | (1 << UV_SMOOTH_V_PRED) | 62 (1 << UV_SMOOTH_H_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED), 63 UV_INTRA_DC = (1 << UV_DC_PRED), 64 UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED), 65 UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED), 66 UV_INTRA_DC_PAETH_CFL = 67 (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED), 68 UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED), 69 UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | 70 (1 << UV_H_PRED) | (1 << UV_CFL_PRED), 71 UV_INTRA_DC_PAETH_H_V = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | 72 (1 << UV_V_PRED) | (1 << UV_H_PRED), 73 UV_INTRA_DC_PAETH_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | 74 (1 << UV_V_PRED) | (1 << UV_H_PRED) | 75 (1 << UV_CFL_PRED), 76 INTRA_DC = (1 << DC_PRED), 77 INTRA_DC_TM = (1 << DC_PRED) | (1 << PAETH_PRED), 78 INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), 79 INTRA_DC_H_V_SMOOTH = 80 (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << SMOOTH_PRED), 81 INTRA_DC_PAETH_H_V = 82 (1 << DC_PRED) | (1 << PAETH_PRED) | (1 << V_PRED) | (1 << H_PRED) 83 }; 84 85 enum { 86 INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) | 87 (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | 88 (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | 89 (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << GLOBAL_GLOBALMV), 90 INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) | 91 (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) | 92 (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) | 93 (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | 94 (1 << NEAR_NEARMV), 95 INTER_SINGLE_ALL = 96 (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) | (1 << NEWMV), 97 }; 98 99 enum { 100 DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) | 101 (1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST), 102 103 DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT, 104 105 DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA), 106 107 LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) | 108 (1 << THR_ALTR) | (1 << THR_GOLD) 109 }; 110 111 enum { 112 TXFM_CODING_SF = 1, 113 INTER_PRED_SF = 2, 114 INTRA_PRED_SF = 4, 115 PARTITION_SF = 8, 116 LOOP_FILTER_SF = 16, 117 RD_SKIP_SF = 32, 118 RESERVE_2_SF = 64, 119 RESERVE_3_SF = 128, 120 } UENUM1BYTE(DEV_SPEED_FEATURES); 121 122 /* This enumeration defines when the rate control recode loop will be 123 * enabled. 124 */ 125 enum { 126 /* 127 * No recodes allowed 128 */ 129 DISALLOW_RECODE = 0, 130 /* 131 * Allow recode only for KF/ARF/GF frames 132 */ 133 ALLOW_RECODE_KFARFGF = 1, 134 /* 135 * Allow recode for all frame types based on bitrate constraints. 136 */ 137 ALLOW_RECODE = 2, 138 } UENUM1BYTE(RECODE_LOOP_TYPE); 139 140 enum { 141 SUBPEL_TREE = 0, 142 SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches 143 SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively 144 SUBPEL_SEARCH_METHODS 145 } UENUM1BYTE(SUBPEL_SEARCH_METHOD); 146 147 enum { 148 // Try the full image with different values. 149 LPF_PICK_FROM_FULL_IMAGE, 150 // Try the full image filter search with non-dual filter only. 151 LPF_PICK_FROM_FULL_IMAGE_NON_DUAL, 152 // Try a small portion of the image with different values. 153 LPF_PICK_FROM_SUBIMAGE, 154 // Estimate the level based on quantizer and frame type 155 LPF_PICK_FROM_Q, 156 // Pick 0 to disable LPF if LPF was enabled last frame 157 LPF_PICK_MINIMAL_LPF 158 } UENUM1BYTE(LPF_PICK_METHOD); 159 /*!\endcond */ 160 161 /*!\enum CDEF_PICK_METHOD 162 * \brief This enumeration defines a variety of CDEF pick methods 163 */ 164 typedef enum { 165 CDEF_FULL_SEARCH, /**< Full search */ 166 CDEF_FAST_SEARCH_LVL1, /**< Search among a subset of all possible filters. */ 167 CDEF_FAST_SEARCH_LVL2, /**< Search reduced subset of filters than Level 1. */ 168 CDEF_FAST_SEARCH_LVL3, /**< Search reduced subset of secondary filters than 169 Level 2. */ 170 CDEF_FAST_SEARCH_LVL4, /**< Search reduced subset of filters than Level 3. */ 171 CDEF_FAST_SEARCH_LVL5, /**< Search reduced subset of filters than Level 4. */ 172 CDEF_PICK_FROM_Q, /**< Estimate filter strength based on quantizer. */ 173 CDEF_PICK_METHODS 174 } CDEF_PICK_METHOD; 175 176 /*!\cond */ 177 enum { 178 // Terminate search early based on distortion so far compared to 179 // qp step, distortion in the neighborhood of the frame, etc. 180 FLAG_EARLY_TERMINATE = 1 << 0, 181 182 // Skips comp inter modes if the best so far is an intra mode. 183 FLAG_SKIP_COMP_BESTINTRA = 1 << 1, 184 185 // Skips oblique intra modes if the best so far is an inter mode. 186 FLAG_SKIP_INTRA_BESTINTER = 1 << 3, 187 188 // Skips oblique intra modes at angles 27, 63, 117, 153 if the best 189 // intra so far is not one of the neighboring directions. 190 FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, 191 192 // Skips intra modes other than DC_PRED if the source variance is small 193 FLAG_SKIP_INTRA_LOWVAR = 1 << 5, 194 } UENUM1BYTE(MODE_SEARCH_SKIP_LOGIC); 195 196 enum { 197 // No tx type pruning 198 TX_TYPE_PRUNE_0 = 0, 199 // adaptively prunes the least perspective tx types out of all 16 200 // (tuned to provide negligible quality loss) 201 TX_TYPE_PRUNE_1 = 1, 202 // similar, but applies much more aggressive pruning to get better speed-up 203 TX_TYPE_PRUNE_2 = 2, 204 TX_TYPE_PRUNE_3 = 3, 205 // More aggressive pruning based on tx type score and allowed tx count 206 TX_TYPE_PRUNE_4 = 4, 207 TX_TYPE_PRUNE_5 = 5, 208 } UENUM1BYTE(TX_TYPE_PRUNE_MODE); 209 210 enum { 211 // No reaction to rate control on a detected slide/scene change. 212 NO_DETECTION = 0, 213 214 // Set to larger Q based only on the detected slide/scene change and 215 // current/past Q. 216 FAST_DETECTION_MAXQ = 1, 217 } UENUM1BYTE(OVERSHOOT_DETECTION_CBR); 218 219 enum { 220 // Turns off multi-winner mode. So we will do txfm search on either all modes 221 // if winner mode is off, or we will only on txfm search on a single winner 222 // mode. 223 MULTI_WINNER_MODE_OFF = 0, 224 225 // Limits the number of winner modes to at most 2 226 MULTI_WINNER_MODE_FAST = 1, 227 228 // Uses the default number of winner modes, which is 3 for intra mode, and 1 229 // for inter mode. 230 MULTI_WINNER_MODE_DEFAULT = 2, 231 232 // Maximum number of winner modes allowed. 233 MULTI_WINNER_MODE_LEVELS, 234 } UENUM1BYTE(MULTI_WINNER_MODE_TYPE); 235 236 enum { 237 PRUNE_NEARMV_OFF = 0, // Turn off nearmv pruning 238 PRUNE_NEARMV_LEVEL1 = 1, // Prune nearmv for qindex (0-85) 239 PRUNE_NEARMV_LEVEL2 = 2, // Prune nearmv for qindex (0-170) 240 PRUNE_NEARMV_LEVEL3 = 3, // Prune nearmv more aggressively for qindex (0-170) 241 PRUNE_NEARMV_MAX = PRUNE_NEARMV_LEVEL3, 242 } UENUM1BYTE(PRUNE_NEARMV_LEVEL); 243 244 enum { 245 // Default transform search used in evaluation of best inter candidates 246 // (MODE_EVAL stage) and motion mode winner processing (WINNER_MODE_EVAL 247 // stage). 248 TX_SEARCH_DEFAULT = 0, 249 // Transform search in motion mode rd during MODE_EVAL stage. 250 TX_SEARCH_MOTION_MODE, 251 // Transform search in compound type mode rd during MODE_EVAL stage. 252 TX_SEARCH_COMP_TYPE_MODE, 253 // All transform search cases 254 TX_SEARCH_CASES 255 } UENUM1BYTE(TX_SEARCH_CASE); 256 257 typedef struct { 258 TX_TYPE_PRUNE_MODE prune_2d_txfm_mode; 259 int fast_intra_tx_type_search; 260 261 // INT_MAX: Disable fast search. 262 // 1 - 1024: Probability threshold used for conditionally forcing tx type, 263 // during mode search. 264 // 0: Force tx type to be DCT_DCT unconditionally, during 265 // mode search. 266 int fast_inter_tx_type_prob_thresh; 267 268 // Prune less likely chosen transforms for each intra mode. The speed 269 // feature ranges from 0 to 2, for different speed / compression trade offs. 270 int use_reduced_intra_txset; 271 272 // Use a skip flag prediction model to detect blocks with skip = 1 early 273 // and avoid doing full TX type search for such blocks. 274 int use_skip_flag_prediction; 275 276 // Threshold used by the ML based method to predict TX block split decisions. 277 int ml_tx_split_thresh; 278 279 // skip remaining transform type search when we found the rdcost of skip is 280 // better than applying transform 281 int skip_tx_search; 282 283 // Prune tx type search using previous frame stats. 284 int prune_tx_type_using_stats; 285 // Prune tx type search using estimated RDcost 286 int prune_tx_type_est_rd; 287 288 // Flag used to control the winner mode processing for tx type pruning for 289 // inter blocks. It enables further tx type mode pruning based on ML model for 290 // mode evaluation and disables tx type mode pruning for winner mode 291 // processing. 292 int winner_mode_tx_type_pruning; 293 } TX_TYPE_SEARCH; 294 295 enum { 296 // Search partitions using RD criterion 297 SEARCH_PARTITION, 298 299 // Always use a fixed size partition 300 FIXED_PARTITION, 301 302 // Partition using source variance 303 VAR_BASED_PARTITION, 304 305 #if CONFIG_RT_ML_PARTITIONING 306 // Partition using ML model 307 ML_BASED_PARTITION 308 #endif 309 } UENUM1BYTE(PARTITION_SEARCH_TYPE); 310 311 enum { 312 NOT_IN_USE, 313 DIRECT_PRED, 314 RELAXED_PRED, 315 ADAPT_PRED 316 } UENUM1BYTE(MAX_PART_PRED_MODE); 317 318 enum { 319 LAST_MV_DATA, 320 CURRENT_Q, 321 QTR_ONLY, 322 } UENUM1BYTE(MV_PREC_LOGIC); 323 324 enum { 325 SUPERRES_AUTO_ALL, // Tries all possible superres ratios 326 SUPERRES_AUTO_DUAL, // Tries no superres and q-based superres ratios 327 SUPERRES_AUTO_SOLO, // Only apply the q-based superres ratio 328 } UENUM1BYTE(SUPERRES_AUTO_SEARCH_TYPE); 329 /*!\endcond */ 330 331 /*!\enum INTERNAL_COST_UPDATE_TYPE 332 * \brief This enum decides internally how often to update the entropy costs 333 * 334 * INTERNAL_COST_UPD_TYPE is similar to \ref COST_UPDATE_TYPE but has slightly 335 * more flexibility in update frequency. This enum is separate from \ref 336 * COST_UPDATE_TYPE because although \ref COST_UPDATE_TYPE is not exposed, its 337 * values are public so it cannot be modified without breaking public API. 338 * Due to the use of AOMMIN() in populate_unified_cost_update_freq() to 339 * compute the unified cost update frequencies (out of COST_UPDATE_TYPE and 340 * INTERNAL_COST_UPDATE_TYPE), the values of this enum type must be listed in 341 * the order of increasing frequencies. 342 * 343 * \warning In case of any updates/modifications to the enum COST_UPDATE_TYPE, 344 * update the enum INTERNAL_COST_UPDATE_TYPE as well. 345 */ 346 typedef enum { 347 INTERNAL_COST_UPD_OFF, /*!< Turn off cost updates. */ 348 INTERNAL_COST_UPD_TILE, /*!< Update every tile. */ 349 INTERNAL_COST_UPD_SBROW_SET, /*!< Update every row_set of height 256 pixs. */ 350 INTERNAL_COST_UPD_SBROW, /*!< Update every sb rows inside a tile. */ 351 INTERNAL_COST_UPD_SB, /*!< Update every sb. */ 352 } INTERNAL_COST_UPDATE_TYPE; 353 354 /*!\enum SIMPLE_MOTION_SEARCH_PRUNE_LEVEL 355 * \brief This enumeration defines a variety of simple motion search based 356 * partition prune levels 357 */ 358 typedef enum { 359 NO_PRUNING = -1, 360 SIMPLE_AGG_LVL0, /*!< Simple prune aggressiveness level 0. speed = 0 */ 361 SIMPLE_AGG_LVL1, /*!< Simple prune aggressiveness level 1. speed = 1 */ 362 SIMPLE_AGG_LVL2, /*!< Simple prune aggressiveness level 2. speed = 2 */ 363 SIMPLE_AGG_LVL3, /*!< Simple prune aggressiveness level 3. speed >= 3 */ 364 SIMPLE_AGG_LVL4, /*!< Simple prune aggressiveness level 4. speed >= 4 */ 365 SIMPLE_AGG_LVL5, /*!< Simple prune aggressiveness level 5. speed >= 5 */ 366 QIDX_BASED_AGG_LVL1, /*!< Qindex based prune aggressiveness level, aggressive 367 level maps to simple agg level 1 or 2 based on qindex. 368 */ 369 TOTAL_SIMPLE_AGG_LVLS = QIDX_BASED_AGG_LVL1, /*!< Total number of simple prune 370 aggressiveness levels. */ 371 TOTAL_QINDEX_BASED_AGG_LVLS = 372 QIDX_BASED_AGG_LVL1 - 373 SIMPLE_AGG_LVL5, /*!< Total number of qindex based simple prune 374 aggressiveness levels. */ 375 TOTAL_AGG_LVLS = TOTAL_SIMPLE_AGG_LVLS + 376 TOTAL_QINDEX_BASED_AGG_LVLS, /*!< Total number of levels. */ 377 } SIMPLE_MOTION_SEARCH_PRUNE_LEVEL; 378 379 /*!\enum PRUNE_MESH_SEARCH_LEVEL 380 * \brief This enumeration defines a variety of mesh search prune levels. 381 */ 382 typedef enum { 383 PRUNE_MESH_SEARCH_DISABLED = 0, /*!< Prune mesh search level 0. */ 384 PRUNE_MESH_SEARCH_LVL_1 = 1, /*!< Prune mesh search level 1. */ 385 PRUNE_MESH_SEARCH_LVL_2 = 2, /*!< Prune mesh search level 2. */ 386 } PRUNE_MESH_SEARCH_LEVEL; 387 388 /*!\enum INTER_SEARCH_EARLY_TERM_IDX 389 * \brief This enumeration defines inter search early termination index in 390 * non-rd path based on sse value. 391 */ 392 typedef enum { 393 EARLY_TERM_DISABLED = 394 0, /*!< Early terminate inter mode search based on sse disabled. */ 395 EARLY_TERM_IDX_1 = 396 1, /*!< Early terminate inter mode search based on sse, index 1. */ 397 EARLY_TERM_IDX_2 = 398 2, /*!< Early terminate inter mode search based on sse, index 2. */ 399 EARLY_TERM_IDX_3 = 400 3, /*!< Early terminate inter mode search based on sse, index 3. */ 401 EARLY_TERM_IDX_4 = 402 4, /*!< Early terminate inter mode search based on sse, index 4. */ 403 EARLY_TERM_INDICES, /*!< Total number of early terminate indices */ 404 } INTER_SEARCH_EARLY_TERM_IDX; 405 406 /*! 407 * \brief Sequence/frame level speed vs quality features 408 */ 409 typedef struct HIGH_LEVEL_SPEED_FEATURES { 410 /*! Frame level coding parameter update. */ 411 int frame_parameter_update; 412 413 /*! 414 * Cases and frame types for which the recode loop is enabled. 415 */ 416 RECODE_LOOP_TYPE recode_loop; 417 418 /*! 419 * Controls the tolerance vs target rate used in deciding whether to 420 * recode a frame. It has no meaning if recode is disabled. 421 */ 422 int recode_tolerance; 423 424 /*! 425 * Determine how motion vector precision is chosen. The possibilities are: 426 * LAST_MV_DATA: use the mv data from the last coded frame 427 * CURRENT_Q: use the current q as a threshold 428 * QTR_ONLY: use quarter pel precision only. 429 */ 430 MV_PREC_LOGIC high_precision_mv_usage; 431 432 /*! 433 * Always set to 0. If on it enables 0 cost background transmission 434 * (except for the initial transmission of the segmentation). The feature is 435 * disabled because the addition of very large block sizes make the 436 * backgrounds very to cheap to encode, and the segmentation we have 437 * adds overhead. 438 */ 439 int static_segmentation; 440 441 /*! 442 * Superres-auto mode search type: 443 */ 444 SUPERRES_AUTO_SEARCH_TYPE superres_auto_search_type; 445 446 /*! 447 * Enable/disable extra screen content test by encoding key frame twice. 448 */ 449 int disable_extra_sc_testing; 450 451 /*! 452 * Enable/disable second_alt_ref temporal filtering. 453 */ 454 int second_alt_ref_filtering; 455 456 /*! 457 * The number of frames to be used during temporal filtering of an ARF frame 458 * is adjusted based on noise level of the current frame. The sf has three 459 * levels to decide number of frames to be considered for filtering: 460 * 0 : Use default number of frames 461 * 1 and 2 : Reduce the number of frames based on noise level with varied 462 * aggressiveness 463 */ 464 int adjust_num_frames_for_arf_filtering; 465 466 /*! 467 * Decide the bit estimation approach used in qindex decision. 468 * 0: estimate bits based on a constant value; 469 * 1: estimate bits more accurately based on the frame complexity. 470 */ 471 int accurate_bit_estimate; 472 473 /*! 474 * Decide the approach for weight calculation during temporal filtering. 475 * 0: Calculate weight using exp() 476 * 1: Calculate weight using a lookup table that approximates exp(). 477 */ 478 int weight_calc_level_in_tf; 479 480 /*! 481 * Decide whether to perform motion estimation at split block (i.e. 16x16) 482 * level or not. 483 * 0: Always allow motion estimation. 484 * 1: Conditionally allow motion estimation based on 4x4 sub-blocks variance. 485 */ 486 int allow_sub_blk_me_in_tf; 487 488 /*! 489 * Decide whether to disable temporal mv prediction. 490 * 0: Do not disable 491 * 1: Conditionally disable 492 * 2: Always disable 493 */ 494 int ref_frame_mvs_lvl; 495 496 /*! 497 * Decide whether to enable screen detection mode 2 fast detection. 498 * 0: Regular detection 499 * 1: Fast detection 500 */ 501 int screen_detection_mode2_fast_detection; 502 } HIGH_LEVEL_SPEED_FEATURES; 503 504 /*! 505 * Speed features for the first pass. 506 */ 507 typedef struct FIRST_PASS_SPEED_FEATURES { 508 /*! 509 * \brief Reduces the mv search window. 510 * By default, the initial search window is around 511 * MIN(MIN(dims), MAX_FULL_PEL_VAL) = MIN(MIN(dims), 1023). 512 * Each step reduction decrease the window size by about a factor of 2. 513 */ 514 int reduce_mv_step_param; 515 516 /*! 517 * \brief Skips the motion search when the zero mv has small sse. 518 */ 519 int skip_motion_search_threshold; 520 521 /*! 522 * \brief Skips reconstruction by using source buffers for prediction 523 */ 524 int disable_recon; 525 526 /*! 527 * \brief Skips the motion search centered on 0,0 mv. 528 */ 529 int skip_zeromv_motion_search; 530 } FIRST_PASS_SPEED_FEATURES; 531 532 /*!\cond */ 533 typedef struct TPL_SPEED_FEATURES { 534 // GOP length adaptive decision. 535 // If set to 0, tpl model decides whether a shorter gf interval is better. 536 // If set to 1, tpl stats of ARFs from base layer, (base+1) layer and 537 // (base+2) layer decide whether a shorter gf interval is better. 538 // If set to 2, tpl stats of ARFs from base layer, (base+1) layer and GF boost 539 // decide whether a shorter gf interval is better. 540 // If set to 3, gop length adaptive decision is disabled. 541 int gop_length_decision_method; 542 // Prune the intra modes search by tpl. 543 // If set to 0, we will search all intra modes from DC_PRED to PAETH_PRED. 544 // If set to 1, we only search DC_PRED, V_PRED, and H_PRED. 545 int prune_intra_modes; 546 // This parameter controls which step in the n-step process we start at. 547 int reduce_first_step_size; 548 // Skip motion estimation based on the precision of center MVs and the 549 // difference between center MVs. 550 // If set to 0, motion estimation is skipped for duplicate center MVs 551 // (default). If set to 1, motion estimation is skipped for duplicate 552 // full-pixel center MVs. If set to 2, motion estimation is skipped if the 553 // difference between center MVs is less than the threshold. 554 int skip_alike_starting_mv; 555 556 // When to stop subpel search. 557 SUBPEL_FORCE_STOP subpel_force_stop; 558 559 // Which search method to use. 560 SEARCH_METHODS search_method; 561 562 // Prune starting mvs in TPL based on sad scores. 563 int prune_starting_mv; 564 565 // Prune reference frames in TPL. 566 int prune_ref_frames_in_tpl; 567 568 // Support compound predictions. 569 int allow_compound_pred; 570 571 // Calculate rate and distortion based on Y plane only. 572 int use_y_only_rate_distortion; 573 574 // Use SAD instead of SATD during intra/inter mode search. 575 // If set to 0, use SATD always. 576 // If set to 1, use SAD during intra/inter mode search for frames in the 577 // higher temporal layers of the hierarchical prediction structure. 578 // If set to 2, use SAD during intra/inter mode search for all frames. 579 // This sf is disabled for the first GF group of the key-frame interval, 580 // i.e., SATD is used during intra/inter mode search of the first GF group. 581 int use_sad_for_mode_decision; 582 583 // Skip tpl processing for frames of type LF_UPDATE. 584 // This sf is disabled for the first GF group of the key-frame interval. 585 int reduce_num_frames; 586 } TPL_SPEED_FEATURES; 587 588 typedef struct GLOBAL_MOTION_SPEED_FEATURES { 589 GM_SEARCH_TYPE gm_search_type; 590 591 // During global motion estimation, prune remaining reference frames in a 592 // given direction(past/future), if the evaluated ref_frame in that direction 593 // yields gm_type as INVALID/TRANSLATION/IDENTITY 594 int prune_ref_frame_for_gm_search; 595 596 // When the current GM type is set to ZEROMV, prune ZEROMV if its performance 597 // is worse than NEWMV under SSE metric. 598 // 0 : no pruning 599 // 1 : conservative pruning 600 // 2 : aggressive pruning 601 int prune_zero_mv_with_sse; 602 603 // Disable global motion estimation based on stats of previous frames in the 604 // GF group 605 int disable_gm_search_based_on_stats; 606 607 // Downsampling pyramid level to use for global motion estimation 608 int downsample_level; 609 610 // Number of refinement steps to apply after initial model generation 611 int num_refinement_steps; 612 613 // Error advantage threshold level used to determine whether global motion 614 // compensation should be enabled 615 int gm_erroradv_tr_level; 616 } GLOBAL_MOTION_SPEED_FEATURES; 617 618 typedef struct PARTITION_SPEED_FEATURES { 619 PARTITION_SEARCH_TYPE partition_search_type; 620 621 // Used if partition_search_type = FIXED_PARTITION 622 BLOCK_SIZE fixed_partition_size; 623 624 // Prune extended partition types search based on the current best partition 625 // and the combined rdcost of the subblocks estimated from previous 626 // partitions. Can take values 0 - 2, 0 referring to no pruning, and 1 - 2 627 // increasing aggressiveness of pruning in order. 628 int prune_ext_partition_types_search_level; 629 630 // Prune part4 based on block size 631 int prune_part4_search; 632 633 // Use a ML model to prune rectangular, ab and 4-way horz 634 // and vert partitions 635 int ml_prune_partition; 636 637 // Use a ML model to adaptively terminate partition search after trying 638 // PARTITION_SPLIT. Can take values 0 - 2, 0 meaning not being enabled, and 639 // 1 - 2 increasing aggressiveness in order. 640 int ml_early_term_after_part_split_level; 641 642 // Skip rectangular partition test when partition type none gives better 643 // rd than partition type split. Can take values 0 - 2, 0 referring to no 644 // skipping, and 1 - 2 increasing aggressiveness of skipping in order. 645 int less_rectangular_check_level; 646 647 // Use square partition only beyond this block size. 648 BLOCK_SIZE use_square_partition_only_threshold; 649 650 // Sets max square partition levels for this superblock based on 651 // motion vector and prediction error distribution produced from 16x16 652 // simple motion search 653 MAX_PART_PRED_MODE auto_max_partition_based_on_simple_motion; 654 655 // Min and max square partition size we enable (block_size) as per auto 656 // min max, but also used by adjust partitioning, and pick_partitioning. 657 BLOCK_SIZE default_min_partition_size; 658 BLOCK_SIZE default_max_partition_size; 659 660 // Sets level of adjustment of variance-based partitioning during 661 // rd_use_partition 0 - no partition adjustment, 1 - try to merge partitions 662 // for small blocks and high QP, 2 - try to merge partitions, 3 - try to merge 663 // and split leaf partitions and 0 - 3 decreasing aggressiveness in order. 664 int adjust_var_based_rd_partitioning; 665 666 // Partition search early breakout thresholds. 667 int64_t partition_search_breakout_dist_thr; 668 int partition_search_breakout_rate_thr; 669 670 // Thresholds for ML based partition search breakout. 671 float ml_partition_search_breakout_thresh[PARTITION_BLOCK_SIZES]; 672 673 // ML based partition search breakout model index 674 int ml_partition_search_breakout_model_index; 675 676 // ML based partition search breakout model index 677 int ml_4_partition_search_level_index; 678 679 // Aggressiveness levels for pruning split and rectangular partitions based on 680 // simple_motion_search. SIMPLE_AGG_LVL0 to SIMPLE_AGG_LVL5 correspond to 681 // simple motion search based pruning. QIDX_BASED_AGG_LVL1 corresponds to 682 // qindex based and simple motion search based pruning. 683 int simple_motion_search_prune_agg; 684 685 // Perform simple_motion_search on each possible subblock and use it to prune 686 // PARTITION_HORZ and PARTITION_VERT. 687 int simple_motion_search_prune_rect; 688 689 // Perform simple motion search before none_partition to decide if we 690 // want to remove all partitions other than PARTITION_SPLIT. If set to 0, this 691 // model is disabled. If set to 1, the model attempts to perform 692 // PARTITION_SPLIT only. If set to 2, the model also attempts to prune 693 // PARTITION_SPLIT. 694 int simple_motion_search_split; 695 696 // Use features from simple_motion_search to terminate prediction block 697 // partition after PARTITION_NONE 698 int simple_motion_search_early_term_none; 699 700 // Controls whether to reduce the number of motion search steps. If this is 0, 701 // then simple_motion_search has the same number of steps as 702 // single_motion_search (assuming no other speed features). Otherwise, reduce 703 // the number of steps by the value contained in this variable. 704 int simple_motion_search_reduce_search_steps; 705 706 // This variable controls the maximum block size where intra blocks can be 707 // used in inter frames. 708 // TODO(aconverse): Fold this into one of the other many mode skips 709 BLOCK_SIZE max_intra_bsize; 710 711 // Use CNN with luma pixels on source frame on each of the 64x64 subblock to 712 // perform partition pruning in intra frames. 713 // 0: No Pruning 714 // 1: Prune split and rectangular partitions only 715 // 2: Prune none, split and rectangular partitions 716 int intra_cnn_based_part_prune_level; 717 718 // Disable extended partition search if the current bsize is greater than the 719 // threshold. Must be a square block size BLOCK_8X8 or higher. 720 BLOCK_SIZE ext_partition_eval_thresh; 721 722 // Use best partition decision so far to tune 'ext_partition_eval_thresh' 723 int ext_part_eval_based_on_cur_best; 724 725 // Disable rectangular partitions for larger block sizes. 726 int rect_partition_eval_thresh; 727 728 // Prune extended partition search based on whether the split/rect partitions 729 // provided an improvement in the previous search. 730 // 0 : no pruning 731 // 1 : prune 1:4 partition search using winner info from split partitions 732 // 2 : prune 1:4 and AB partition search using split and HORZ/VERT info 733 int prune_ext_part_using_split_info; 734 735 // Prunt rectangular, AB and 4-way partition based on q index and block size 736 // 0 : no pruning 737 // 1 : prune sub_8x8 at very low quantizers 738 // 2 : prune all block size based on qindex 739 int prune_rectangular_split_based_on_qidx; 740 741 // Prune rectangular partitions based on 4x4 sub-block variance 742 // false : no pruning 743 // true : prune rectangular partitions based on 4x4 sub-block variance 744 // deviation 745 // 746 // For allintra encode, this speed feature reduces instruction count by 6.4% 747 // for speed=6 with coding performance change less than 0.24%. For AVIF image 748 // encode, this speed feature reduces encode time by 8.14% for speed 6 on a 749 // typical image dataset with coding performance change less than 0.16%. This 750 // speed feature is not applicable to speed >= 7. 751 bool prune_rect_part_using_4x4_var_deviation; 752 753 // Prune rectangular partitions based on prediction mode chosen by NONE 754 // partition. 755 // false : no pruning 756 // true : prunes rectangular partition as described below 757 // If prediction mode chosen by NONE partition is 758 // DC_PRED or SMOOTH_PRED: Prunes both horizontal and vertical partitions if 759 // at least one of the left and top neighbor blocks is larger than the 760 // current block. 761 // Directional Mode: Prunes either of the horizontal and vertical partition 762 // based on center angle of the prediction mode chosen by NONE partition. For 763 // example, vertical partition is pruned if center angle of the prediction 764 // mode chosen by NONE partition is close to 180 degrees (i.e. horizontal 765 // direction) and vice versa. 766 // For allintra encode, this speed feature reduces instruction count by 5.1% 767 // for speed=6 with coding performance change less than 0.22%. For AVIF image 768 // encode, this speed feature reduces encode time by 4.44% for speed 6 on a 769 // typical image dataset with coding performance change less than 0.15%. 770 // For speed >= 7, variance-based logic is used to determine the partition 771 // structure instead of recursive partition search. Therefore, this speed 772 // feature is not applicable in such cases. 773 bool prune_rect_part_using_none_pred_mode; 774 775 // Terminate partition search for child partition, 776 // when NONE and SPLIT partition rd_costs are INT64_MAX. 777 int early_term_after_none_split; 778 779 // Level used to adjust threshold for av1_ml_predict_breakout(). At lower 780 // levels, more conservative threshold is used, and value of 0 indicates 781 // av1_ml_predict_breakout() is disabled. Value of 3 corresponds to default 782 // case with no adjustment to lbd thresholds. 783 int ml_predict_breakout_level; 784 785 // Prune sub_8x8 (BLOCK_4X4, BLOCK_4X8 and BLOCK_8X4) partitions. 786 // 0 : no pruning 787 // 1 : pruning based on neighbour block information 788 // 2 : prune always 789 int prune_sub_8x8_partition_level; 790 791 // Prune rectangular split based on simple motion search split/no_split score. 792 // 0: disable pruning, 1: enable pruning 793 int simple_motion_search_rect_split; 794 795 // The current encoder adopts a DFS search for block partitions. 796 // Therefore the mode selection and associated rdcost is ready for smaller 797 // blocks before the mode selection for some partition types. 798 // AB partition could use previous rd information and skip mode search. 799 // An example is: 800 // 801 // current block 802 // +---+---+ 803 // | | 804 // + + 805 // | | 806 // +-------+ 807 // 808 // SPLIT partition has been searched first before trying HORZ_A 809 // +---+---+ 810 // | R | R | 811 // +---+---+ 812 // | R | R | 813 // +---+---+ 814 // 815 // HORZ_A 816 // +---+---+ 817 // | | | 818 // +---+---+ 819 // | | 820 // +-------+ 821 // 822 // With this speed feature, the top two sub blocks can directly use rdcost 823 // searched in split partition, and the mode info is also copied from 824 // saved info. Similarly, the bottom rectangular block can also use 825 // the available information from previous rectangular search. 826 int reuse_prev_rd_results_for_part_ab; 827 828 // Reuse the best prediction modes found in PARTITION_SPLIT and PARTITION_RECT 829 // when encoding PARTITION_AB. 830 int reuse_best_prediction_for_part_ab; 831 832 // The current partition search records the best rdcost so far and uses it 833 // in mode search and transform search to early skip when some criteria is 834 // met. For example, when the current rdcost is larger than the best rdcost, 835 // or the model rdcost is larger than the best rdcost times some thresholds. 836 // By default, this feature is turned on to speed up the encoder partition 837 // search. 838 // If disabling it, at speed 0, 30 frames, we could get 839 // about -0.25% quality gain (psnr, ssim, vmaf), with about 13% slowdown. 840 int use_best_rd_for_pruning; 841 842 // Skip evaluation of non-square partitions based on the corresponding NONE 843 // partition. 844 // 0: no pruning 845 // 1: prune extended partitions if NONE is skippable 846 // 2: on top of 1, prune rectangular partitions if NONE is inter, not a newmv 847 // mode and skippable 848 int skip_non_sq_part_based_on_none; 849 850 // Disables 8x8 and below partitions for low quantizers. 851 int disable_8x8_part_based_on_qidx; 852 853 // Decoder side speed feature to add penalty for use of smaller partitions. 854 // Takes values 0 - 2, 0 indicating no penalty and higher level indicating 855 // increased penalty. 856 int split_partition_penalty_level; 857 } PARTITION_SPEED_FEATURES; 858 859 typedef struct MV_SPEED_FEATURES { 860 // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). 861 SEARCH_METHODS search_method; 862 863 // Enable the use of faster, less accurate mv search method 864 // 0: disable, 1: if bsize >= BLOCK_32X32, 2: based on bsize, SAD and qp 865 // TODO(chiyotsai@google.com): Take the clip's resolution and mv activity into 866 // account. 867 int use_bsize_dependent_search_method; 868 869 // If this is set to 1, we limit the motion search range to 2 times the 870 // largest motion vector found in the last frame. 871 int auto_mv_step_size; 872 873 // Subpel_search_method can only be subpel_tree which does a subpixel 874 // logarithmic search that keeps stepping at 1/2 pixel units until 875 // you stop getting a gain, and then goes on to 1/4 and repeats 876 // the same process. Along the way it skips many diagonals. 877 SUBPEL_SEARCH_METHOD subpel_search_method; 878 879 // Maximum number of steps in logarithmic subpel search before giving up. 880 int subpel_iters_per_step; 881 882 // When to stop subpel search. 883 SUBPEL_FORCE_STOP subpel_force_stop; 884 885 // When to stop subpel search in simple motion search. 886 SUBPEL_FORCE_STOP simple_motion_subpel_force_stop; 887 888 // If true, sub-pixel search uses the exact convolve function used for final 889 // encoding and decoding; otherwise, it uses bilinear interpolation. 890 SUBPEL_SEARCH_TYPE use_accurate_subpel_search; 891 892 // Threshold for allowing exhaustive motion search. 893 int exhaustive_searches_thresh; 894 895 // Pattern to be used for any exhaustive mesh searches (except intraBC ME). 896 MESH_PATTERN mesh_patterns[MAX_MESH_STEP]; 897 898 // Pattern to be used for exhaustive mesh searches of intraBC ME. 899 MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_STEP]; 900 901 // Reduce single motion search range based on MV result of prior ref_mv_idx. 902 int reduce_search_range; 903 904 // Prune mesh search. 905 PRUNE_MESH_SEARCH_LEVEL prune_mesh_search; 906 907 // Use the rd cost around the best FULLPEL_MV to speed up subpel search 908 int use_fullpel_costlist; 909 910 // Set the full pixel search level of obmc 911 // 0: obmc_full_pixel_diamond 912 // 1: obmc_refining_search_sad (faster) 913 int obmc_full_pixel_search_level; 914 915 // Accurate full pixel motion search based on TPL stats. 916 int full_pixel_search_level; 917 918 // Allow intrabc motion search 919 int use_intrabc; 920 921 // Prune intrabc candidate block hash search 922 // 0: check every block hash candidate 923 // 1: check the first 64 block hash candidates only 924 int prune_intrabc_candidate_block_hash_search; 925 926 // Intrabc search level 927 // 0: top + left search, all block sizes, always hash plus pixel search 928 // 1: top search only, 4x4, 8x8 and 16x16 block sizes only, perform pixel 929 // search if and only if hash search failed to find a candidate 930 int intrabc_search_level; 931 932 // Whether the maximum intrabc block size to hash is 8x8 933 // 0: Hash from 4x4 up to superblock size 934 // 1: Hash 4x4 and 8x8 only 935 int hash_max_8x8_intrabc_blocks; 936 937 // Whether to downsample the rows in sad calculation during motion search. 938 // This is only active when there are at least 16 rows. When this sf is 939 // active, if there is a large discrepancy in the SAD values for the final 940 // motion vector between skipping vs not skipping, motion search is redone 941 // with skip row features off. 942 // 0: Disabled (do not downsample rows) 943 // 1: Skip SAD calculation of odd rows if the SAD deviation of the even and 944 // odd rows for the starting MV is small. Redo motion search with sf off 945 // when SAD deviation is high for the final motion vector. 946 // 2: Skip SAD calculation of odd rows. SAD deviation is not tested for the 947 // start MV and tested only for the final MV. 948 int use_downsampled_sad; 949 950 // Enable/disable extensive joint motion search. 951 int disable_extensive_joint_motion_search; 952 953 // Enable second best mv check in joint mv search. 954 // 0: allow second MV (use rd cost as the metric) 955 // 1: use var as the metric 956 // 2: disable second MV 957 int disable_second_mv; 958 959 // Skips full pixel search based on start mv of prior ref_mv_idx. 960 // 0: Disabled 961 // 1: Skips the full pixel search upto 4 neighbor full-pel MV positions. 962 // 2: Skips the full pixel search upto 8 neighbor full-pel MV positions. 963 int skip_fullpel_search_using_startmv; 964 965 // Method to use for refining WARPED_CAUSAL motion vectors 966 // TODO(rachelbarker): Can this be unified with OBMC in some way? 967 WARP_SEARCH_METHOD warp_search_method; 968 969 // Maximum number of iterations in WARPED_CAUSAL refinement search 970 int warp_search_iters; 971 } MV_SPEED_FEATURES; 972 973 typedef struct INTER_MODE_SPEED_FEATURES { 974 // 2-pass inter mode model estimation where the preliminary pass skips 975 // transform search and uses a model to estimate rd, while the final pass 976 // computes the full transform search. Two types of models are supported: 977 // 0: not used 978 // 1: used with online dynamic rd model 979 // 2: used with static rd model 980 int inter_mode_rd_model_estimation; 981 982 // Bypass transform search based on skip rd at following stages 983 // i. Compound type mode search 984 // ii. Motion mode search (mode evaluation and winner motion mode stage) 985 // iii. Transform search for best inter candidates 986 int txfm_rd_gate_level[TX_SEARCH_CASES]; 987 988 // Limit the inter mode tested in the RD loop 989 int reduce_inter_modes; 990 991 // This variable is used to cap the maximum number of times we skip testing a 992 // mode to be evaluated. A high value means we will be faster. 993 int adaptive_rd_thresh; 994 995 // Aggressively prune inter modes when best mode is skippable. 996 int prune_inter_modes_if_skippable; 997 998 // Drop less likely to be picked reference frames in the RD search. 999 // Has seven levels for now: 0, 1, 2, 3, 4, 5 and 6 where higher levels prune 1000 // more aggressively than lower ones. (0 means no pruning). 1001 int selective_ref_frame; 1002 1003 // Prune reference frames for rectangular partitions. 1004 // 0 implies no pruning 1005 // 1 implies prune for extended partition 1006 // 2 implies prune horiz, vert and extended partition 1007 int prune_ref_frame_for_rect_partitions; 1008 1009 // Prune inter modes w.r.t past reference frames 1010 // 0 no pruning 1011 // 1 prune inter modes w.r.t ALTREF2 and ALTREF reference frames 1012 // 2 prune inter modes w.r.t BWDREF, ALTREF2 and ALTREF reference frames 1013 int alt_ref_search_fp; 1014 1015 // Prune reference frames for single prediction modes based on temporal 1016 // distance and pred MV SAD. Feasible values are 0, 1, 2. The feature is 1017 // disabled for 0. An increasing value indicates more aggressive pruning 1018 // threshold. 1019 int prune_single_ref; 1020 1021 // Prune compound reference frames 1022 // 0 no pruning 1023 // 1 prune compound references which do not satisfy the two conditions: 1024 // a) The references are at a nearest distance from the current frame in 1025 // both past and future direction. 1026 // b) The references have minimum pred_mv_sad in both past and future 1027 // direction. 1028 // 2 prune compound references except the one with nearest distance from the 1029 // current frame in both past and future direction. 1030 int prune_comp_ref_frames; 1031 1032 // Skip the current ref_mv in NEW_MV mode based on mv, rate cost, etc. 1033 // This speed feature equaling 0 means no skipping. 1034 // If the speed feature equals 1 or 2, skip the current ref_mv in NEW_MV mode 1035 // if we have already encountered ref_mv in the drl such that: 1036 // 1. The other drl has the same mv during the SIMPLE_TRANSLATION search 1037 // process as the current mv. 1038 // 2. The rate needed to encode the current mv is larger than that for the 1039 // other ref_mv. 1040 // The speed feature equaling 1 means using subpel mv in the comparison. 1041 // The speed feature equaling 2 means using fullpel mv in the comparison. 1042 // If the speed feature >= 3, skip the current ref_mv in NEW_MV mode based on 1043 // known full_mv bestsme and drl cost. 1044 int skip_newmv_in_drl; 1045 1046 // This speed feature checks duplicate ref MVs among NEARESTMV, NEARMV, 1047 // GLOBALMV and skips NEARMV or GLOBALMV (in order) if a duplicate is found 1048 // TODO(any): Instead of skipping repeated ref mv, use the recalculated 1049 // rd-cost based on mode rate and skip the mode evaluation 1050 int skip_repeated_ref_mv; 1051 1052 // Flag used to control the ref_best_rd based gating for chroma 1053 int perform_best_rd_based_gating_for_chroma; 1054 1055 // Reuse the inter_intra_mode search result from NEARESTMV mode to other 1056 // single ref modes 1057 int reuse_inter_intra_mode; 1058 1059 // prune wedge and compound segment approximate rd evaluation based on 1060 // compound average modeled rd 1061 int prune_comp_type_by_model_rd; 1062 1063 // prune wedge and compound segment approximate rd evaluation based on 1064 // compound average rd/ref_best_rd 1065 int prune_comp_type_by_comp_avg; 1066 1067 // Skip some ref frames in compound motion search by single motion search 1068 // result. Has three levels for now: 0 referring to no skipping, and 1 - 3 1069 // increasing aggressiveness of skipping in order. 1070 // Note: The search order might affect the result. It assumes that the single 1071 // reference modes are searched before compound modes. It is better to search 1072 // same single inter mode as a group. 1073 int prune_comp_search_by_single_result; 1074 1075 // Instead of performing a full MV search, do a simple translation first 1076 // and only perform a full MV search on the motion vectors that performed 1077 // well. 1078 int prune_mode_search_simple_translation; 1079 1080 // Only search compound modes with at least one "good" reference frame. 1081 // A reference frame is good if, after looking at its performance among 1082 // the single reference modes, it is one of the two best performers. 1083 int prune_compound_using_single_ref; 1084 1085 // Skip extended compound mode (NEAREST_NEWMV, NEW_NEARESTMV, NEAR_NEWMV, 1086 // NEW_NEARMV) using ref frames of above and left neighbor 1087 // blocks. 1088 // 0 : no pruning 1089 // 1 : prune ext compound modes using neighbor blocks (less aggressiveness) 1090 // 2 : prune ext compound modes using neighbor blocks (high aggressiveness) 1091 // 3 : prune ext compound modes unconditionally (highest aggressiveness) 1092 int prune_ext_comp_using_neighbors; 1093 1094 // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes 1095 int skip_ext_comp_nearmv_mode; 1096 1097 // Skip extended compound mode when ref frame corresponding to NEWMV does not 1098 // have NEWMV as single mode winner. 1099 // 0 : no pruning 1100 // 1 : prune extended compound mode (less aggressiveness) 1101 // 2 : prune extended compound mode (high aggressiveness) 1102 int prune_comp_using_best_single_mode_ref; 1103 1104 // Skip NEARESTMV and NEARMV using weight computed in ref mv list population 1105 // 1106 // Pruning is enabled only when both the top and left neighbor blocks are 1107 // available and when the current block already has a valid inter prediction. 1108 int prune_nearest_near_mv_using_refmv_weight; 1109 1110 // Based on previous ref_mv_idx search result, prune the following search. 1111 int prune_ref_mv_idx_search; 1112 1113 // Disable one sided compound modes. 1114 int disable_onesided_comp; 1115 1116 // Prune obmc search using previous frame stats. 1117 // INT_MAX : disable obmc search 1118 int prune_obmc_prob_thresh; 1119 1120 // Prune warped motion search using previous frame stats. 1121 int prune_warped_prob_thresh; 1122 1123 // Variance threshold to enable/disable Interintra wedge search 1124 unsigned int disable_interintra_wedge_var_thresh; 1125 1126 // Variance threshold to enable/disable Interinter wedge search 1127 unsigned int disable_interinter_wedge_var_thresh; 1128 1129 // De-couple wedge and mode search during interintra RDO. 1130 int fast_interintra_wedge_search; 1131 1132 // Whether fast wedge sign estimate is used 1133 int fast_wedge_sign_estimate; 1134 1135 // Enable/disable ME for interinter wedge search. 1136 int disable_interinter_wedge_newmv_search; 1137 1138 // Decide when and how to use joint_comp. 1139 DIST_WTD_COMP_FLAG use_dist_wtd_comp_flag; 1140 1141 // Clip the frequency of updating the mv cost. 1142 INTERNAL_COST_UPDATE_TYPE mv_cost_upd_level; 1143 1144 // Clip the frequency of updating the coeff cost. 1145 INTERNAL_COST_UPDATE_TYPE coeff_cost_upd_level; 1146 1147 // Clip the frequency of updating the mode cost. 1148 INTERNAL_COST_UPDATE_TYPE mode_cost_upd_level; 1149 1150 // Prune inter modes based on tpl stats 1151 // 0 : no pruning 1152 // 1 - 3 indicate increasing aggressiveness in order. 1153 int prune_inter_modes_based_on_tpl; 1154 1155 // Skip NEARMV and NEAR_NEARMV modes using ref frames of above and left 1156 // neighbor blocks and qindex. 1157 PRUNE_NEARMV_LEVEL prune_nearmv_using_neighbors; 1158 1159 // Model based breakout after interpolation filter search 1160 // 0: no breakout 1161 // 1: use model based rd breakout 1162 int model_based_post_interp_filter_breakout; 1163 1164 // Reuse compound type rd decision when exact match is found 1165 // 0: No reuse 1166 // 1: Reuse the compound type decision 1167 int reuse_compound_type_decision; 1168 1169 // Enable/disable masked compound. 1170 int disable_masked_comp; 1171 1172 // Enable/disable MV refinement for compound modes corresponds to compound 1173 // types COMPOUND_AVERAGE, COMPOUND_DISTWTD (currently, this compound type 1174 // is disabled for speeds >= 2 using the sf 'use_dist_wtd_comp_flag') and 1175 // COMPOUND_DIFFWTD based on the availability. Levels 0 to 3 indicate 1176 // increasing order of aggressiveness to disable MV refinement. 1177 // 0: MV Refinement is enabled and for NEW_NEWMV mode used two iterations of 1178 // refinement in av1_joint_motion_search(). 1179 // 1: MV Refinement is disabled for COMPOUND_DIFFWTD and enabled for 1180 // COMPOUND_AVERAGE & COMPOUND_DISTWTD. 1181 // 2: MV Refinement is enabled for COMPOUND_AVERAGE & COMPOUND_DISTWTD for 1182 // NEW_NEWMV mode with one iteration of refinement in 1183 // av1_joint_motion_search() and MV Refinement is disabled for other compound 1184 // type modes. 1185 // 3: MV Refinement is disabled. 1186 int enable_fast_compound_mode_search; 1187 1188 // Reuse masked compound type search results 1189 int reuse_mask_search_results; 1190 1191 // Enable/disable fast search for wedge masks 1192 int enable_fast_wedge_mask_search; 1193 1194 // Early breakout from transform search of inter modes 1195 int inter_mode_txfm_breakout; 1196 1197 // Limit number of inter modes for txfm search if a newmv mode gets 1198 // evaluated among the top modes. 1199 // 0: no pruning 1200 // 1 to 3 indicate increasing order of aggressiveness 1201 int limit_inter_mode_cands; 1202 1203 // Cap the no. of txfm searches for a given prediction mode. 1204 // 0: no cap, 1: cap beyond first 4 searches, 2: cap beyond first 3 searches. 1205 int limit_txfm_eval_per_mode; 1206 1207 // Prune warped motion search based on block size. 1208 int extra_prune_warped; 1209 1210 // Do not search compound modes for ARF. 1211 // The intuition is that ARF is predicted by frames far away from it, 1212 // whose temporal correlations with the ARF are likely low. 1213 // It is therefore likely that compound modes do not work as well for ARF 1214 // as other inter frames. 1215 // Speed/quality impact: 1216 // Speed 1: 12% faster, 0.1% psnr loss. 1217 // Speed 2: 2% faster, 0.05% psnr loss. 1218 // No change for speed 3 and up, because |disable_onesided_comp| is true. 1219 int skip_arf_compound; 1220 1221 // Percentage of scaling used to increase the rd cost of warp mode so that 1222 // encoder decisions are biased against local warp, favoring low complexity 1223 // modes. 1224 int bias_warp_mode_rd_scale_pct; 1225 } INTER_MODE_SPEED_FEATURES; 1226 1227 typedef struct INTERP_FILTER_SPEED_FEATURES { 1228 // Do limited interpolation filter search for dual filters, since best choice 1229 // usually includes EIGHTTAP_REGULAR. 1230 int use_fast_interpolation_filter_search; 1231 1232 // Disable dual filter 1233 int disable_dual_filter; 1234 1235 // Save results of av1_interpolation_filter_search for a block 1236 // Check mv and ref_frames before search, if they are very close with previous 1237 // saved results, filter search can be skipped. 1238 int use_interp_filter; 1239 1240 // skip sharp_filter evaluation based on regular and smooth filter rd for 1241 // dual_filter=0 case 1242 int skip_sharp_interp_filter_search; 1243 1244 // skip interpolation filter search for a block in chessboard pattern 1245 int cb_pred_filter_search; 1246 1247 // adaptive interp_filter search to allow skip of certain filter types. 1248 int adaptive_interp_filter_search; 1249 1250 // Forces interpolation filter to EIGHTTAP_REGULAR and skips interpolation 1251 // filter search. 1252 int skip_interp_filter_search; 1253 } INTERP_FILTER_SPEED_FEATURES; 1254 1255 typedef struct INTRA_MODE_SPEED_FEATURES { 1256 // These bit masks allow you to enable or disable intra modes for each 1257 // transform size separately. 1258 int intra_y_mode_mask[TX_SIZES]; 1259 int intra_uv_mode_mask[TX_SIZES]; 1260 1261 // flag to allow skipping intra mode for inter frame prediction 1262 int skip_intra_in_interframe; 1263 1264 // Prune intra mode candidates based on source block histogram of gradient. 1265 // Applies to luma plane only. 1266 // Feasible values are 0..4. The feature is disabled for 0. An increasing 1267 // value indicates more aggressive pruning threshold. 1268 int intra_pruning_with_hog; 1269 1270 // Prune intra mode candidates based on source block histogram of gradient. 1271 // Applies to chroma plane only. 1272 // Feasible values are 0..4. The feature is disabled for 0. An increasing 1273 // value indicates more aggressive pruning threshold. 1274 int chroma_intra_pruning_with_hog; 1275 1276 // Enable/disable smooth intra modes. 1277 int disable_smooth_intra; 1278 1279 // Prune UV_SMOOTH_PRED mode for chroma based on chroma source variance. 1280 // false : No pruning 1281 // true : Prune UV_SMOOTH_PRED mode based on chroma source variance 1282 // 1283 // For allintra encode, this speed feature reduces instruction count 1284 // by 1.90%, 2.21% and 1.97% for speed 6, 7 and 8 with coding performance 1285 // change less than 0.04%. For AVIF image encode, this speed feature reduces 1286 // encode time by 1.56%, 2.14% and 0.90% for speed 6, 7 and 8 on a typical 1287 // image dataset with coding performance change less than 0.05%. 1288 bool prune_smooth_intra_mode_for_chroma; 1289 1290 // Prune filter intra modes in intra frames. 1291 // 0 : No pruning 1292 // 1 : Evaluate applicable filter intra modes based on best intra mode so far 1293 // 2 : Do not evaluate filter intra modes 1294 int prune_filter_intra_level; 1295 1296 // prune palette search 1297 // 0: No pruning 1298 // 1: Perform coarse search to prune the palette colors. For winner colors, 1299 // neighbors are also evaluated using a finer search. 1300 // 2: Perform 2 way palette search from max colors to min colors (and min 1301 // colors to remaining colors) and terminate the search if current number of 1302 // palette colors is not the winner. 1303 int prune_palette_search_level; 1304 1305 // Terminate early in luma palette_size search. Speed feature values indicate 1306 // increasing level of pruning. 1307 // 0: No early termination 1308 // 1: Terminate early for higher luma palette_size, if header rd cost of lower 1309 // palette_size is more than 2 * best_rd. This level of pruning is more 1310 // conservative when compared to sf level 2 as the cases which will get pruned 1311 // with sf level 1 is a subset of the cases which will get pruned with sf 1312 // level 2. 1313 // 2: Terminate early for higher luma palette_size, if header rd cost of lower 1314 // palette_size is more than best_rd. 1315 // For allintra encode, this sf reduces instruction count by 2.49%, 1.07%, 1316 // 2.76%, 2.30%, 1.84%, 2.69%, 2.04%, 2.05% and 1.44% for speed 0, 1, 2, 3, 4, 1317 // 5, 6, 7 and 8 on screen content set with coding performance change less 1318 // than 0.01% for speed <= 2 and less than 0.03% for speed >= 3. For AVIF 1319 // image encode, this sf reduces instruction count by 1.94%, 1.13%, 1.29%, 1320 // 0.93%, 0.89%, 1.03%, 1.07%, 1.20% and 0.18% for speed 0, 1, 2, 3, 4, 5, 6, 1321 // 7 and 8 on a typical image dataset with coding performance change less than 1322 // 0.01%. 1323 int prune_luma_palette_size_search_level; 1324 1325 // Prune chroma intra modes based on luma intra mode winner. 1326 // 0: No pruning 1327 // 1: Prune chroma intra modes other than UV_DC_PRED, UV_SMOOTH_PRED, 1328 // UV_CFL_PRED and the mode that corresponds to luma intra mode winner. 1329 int prune_chroma_modes_using_luma_winner; 1330 1331 // Clip the frequency of updating the mv cost for intrabc. 1332 INTERNAL_COST_UPDATE_TYPE dv_cost_upd_level; 1333 1334 // We use DCT_DCT transform followed by computing SATD (Sum of Absolute 1335 // Transformed Differences) as an estimation of RD score to quickly find the 1336 // best possible Chroma from Luma (CFL) parameter. Then we do a full RD search 1337 // near the best possible parameter. The search range is set here. 1338 // The range of cfl_searh_range should be [1, 33], and the following are the 1339 // recommended values. 1340 // 1: Fastest mode. 1341 // 3: Default mode that provides good speedup without losing compression 1342 // performance at speed 0. 1343 // 33: Exhaustive rd search (33 == CFL_MAGS_SIZE). This mode should only 1344 // be used for debugging purpose. 1345 int cfl_search_range; 1346 1347 // TOP_INTRA_MODEL_COUNT is 4 that is the number of top model rd to store in 1348 // intra mode decision. Here, add a speed feature to reduce this number for 1349 // higher speeds. 1350 int top_intra_model_count_allowed; 1351 1352 // Adapt top_intra_model_count_allowed locally to prune luma intra modes using 1353 // neighbor block and quantizer information. 1354 int adapt_top_model_rd_count_using_neighbors; 1355 1356 // Prune the evaluation of odd delta angles of directional luma intra modes by 1357 // using the rdcosts of neighbouring delta angles. 1358 // For allintra encode, this speed feature reduces instruction count 1359 // by 4.461%, 3.699% and 3.536% for speed 6, 7 and 8 on a typical video 1360 // dataset with coding performance change less than 0.26%. For AVIF image 1361 // encode, this speed feature reduces encode time by 2.849%, 2.471%, 1362 // and 2.051% for speed 6, 7 and 8 on a typical image dataset with coding 1363 // performance change less than 0.27%. 1364 int prune_luma_odd_delta_angles_in_intra; 1365 1366 // Terminate early in chroma palette_size search. 1367 // 0: No early termination 1368 // 1: Terminate early for higher palette_size, if header rd cost of lower 1369 // palette_size is more than best_rd. 1370 // For allintra encode, this sf reduces instruction count by 0.45%, 1371 // 0.62%, 1.73%, 2.50%, 2.89%, 3.09% and 3.86% for speed 0 to 6 on screen 1372 // content set with coding performance change less than 0.01%. 1373 // For AVIF image encode, this sf reduces instruction count by 0.45%, 0.81%, 1374 // 0.85%, 1.05%, 1.45%, 1.66% and 1.95% for speed 0 to 6 on a typical image 1375 // dataset with no quality drop. 1376 int early_term_chroma_palette_size_search; 1377 1378 // Skips the evaluation of filter intra modes in inter frames if rd evaluation 1379 // of luma intra dc mode results in invalid rd stats. 1380 int skip_filter_intra_in_inter_frames; 1381 } INTRA_MODE_SPEED_FEATURES; 1382 1383 typedef struct TX_SPEED_FEATURES { 1384 // Init search depth for square and rectangular transform partitions. 1385 // Values: 1386 // 0 - search full tree, 1: search 1 level, 2: search the highest level only 1387 int inter_tx_size_search_init_depth_sqr; 1388 int inter_tx_size_search_init_depth_rect; 1389 int intra_tx_size_search_init_depth_sqr; 1390 int intra_tx_size_search_init_depth_rect; 1391 1392 // If any dimension of a coding block size above 64, always search the 1393 // largest transform only, since the largest transform block size is 64x64. 1394 int tx_size_search_lgr_block; 1395 1396 TX_TYPE_SEARCH tx_type_search; 1397 1398 // Skip split transform block partition when the collocated bigger block 1399 // is selected as all zero coefficients. 1400 int txb_split_cap; 1401 1402 // Shortcut the transform block partition and type search when the target 1403 // rdcost is relatively lower. 1404 // Values are 0 (not used) , or 1 - 2 with progressively increasing 1405 // aggressiveness 1406 int adaptive_txb_search_level; 1407 1408 // Prune level for tx_size_type search for inter based on rd model 1409 // 0: no pruning 1410 // 1-2: progressively increasing aggressiveness of pruning 1411 int model_based_prune_tx_search_level; 1412 1413 // Refine TX type after fast TX search. 1414 int refine_fast_tx_search_results; 1415 1416 // Prune transform split/no_split eval based on residual properties. A value 1417 // of 0 indicates no pruning, and the aggressiveness of pruning progressively 1418 // increases from levels 1 to 3. 1419 int prune_tx_size_level; 1420 1421 // Prune the evaluation of transform depths as decided by the NN model. 1422 // false: No pruning. 1423 // true : Avoid the evaluation of specific transform depths using NN model. 1424 // 1425 // For allintra encode, this speed feature reduces instruction count 1426 // by 4.76%, 8.92% and 11.28% for speed 6, 7 and 8 with coding performance 1427 // change less than 0.32%. For AVIF image encode, this speed feature reduces 1428 // encode time by 4.65%, 9.16% and 10.45% for speed 6, 7 and 8 on a typical 1429 // image dataset with coding performance change less than 0.19%. 1430 bool prune_intra_tx_depths_using_nn; 1431 1432 // Enable/disable early breakout during transform search of intra modes, by 1433 // using the minimum rd cost possible. By using this approach, the rd 1434 // evaluation of applicable transform blocks (in the current block) can be 1435 // avoided as 1436 // 1) best_rd evolves during the search in choose_tx_size_type_from_rd() 1437 // 2) appropriate ref_best_rd is passed in intra_block_yrd() 1438 // 1439 // For allintra encode, this speed feature reduces instruction count 1440 // by 1.11%, 1.08%, 1.02% and 0.93% for speed 3, 6, 7 and 8 with coding 1441 // performance change less than 0.02%. For AVIF image encode, this speed 1442 // feature reduces encode time by 0.93%, 1.46%, 1.07%, 0.84%, 0.99% and 0.73% 1443 // for speed 3, 4, 5, 6, 7 and 8 on a typical image dataset with coding 1444 // performance change less than 0.004%. 1445 bool use_rd_based_breakout_for_intra_tx_search; 1446 } TX_SPEED_FEATURES; 1447 1448 typedef struct RD_CALC_SPEED_FEATURES { 1449 // Fast approximation of av1_model_rd_from_var_lapndz 1450 int simple_model_rd_from_var; 1451 1452 // Perform faster distortion computation during the R-D evaluation by trying 1453 // to approximate the prediction error with transform coefficients (faster but 1454 // less accurate) rather than computing distortion in the pixel domain (slower 1455 // but more accurate). The following methods are used for distortion 1456 // computation: 1457 // Method 0: Always compute distortion in the pixel domain 1458 // Method 1: Based on block error, try using transform domain distortion for 1459 // tx_type search and compute distortion in pixel domain for final RD_STATS 1460 // Method 2: Based on block error, try to compute distortion in transform 1461 // domain 1462 // Methods 1 and 2 may fallback to computing distortion in the pixel domain in 1463 // case the block error is less than the threshold, which is controlled by the 1464 // speed feature tx_domain_dist_thres_level. 1465 // 1466 // The speed feature tx_domain_dist_level decides which of the above methods 1467 // needs to be used across different mode evaluation stages as described 1468 // below: 1469 // Eval type: Default Mode Winner 1470 // Level 0 : Method 0 Method 2 Method 0 1471 // Level 1 : Method 1 Method 2 Method 0 1472 // Level 2 : Method 2 Method 2 Method 0 1473 // Level 3 : Method 2 Method 2 Method 2 1474 int tx_domain_dist_level; 1475 1476 // Transform domain distortion threshold level 1477 int tx_domain_dist_thres_level; 1478 1479 // Trellis (dynamic programming) optimization of quantized values 1480 TRELLIS_OPT_TYPE optimize_coefficients; 1481 1482 // Use hash table to store macroblock RD search results 1483 // to avoid repeated search on the same residue signal. 1484 int use_mb_rd_hash; 1485 1486 // Flag used to control the extent of coeff R-D optimization 1487 int perform_coeff_opt; 1488 } RD_CALC_SPEED_FEATURES; 1489 1490 typedef struct WINNER_MODE_SPEED_FEATURES { 1491 // Flag used to control the winner mode processing for better R-D optimization 1492 // of quantized coeffs 1493 int enable_winner_mode_for_coeff_opt; 1494 1495 // Flag used to control the winner mode processing for transform size 1496 // search method 1497 int enable_winner_mode_for_tx_size_srch; 1498 1499 // Control transform size search level 1500 // Eval type: Default Mode Winner 1501 // Level 0 : FULL RD LARGEST ALL FULL RD 1502 // Level 1 : FAST RD LARGEST ALL FULL RD 1503 // Level 2 : LARGEST ALL LARGEST ALL FULL RD 1504 // Level 3 : LARGEST ALL LARGEST ALL LARGEST ALL 1505 int tx_size_search_level; 1506 1507 // Flag used to control the winner mode processing for use transform 1508 // domain distortion 1509 int enable_winner_mode_for_use_tx_domain_dist; 1510 1511 // Flag used to enable processing of multiple winner modes 1512 MULTI_WINNER_MODE_TYPE multi_winner_mode_type; 1513 1514 // Motion mode for winner candidates: 1515 // 0: speed feature OFF 1516 // 1 / 2 : Use configured number of winner candidates 1517 int motion_mode_for_winner_cand; 1518 1519 // Controls the prediction of transform skip block or DC only block. 1520 // 1521 // Different speed feature values (0 to 3) decide the aggressiveness of 1522 // prediction (refer to predict_dc_levels[][] in speed_features.c) to be used 1523 // during different mode evaluation stages. 1524 int dc_blk_pred_level; 1525 1526 // If on, disables interpolation filter search in handle_inter_mode loop, and 1527 // performs it during winner mode processing by \ref 1528 // tx_search_best_inter_candidates. 1529 int winner_mode_ifs; 1530 1531 // Controls the disabling of winner mode processing. Speed feature levels 1532 // are ordered in increasing aggressiveness of pruning. The method considered 1533 // for disabling, depends on the sf level value and it is described as below. 1534 // 0: Do not disable 1535 // 1: Disable for blocks with low source variance. 1536 // 2: Disable for blocks which turn out to be transform skip (skipped based on 1537 // eob) during MODE_EVAL stage except NEWMV mode. 1538 // 3: Disable for blocks which turn out to be transform skip during MODE_EVAL 1539 // stage except NEWMV mode. For high quantizers, prune conservatively based on 1540 // transform skip (skipped based on eob) except for NEWMV mode. 1541 // 4: Disable for blocks which turn out to be transform skip during MODE_EVAL 1542 // stage. 1543 int prune_winner_mode_eval_level; 1544 } WINNER_MODE_SPEED_FEATURES; 1545 1546 typedef struct LOOP_FILTER_SPEED_FEATURES { 1547 // This feature controls how the loop filter level is determined. 1548 LPF_PICK_METHOD lpf_pick; 1549 1550 // Skip some final iterations in the determination of the best loop filter 1551 // level. 1552 int use_coarse_filter_level_search; 1553 1554 // Reset luma filter levels to zero based on minimum filter levels of 1555 // reference frames and current frame's pyramid level. 1556 int adaptive_luma_loop_filter_skip; 1557 1558 // Reset luma filter levels to zero when the percentage of SSE difference 1559 // between the unfiltered and filtered versions of the current frame is below 1560 // a threshold. 1561 int skip_loop_filter_using_filt_error; 1562 1563 // Control how the CDEF strength is determined. 1564 CDEF_PICK_METHOD cdef_pick_method; 1565 1566 // Decoder side speed feature to add penalty for use of dual-sgr filters. 1567 // Takes values 0 - 10, 0 indicating no penalty and each additional level 1568 // adding a penalty of 1% 1569 int dual_sgr_penalty_level; 1570 1571 // Restricts loop restoration to RESTORE_SWITCHABLE by skipping RD cost 1572 // comparisons for RESTORE_WIENER and RESTORE_SGRPROJ. Also applies a bias 1573 // during switchable restoration search: each level adds a 0.5% penalty to 1574 // Wiener and SGR selection. 1575 // 0 : No restriction or bias (all restoration types allowed) 1576 // 1+: Skip WIENER/SGRPROJ and apply (level x 0.5%) penalty in 1577 // search_switchable() 1578 int switchable_lr_with_bias_level; 1579 1580 // prune sgr ep using binary search like mechanism 1581 int enable_sgr_ep_pruning; 1582 1583 // Disable loop restoration for Chroma plane 1584 int disable_loop_restoration_chroma; 1585 1586 // Disable loop restoration for luma plane 1587 int disable_loop_restoration_luma; 1588 1589 // Range of loop restoration unit sizes to search 1590 // The minimum size is clamped against the superblock size in 1591 // av1_pick_filter_restoration, so that the code which sets this value does 1592 // not need to know the superblock size ahead of time. 1593 int min_lr_unit_size; 1594 int max_lr_unit_size; 1595 1596 // Prune RESTORE_WIENER evaluation based on source variance 1597 // 0 : no pruning 1598 // 1 : conservative pruning 1599 // 2 : aggressive pruning 1600 int prune_wiener_based_on_src_var; 1601 1602 // Prune self-guided loop restoration based on wiener search results 1603 // 0 : no pruning 1604 // 1 : pruning based on rdcost ratio of RESTORE_WIENER and RESTORE_NONE 1605 // 2 : pruning based on winner restoration type among RESTORE_WIENER and 1606 // RESTORE_NONE 1607 int prune_sgr_based_on_wiener; 1608 1609 // Reduce the wiener filter win size for luma 1610 int reduce_wiener_window_size; 1611 1612 // Flag to disable Wiener Loop restoration filter. 1613 bool disable_wiener_filter; 1614 1615 // Flag to disable Self-guided Loop restoration filter. 1616 bool disable_sgr_filter; 1617 1618 // Disable the refinement search around the wiener filter coefficients. 1619 bool disable_wiener_coeff_refine_search; 1620 1621 // Whether to downsample the rows in computation of wiener stats. 1622 int use_downsampled_wiener_stats; 1623 } LOOP_FILTER_SPEED_FEATURES; 1624 1625 typedef struct REAL_TIME_SPEED_FEATURES { 1626 // check intra prediction for non-RD mode. 1627 int check_intra_pred_nonrd; 1628 1629 // Skip checking intra prediction. 1630 // 0 - don't skip 1631 // 1 - skip if TX is skipped and best mode is not NEWMV 1632 // 2 - skip if TX is skipped 1633 // Skipping aggressiveness increases from level 1 to 2. 1634 int skip_intra_pred; 1635 1636 // Estimate motion before calculating variance in variance-based partition 1637 // 0 - Only use zero MV 1638 // 1 - perform coarse ME 1639 // 2 - perform coarse ME, and also use neighbours' MVs 1640 // 3 - use neighbours' MVs without performing coarse ME 1641 int estimate_motion_for_var_based_partition; 1642 1643 // For nonrd_use_partition: mode of extra check of leaf partition 1644 // 0 - don't check merge 1645 // 1 - always check merge 1646 // 2 - check merge and prune checking final split 1647 // 3 - check merge and prune checking final split based on bsize and qindex 1648 int nonrd_check_partition_merge_mode; 1649 1650 // For nonrd_use_partition: check of leaf partition extra split 1651 int nonrd_check_partition_split; 1652 1653 // Implements various heuristics to skip searching modes 1654 // The heuristics selected are based on flags 1655 // defined in the MODE_SEARCH_SKIP_HEURISTICS enum 1656 unsigned int mode_search_skip_flags; 1657 1658 // For nonrd: Reduces ref frame search. 1659 // 0 - low level of search prune in non last frames 1660 // 1 - pruned search in non last frames 1661 // 2 - more pruned search in non last frames 1662 int nonrd_prune_ref_frame_search; 1663 1664 // This flag controls the use of non-RD mode decision. 1665 int use_nonrd_pick_mode; 1666 1667 // Flag that controls discounting for color map cost during palette search. 1668 // This saves about 5% of CPU and in non-RD speeds delivers better results 1669 // across rtc_screen set (on speed 10 overall BDRate growth is 13%) 1670 int discount_color_cost; 1671 1672 // Use ALTREF frame in non-RD mode decision. 1673 int use_nonrd_altref_frame; 1674 1675 // Use compound reference for non-RD mode. 1676 int use_comp_ref_nonrd; 1677 1678 // Reference frames for compound prediction for nonrd pickmode: 1679 // LAST_GOLDEN (0), LAST_LAST2 (1), or LAST_ALTREF (2). 1680 int ref_frame_comp_nonrd[3]; 1681 1682 // use reduced ref set for real-time mode 1683 int use_real_time_ref_set; 1684 1685 // Skip a number of expensive mode evaluations for blocks with very low 1686 // temporal variance. 1687 int short_circuit_low_temp_var; 1688 1689 // Reuse inter prediction in fast non-rd mode. 1690 int reuse_inter_pred_nonrd; 1691 1692 // Number of best inter modes to search transform. INT_MAX - search all. 1693 int num_inter_modes_for_tx_search; 1694 1695 // Use interpolation filter search in non-RD mode decision. 1696 int use_nonrd_filter_search; 1697 1698 // Use simplified RD model for interpolation search and Intra 1699 int use_simple_rd_model; 1700 1701 // For nonrd mode: use hybrid intra mode search for intra only frames based on 1702 // block properties. 1703 // 0 : use nonrd pick intra for all blocks 1704 // 1 : use rd for bsize < 16x16, nonrd otherwise 1705 // 2 : use rd for bsize < 16x16 and src var >= 101, nonrd otherwise 1706 int hybrid_intra_pickmode; 1707 1708 // Filter blocks by certain criteria such as SAD, source variance, such that 1709 // fewer blocks will go through the palette search. 1710 // For nonrd encoding path, enable this feature reduces encoding time when 1711 // palette mode is used. Disabling it leads to better compression efficiency. 1712 // 0: off 1713 // 1: less aggressive pruning mode 1714 // 2, 3: more aggressive pruning mode 1715 int prune_palette_search_nonrd; 1716 1717 // Compute variance/sse on source difference, prior to encoding superblock. 1718 int source_metrics_sb_nonrd; 1719 1720 // Flag to indicate process for handling overshoot on slide/scene change, 1721 // for real-time CBR mode. 1722 OVERSHOOT_DETECTION_CBR overshoot_detection_cbr; 1723 1724 // Check for scene/content change detection on every frame before encoding. 1725 int check_scene_detection; 1726 1727 // For keyframes in rtc: adjust the rc_bits_per_mb, to reduce overshoot. 1728 int rc_adjust_keyframe; 1729 1730 // On scene change: compute spatial variance. 1731 int rc_compute_spatial_var_sc; 1732 1733 // For nonrd mode: Prefer larger partition blks in variance based partitioning 1734 // 0: disabled, 1-3: increasing aggressiveness 1735 int prefer_large_partition_blocks; 1736 1737 // uses results of temporal noise estimate 1738 int use_temporal_noise_estimate; 1739 1740 // Parameter indicating initial search window to be used in full-pixel search 1741 // for nonrd_pickmode. Range [0, MAX_MVSEARCH_STEPS - 1]. Lower value 1742 // indicates larger window. If set to 0, step_param is set based on internal 1743 // logic in set_mv_search_params(). 1744 int fullpel_search_step_param; 1745 1746 // Bit mask to enable or disable intra modes for each prediction block size 1747 // separately, for nonrd_pickmode. Currently, the sf is not respected when 1748 // 'force_intra_check' is true in 'av1_estimate_intra_mode()' function. Also, 1749 // H and V pred modes allowed through this sf can be further pruned when 1750 //'prune_hv_pred_modes_using_src_sad' sf is true. 1751 int intra_y_mode_bsize_mask_nrd[BLOCK_SIZES]; 1752 1753 // Prune H and V intra predition modes evalution in inter frame. 1754 // The sf does not have any impact. 1755 // i. when frame_source_sad is 1.1 times greater than avg_source_sad 1756 // ii. when cyclic_refresh_segment_id_boosted is enabled 1757 // iii. when SB level source sad is greater than kMedSad 1758 // iv. when color sensitivity is non zero for both the chroma channels 1759 bool prune_hv_pred_modes_using_src_sad; 1760 1761 // Skips mode checks more aggressively in nonRD mode 1762 int nonrd_aggressive_skip; 1763 1764 // Skip cdef on 64x64 blocks/ 1765 // 0: disabled 1766 // 1: skip when NEWMV or INTRA is not picked or color sensitivity is off. 1767 // When color sensitivity is on for a superblock, all 64x64 blocks within 1768 // will not skip. 1769 // 2: more aggressive mode where skip is done for all frames where 1770 // rc->high_source_sad = 0 (non slide-changes), and color sensitivity off. 1771 int skip_cdef_sb; 1772 1773 // Force selective cdf update. 1774 int selective_cdf_update; 1775 1776 // Force only single reference (LAST) for prediction. 1777 int force_only_last_ref; 1778 1779 // Forces larger partition blocks in variance based partitioning for intra 1780 // frames 1781 int force_large_partition_blocks_intra; 1782 1783 // Use fixed partition for superblocks based on source_sad. 1784 // 0: disabled 1785 // 1: enabled 1786 int use_fast_fixed_part; 1787 1788 // Increase source_sad thresholds in nonrd pickmode. 1789 int increase_source_sad_thresh; 1790 1791 // Skip evaluation of no split in tx size selection for merge partition 1792 int skip_tx_no_split_var_based_partition; 1793 1794 // Intermediate termination of newMV mode evaluation based on so far best mode 1795 // sse 1796 int skip_newmv_mode_based_on_sse; 1797 1798 // Define gf length multiplier. 1799 // Level 0: use large multiplier, level 1: use medium multiplier. 1800 int gf_length_lvl; 1801 1802 // Prune inter modes with golden frame as reference for NEARMV and NEWMV modes 1803 int prune_inter_modes_with_golden_ref; 1804 1805 // Prune inter modes w.r.t golden or alt-ref frame based on sad 1806 int prune_inter_modes_wrt_gf_arf_based_on_sad; 1807 1808 // Prune inter mode search in rd path based on current block's temporal 1809 // variance wrt LAST reference. 1810 int prune_inter_modes_using_temp_var; 1811 1812 // Reduce MV precision to halfpel for higher int MV value & frame-level motion 1813 // 0: disabled 1814 // 1-2: Reduce precision to halfpel, fullpel based on conservative 1815 // thresholds, aggressiveness increases with increase in level 1816 // 3: Reduce precision to halfpel using more aggressive thresholds 1817 int reduce_mv_pel_precision_highmotion; 1818 1819 // Reduce MV precision for low complexity blocks 1820 // 0: disabled 1821 // 1: Reduce the mv resolution for zero mv if the variance is low 1822 // 2: Switch to halfpel, fullpel based on low block spatial-temporal 1823 // complexity. 1824 int reduce_mv_pel_precision_lowcomplex; 1825 1826 // Prune intra mode evaluation in inter frames based on mv range. 1827 BLOCK_SIZE prune_intra_mode_based_on_mv_range; 1828 // The number of times to left shift the splitting thresholds in variance 1829 // based partitioning. The minimum values should be 7 to avoid left shifting 1830 // by a negative number. 1831 int var_part_split_threshold_shift; 1832 1833 // Qindex based variance partition threshold index, which determines 1834 // the aggressiveness of partition pruning 1835 // 0: disabled for speeds 9,10 1836 // 1,2: (rd-path) lowers qindex thresholds conditionally (for low SAD sb) 1837 // 3,4: (non-rd path) uses pre-tuned qindex thresholds 1838 int var_part_based_on_qidx; 1839 1840 // Enable GF refresh based on Q value. 1841 int gf_refresh_based_on_qp; 1842 1843 // Temporal filtering 1844 // The value can be 1 or 2, which indicates the threshold to use. 1845 // Must be off for lossless mode. 1846 int use_rtc_tf; 1847 1848 // Use of the identity transform in nonrd_pickmode, 1849 int use_idtx_nonrd; 1850 1851 // Prune the use of the identity transform in nonrd_pickmode: 1852 // only for smaller blocks and higher spatial variance, and when skip_txfm 1853 // is not already set. 1854 int prune_idtx_nonrd; 1855 1856 // Force to only use dct for palette search in nonrd pickmode. 1857 int dct_only_palette_nonrd; 1858 1859 // Skip loopfilter, for static content after slide change 1860 // or key frame, once quality has ramped up. 1861 // 0: disabled 1862 // 1: skip only after quality is ramped up. 1863 // 2: aggrssive mode, where skip is done for all frames that 1864 // where rc->high_source_sad = 0 (no slide-changes). 1865 int skip_lf_screen; 1866 1867 // Threshold on the active/inactive region percent to disable 1868 // the loopfilter and cdef. Setting to 100 disables this feature. 1869 int thresh_active_maps_skip_lf_cdef; 1870 1871 // For nonrd: early exit out of variance partition that sets the 1872 // block size to superblock size, and sets mode to zeromv-last skip. 1873 // 0: disabled 1874 // 1: zeromv-skip is enabled at SB level only 1875 // 2: zeromv-skip is enabled at SB level and coding block level 1876 int part_early_exit_zeromv; 1877 1878 // Early terminate inter mode search based on sse in non-rd path. 1879 INTER_SEARCH_EARLY_TERM_IDX sse_early_term_inter_search; 1880 1881 // SAD based adaptive altref selection 1882 int sad_based_adp_altref_lag; 1883 1884 // Enable/disable partition direct merging. 1885 int partition_direct_merging; 1886 1887 // Level of aggressiveness for obtaining tx size based on qstep 1888 int tx_size_level_based_on_qstep; 1889 1890 // Avoid the partitioning of a 16x16 block in variance based partitioning 1891 // (VBP) by making use of minimum and maximum sub-block variances. 1892 // For allintra encode, this speed feature reduces instruction count by 5.39% 1893 // for speed 9 on a typical video dataset with coding performance gain 1894 // of 1.44%. 1895 // For AVIF image encode, this speed feature reduces encode time 1896 // by 8.44% for speed 9 on a typical image dataset with coding performance 1897 // gain of 0.78%. 1898 bool vbp_prune_16x16_split_using_min_max_sub_blk_var; 1899 1900 // A qindex threshold that determines whether to use qindex based CDEF filter 1901 // strength estimation for screen content types. The strength estimation model 1902 // used for screen contents prefers to allow cdef filtering for more frames. 1903 // This sf is used to limit the frames which go through cdef filtering and 1904 // following explains the setting of the same. 1905 // MAXQ (255): This disables the usage of this sf. Here, frame does not use a 1906 // screen content model thus reduces the number of frames that go through cdef 1907 // filtering. 1908 // MINQ (0): Frames always use screen content model thus increasing the number 1909 // of frames that go through cdef filtering. 1910 // This speed feature has a substantial gain on coding metrics, with moderate 1911 // increase encoding time. Select threshold based on speed vs quality 1912 // trade-off. 1913 int screen_content_cdef_filter_qindex_thresh; 1914 1915 // Prune compound mode if its variance is higher than the variance of single 1916 // modes. 1917 bool prune_compoundmode_with_singlecompound_var; 1918 1919 // Allow mode cost update at frame level every couple frames. This 1920 // overrides the command line setting --mode-cost-upd-freq=3 (never update 1921 // except on key frame and first delta). 1922 bool frame_level_mode_cost_update; 1923 1924 // Prune H_PRED during intra mode evaluation in the nonrd path based on best 1925 // mode so far. 1926 // 1927 // For allintra encode, this speed feature reduces instruction count by 1.10% 1928 // for speed 9 with coding performance change less than 0.04%. 1929 // For AVIF image encode, this speed feature reduces encode time by 1.03% for 1930 // speed 9 on a typical image dataset with coding performance change less than 1931 // 0.08%. 1932 bool prune_h_pred_using_best_mode_so_far; 1933 1934 // Enable pruning of intra mode evaluations in nonrd path based on source 1935 // variance and best mode so far. The pruning logic is enabled only if the 1936 // mode is not a winner mode of both the neighboring blocks (left/top). 1937 // 1938 // For allintra encode, this speed feature reduces instruction count by 3.96% 1939 // for speed 9 with coding performance change less than 0.38%. 1940 // For AVIF image encode, this speed feature reduces encode time by 3.46% for 1941 // speed 9 on a typical image dataset with coding performance change less than 1942 // -0.06%. 1943 bool enable_intra_mode_pruning_using_neighbors; 1944 1945 // Prune intra mode evaluations in nonrd path based on best sad so far. 1946 // 1947 // For allintra encode, this speed feature reduces instruction count by 3.05% 1948 // for speed 9 with coding performance change less than 0.24%. 1949 // For AVIF image encode, this speed feature reduces encode time by 1.87% for 1950 // speed 9 on a typical image dataset with coding performance change less than 1951 // 0.16%. 1952 bool prune_intra_mode_using_best_sad_so_far; 1953 1954 // If compound is enabled, and the current block size is \geq BLOCK_16X16, 1955 // limit the compound modes to GLOBAL_GLOBALMV. This does not apply to the 1956 // base layer of svc. 1957 bool check_only_zero_zeromv_on_large_blocks; 1958 1959 // Allow for disabling cdf update for non reference frames in svc mode. 1960 bool disable_cdf_update_non_reference_frame; 1961 1962 // Prune compound modes if the single modes variances do not perform well. 1963 bool prune_compoundmode_with_singlemode_var; 1964 1965 // Skip searching all compound mode if the variance of single_mode residue is 1966 // sufficiently low. 1967 bool skip_compound_based_on_var; 1968 1969 // Sets force_zeromv_skip based on the source sad available. Aggressiveness 1970 // increases with increase in the level set for speed feature. 1971 // 0: No setting 1972 // 1: If source sad is kZeroSad 1973 // 2: If source sad <= kVeryLowSad 1974 int set_zeromv_skip_based_on_source_sad; 1975 1976 // Downgrades the block-level subpel motion search to 1977 // av1_find_best_sub_pixel_tree_pruned_more for higher QP and when fullpel 1978 // search performed well, zeromv has low sad or low source_var 1979 bool use_adaptive_subpel_search; 1980 1981 // A flag used in RTC case to control frame_refs_short_signaling. Note that 1982 // the final decision is made in check_frame_refs_short_signaling(). The flag 1983 // can only be turned on when res < 360p and speed >= 9, in which case only 1984 // LAST and GOLDEN ref frames are used now. 1985 bool enable_ref_short_signaling; 1986 1987 // A flag that controls if we check or bypass GLOBALMV in rtc single ref frame 1988 // case. 1989 bool check_globalmv_on_single_ref; 1990 1991 // Allows for increasing the color_threshold for palette prediction. 1992 // This generally leads to better coding efficiency but with some speed loss. 1993 // Only used for screen content and for nonrd_pickmode. 1994 bool increase_color_thresh_palette; 1995 1996 // Flag to indicate selecting of higher threshold for scenee change detection. 1997 int higher_thresh_scene_detection; 1998 1999 // FLag to indicate skip testing of NEWMV for flat blocks. 2000 int skip_newmv_flat_blocks_screen; 2001 2002 // Flag to force skip encoding for non_reference_frame on slide/scene changes. 2003 int skip_encoding_non_reference_slide_change; 2004 2005 // Flag to indicate more aggressive QP downward adjustment for screen static 2006 // content, to make convergence to min_qp faster. 2007 int rc_faster_convergence_static; 2008 2009 // Skip NEWMV mode evaluation based on sad for screen content. 2010 int skip_newmv_mode_sad_screen; 2011 } REAL_TIME_SPEED_FEATURES; 2012 2013 /*!\endcond */ 2014 2015 /*! 2016 * \brief Top level speed vs quality trade off data struture. 2017 */ 2018 typedef struct SPEED_FEATURES { 2019 /*! 2020 * Sequence/frame level speed features: 2021 */ 2022 HIGH_LEVEL_SPEED_FEATURES hl_sf; 2023 2024 /*! 2025 * Speed features for the first pass. 2026 */ 2027 FIRST_PASS_SPEED_FEATURES fp_sf; 2028 2029 /*! 2030 * Speed features related to how tpl's searches are done. 2031 */ 2032 TPL_SPEED_FEATURES tpl_sf; 2033 2034 /*! 2035 * Global motion speed features: 2036 */ 2037 GLOBAL_MOTION_SPEED_FEATURES gm_sf; 2038 2039 /*! 2040 * Partition search speed features: 2041 */ 2042 PARTITION_SPEED_FEATURES part_sf; 2043 2044 /*! 2045 * Motion search speed features: 2046 */ 2047 MV_SPEED_FEATURES mv_sf; 2048 2049 /*! 2050 * Inter mode search speed features: 2051 */ 2052 INTER_MODE_SPEED_FEATURES inter_sf; 2053 2054 /*! 2055 * Interpolation filter search speed features: 2056 */ 2057 INTERP_FILTER_SPEED_FEATURES interp_sf; 2058 2059 /*! 2060 * Intra mode search speed features: 2061 */ 2062 INTRA_MODE_SPEED_FEATURES intra_sf; 2063 2064 /*! 2065 * Transform size/type search speed features: 2066 */ 2067 TX_SPEED_FEATURES tx_sf; 2068 2069 /*! 2070 * RD calculation speed features: 2071 */ 2072 RD_CALC_SPEED_FEATURES rd_sf; 2073 2074 /*! 2075 * Two-pass mode evaluation features: 2076 */ 2077 WINNER_MODE_SPEED_FEATURES winner_mode_sf; 2078 2079 /*! 2080 * In-loop filter speed features: 2081 */ 2082 LOOP_FILTER_SPEED_FEATURES lpf_sf; 2083 2084 /*! 2085 * Real-time mode speed features: 2086 */ 2087 REAL_TIME_SPEED_FEATURES rt_sf; 2088 } SPEED_FEATURES; 2089 /*!\cond */ 2090 2091 struct AV1_COMP; 2092 2093 /*!\endcond */ 2094 /*!\brief Frame size independent speed vs quality trade off flags 2095 * 2096 *\ingroup speed_features 2097 * 2098 * \param[in] cpi Top - level encoder instance structure 2099 * \param[in] speed Speed setting passed in from the command line 2100 * 2101 * \remark No return value but configures the various speed trade off flags 2102 * based on the passed in speed setting. (Higher speed gives lower 2103 * quality) 2104 */ 2105 void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi, 2106 int speed); 2107 2108 /*!\brief Frame size dependent speed vs quality trade off flags 2109 * 2110 *\ingroup speed_features 2111 * 2112 * \param[in] cpi Top - level encoder instance structure 2113 * \param[in] speed Speed setting passed in from the command line 2114 * 2115 * \remark No return value but configures the various speed trade off flags 2116 * based on the passed in speed setting and frame size. (Higher speed 2117 * corresponds to lower quality) 2118 */ 2119 void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi, 2120 int speed); 2121 /*!\brief Q index dependent speed vs quality trade off flags 2122 * 2123 *\ingroup speed_features 2124 * 2125 * \param[in] cpi Top - level encoder instance structure 2126 * \param[in] speed Speed setting passed in from the command line 2127 * 2128 * \remark No return value but configures the various speed trade off flags 2129 * based on the passed in speed setting and current frame's Q index. 2130 * (Higher speed corresponds to lower quality) 2131 */ 2132 void av1_set_speed_features_qindex_dependent(struct AV1_COMP *cpi, int speed); 2133 2134 #ifdef __cplusplus 2135 } // extern "C" 2136 #endif 2137 2138 #endif // AOM_AV1_ENCODER_SPEED_FEATURES_H_