nonrd_opt.h (28710B)
1 /* 2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_ 13 #define AOM_AV1_ENCODER_NONRD_OPT_H_ 14 15 #include "av1/encoder/context_tree.h" 16 #include "av1/encoder/rdopt_utils.h" 17 #include "av1/encoder/rdopt.h" 18 19 #define RTC_INTER_MODES (4) 20 #define RTC_INTRA_MODES (4) 21 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES)) 22 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3) 23 #define NUM_COMP_INTER_MODES_RT (6) 24 #define NUM_INTER_MODES 12 25 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \ 26 (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false) 27 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16) 28 #define FILTER_SEARCH_SIZE 2 29 #if !CONFIG_REALTIME_ONLY 30 #define MOTION_MODE_SEARCH_SIZE 2 31 #endif 32 33 extern int g_pick_inter_mode_cnt; 34 /*!\cond */ 35 typedef struct { 36 uint8_t *data; 37 int stride; 38 int in_use; 39 } PRED_BUFFER; 40 41 typedef struct { 42 PRED_BUFFER *best_pred; 43 PREDICTION_MODE best_mode; 44 TX_SIZE best_tx_size; 45 TX_TYPE tx_type; 46 MV_REFERENCE_FRAME best_ref_frame; 47 MV_REFERENCE_FRAME best_second_ref_frame; 48 uint8_t best_mode_skip_txfm; 49 uint8_t best_mode_initial_skip_flag; 50 int_interpfilters best_pred_filter; 51 MOTION_MODE best_motion_mode; 52 WarpedMotionParams wm_params; 53 int num_proj_ref; 54 PALETTE_MODE_INFO pmi; 55 int64_t best_sse; 56 } BEST_PICKMODE; 57 58 typedef struct { 59 MV_REFERENCE_FRAME ref_frame; 60 PREDICTION_MODE pred_mode; 61 } REF_MODE; 62 63 typedef struct { 64 MV_REFERENCE_FRAME ref_frame[2]; 65 PREDICTION_MODE pred_mode; 66 } COMP_REF_MODE; 67 68 struct estimate_block_intra_args { 69 AV1_COMP *cpi; 70 MACROBLOCK *x; 71 PREDICTION_MODE mode; 72 int skippable; 73 RD_STATS *rdc; 74 unsigned int best_sad; 75 bool prune_mode_based_on_sad; 76 bool prune_palette_sad; 77 }; 78 /*!\endcond */ 79 80 /*!\brief Structure to store parameters and statistics used in non-rd inter mode 81 * evaluation. 82 */ 83 typedef struct { 84 //! Structure to hold best inter mode data 85 BEST_PICKMODE best_pickmode; 86 //! Structure to RD cost of current mode 87 RD_STATS this_rdc; 88 //! Pointer to the RD Cost for the best mode found so far 89 RD_STATS best_rdc; 90 //! Distortion of chroma planes for all modes and reference frames 91 int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES]; 92 //! Buffer to hold predicted block for all reference frames and planes 93 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; 94 //! Array to hold variance of all modes and reference frames 95 unsigned int vars[RTC_INTER_MODES][REF_FRAMES]; 96 //! Array to hold ref cost of single reference mode for all ref frames 97 unsigned int ref_costs_single[REF_FRAMES]; 98 //! Array to hold motion vector for all modes and reference frames 99 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES]; 100 //! Array to hold best mv for all modes and reference frames 101 int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES]; 102 //! Array to hold inter mode cost of single ref mode for all ref frames 103 int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES]; 104 //! Array to hold use reference frame mask for each reference frame 105 int use_ref_frame_mask[REF_FRAMES]; 106 //! Array to hold flags of evaluated modes for each reference frame 107 uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES]; 108 //! Array to hold flag indicating if scaled reference frame is used. 109 bool use_scaled_ref_frame[REF_FRAMES]; 110 } InterModeSearchStateNonrd; 111 112 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2, 113 2, 2, 3, 3, 3, 4, 114 4, 4, 5, 5 }; 115 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1, 116 2, 3, 2, 3, 4, 3, 117 4, 5, 4, 5 }; 118 119 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED, 120 SMOOTH_PRED }; 121 122 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV, 123 NEWMV }; 124 125 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = { 126 { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH }, 127 { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV }, 128 { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 }, 129 { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 }, 130 { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG }, 131 { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB }, 132 { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 }, 133 { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA }, 134 }; 135 136 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT 137 // mode 138 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = { 139 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV }, 140 { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV }, 141 { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV }, 142 { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV }, 143 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV }, 144 { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV }, 145 }; 146 147 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = { 148 { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV }, 149 { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV }, 150 { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV }, 151 { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV }, 152 { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV }, 153 { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV }, 154 }; 155 156 static const int_interpfilters filters_ref_set[9] = { 157 [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR }, 158 [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH }, 159 [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH }, 160 [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR }, 161 [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP }, 162 [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP }, 163 [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR }, 164 [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP }, 165 [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH } 166 }; 167 168 enum { 169 // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV), 170 INTER_NEAREST = (1 << NEARESTMV), 171 INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV), 172 INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV), 173 INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV), 174 }; 175 176 // The original scan order (default_scan_8x8) is modified according to the extra 177 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and 178 // aom_hadamard_8x8_c. 179 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = { 180 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40, 181 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35, 182 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30, 183 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63 184 }; 185 186 // The original scan order (av1_default_iscan_8x8) is modified to match 187 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and 188 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the 189 // order of coefficients, such that the normal scan order is no longer 190 // guaranteed to scan low coefficients first, therefore we modify the scan order 191 // accordingly. 192 // Note that this one has to be used together with default_scan_8x8_transpose. 193 DECLARE_ALIGNED(16, static const int16_t, 194 av1_default_iscan_8x8_transpose[64]) = { 195 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36, 196 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49, 197 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58, 198 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63 199 }; 200 201 // The original scan order (default_scan_16x16) is modified according to the 202 // extra transpose in hadamard c implementation in lp case, i.e., 203 // aom_hadamard_lp_16x16_c. 204 DECLARE_ALIGNED(16, static const int16_t, 205 default_scan_lp_16x16_transpose[256]) = { 206 0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32, 207 40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50, 208 44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1, 209 9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100, 210 106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152, 211 146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27, 212 33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194, 213 196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49, 214 57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204, 215 198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53, 216 59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214, 217 222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115, 218 109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143, 219 149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252, 220 246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217, 221 211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221, 222 215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247, 223 255 224 }; 225 226 #if CONFIG_AV1_HIGHBITDEPTH 227 // The original scan order (default_scan_16x16) is modified according to the 228 // extra shift in hadamard c implementation in fp case, i.e., 229 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different 230 // outputs, so we handle them separately. 231 DECLARE_ALIGNED(16, static const int16_t, 232 default_scan_fp_16x16_transpose[256]) = { 233 0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32, 234 36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50, 235 44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1, 236 5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104, 237 102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148, 238 146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23, 239 33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194, 240 200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49, 241 53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204, 242 202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57, 243 55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218, 244 222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115, 245 109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143, 246 153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252, 247 250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213, 248 211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221, 249 219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251, 250 255 251 }; 252 #endif 253 254 // The original scan order (av1_default_iscan_16x16) is modified to match 255 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2. 256 // Since hadamard AVX2 implementation will modify the order of coefficients, 257 // such that the normal scan order is no longer guaranteed to scan low 258 // coefficients first, therefore we modify the scan order accordingly. Note that 259 // this one has to be used together with default_scan_lp_16x16_transpose. 260 DECLARE_ALIGNED(16, static const int16_t, 261 av1_default_iscan_lp_16x16_transpose[256]) = { 262 0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11, 263 87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93, 264 24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30, 265 122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150, 266 47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22, 267 114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124, 268 39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49, 269 153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202, 270 70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77, 271 185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195, 272 102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109, 273 216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221, 274 141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104, 275 208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214, 276 133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139, 277 231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250, 278 168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211, 279 255 280 }; 281 282 #if CONFIG_AV1_HIGHBITDEPTH 283 // The original scan order (av1_default_iscan_16x16) is modified to match 284 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2. 285 // Since hadamard AVX2 implementation will modify the order of coefficients, 286 // such that the normal scan order is no longer guaranteed to scan low 287 // coefficients first, therefore we modify the scan order accordingly. Note that 288 // this one has to be used together with default_scan_fp_16x16_transpose. 289 DECLARE_ALIGNED(16, static const int16_t, 290 av1_default_iscan_fp_16x16_transpose[256]) = { 291 0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11, 292 87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93, 293 24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30, 294 122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152, 295 47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35, 296 127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145, 297 52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59, 298 174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200, 299 84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56, 300 171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182, 301 81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99, 302 203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221, 303 128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104, 304 208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214, 305 133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139, 306 231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250, 307 168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211, 308 255 309 }; 310 #endif 311 312 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms, 313 // but the fastest way to calculate the IDTX transform (i.e. no transposes) 314 // results in coefficients that are a transposition of the entropy coding 315 // versions. These tables are used as substitute for the scan order for the 316 // faster version of IDTX. 317 318 // Must be used together with av1_fast_idtx_iscan_4x4 319 DECLARE_ALIGNED(16, static const int16_t, 320 av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, 321 9, 12, 13, 10, 7, 11, 14, 15 }; 322 323 // Must be used together with av1_fast_idtx_scan_4x4 324 DECLARE_ALIGNED(16, static const int16_t, 325 av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12, 326 3, 8, 11, 13, 9, 10, 14, 15 }; 327 328 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = { 329 av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4 330 }; 331 332 // Must be used together with av1_fast_idtx_iscan_8x8 333 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = { 334 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 335 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 336 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 337 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 338 }; 339 340 // Must be used together with av1_fast_idtx_scan_8x8 341 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = { 342 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, 343 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53, 344 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, 345 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63 346 }; 347 348 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = { 349 av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8 350 }; 351 352 // Must be used together with av1_fast_idtx_iscan_16x16 353 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = { 354 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4, 355 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22, 356 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8, 357 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100, 358 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131, 359 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27, 360 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208, 361 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14, 362 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 363 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46, 364 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242, 365 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94, 366 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185, 367 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231, 368 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203, 369 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235, 370 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 371 255 372 }; 373 374 // Must be used together with av1_fast_idtx_scan_16x16 375 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = { 376 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119, 377 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118, 378 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117, 379 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116, 380 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115, 381 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114, 382 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113, 383 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112, 384 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111, 385 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110, 386 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109, 387 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108, 388 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107, 389 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106, 390 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105, 391 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253, 392 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254, 393 255 394 }; 395 396 // Indicates the blocks for which RD model should be based on special logic 397 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd, 398 BLOCK_SIZE bsize) { 399 const AV1_COMMON *const cm = &cpi->common; 400 const int large_block = bsize >= BLOCK_32X32; 401 // Only enable for low bitdepth to mitigate issue: b/303023614. 402 return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block && 403 !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) && 404 cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth; 405 } 406 /*!\brief Finds predicted motion vectors for a block. 407 * 408 * \ingroup nonrd_mode_search 409 * \callgraph 410 * \callergraph 411 * Finds predicted motion vectors for a block from a certain reference frame. 412 * First, it fills reference MV stack, then picks the test from the stack and 413 * predicts the final MV for a block for each mode. 414 * \param[in] cpi Top-level encoder structure 415 * \param[in] x Pointer to structure holding all the 416 * data for the current macroblock 417 * \param[in] ref_frame Reference frame for which to find 418 * ref MVs 419 * \param[out] frame_mv Predicted MVs for a block 420 * \param[in] yv12_mb Buffer to hold predicted block 421 * \param[in] bsize Current block size 422 * \param[in] force_skip_low_temp_var Flag indicating possible mode search 423 * prune for low temporal variance block 424 * \param[in] skip_pred_mv Flag indicating to skip av1_mv_pred 425 * \param[out] use_scaled_ref_frame Flag to indicate if scaled reference 426 * frame is used. 427 * 428 * \remark Nothing is returned. Instead, predicted MVs are placed into 429 * \c frame_mv array, and use_scaled_ref_frame is set. 430 */ 431 static inline void find_predictors( 432 AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, 433 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], 434 struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize, 435 int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) { 436 AV1_COMMON *const cm = &cpi->common; 437 MACROBLOCKD *const xd = &x->e_mbd; 438 MB_MODE_INFO *const mbmi = xd->mi[0]; 439 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext; 440 const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame); 441 const bool ref_is_scaled = 442 ref->y_crop_height != cm->height || ref->y_crop_width != cm->width; 443 const YV12_BUFFER_CONFIG *scaled_ref = 444 av1_get_scaled_ref_frame(cpi, ref_frame); 445 const YV12_BUFFER_CONFIG *yv12 = 446 ref_is_scaled && scaled_ref ? scaled_ref : ref; 447 const int num_planes = av1_num_planes(cm); 448 x->pred_mv_sad[ref_frame] = INT_MAX; 449 x->pred_mv0_sad[ref_frame] = INT_MAX; 450 x->pred_mv1_sad[ref_frame] = INT_MAX; 451 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; 452 // TODO(kyslov) this needs various further optimizations. to be continued.. 453 assert(yv12 != NULL); 454 if (yv12 != NULL) { 455 struct scale_factors *const sf = 456 scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame); 457 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes); 458 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, 459 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs, 460 mbmi_ext->mode_context); 461 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and 462 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. 463 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame); 464 av1_find_best_ref_mvs_from_stack( 465 cm->features.allow_high_precision_mv, mbmi_ext, ref_frame, 466 &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0); 467 frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame]; 468 // Early exit for non-LAST frame if force_skip_low_temp_var is set. 469 if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv && 470 !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) { 471 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame, 472 bsize); 473 } 474 } 475 if (cm->features.switchable_motion_mode) { 476 av1_count_overlappable_neighbors(cm, xd); 477 } 478 mbmi->num_proj_ref = 1; 479 *use_scaled_ref_frame = ref_is_scaled && scaled_ref; 480 } 481 482 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi, 483 PREDICTION_MODE pred_mode, 484 MV_REFERENCE_FRAME ref_frame0, 485 MV_REFERENCE_FRAME ref_frame1, 486 const AV1_COMMON *cm) { 487 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 488 mbmi->ref_mv_idx = 0; 489 mbmi->mode = pred_mode; 490 mbmi->uv_mode = UV_DC_PRED; 491 mbmi->ref_frame[0] = ref_frame0; 492 mbmi->ref_frame[1] = ref_frame1; 493 pmi->palette_size[PLANE_TYPE_Y] = 0; 494 pmi->palette_size[PLANE_TYPE_UV] = 0; 495 mbmi->filter_intra_mode_info.use_filter_intra = 0; 496 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; 497 mbmi->motion_mode = SIMPLE_TRANSLATION; 498 mbmi->num_proj_ref = 1; 499 mbmi->interintra_mode = 0; 500 set_default_interp_filters(mbmi, cm->features.interp_filter); 501 } 502 503 static inline void init_estimate_block_intra_args( 504 struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) { 505 args->cpi = cpi; 506 args->x = x; 507 args->mode = DC_PRED; 508 args->skippable = 1; 509 args->rdc = 0; 510 args->best_sad = UINT_MAX; 511 args->prune_mode_based_on_sad = false; 512 args->prune_palette_sad = false; 513 } 514 515 static inline int get_pred_buffer(PRED_BUFFER *p, int len) { 516 for (int buf_idx = 0; buf_idx < len; buf_idx++) { 517 if (!p[buf_idx].in_use) { 518 p[buf_idx].in_use = 1; 519 return buf_idx; 520 } 521 } 522 return -1; 523 } 524 525 static inline bool prune_palette_testing_inter(AV1_COMP *cpi, 526 unsigned int source_variance) { 527 return ( 528 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && 529 cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad && 530 ((cpi->sf.rt_sf.prune_palette_search_nonrd > 2) || 531 (cpi->sf.rt_sf.rc_compute_spatial_var_sc && 532 cpi->rc.frame_spatial_variance < 1200 && 533 cpi->rc.perc_spatial_flat_blocks < 5 && 534 cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000))); 535 } 536 537 static inline void free_pred_buffer(PRED_BUFFER *p) { 538 if (p != NULL) p->in_use = 0; 539 } 540 541 #if CONFIG_INTERNAL_STATS 542 static inline void store_coding_context_nonrd(MACROBLOCK *x, 543 PICK_MODE_CONTEXT *ctx, 544 int mode_index) { 545 #else 546 static inline void store_coding_context_nonrd(MACROBLOCK *x, 547 PICK_MODE_CONTEXT *ctx) { 548 #endif // CONFIG_INTERNAL_STATS 549 MACROBLOCKD *const xd = &x->e_mbd; 550 TxfmSearchInfo *txfm_info = &x->txfm_search_info; 551 552 // Take a snapshot of the coding context so it can be 553 // restored if we decide to encode this way 554 ctx->rd_stats.skip_txfm = txfm_info->skip_txfm; 555 556 ctx->skippable = txfm_info->skip_txfm; 557 #if CONFIG_INTERNAL_STATS 558 ctx->best_mode_index = mode_index; 559 #endif // CONFIG_INTERNAL_STATS 560 ctx->mic = *xd->mi[0]; 561 ctx->skippable = txfm_info->skip_txfm; 562 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext, 563 av1_ref_frame_type(xd->mi[0]->ref_frame)); 564 } 565 566 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable, 567 BLOCK_SIZE bsize, TX_SIZE tx_size); 568 569 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf, 570 int pred_stride, RD_STATS *this_rdc, int *skippable, 571 BLOCK_SIZE bsize, TX_SIZE tx_size); 572 573 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize, 574 MACROBLOCK *x, MACROBLOCKD *xd, 575 RD_STATS *this_rdc, int start_plane, 576 int stop_plane); 577 578 void av1_estimate_block_intra(int plane, int block, int row, int col, 579 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, 580 void *arg); 581 582 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 583 int best_early_term, unsigned int ref_cost_intra, 584 int reuse_prediction, struct buf_2d *orig_dst, 585 PRED_BUFFER *tmp_buffers, 586 PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc, 587 BEST_PICKMODE *best_pickmode, 588 PICK_MODE_CONTEXT *ctx, 589 unsigned int *best_sad_norm); 590 591 #endif // AOM_AV1_ENCODER_NONRD_OPT_H_