encodeframe.c (110286B)
1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <limits.h> 13 #include <float.h> 14 #include <math.h> 15 #include <stdbool.h> 16 #include <stdio.h> 17 18 #include "config/aom_config.h" 19 #include "config/aom_dsp_rtcd.h" 20 #include "config/av1_rtcd.h" 21 22 #include "aom_dsp/aom_dsp_common.h" 23 #include "aom_dsp/binary_codes_writer.h" 24 #include "aom_ports/mem.h" 25 #include "aom_ports/aom_timer.h" 26 #include "aom_util/aom_pthread.h" 27 #if CONFIG_MISMATCH_DEBUG 28 #include "aom_util/debug_util.h" 29 #endif // CONFIG_MISMATCH_DEBUG 30 31 #include "av1/common/cfl.h" 32 #include "av1/common/common.h" 33 #include "av1/common/common_data.h" 34 #include "av1/common/entropy.h" 35 #include "av1/common/entropymode.h" 36 #include "av1/common/idct.h" 37 #include "av1/common/mv.h" 38 #include "av1/common/mvref_common.h" 39 #include "av1/common/pred_common.h" 40 #include "av1/common/quant_common.h" 41 #include "av1/common/reconintra.h" 42 #include "av1/common/reconinter.h" 43 #include "av1/common/seg_common.h" 44 #include "av1/common/tile_common.h" 45 #include "av1/common/warped_motion.h" 46 47 #include "av1/encoder/allintra_vis.h" 48 #include "av1/encoder/aq_complexity.h" 49 #include "av1/encoder/aq_cyclicrefresh.h" 50 #include "av1/encoder/aq_variance.h" 51 #include "av1/encoder/av1_quantize.h" 52 #include "av1/encoder/global_motion_facade.h" 53 #include "av1/encoder/encodeframe.h" 54 #include "av1/encoder/encodeframe_utils.h" 55 #include "av1/encoder/encodemb.h" 56 #include "av1/encoder/encodemv.h" 57 #include "av1/encoder/encodetxb.h" 58 #include "av1/encoder/ethread.h" 59 #include "av1/encoder/extend.h" 60 #include "av1/encoder/intra_mode_search_utils.h" 61 #include "av1/encoder/ml.h" 62 #include "av1/encoder/motion_search_facade.h" 63 #include "av1/encoder/partition_strategy.h" 64 #if !CONFIG_REALTIME_ONLY 65 #include "av1/encoder/partition_model_weights.h" 66 #endif 67 #include "av1/encoder/partition_search.h" 68 #include "av1/encoder/rd.h" 69 #include "av1/encoder/rdopt.h" 70 #include "av1/encoder/reconinter_enc.h" 71 #include "av1/encoder/segmentation.h" 72 #include "av1/encoder/tokenize.h" 73 #include "av1/encoder/tpl_model.h" 74 #include "av1/encoder/var_based_part.h" 75 76 #if CONFIG_TUNE_VMAF 77 #include "av1/encoder/tune_vmaf.h" 78 #endif 79 80 /*!\cond */ 81 // This is used as a reference when computing the source variance for the 82 // purposes of activity masking. 83 // Eventually this should be replaced by custom no-reference routines, 84 // which will be faster. 85 static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = { 86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 89 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 90 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 91 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 92 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 93 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 94 128, 128, 128, 128, 128, 128, 128, 128 95 }; 96 97 #if CONFIG_AV1_HIGHBITDEPTH 98 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = { 99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 101 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 102 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 103 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 104 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 105 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 106 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 107 128, 128, 128, 128, 128, 128, 128, 128 108 }; 109 110 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = { 111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 121 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 122 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 123 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 124 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 125 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 126 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 127 }; 128 129 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = { 130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 142 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 143 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 144 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 145 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 146 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 147 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 148 128 * 16, 128 * 16 149 }; 150 #endif // CONFIG_AV1_HIGHBITDEPTH 151 /*!\endcond */ 152 153 // For the given bit depth, returns a constant array used to assist the 154 // calculation of source block variance, which will then be used to decide 155 // adaptive quantizers. 156 static const uint8_t *get_var_offs(int use_hbd, int bd) { 157 #if CONFIG_AV1_HIGHBITDEPTH 158 if (use_hbd) { 159 assert(bd == 8 || bd == 10 || bd == 12); 160 const int off_index = (bd - 8) >> 1; 161 static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8, 162 AV1_HIGH_VAR_OFFS_10, 163 AV1_HIGH_VAR_OFFS_12 }; 164 return CONVERT_TO_BYTEPTR(high_var_offs[off_index]); 165 } 166 #else 167 (void)use_hbd; 168 (void)bd; 169 assert(!use_hbd); 170 #endif 171 assert(bd == 8); 172 return AV1_VAR_OFFS; 173 } 174 175 void av1_init_rtc_counters(MACROBLOCK *const x) { 176 av1_init_cyclic_refresh_counters(x); 177 x->cnt_zeromv = 0; 178 x->sb_col_scroll = 0; 179 x->sb_row_scroll = 0; 180 } 181 182 void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) { 183 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ) 184 av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x); 185 cpi->rc.cnt_zeromv += x->cnt_zeromv; 186 cpi->rc.num_col_blscroll_last_tl0 += x->sb_col_scroll; 187 cpi->rc.num_row_blscroll_last_tl0 += x->sb_row_scroll; 188 } 189 190 unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi, 191 const MACROBLOCKD *xd, 192 const struct buf_2d *ref, 193 BLOCK_SIZE bsize, int plane, 194 int use_hbd) { 195 const int subsampling_x = xd->plane[plane].subsampling_x; 196 const int subsampling_y = xd->plane[plane].subsampling_y; 197 const BLOCK_SIZE plane_bsize = 198 get_plane_block_size(bsize, subsampling_x, subsampling_y); 199 unsigned int sse; 200 const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf( 201 ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse); 202 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]); 203 } 204 205 unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi, 206 const MACROBLOCKD *xd, 207 const struct buf_2d *ref, 208 BLOCK_SIZE bsize, int plane) { 209 const int use_hbd = is_cur_buf_hbd(xd); 210 return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd); 211 } 212 213 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, 214 int mi_row, int mi_col, const int num_planes, 215 BLOCK_SIZE bsize) { 216 // Set current frame pointer. 217 x->e_mbd.cur_buf = src; 218 219 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet 220 // the static analysis warnings. 221 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) { 222 const int is_uv = i > 0; 223 setup_pred_plane( 224 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv], 225 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL, 226 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y); 227 } 228 } 229 230 #if !CONFIG_REALTIME_ONLY 231 /*!\brief Assigns different quantization parameters to each superblock 232 * based on statistics relevant to the selected delta-q mode (variance). 233 * This is the non-rd version. 234 * 235 * \param[in] cpi Top level encoder instance structure 236 * \param[in,out] td Thread data structure 237 * \param[in,out] x Superblock level data for this block. 238 * \param[in] tile_info Tile information / identification 239 * \param[in] mi_row Block row (in "MI_SIZE" units) index 240 * \param[in] mi_col Block column (in "MI_SIZE" units) index 241 * \param[out] num_planes Number of image planes (e.g. Y,U,V) 242 * 243 * \remark No return value but updates superblock and thread data 244 * related to the q / q delta to be used. 245 */ 246 static inline void setup_delta_q_nonrd(AV1_COMP *const cpi, ThreadData *td, 247 MACROBLOCK *const x, 248 const TileInfo *const tile_info, 249 int mi_row, int mi_col, int num_planes) { 250 AV1_COMMON *const cm = &cpi->common; 251 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 252 assert(delta_q_info->delta_q_present_flag); 253 254 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 255 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size); 256 257 const int delta_q_res = delta_q_info->delta_q_res; 258 int current_qindex = cm->quant_params.base_qindex; 259 260 if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) { 261 current_qindex = av1_get_sbq_variance_boost(cpi, x); 262 } 263 264 x->rdmult_cur_qindex = current_qindex; 265 MACROBLOCKD *const xd = &x->e_mbd; 266 current_qindex = av1_adjust_q_from_delta_q_res( 267 delta_q_res, xd->current_base_qindex, current_qindex); 268 269 x->delta_qindex = current_qindex - cm->quant_params.base_qindex; 270 x->rdmult_delta_qindex = x->delta_qindex; 271 272 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 273 xd->mi[0]->current_qindex = current_qindex; 274 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0); 275 276 // keep track of any non-zero delta-q used 277 td->deltaq_used |= (x->delta_qindex != 0); 278 } 279 280 /*!\brief Assigns different quantization parameters to each superblock 281 * based on statistics relevant to the selected delta-q mode (TPL weight, 282 * variance, HDR, etc). 283 * 284 * \ingroup tpl_modelling 285 * 286 * \param[in] cpi Top level encoder instance structure 287 * \param[in,out] td Thread data structure 288 * \param[in,out] x Superblock level data for this block. 289 * \param[in] tile_info Tile information / identification 290 * \param[in] mi_row Block row (in "MI_SIZE" units) index 291 * \param[in] mi_col Block column (in "MI_SIZE" units) index 292 * \param[out] num_planes Number of image planes (e.g. Y,U,V) 293 * 294 * \remark No return value but updates superblock and thread data 295 * related to the q / q delta to be used. 296 */ 297 static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td, 298 MACROBLOCK *const x, 299 const TileInfo *const tile_info, int mi_row, 300 int mi_col, int num_planes) { 301 AV1_COMMON *const cm = &cpi->common; 302 const CommonModeInfoParams *const mi_params = &cm->mi_params; 303 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 304 assert(delta_q_info->delta_q_present_flag); 305 306 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 307 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size); 308 309 const int delta_q_res = delta_q_info->delta_q_res; 310 int current_qindex = cm->quant_params.base_qindex; 311 if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode == 312 DUCKY_ENCODE_FRAME_MODE_QINDEX) { 313 const int sb_row = mi_row >> cm->seq_params->mib_size_log2; 314 const int sb_col = mi_col >> cm->seq_params->mib_size_log2; 315 const int sb_cols = 316 CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2); 317 const int sb_index = sb_row * sb_cols + sb_col; 318 current_qindex = 319 cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index]; 320 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) { 321 if (DELTA_Q_PERCEPTUAL_MODULATION == 1) { 322 const int block_wavelet_energy_level = 323 av1_block_wavelet_energy_level(cpi, x, sb_size); 324 x->sb_energy_level = block_wavelet_energy_level; 325 current_qindex = av1_compute_q_from_energy_level_deltaq_mode( 326 cpi, block_wavelet_energy_level); 327 } else { 328 const int block_var_level = av1_log_block_var(cpi, x, sb_size); 329 x->sb_energy_level = block_var_level; 330 current_qindex = 331 av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level); 332 } 333 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE && 334 cpi->oxcf.algo_cfg.enable_tpl_model) { 335 // Setup deltaq based on tpl stats 336 current_qindex = 337 av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col); 338 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) { 339 current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col); 340 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) { 341 current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col); 342 } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) { 343 current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col); 344 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) { 345 current_qindex = av1_get_sbq_variance_boost(cpi, x); 346 } 347 348 x->rdmult_cur_qindex = current_qindex; 349 MACROBLOCKD *const xd = &x->e_mbd; 350 const int adjusted_qindex = av1_adjust_q_from_delta_q_res( 351 delta_q_res, xd->current_base_qindex, current_qindex); 352 if (cpi->use_ducky_encode) { 353 assert(adjusted_qindex == current_qindex); 354 } 355 current_qindex = adjusted_qindex; 356 357 x->delta_qindex = current_qindex - cm->quant_params.base_qindex; 358 x->rdmult_delta_qindex = x->delta_qindex; 359 360 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 361 xd->mi[0]->current_qindex = current_qindex; 362 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0); 363 364 // keep track of any non-zero delta-q used 365 td->deltaq_used |= (x->delta_qindex != 0); 366 367 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) { 368 const int delta_lf_res = delta_q_info->delta_lf_res; 369 const int lfmask = ~(delta_lf_res - 1); 370 const int delta_lf_from_base = 371 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask); 372 const int8_t delta_lf = 373 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER); 374 const int frame_lf_count = 375 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; 376 const int mib_size = cm->seq_params->mib_size; 377 378 // pre-set the delta lf for loop filter. Note that this value is set 379 // before mi is assigned for each block in current superblock 380 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) { 381 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) { 382 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k); 383 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf; 384 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { 385 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf; 386 } 387 } 388 } 389 } 390 } 391 392 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row, 393 int mi_col) { 394 const AV1_COMMON *cm = &cpi->common; 395 const GF_GROUP *const gf_group = &cpi->ppi->gf_group; 396 const CommonModeInfoParams *const mi_params = &cm->mi_params; 397 MACROBLOCK *x = &td->mb; 398 const int frame_idx = cpi->gf_frame_index; 399 TplParams *const tpl_data = &cpi->ppi->tpl_data; 400 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 401 402 av1_zero(x->tpl_keep_ref_frame); 403 404 if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return; 405 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return; 406 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return; 407 408 const int is_overlay = 409 cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE; 410 if (is_overlay) { 411 memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame)); 412 return; 413 } 414 415 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx]; 416 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; 417 const int tpl_stride = tpl_frame->stride; 418 int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 }; 419 const int step = 1 << block_mis_log2; 420 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 421 422 const int mi_row_end = 423 AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows); 424 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); 425 const int mi_col_sr = 426 coded_to_superres_mi(mi_col, cm->superres_scale_denominator); 427 const int mi_col_end_sr = 428 AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size], 429 cm->superres_scale_denominator), 430 mi_cols_sr); 431 const int row_step = step; 432 const int col_step_sr = 433 coded_to_superres_mi(step, cm->superres_scale_denominator); 434 for (int row = mi_row; row < mi_row_end; row += row_step) { 435 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) { 436 const TplDepStats *this_stats = 437 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)]; 438 int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 }; 439 // Find the winner ref frame idx for the current block 440 int64_t best_inter_cost = this_stats->pred_error[0]; 441 int best_rf_idx = 0; 442 for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) { 443 if ((this_stats->pred_error[idx] < best_inter_cost) && 444 (this_stats->pred_error[idx] != 0)) { 445 best_inter_cost = this_stats->pred_error[idx]; 446 best_rf_idx = idx; 447 } 448 } 449 // tpl_pred_error is the pred_error reduction of best_ref w.r.t. 450 // LAST_FRAME. 451 tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] - 452 this_stats->pred_error[LAST_FRAME - 1]; 453 454 for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx) 455 inter_cost[rf_idx] += tpl_pred_error[rf_idx]; 456 } 457 } 458 459 int rank_index[INTER_REFS_PER_FRAME - 1]; 460 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) { 461 rank_index[idx] = idx + 1; 462 for (int i = idx; i > 0; --i) { 463 if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) { 464 const int tmp = rank_index[i - 1]; 465 rank_index[i - 1] = rank_index[i]; 466 rank_index[i] = tmp; 467 } 468 } 469 } 470 471 x->tpl_keep_ref_frame[INTRA_FRAME] = 1; 472 x->tpl_keep_ref_frame[LAST_FRAME] = 1; 473 474 int cutoff_ref = 0; 475 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) { 476 x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1; 477 if (idx > 2) { 478 if (!cutoff_ref) { 479 // If the predictive coding gains are smaller than the previous more 480 // relevant frame over certain amount, discard this frame and all the 481 // frames afterwards. 482 if (llabs(inter_cost[rank_index[idx]]) < 483 llabs(inter_cost[rank_index[idx - 1]]) / 8 || 484 inter_cost[rank_index[idx]] == 0) 485 cutoff_ref = 1; 486 } 487 488 if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0; 489 } 490 } 491 } 492 493 static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x, 494 int mi_row, int mi_col) { 495 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size; 496 const int orig_rdmult = cpi->rd.RDMULT; 497 498 assert(IMPLIES(cpi->ppi->gf_group.size > 0, 499 cpi->gf_frame_index < cpi->ppi->gf_group.size)); 500 const int gf_group_index = cpi->gf_frame_index; 501 if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ && 502 cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 && 503 cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) { 504 const int dr = 505 av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult); 506 x->rdmult = dr; 507 } 508 } 509 #endif // !CONFIG_REALTIME_ONLY 510 511 #if CONFIG_RT_ML_PARTITIONING 512 // Get a prediction(stored in x->est_pred) for the whole superblock. 513 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile, 514 MACROBLOCK *x, int mi_row, int mi_col) { 515 AV1_COMMON *const cm = &cpi->common; 516 const int is_key_frame = frame_is_intra_only(cm); 517 MACROBLOCKD *xd = &x->e_mbd; 518 519 // TODO(kyslov) Extend to 128x128 520 assert(cm->seq_params->sb_size == BLOCK_64X64); 521 522 av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); 523 524 if (!is_key_frame) { 525 MB_MODE_INFO *mi = xd->mi[0]; 526 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); 527 528 assert(yv12 != NULL); 529 530 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, 531 get_ref_scale_factors(cm, LAST_FRAME), 1); 532 mi->ref_frame[0] = LAST_FRAME; 533 mi->ref_frame[1] = NONE; 534 mi->bsize = BLOCK_64X64; 535 mi->mv[0].as_int = 0; 536 mi->interp_filters = av1_broadcast_interp_filter(BILINEAR); 537 538 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 539 540 xd->plane[0].dst.buf = x->est_pred; 541 xd->plane[0].dst.stride = 64; 542 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col); 543 } else { 544 #if CONFIG_AV1_HIGHBITDEPTH 545 switch (xd->bd) { 546 case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break; 547 case 10: 548 memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0])); 549 break; 550 case 12: 551 memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0])); 552 break; 553 } 554 #else 555 memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); 556 #endif // CONFIG_VP9_HIGHBITDEPTH 557 } 558 } 559 #endif // CONFIG_RT_ML_PARTITIONING 560 561 #define AVG_CDF_WEIGHT_LEFT 3 562 #define AVG_CDF_WEIGHT_TOP_RIGHT 1 563 564 /*!\brief Encode a superblock (minimal RD search involved) 565 * 566 * \ingroup partition_search 567 * Encodes the superblock by a pre-determined partition pattern, only minor 568 * rd-based searches are allowed to adjust the initial pattern. It is only used 569 * by realtime encoding. 570 */ 571 static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td, 572 TileDataEnc *tile_data, TokenExtra **tp, 573 const int mi_row, const int mi_col, 574 const int seg_skip) { 575 AV1_COMMON *const cm = &cpi->common; 576 MACROBLOCK *const x = &td->mb; 577 const SPEED_FEATURES *const sf = &cpi->sf; 578 const TileInfo *const tile_info = &tile_data->tile_info; 579 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + 580 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col); 581 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 582 PC_TREE *const pc_root = td->pc_root; 583 584 #if !CONFIG_REALTIME_ONLY 585 if (cm->delta_q_info.delta_q_present_flag) { 586 const int num_planes = av1_num_planes(cm); 587 588 setup_delta_q_nonrd(cpi, td, x, tile_info, mi_row, mi_col, num_planes); 589 } 590 #endif 591 #if CONFIG_RT_ML_PARTITIONING 592 if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) { 593 RD_STATS dummy_rdc; 594 get_estimated_pred(cpi, tile_info, x, mi_row, mi_col); 595 av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, 596 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root); 597 return; 598 } 599 #endif 600 // Set the partition 601 if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip || 602 (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 && 603 (!frame_is_intra_only(cm) && 604 (!cpi->ppi->use_svc || 605 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) { 606 // set a fixed-size partition 607 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 608 BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size; 609 if (sf->rt_sf.use_fast_fixed_part && 610 x->content_state_sb.source_sad_nonrd < kLowSad) { 611 bsize_select = cm->seq_params->sb_size; 612 } 613 if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change && 614 cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) { 615 bsize_select = cm->seq_params->sb_size; 616 x->force_zeromv_skip_for_sb = 1; 617 } 618 const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select; 619 if (x->content_state_sb.source_sad_nonrd > kZeroSad) 620 x->force_color_check_block_level = 1; 621 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 622 } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) { 623 // set a variance-based partition 624 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 625 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col); 626 } 627 assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip || 628 sf->part_sf.partition_search_type == VAR_BASED_PARTITION); 629 set_cb_offsets(td->mb.cb_offset, 0, 0); 630 631 // Initialize the flag to skip cdef to 1. 632 if (sf->rt_sf.skip_cdef_sb) { 633 const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1; 634 // If 128x128 block is used, we need to set the flag for all 4 64x64 sub 635 // "blocks". 636 for (int r = 0; r < block64_in_sb; ++r) { 637 for (int c = 0; c < block64_in_sb; ++c) { 638 const int idx_in_sb = 639 r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64; 640 if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1; 641 } 642 } 643 } 644 645 #if CONFIG_COLLECT_COMPONENT_TIMING 646 start_timing(cpi, nonrd_use_partition_time); 647 #endif 648 av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, 649 pc_root); 650 #if CONFIG_COLLECT_COMPONENT_TIMING 651 end_timing(cpi, nonrd_use_partition_time); 652 #endif 653 } 654 655 // This function initializes the stats for encode_rd_sb. 656 static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td, 657 const TileDataEnc *tile_data, 658 SIMPLE_MOTION_DATA_TREE *sms_root, 659 RD_STATS *rd_cost, int mi_row, int mi_col, 660 int gather_tpl_data) { 661 const AV1_COMMON *cm = &cpi->common; 662 const TileInfo *tile_info = &tile_data->tile_info; 663 MACROBLOCK *x = &td->mb; 664 665 const SPEED_FEATURES *sf = &cpi->sf; 666 const int use_simple_motion_search = 667 (sf->part_sf.simple_motion_search_split || 668 sf->part_sf.simple_motion_search_prune_rect || 669 sf->part_sf.simple_motion_search_early_term_none || 670 sf->part_sf.ml_early_term_after_part_split_level) && 671 !frame_is_intra_only(cm); 672 if (use_simple_motion_search) { 673 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root, 674 mi_row, mi_col); 675 } 676 677 #if !CONFIG_REALTIME_ONLY 678 if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME && 679 cpi->oxcf.gf_cfg.lag_in_frames == 0)) { 680 init_ref_frame_space(cpi, td, mi_row, mi_col); 681 x->sb_energy_level = 0; 682 x->part_search_info.cnn_output_valid = 0; 683 if (gather_tpl_data) { 684 if (cm->delta_q_info.delta_q_present_flag) { 685 const int num_planes = av1_num_planes(cm); 686 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 687 setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes); 688 av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col); 689 } 690 691 // TODO(jingning): revisit this function. 692 if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) { 693 adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col); 694 } 695 } 696 } 697 #else 698 (void)tile_info; 699 (void)mi_row; 700 (void)mi_col; 701 (void)gather_tpl_data; 702 #endif 703 704 x->reuse_inter_pred = false; 705 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL; 706 reset_mb_rd_record(x->txfm_search_info.mb_rd_record); 707 av1_zero(x->picked_ref_frames_mask); 708 av1_invalid_rd_stats(rd_cost); 709 } 710 711 #if !CONFIG_REALTIME_ONLY 712 static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td, 713 const TileDataEnc *tile_data, 714 SIMPLE_MOTION_DATA_TREE *sms_tree, 715 RD_STATS *rd_cost, int mi_row, 716 int mi_col, int delta_qp_ofs) { 717 AV1_COMMON *const cm = &cpi->common; 718 MACROBLOCK *const x = &td->mb; 719 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 720 const TileInfo *tile_info = &tile_data->tile_info; 721 const CommonModeInfoParams *const mi_params = &cm->mi_params; 722 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 723 assert(delta_q_info->delta_q_present_flag); 724 const int delta_q_res = delta_q_info->delta_q_res; 725 726 const SPEED_FEATURES *sf = &cpi->sf; 727 const int use_simple_motion_search = 728 (sf->part_sf.simple_motion_search_split || 729 sf->part_sf.simple_motion_search_prune_rect || 730 sf->part_sf.simple_motion_search_early_term_none || 731 sf->part_sf.ml_early_term_after_part_split_level) && 732 !frame_is_intra_only(cm); 733 if (use_simple_motion_search) { 734 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree, 735 mi_row, mi_col); 736 } 737 738 int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs; 739 740 MACROBLOCKD *const xd = &x->e_mbd; 741 current_qindex = av1_adjust_q_from_delta_q_res( 742 delta_q_res, xd->current_base_qindex, current_qindex); 743 744 x->delta_qindex = current_qindex - cm->quant_params.base_qindex; 745 746 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 747 xd->mi[0]->current_qindex = current_qindex; 748 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0); 749 750 // keep track of any non-zero delta-q used 751 td->deltaq_used |= (x->delta_qindex != 0); 752 753 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) { 754 const int delta_lf_res = delta_q_info->delta_lf_res; 755 const int lfmask = ~(delta_lf_res - 1); 756 const int delta_lf_from_base = 757 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask); 758 const int8_t delta_lf = 759 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER); 760 const int frame_lf_count = 761 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; 762 const int mib_size = cm->seq_params->mib_size; 763 764 // pre-set the delta lf for loop filter. Note that this value is set 765 // before mi is assigned for each block in current superblock 766 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) { 767 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) { 768 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k); 769 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf; 770 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { 771 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf; 772 } 773 } 774 } 775 } 776 777 x->reuse_inter_pred = false; 778 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL; 779 reset_mb_rd_record(x->txfm_search_info.mb_rd_record); 780 av1_zero(x->picked_ref_frames_mask); 781 av1_invalid_rd_stats(rd_cost); 782 } 783 784 static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td, 785 TileDataEnc *tile_data, TokenExtra **tp, int mi_row, 786 int mi_col, BLOCK_SIZE bsize, 787 SIMPLE_MOTION_DATA_TREE *sms_tree, 788 SB_FIRST_PASS_STATS *sb_org_stats) { 789 AV1_COMMON *const cm = &cpi->common; 790 MACROBLOCK *const x = &td->mb; 791 RD_STATS rdc_winner, cur_rdc; 792 av1_invalid_rd_stats(&rdc_winner); 793 794 int best_qindex = td->mb.rdmult_delta_qindex; 795 const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12; 796 const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12; 797 const int step = cm->delta_q_info.delta_q_res; 798 799 for (int sweep_qp_delta = start; sweep_qp_delta <= end; 800 sweep_qp_delta += step) { 801 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row, 802 mi_col, sweep_qp_delta); 803 804 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col); 805 const int backup_current_qindex = 806 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex; 807 808 av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col); 809 av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col); 810 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex; 811 812 td->pc_root = av1_alloc_pc_tree_node(bsize); 813 if (!td->pc_root) 814 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR, 815 "Failed to allocate PC_TREE"); 816 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, 817 &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL, 818 SB_DRY_PASS, NULL); 819 820 if ((rdc_winner.rdcost > cur_rdc.rdcost) || 821 (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) && 822 rdc_winner.rdcost == cur_rdc.rdcost)) { 823 rdc_winner = cur_rdc; 824 best_qindex = x->rdmult_delta_qindex + sweep_qp_delta; 825 } 826 } 827 828 return best_qindex; 829 } 830 #endif //! CONFIG_REALTIME_ONLY 831 832 /*!\brief Encode a superblock (RD-search-based) 833 * 834 * \ingroup partition_search 835 * Conducts partition search for a superblock, based on rate-distortion costs, 836 * from scratch or adjusting from a pre-calculated partition pattern. 837 */ 838 static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td, 839 TileDataEnc *tile_data, TokenExtra **tp, 840 const int mi_row, const int mi_col, 841 const int seg_skip) { 842 AV1_COMMON *const cm = &cpi->common; 843 MACROBLOCK *const x = &td->mb; 844 MACROBLOCKD *const xd = &x->e_mbd; 845 const SPEED_FEATURES *const sf = &cpi->sf; 846 const TileInfo *const tile_info = &tile_data->tile_info; 847 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + 848 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col); 849 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 850 const int num_planes = av1_num_planes(cm); 851 int dummy_rate; 852 int64_t dummy_dist; 853 RD_STATS dummy_rdc; 854 SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root; 855 856 #if CONFIG_REALTIME_ONLY 857 (void)seg_skip; 858 #endif // CONFIG_REALTIME_ONLY 859 860 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 861 1); 862 863 // Encode the superblock 864 if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) { 865 // partition search starting from a variance-based partition 866 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 867 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col); 868 869 #if CONFIG_COLLECT_COMPONENT_TIMING 870 start_timing(cpi, rd_use_partition_time); 871 #endif 872 td->pc_root = av1_alloc_pc_tree_node(sb_size); 873 if (!td->pc_root) 874 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 875 "Failed to allocate PC_TREE"); 876 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, 877 &dummy_rate, &dummy_dist, 1, td->pc_root); 878 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0, 879 sf->part_sf.partition_search_type); 880 td->pc_root = NULL; 881 #if CONFIG_COLLECT_COMPONENT_TIMING 882 end_timing(cpi, rd_use_partition_time); 883 #endif 884 } 885 #if !CONFIG_REALTIME_ONLY 886 else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) { 887 // partition search by adjusting a fixed-size partition 888 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size); 889 const BLOCK_SIZE bsize = 890 seg_skip ? sb_size : sf->part_sf.fixed_partition_size; 891 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); 892 td->pc_root = av1_alloc_pc_tree_node(sb_size); 893 if (!td->pc_root) 894 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 895 "Failed to allocate PC_TREE"); 896 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, 897 &dummy_rate, &dummy_dist, 1, td->pc_root); 898 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0, 899 sf->part_sf.partition_search_type); 900 td->pc_root = NULL; 901 } else { 902 // The most exhaustive recursive partition search 903 SuperBlockEnc *sb_enc = &x->sb_enc; 904 // No stats for overlay frames. Exclude key frame. 905 av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc); 906 907 // Reset the tree for simple motion search data 908 av1_reset_simple_motion_tree_partition(sms_root, sb_size); 909 910 #if CONFIG_COLLECT_COMPONENT_TIMING 911 start_timing(cpi, rd_pick_partition_time); 912 #endif 913 914 // Estimate the maximum square partition block size, which will be used 915 // as the starting block size for partitioning the sb 916 set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col); 917 918 // The superblock can be searched only once, or twice consecutively for 919 // better quality. Note that the meaning of passes here is different from 920 // the general concept of 1-pass/2-pass encoders. 921 const int num_passes = 922 cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1; 923 924 if (cpi->oxcf.sb_qp_sweep && 925 !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME && 926 cpi->oxcf.gf_cfg.lag_in_frames == 0) && 927 cm->delta_q_info.delta_q_present_flag) { 928 AOM_CHECK_MEM_ERROR( 929 x->e_mbd.error_info, td->mb.sb_stats_cache, 930 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache))); 931 av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row, 932 mi_col); 933 assert(x->rdmult_delta_qindex == x->delta_qindex); 934 935 const int best_qp_diff = 936 sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root, 937 td->mb.sb_stats_cache) - 938 x->rdmult_delta_qindex; 939 940 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc, 941 mi_row, mi_col, best_qp_diff); 942 943 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col); 944 const int backup_current_qindex = 945 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex; 946 947 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col); 948 av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row, 949 mi_col); 950 951 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = 952 backup_current_qindex; 953 aom_free(td->mb.sb_stats_cache); 954 td->mb.sb_stats_cache = NULL; 955 } 956 if (num_passes == 1) { 957 #if CONFIG_PARTITION_SEARCH_ORDER 958 if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) { 959 av1_reset_part_sf(&cpi->sf.part_sf); 960 av1_reset_sf_for_ext_part(cpi); 961 RD_STATS this_rdc; 962 av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row, 963 mi_col, sb_size, &this_rdc); 964 } else { 965 td->pc_root = av1_alloc_pc_tree_node(sb_size); 966 if (!td->pc_root) 967 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 968 "Failed to allocate PC_TREE"); 969 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, 970 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, 971 NULL, SB_SINGLE_PASS, NULL); 972 } 973 #else 974 td->pc_root = av1_alloc_pc_tree_node(sb_size); 975 if (!td->pc_root) 976 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 977 "Failed to allocate PC_TREE"); 978 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, 979 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL, 980 SB_SINGLE_PASS, NULL); 981 #endif // CONFIG_PARTITION_SEARCH_ORDER 982 } else { 983 // First pass 984 AOM_CHECK_MEM_ERROR( 985 x->e_mbd.error_info, td->mb.sb_fp_stats, 986 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats))); 987 av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row, 988 mi_col); 989 td->pc_root = av1_alloc_pc_tree_node(sb_size); 990 if (!td->pc_root) 991 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 992 "Failed to allocate PC_TREE"); 993 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, 994 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL, 995 SB_DRY_PASS, NULL); 996 997 // Second pass 998 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, 999 mi_col, 0); 1000 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col); 1001 av1_reset_simple_motion_tree_partition(sms_root, sb_size); 1002 1003 av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row, 1004 mi_col); 1005 1006 td->pc_root = av1_alloc_pc_tree_node(sb_size); 1007 if (!td->pc_root) 1008 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 1009 "Failed to allocate PC_TREE"); 1010 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, 1011 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL, 1012 SB_WET_PASS, NULL); 1013 aom_free(td->mb.sb_fp_stats); 1014 td->mb.sb_fp_stats = NULL; 1015 } 1016 1017 // Reset to 0 so that it wouldn't be used elsewhere mistakenly. 1018 sb_enc->tpl_data_count = 0; 1019 #if CONFIG_COLLECT_COMPONENT_TIMING 1020 end_timing(cpi, rd_pick_partition_time); 1021 #endif 1022 } 1023 #endif // !CONFIG_REALTIME_ONLY 1024 1025 // Update the inter rd model 1026 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile. 1027 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 && 1028 cm->tiles.cols == 1 && cm->tiles.rows == 1) { 1029 av1_inter_mode_data_fit(tile_data, x->rdmult); 1030 } 1031 } 1032 1033 // Check if the cost update of symbols mode, coeff and dv are tile or off. 1034 static inline int is_mode_coeff_dv_upd_freq_tile_or_off( 1035 const AV1_COMP *const cpi) { 1036 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf; 1037 1038 return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE && 1039 inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE && 1040 cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE); 1041 } 1042 1043 // When row-mt is enabled and cost update frequencies are set to off/tile, 1044 // processing of current SB can start even before processing of top-right SB 1045 // is finished. This function checks if it is sufficient to wait for top SB 1046 // to finish processing before current SB starts processing. 1047 static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) { 1048 const MODE mode = cpi->oxcf.mode; 1049 if (mode == GOOD) return 0; 1050 1051 if (mode == ALLINTRA) 1052 return is_mode_coeff_dv_upd_freq_tile_or_off(cpi); 1053 else if (mode == REALTIME) 1054 return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) && 1055 cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE); 1056 else 1057 return 0; 1058 } 1059 1060 /*!\brief Calculate source SAD at superblock level using 64x64 block source SAD 1061 * 1062 * \ingroup partition_search 1063 * \callgraph 1064 * \callergraph 1065 */ 1066 static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row, 1067 int mi_col) { 1068 if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX; 1069 1070 const AV1_COMMON *const cm = &cpi->common; 1071 const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128) 1072 ? (cm->seq_params->mib_size >> 1) 1073 : cm->seq_params->mib_size; 1074 const int num_blk_64x64_cols = 1075 (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis; 1076 const int num_blk_64x64_rows = 1077 (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis; 1078 const int blk_64x64_col_index = mi_col / blk_64x64_in_mis; 1079 const int blk_64x64_row_index = mi_row / blk_64x64_in_mis; 1080 uint64_t curr_sb_sad = UINT64_MAX; 1081 // Avoid the border as sad_blk_64x64 may not be set for the border 1082 // in the scene detection. 1083 if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) || 1084 (blk_64x64_col_index >= num_blk_64x64_cols - 1)) { 1085 return curr_sb_sad; 1086 } 1087 const uint64_t *const src_sad_blk_64x64_data = 1088 &cpi->src_sad_blk_64x64[blk_64x64_col_index + 1089 blk_64x64_row_index * num_blk_64x64_cols]; 1090 if (cm->seq_params->sb_size == BLOCK_128X128) { 1091 // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the 1092 // superblock 1093 curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] + 1094 src_sad_blk_64x64_data[num_blk_64x64_cols] + 1095 src_sad_blk_64x64_data[num_blk_64x64_cols + 1]; 1096 } else if (cm->seq_params->sb_size == BLOCK_64X64) { 1097 curr_sb_sad = src_sad_blk_64x64_data[0]; 1098 } 1099 return curr_sb_sad; 1100 } 1101 1102 /*!\brief Determine whether grading content can be skipped based on sad stat 1103 * 1104 * \ingroup partition_search 1105 * \callgraph 1106 * \callergraph 1107 */ 1108 static inline bool is_calc_src_content_needed(AV1_COMP *cpi, 1109 MACROBLOCK *const x, int mi_row, 1110 int mi_col) { 1111 if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) 1112 return true; 1113 const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col); 1114 if (curr_sb_sad == UINT64_MAX) return true; 1115 if (curr_sb_sad == 0) { 1116 x->content_state_sb.source_sad_nonrd = kZeroSad; 1117 return false; 1118 } 1119 AV1_COMMON *const cm = &cpi->common; 1120 bool do_calc_src_content = true; 1121 1122 if (cpi->oxcf.speed < 9) return do_calc_src_content; 1123 1124 // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size. 1125 if (AOMMIN(cm->width, cm->height) < 360) { 1126 // Derive Average 64x64 block source SAD from SB source SAD 1127 const uint64_t avg_64x64_blk_sad = 1128 (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2) 1129 : curr_sb_sad; 1130 1131 // The threshold is determined based on kLowSad and kHighSad threshold and 1132 // test results. 1133 uint64_t thresh_low = 15000; 1134 uint64_t thresh_high = 40000; 1135 1136 if (cpi->sf.rt_sf.increase_source_sad_thresh) { 1137 thresh_low = thresh_low << 1; 1138 thresh_high = thresh_high << 1; 1139 } 1140 1141 if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) { 1142 do_calc_src_content = false; 1143 // Note: set x->content_state_sb.source_sad_rd as well if this is extended 1144 // to RTC rd path. 1145 x->content_state_sb.source_sad_nonrd = kMedSad; 1146 } 1147 } 1148 1149 return do_calc_src_content; 1150 } 1151 1152 /*!\brief Determine whether grading content is needed based on sf and frame stat 1153 * 1154 * \ingroup partition_search 1155 * \callgraph 1156 * \callergraph 1157 */ 1158 // TODO(any): consolidate sfs to make interface cleaner 1159 static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x, 1160 TileDataEnc *tile_data, int mi_row, 1161 int mi_col) { 1162 AV1_COMMON *const cm = &cpi->common; 1163 if (cm->current_frame.frame_type == KEY_FRAME || 1164 (cpi->ppi->use_svc && 1165 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { 1166 assert(x->content_state_sb.source_sad_nonrd == kMedSad); 1167 assert(x->content_state_sb.source_sad_rd == kMedSad); 1168 return; 1169 } 1170 bool calc_src_content = false; 1171 1172 if (cpi->sf.rt_sf.source_metrics_sb_nonrd) { 1173 if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) { 1174 calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col); 1175 } else { 1176 x->content_state_sb.source_sad_nonrd = kZeroSad; 1177 } 1178 } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) && 1179 (cm->width * cm->height <= 352 * 288)) { 1180 if (cpi->rc.frame_source_sad > 0) 1181 calc_src_content = true; 1182 else 1183 x->content_state_sb.source_sad_rd = kZeroSad; 1184 } 1185 if (calc_src_content) 1186 av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col); 1187 } 1188 1189 /*!\brief Encode a superblock row by breaking it into superblocks 1190 * 1191 * \ingroup partition_search 1192 * \callgraph 1193 * \callergraph 1194 * Do partition and mode search for an sb row: one row of superblocks filling up 1195 * the width of the current tile. 1196 */ 1197 static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td, 1198 TileDataEnc *tile_data, int mi_row, 1199 TokenExtra **tp) { 1200 AV1_COMMON *const cm = &cpi->common; 1201 const TileInfo *const tile_info = &tile_data->tile_info; 1202 MultiThreadInfo *const mt_info = &cpi->mt_info; 1203 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt; 1204 AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync; 1205 bool row_mt_enabled = mt_info->row_mt_enabled; 1206 MACROBLOCK *const x = &td->mb; 1207 MACROBLOCKD *const xd = &x->e_mbd; 1208 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info); 1209 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 1210 const int mib_size = cm->seq_params->mib_size; 1211 const int mib_size_log2 = cm->seq_params->mib_size_log2; 1212 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2; 1213 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode; 1214 1215 #if CONFIG_COLLECT_COMPONENT_TIMING 1216 start_timing(cpi, encode_sb_row_time); 1217 #endif 1218 1219 // Initialize the left context for the new SB row 1220 av1_zero_left_context(xd); 1221 1222 // Reset delta for quantizer and loof filters at the beginning of every tile 1223 if (mi_row == tile_info->mi_row_start || row_mt_enabled) { 1224 if (cm->delta_q_info.delta_q_present_flag) 1225 xd->current_base_qindex = cm->quant_params.base_qindex; 1226 if (cm->delta_q_info.delta_lf_present_flag) { 1227 av1_reset_loop_filter_delta(xd, av1_num_planes(cm)); 1228 } 1229 } 1230 1231 reset_thresh_freq_fact(x); 1232 1233 // Code each SB in the row 1234 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0; 1235 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) { 1236 // In realtime/allintra mode and when frequency of cost updates is off/tile, 1237 // wait for the top superblock to finish encoding. Otherwise, wait for the 1238 // top-right superblock to finish encoding. 1239 enc_row_mt->sync_read_ptr( 1240 row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi)); 1241 1242 #if CONFIG_MULTITHREAD 1243 if (row_mt_enabled) { 1244 pthread_mutex_lock(enc_row_mt->mutex_); 1245 const bool row_mt_exit = enc_row_mt->row_mt_exit; 1246 pthread_mutex_unlock(enc_row_mt->mutex_); 1247 // Exit in case any worker has encountered an error. 1248 if (row_mt_exit) return; 1249 } 1250 #endif 1251 1252 const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled; 1253 if (update_cdf && (tile_info->mi_row_start != mi_row)) { 1254 if ((tile_info->mi_col_start == mi_col)) { 1255 // restore frame context at the 1st column sb 1256 *xd->tile_ctx = *x->row_ctx; 1257 } else { 1258 // update context 1259 int wt_left = AVG_CDF_WEIGHT_LEFT; 1260 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT; 1261 if (tile_info->mi_col_end > (mi_col + mib_size)) 1262 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile, 1263 wt_left, wt_tr); 1264 else 1265 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1, 1266 wt_left, wt_tr); 1267 } 1268 } 1269 1270 // Update the rate cost tables for some symbols 1271 av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col); 1272 1273 // Reset color coding related parameters 1274 av1_zero(x->color_sensitivity_sb); 1275 av1_zero(x->color_sensitivity_sb_g); 1276 av1_zero(x->color_sensitivity_sb_alt); 1277 av1_zero(x->color_sensitivity); 1278 x->content_state_sb.source_sad_nonrd = kMedSad; 1279 x->content_state_sb.source_sad_rd = kMedSad; 1280 x->content_state_sb.lighting_change = 0; 1281 x->content_state_sb.low_sumdiff = 0; 1282 x->force_zeromv_skip_for_sb = 0; 1283 x->sb_me_block = 0; 1284 x->sb_me_partition = 0; 1285 x->sb_me_mv.as_int = 0; 1286 x->sb_force_fixed_part = 1; 1287 x->color_palette_thresh = 64; 1288 x->force_color_check_block_level = 0; 1289 x->nonrd_prune_ref_frame_search = 1290 cpi->sf.rt_sf.nonrd_prune_ref_frame_search; 1291 1292 if (cpi->oxcf.mode == ALLINTRA) { 1293 x->intra_sb_rdmult_modifier = 128; 1294 } 1295 1296 xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv; 1297 x->source_variance = UINT_MAX; 1298 td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col); 1299 1300 // Get segment id and skip flag 1301 const struct segmentation *const seg = &cm->seg; 1302 int seg_skip = 0; 1303 if (seg->enabled) { 1304 const uint8_t *const map = 1305 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map; 1306 const uint8_t segment_id = 1307 map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col) 1308 : 0; 1309 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); 1310 } 1311 1312 produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col); 1313 1314 init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks, 1315 sb_size); 1316 1317 // Grade the temporal variation of the sb, the grade will be used to decide 1318 // fast mode search strategy for coding blocks 1319 if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col); 1320 1321 // encode the superblock 1322 if (use_nonrd_mode) { 1323 encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip); 1324 } else { 1325 encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip); 1326 } 1327 1328 // Update the top-right context in row_mt coding 1329 if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) { 1330 if (sb_cols_in_tile == 1) 1331 x->row_ctx[0] = *xd->tile_ctx; 1332 else if (sb_col_in_tile >= 1) 1333 x->row_ctx[sb_col_in_tile - 1] = *xd->tile_ctx; 1334 } 1335 enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile, 1336 sb_cols_in_tile); 1337 } 1338 1339 #if CONFIG_COLLECT_COMPONENT_TIMING 1340 end_timing(cpi, encode_sb_row_time); 1341 #endif 1342 } 1343 1344 static inline void init_encode_frame_mb_context(AV1_COMP *cpi) { 1345 AV1_COMMON *const cm = &cpi->common; 1346 const int num_planes = av1_num_planes(cm); 1347 MACROBLOCK *const x = &cpi->td.mb; 1348 MACROBLOCKD *const xd = &x->e_mbd; 1349 1350 // Copy data over into macro block data structures. 1351 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes, 1352 cm->seq_params->sb_size); 1353 1354 av1_setup_block_planes(xd, cm->seq_params->subsampling_x, 1355 cm->seq_params->subsampling_y, num_planes); 1356 } 1357 1358 void av1_alloc_tile_data(AV1_COMP *cpi) { 1359 AV1_COMMON *const cm = &cpi->common; 1360 AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt; 1361 const int tile_cols = cm->tiles.cols; 1362 const int tile_rows = cm->tiles.rows; 1363 1364 av1_row_mt_mem_dealloc(cpi); 1365 1366 aom_free(cpi->tile_data); 1367 cpi->allocated_tiles = 0; 1368 enc_row_mt->allocated_tile_cols = 0; 1369 enc_row_mt->allocated_tile_rows = 0; 1370 1371 CHECK_MEM_ERROR( 1372 cm, cpi->tile_data, 1373 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data))); 1374 1375 cpi->allocated_tiles = tile_cols * tile_rows; 1376 enc_row_mt->allocated_tile_cols = tile_cols; 1377 enc_row_mt->allocated_tile_rows = tile_rows; 1378 for (int tile_row = 0; tile_row < tile_rows; ++tile_row) { 1379 for (int tile_col = 0; tile_col < tile_cols; ++tile_col) { 1380 const int tile_index = tile_row * tile_cols + tile_col; 1381 TileDataEnc *const this_tile = &cpi->tile_data[tile_index]; 1382 av1_zero(this_tile->row_mt_sync); 1383 this_tile->row_ctx = NULL; 1384 } 1385 } 1386 } 1387 1388 void av1_init_tile_data(AV1_COMP *cpi) { 1389 AV1_COMMON *const cm = &cpi->common; 1390 const int num_planes = av1_num_planes(cm); 1391 const int tile_cols = cm->tiles.cols; 1392 const int tile_rows = cm->tiles.rows; 1393 int tile_col, tile_row; 1394 TokenInfo *const token_info = &cpi->token_info; 1395 TokenExtra *pre_tok = token_info->tile_tok[0][0]; 1396 TokenList *tplist = token_info->tplist[0][0]; 1397 unsigned int tile_tok = 0; 1398 int tplist_count = 0; 1399 1400 if (!is_stat_generation_stage(cpi) && 1401 cm->features.allow_screen_content_tools) { 1402 // Number of tokens for which token info needs to be allocated. 1403 unsigned int tokens_required = 1404 get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols, 1405 MAX_SB_SIZE_LOG2, num_planes); 1406 // Allocate/reallocate memory for token related info if the number of tokens 1407 // required is more than the number of tokens already allocated. This could 1408 // occur in case of the following: 1409 // 1) If the memory is not yet allocated 1410 // 2) If the frame dimensions have changed 1411 const bool realloc_tokens = tokens_required > token_info->tokens_allocated; 1412 if (realloc_tokens) { 1413 free_token_info(token_info); 1414 alloc_token_info(cm, token_info, tokens_required); 1415 pre_tok = token_info->tile_tok[0][0]; 1416 tplist = token_info->tplist[0][0]; 1417 } 1418 } 1419 1420 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { 1421 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 1422 TileDataEnc *const tile_data = 1423 &cpi->tile_data[tile_row * tile_cols + tile_col]; 1424 TileInfo *const tile_info = &tile_data->tile_info; 1425 av1_tile_init(tile_info, cm, tile_row, tile_col); 1426 tile_data->firstpass_top_mv = kZeroMv; 1427 tile_data->abs_sum_level = 0; 1428 1429 if (is_token_info_allocated(token_info)) { 1430 token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; 1431 pre_tok = token_info->tile_tok[tile_row][tile_col]; 1432 tile_tok = allocated_tokens( 1433 tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, 1434 num_planes); 1435 token_info->tplist[tile_row][tile_col] = tplist + tplist_count; 1436 tplist = token_info->tplist[tile_row][tile_col]; 1437 tplist_count = av1_get_sb_rows_in_tile(cm, tile_info); 1438 } 1439 tile_data->allow_update_cdf = !cm->tiles.large_scale; 1440 tile_data->allow_update_cdf = tile_data->allow_update_cdf && 1441 !cm->features.disable_cdf_update && 1442 !delay_wait_for_top_right_sb(cpi); 1443 tile_data->tctx = *cm->fc; 1444 } 1445 } 1446 } 1447 1448 // Populate the start palette token info prior to encoding an SB row. 1449 static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info, 1450 int tile_row, int tile_col, int mi_row, 1451 TokenExtra **tp) { 1452 const TokenInfo *token_info = &cpi->token_info; 1453 if (!is_token_info_allocated(token_info)) return; 1454 1455 const AV1_COMMON *cm = &cpi->common; 1456 const int num_planes = av1_num_planes(cm); 1457 TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col]; 1458 const int sb_row_in_tile = 1459 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2; 1460 1461 get_start_tok(cpi, tile_row, tile_col, mi_row, tp, 1462 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes); 1463 assert(tplist != NULL); 1464 tplist[sb_row_in_tile].start = *tp; 1465 } 1466 1467 // Populate the token count after encoding an SB row. 1468 static inline void populate_token_count(AV1_COMP *cpi, 1469 const TileInfo *tile_info, int tile_row, 1470 int tile_col, int mi_row, 1471 TokenExtra *tok) { 1472 const TokenInfo *token_info = &cpi->token_info; 1473 if (!is_token_info_allocated(token_info)) return; 1474 1475 const AV1_COMMON *cm = &cpi->common; 1476 const int num_planes = av1_num_planes(cm); 1477 TokenList *const tplist = token_info->tplist[tile_row][tile_col]; 1478 const int sb_row_in_tile = 1479 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2; 1480 const int tile_mb_cols = 1481 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2; 1482 const int num_mb_rows_in_sb = 1483 ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4; 1484 tplist[sb_row_in_tile].count = 1485 (unsigned int)(tok - tplist[sb_row_in_tile].start); 1486 1487 assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <= 1488 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols, 1489 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, 1490 num_planes)); 1491 1492 (void)num_planes; 1493 (void)tile_mb_cols; 1494 (void)num_mb_rows_in_sb; 1495 } 1496 1497 /*!\brief Encode a superblock row 1498 * 1499 * \ingroup partition_search 1500 */ 1501 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row, 1502 int tile_col, int mi_row) { 1503 AV1_COMMON *const cm = &cpi->common; 1504 const int tile_cols = cm->tiles.cols; 1505 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; 1506 const TileInfo *const tile_info = &this_tile->tile_info; 1507 TokenExtra *tok = NULL; 1508 1509 get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok); 1510 1511 encode_sb_row(cpi, td, this_tile, mi_row, &tok); 1512 1513 populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok); 1514 } 1515 1516 /*!\brief Encode a tile 1517 * 1518 * \ingroup partition_search 1519 */ 1520 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, 1521 int tile_col) { 1522 AV1_COMMON *const cm = &cpi->common; 1523 TileDataEnc *const this_tile = 1524 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col]; 1525 const TileInfo *const tile_info = &this_tile->tile_info; 1526 1527 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile); 1528 1529 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start, 1530 tile_info->mi_col_end, tile_row); 1531 av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row, 1532 &td->mb.e_mbd); 1533 1534 #if !CONFIG_REALTIME_ONLY 1535 if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra) 1536 cfl_init(&td->mb.e_mbd.cfl, cm->seq_params); 1537 #endif 1538 1539 if (td->mb.txfm_search_info.mb_rd_record != NULL) { 1540 av1_crc32c_calculator_init( 1541 &td->mb.txfm_search_info.mb_rd_record->crc_calculator); 1542 } 1543 1544 for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; 1545 mi_row += cm->seq_params->mib_size) { 1546 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); 1547 } 1548 this_tile->abs_sum_level = td->abs_sum_level; 1549 } 1550 1551 /*!\brief Break one frame into tiles and encode the tiles 1552 * 1553 * \ingroup partition_search 1554 * 1555 * \param[in] cpi Top-level encoder structure 1556 */ 1557 static inline void encode_tiles(AV1_COMP *cpi) { 1558 AV1_COMMON *const cm = &cpi->common; 1559 const int tile_cols = cm->tiles.cols; 1560 const int tile_rows = cm->tiles.rows; 1561 int tile_col, tile_row; 1562 1563 MACROBLOCK *const mb = &cpi->td.mb; 1564 assert(IMPLIES(cpi->tile_data == NULL, 1565 cpi->allocated_tiles < tile_cols * tile_rows)); 1566 if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi); 1567 1568 av1_init_tile_data(cpi); 1569 av1_alloc_mb_data(cpi, mb); 1570 1571 for (tile_row = 0; tile_row < tile_rows; ++tile_row) { 1572 for (tile_col = 0; tile_col < tile_cols; ++tile_col) { 1573 TileDataEnc *const this_tile = 1574 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col]; 1575 cpi->td.intrabc_used = 0; 1576 cpi->td.deltaq_used = 0; 1577 cpi->td.abs_sum_level = 0; 1578 cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0; 1579 cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0; 1580 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; 1581 cpi->td.mb.tile_pb_ctx = &this_tile->tctx; 1582 av1_init_rtc_counters(&cpi->td.mb); 1583 cpi->td.mb.palette_pixels = 0; 1584 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col); 1585 if (!frame_is_intra_only(&cpi->common)) 1586 av1_accumulate_rtc_counters(cpi, &cpi->td.mb); 1587 cpi->palette_pixel_num += cpi->td.mb.palette_pixels; 1588 cpi->intrabc_used |= cpi->td.intrabc_used; 1589 cpi->deltaq_used |= cpi->td.deltaq_used; 1590 } 1591 } 1592 1593 av1_dealloc_mb_data(mb, av1_num_planes(cm)); 1594 } 1595 1596 // Set the relative distance of a reference frame w.r.t. current frame 1597 static inline void set_rel_frame_dist( 1598 const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info, 1599 const int ref_frame_flags) { 1600 MV_REFERENCE_FRAME ref_frame; 1601 int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX; 1602 ref_frame_dist_info->nearest_past_ref = NONE_FRAME; 1603 ref_frame_dist_info->nearest_future_ref = NONE_FRAME; 1604 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 1605 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0; 1606 if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) { 1607 int dist = av1_encoder_get_relative_dist( 1608 cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME], 1609 cm->current_frame.display_order_hint); 1610 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist; 1611 // Get the nearest ref_frame in the past 1612 if (abs(dist) < min_past_dist && dist < 0) { 1613 ref_frame_dist_info->nearest_past_ref = ref_frame; 1614 min_past_dist = abs(dist); 1615 } 1616 // Get the nearest ref_frame in the future 1617 if (dist < min_future_dist && dist > 0) { 1618 ref_frame_dist_info->nearest_future_ref = ref_frame; 1619 min_future_dist = dist; 1620 } 1621 } 1622 } 1623 } 1624 1625 static inline int refs_are_one_sided(const AV1_COMMON *cm) { 1626 assert(!frame_is_intra_only(cm)); 1627 1628 int one_sided_refs = 1; 1629 const int cur_display_order_hint = cm->current_frame.display_order_hint; 1630 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) { 1631 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref); 1632 if (buf == NULL) continue; 1633 if (av1_encoder_get_relative_dist(buf->display_order_hint, 1634 cur_display_order_hint) > 0) { 1635 one_sided_refs = 0; // bwd reference 1636 break; 1637 } 1638 } 1639 return one_sided_refs; 1640 } 1641 1642 static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm, 1643 int ref_order_hint[2]) { 1644 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info; 1645 ref_order_hint[0] = ref_order_hint[1] = 0; 1646 if (!skip_mode_info->skip_mode_allowed) return; 1647 1648 const RefCntBuffer *const buf_0 = 1649 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0); 1650 const RefCntBuffer *const buf_1 = 1651 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1); 1652 assert(buf_0 != NULL && buf_1 != NULL); 1653 1654 ref_order_hint[0] = buf_0->order_hint; 1655 ref_order_hint[1] = buf_1->order_hint; 1656 } 1657 1658 static int check_skip_mode_enabled(AV1_COMP *const cpi) { 1659 AV1_COMMON *const cm = &cpi->common; 1660 1661 av1_setup_skip_mode_allowed(cm); 1662 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0; 1663 1664 // Turn off skip mode if the temporal distances of the reference pair to the 1665 // current frame are different by more than 1 frame. 1666 const int cur_offset = (int)cm->current_frame.order_hint; 1667 int ref_offset[2]; 1668 get_skip_mode_ref_offsets(cm, ref_offset); 1669 const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info, 1670 cur_offset, ref_offset[0]); 1671 const int cur_to_ref1 = abs(get_relative_dist( 1672 &cm->seq_params->order_hint_info, cur_offset, ref_offset[1])); 1673 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0; 1674 1675 // High Latency: Turn off skip mode if all refs are fwd. 1676 if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0; 1677 1678 const int ref_frame[2] = { 1679 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME, 1680 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME 1681 }; 1682 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) || 1683 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]])) 1684 return 0; 1685 1686 return 1; 1687 } 1688 1689 static inline void set_default_interp_skip_flags( 1690 const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) { 1691 const int num_planes = av1_num_planes(cm); 1692 interp_search_flags->default_interp_skip_flags = 1693 (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA 1694 : INTERP_SKIP_LUMA_SKIP_CHROMA; 1695 } 1696 1697 static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) { 1698 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp || 1699 cpi->sf.inter_sf.disable_onesided_comp) && 1700 cpi->all_one_sided_refs) { 1701 // Disable all compound references 1702 cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES); 1703 } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode && 1704 cpi->sf.inter_sf.selective_ref_frame >= 2) { 1705 AV1_COMMON *const cm = &cpi->common; 1706 const int cur_frame_display_order_hint = 1707 cm->current_frame.display_order_hint; 1708 unsigned int *ref_display_order_hint = 1709 cm->cur_frame->ref_display_order_hint; 1710 const int arf2_dist = av1_encoder_get_relative_dist( 1711 ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME], 1712 cur_frame_display_order_hint); 1713 const int bwd_dist = av1_encoder_get_relative_dist( 1714 ref_display_order_hint[BWDREF_FRAME - LAST_FRAME], 1715 cur_frame_display_order_hint); 1716 1717 for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) { 1718 MV_REFERENCE_FRAME rf[2]; 1719 av1_set_ref_frame(rf, ref_idx); 1720 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) || 1721 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) { 1722 continue; 1723 } 1724 1725 if (!cpi->all_one_sided_refs) { 1726 int ref_dist[2]; 1727 for (int i = 0; i < 2; ++i) { 1728 ref_dist[i] = av1_encoder_get_relative_dist( 1729 ref_display_order_hint[rf[i] - LAST_FRAME], 1730 cur_frame_display_order_hint); 1731 } 1732 1733 // One-sided compound is used only when all reference frames are 1734 // one-sided. 1735 if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) { 1736 cpi->prune_ref_frame_mask |= 1 << ref_idx; 1737 } 1738 } 1739 1740 if (cpi->sf.inter_sf.selective_ref_frame >= 4 && 1741 (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) && 1742 (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) { 1743 // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references. 1744 if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) { 1745 // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer 1746 // reference to the current frame than ALTREF2_FRAME 1747 cpi->prune_ref_frame_mask |= 1 << ref_idx; 1748 } 1749 } 1750 } 1751 } 1752 } 1753 1754 static int allow_deltaq_mode(AV1_COMP *cpi) { 1755 #if !CONFIG_REALTIME_ONLY 1756 AV1_COMMON *const cm = &cpi->common; 1757 BLOCK_SIZE sb_size = cm->seq_params->sb_size; 1758 int sbs_wide = mi_size_wide[sb_size]; 1759 int sbs_high = mi_size_high[sb_size]; 1760 1761 int64_t delta_rdcost = 0; 1762 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) { 1763 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) { 1764 int64_t this_delta_rdcost = 0; 1765 av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size, 1766 mi_row, mi_col); 1767 delta_rdcost += this_delta_rdcost; 1768 } 1769 } 1770 return delta_rdcost < 0; 1771 #else 1772 (void)cpi; 1773 return 1; 1774 #endif // !CONFIG_REALTIME_ONLY 1775 } 1776 1777 #define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000 1778 #define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4 1779 1780 // Populates block level thresholds for force zeromv-skip decision 1781 static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) { 1782 if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return; 1783 1784 // Threshold for forcing zeromv-skip decision is as below: 1785 // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103. 1786 // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221 1787 // allowing slightly higher error for smaller blocks. 1788 // Per Pixel Threshold of 64x64 block Area of 64x64 block 1 1 1789 // ------------------------------------=sqrt(---------------------)=sqrt(-)=- 1790 // Per Pixel Threshold of 128x128 block Area of 128x128 block 4 2 1791 // Thus, per pixel thresholds for blocks of size 32x32, 16x16,... can be 1792 // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for 1793 // small blocks, the same is clipped to 4. 1794 const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF; 1795 const int num_128x128_pix = 1796 block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128]; 1797 1798 for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) { 1799 const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize]; 1800 1801 // Calculate the threshold for zeromv-skip decision based on area of the 1802 // partition 1803 unsigned int thresh_exit_part_blk = 1804 (unsigned int)(thresh_exit_128x128_part * 1805 sqrt((double)num_block_pix / num_128x128_pix) + 1806 0.5); 1807 thresh_exit_part_blk = AOMMIN( 1808 thresh_exit_part_blk, 1809 (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix)); 1810 cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk; 1811 } 1812 } 1813 1814 static void free_block_hash_buffers(uint32_t *block_hash_values[2]) { 1815 for (int j = 0; j < 2; ++j) { 1816 aom_free(block_hash_values[j]); 1817 } 1818 } 1819 1820 /*!\brief Determines delta_q_res value for Variance Boost modulation. 1821 */ 1822 static int aom_get_variance_boost_delta_q_res(int qindex) { 1823 // Signaling delta_q changes across superblocks comes with inherent syntax 1824 // element overhead, which adds up to total payload size. This overhead 1825 // becomes proportionally bigger the higher the base qindex (i.e. lower 1826 // quality, smaller file size), so a balance needs to be struck. 1827 // - Smaller delta_q_res: more granular delta_q control, more bits spent 1828 // signaling deltas. 1829 // - Larger delta_q_res: coarser delta_q control, less bits spent signaling 1830 // deltas. 1831 // 1832 // At the same time, SB qindex fluctuations become larger the higher 1833 // the base qindex (between lowest and highest-variance regions): 1834 // - For QP 5: up to 8 qindexes 1835 // - For QP 60: up to 52 qindexes 1836 // 1837 // With these factors in mind, it was found that the best strategy that 1838 // maximizes quality per bitrate is by having very finely-grained delta_q 1839 // values for the lowest picture qindexes (to preserve tiny qindex SB deltas), 1840 // and progressively making them coarser as base qindex increases (to reduce 1841 // total signaling overhead). 1842 int delta_q_res = 1; 1843 1844 if (qindex >= 160) { 1845 delta_q_res = 8; 1846 } else if (qindex >= 120) { 1847 delta_q_res = 4; 1848 } else if (qindex >= 80) { 1849 delta_q_res = 2; 1850 } else { 1851 delta_q_res = 1; 1852 } 1853 1854 return delta_q_res; 1855 } 1856 1857 #if !CONFIG_REALTIME_ONLY 1858 static float get_thresh_based_on_q(int qindex, int speed) { 1859 const float min_threshold_arr[2] = { 0.06f, 0.09f }; 1860 const float max_threshold_arr[2] = { 0.10f, 0.13f }; 1861 1862 const float min_thresh = min_threshold_arr[speed >= 3]; 1863 const float max_thresh = max_threshold_arr[speed >= 3]; 1864 const float thresh = min_thresh + (max_thresh - min_thresh) * 1865 ((float)MAXQ - (float)qindex) / 1866 (float)(MAXQ - MINQ); 1867 return thresh; 1868 } 1869 1870 static int get_mv_err(MV cur_mv, MV ref_mv) { 1871 const MV diff = { cur_mv.row - ref_mv.row, cur_mv.col - ref_mv.col }; 1872 const MV abs_diff = { abs(diff.row), abs(diff.col) }; 1873 const int mv_err = (abs_diff.row + abs_diff.col); 1874 return mv_err; 1875 } 1876 1877 static void check_mv_err_and_update(MV cur_mv, MV ref_mv, int *best_mv_err) { 1878 const int mv_err = get_mv_err(cur_mv, ref_mv); 1879 *best_mv_err = AOMMIN(mv_err, *best_mv_err); 1880 } 1881 1882 static int is_inside_frame_border(int mi_row, int mi_col, int row_offset, 1883 int col_offset, int num_mi_rows, 1884 int num_mi_cols) { 1885 if (mi_row + row_offset < 0 || mi_row + row_offset >= num_mi_rows || 1886 mi_col + col_offset < 0 || mi_col + col_offset >= num_mi_cols) 1887 return 0; 1888 1889 return 1; 1890 } 1891 1892 // Compute the minimum MV error between current MV and spatial MV predictors. 1893 static int get_spatial_mvpred_err(AV1_COMMON *cm, TplParams *const tpl_data, 1894 int tpl_idx, int mi_row, int mi_col, 1895 int ref_idx, int_mv cur_mv, int allow_hp, 1896 int is_integer) { 1897 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx]; 1898 TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr; 1899 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 1900 1901 int mv_err = INT32_MAX; 1902 const int step = 1 << block_mis_log2; 1903 const int mv_pred_pos_in_mis[6][2] = { 1904 { -step, 0 }, { 0, -step }, { -step, step }, 1905 { -step, -step }, { -2 * step, 0 }, { 0, -2 * step }, 1906 }; 1907 1908 for (int i = 0; i < 6; i++) { 1909 int row_offset = mv_pred_pos_in_mis[i][0]; 1910 int col_offset = mv_pred_pos_in_mis[i][1]; 1911 if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset, 1912 tpl_frame->mi_rows, tpl_frame->mi_cols)) { 1913 continue; 1914 } 1915 1916 const TplDepStats *tpl_stats = 1917 &tpl_ptr[av1_tpl_ptr_pos(mi_row + row_offset, mi_col + col_offset, 1918 tpl_frame->stride, block_mis_log2)]; 1919 int_mv this_refmv = tpl_stats->mv[ref_idx]; 1920 lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer); 1921 check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err); 1922 } 1923 1924 // Check MV error w.r.t. Global MV / Zero MV 1925 int_mv gm_mv = { 0 }; 1926 if (cm->global_motion[ref_idx + LAST_FRAME].wmtype > TRANSLATION) { 1927 const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d); 1928 gm_mv = gm_get_motion_vector(&cm->global_motion[ref_idx + LAST_FRAME], 1929 allow_hp, bsize, mi_col, mi_row, is_integer); 1930 } 1931 check_mv_err_and_update(cur_mv.as_mv, gm_mv.as_mv, &mv_err); 1932 1933 return mv_err; 1934 } 1935 1936 // Compute the minimum MV error between current MV and temporal MV predictors. 1937 static int get_temporal_mvpred_err(AV1_COMMON *cm, int mi_row, int mi_col, 1938 int num_mi_rows, int num_mi_cols, 1939 int ref_idx, int_mv cur_mv, int allow_hp, 1940 int is_integer) { 1941 const RefCntBuffer *ref_buf = get_ref_frame_buf(cm, ref_idx + LAST_FRAME); 1942 if (ref_buf == NULL) return INT32_MAX; 1943 int cur_to_ref_dist = 1944 get_relative_dist(&cm->seq_params->order_hint_info, 1945 cm->cur_frame->order_hint, ref_buf->order_hint); 1946 1947 int mv_err = INT32_MAX; 1948 const int mv_pred_pos_in_mis[7][2] = { 1949 { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 }, { 4, -2 }, { 4, 4 }, { 2, 4 }, 1950 }; 1951 1952 for (int i = 0; i < 7; i++) { 1953 int row_offset = mv_pred_pos_in_mis[i][0]; 1954 int col_offset = mv_pred_pos_in_mis[i][1]; 1955 if (!is_inside_frame_border(mi_row, mi_col, row_offset, col_offset, 1956 num_mi_rows, num_mi_cols)) { 1957 continue; 1958 } 1959 const TPL_MV_REF *ref_mvs = 1960 cm->tpl_mvs + 1961 ((mi_row + row_offset) >> 1) * (cm->mi_params.mi_stride >> 1) + 1962 ((mi_col + col_offset) >> 1); 1963 if (ref_mvs->mfmv0.as_int == INVALID_MV) continue; 1964 1965 int_mv this_refmv; 1966 av1_get_mv_projection(&this_refmv.as_mv, ref_mvs->mfmv0.as_mv, 1967 cur_to_ref_dist, ref_mvs->ref_frame_offset); 1968 lower_mv_precision(&this_refmv.as_mv, allow_hp, is_integer); 1969 check_mv_err_and_update(cur_mv.as_mv, this_refmv.as_mv, &mv_err); 1970 } 1971 1972 return mv_err; 1973 } 1974 1975 // Determine whether to disable temporal MV prediction for the current frame 1976 // based on TPL and motion field data. Temporal MV prediction is disabled if the 1977 // reduction in MV error by including temporal MVs as MV predictors is small. 1978 static void check_to_disable_ref_frame_mvs(AV1_COMP *cpi) { 1979 AV1_COMMON *cm = &cpi->common; 1980 if (!cm->features.allow_ref_frame_mvs || cpi->sf.hl_sf.ref_frame_mvs_lvl != 1) 1981 return; 1982 1983 const int tpl_idx = cpi->gf_frame_index; 1984 TplParams *const tpl_data = &cpi->ppi->tpl_data; 1985 if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return; 1986 1987 const SUBPEL_FORCE_STOP tpl_subpel_precision = 1988 cpi->sf.tpl_sf.subpel_force_stop; 1989 const int allow_high_precision_mv = tpl_subpel_precision == EIGHTH_PEL && 1990 cm->features.allow_high_precision_mv; 1991 const int force_integer_mv = tpl_subpel_precision == FULL_PEL || 1992 cm->features.cur_frame_force_integer_mv; 1993 1994 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx]; 1995 TplDepStats *tpl_ptr = tpl_frame->tpl_stats_ptr; 1996 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; 1997 const int step = 1 << block_mis_log2; 1998 1999 uint64_t accum_spatial_mvpred_err = 0; 2000 uint64_t accum_best_err = 0; 2001 2002 for (int mi_row = 0; mi_row < tpl_frame->mi_rows; mi_row += step) { 2003 for (int mi_col = 0; mi_col < tpl_frame->mi_cols; mi_col += step) { 2004 TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos( 2005 mi_row, mi_col, tpl_frame->stride, block_mis_log2)]; 2006 const int cur_best_ref_idx = tpl_stats_ptr->ref_frame_index[0]; 2007 if (cur_best_ref_idx == NONE_FRAME) continue; 2008 2009 int_mv cur_mv = tpl_stats_ptr->mv[cur_best_ref_idx]; 2010 lower_mv_precision(&cur_mv.as_mv, allow_high_precision_mv, 2011 force_integer_mv); 2012 2013 const int cur_spatial_mvpred_err = get_spatial_mvpred_err( 2014 cm, tpl_data, tpl_idx, mi_row, mi_col, cur_best_ref_idx, cur_mv, 2015 allow_high_precision_mv, force_integer_mv); 2016 2017 const int cur_temporal_mvpred_err = get_temporal_mvpred_err( 2018 cm, mi_row, mi_col, tpl_frame->mi_rows, tpl_frame->mi_cols, 2019 cur_best_ref_idx, cur_mv, allow_high_precision_mv, force_integer_mv); 2020 2021 const int cur_best_err = 2022 AOMMIN(cur_spatial_mvpred_err, cur_temporal_mvpred_err); 2023 accum_spatial_mvpred_err += cur_spatial_mvpred_err; 2024 accum_best_err += cur_best_err; 2025 } 2026 } 2027 2028 const float threshold = 2029 get_thresh_based_on_q(cm->quant_params.base_qindex, cpi->oxcf.speed); 2030 const float mv_err_reduction = 2031 (float)(accum_spatial_mvpred_err - accum_best_err); 2032 2033 if (mv_err_reduction <= threshold * accum_spatial_mvpred_err) 2034 cm->features.allow_ref_frame_mvs = 0; 2035 } 2036 #endif // !CONFIG_REALTIME_ONLY 2037 2038 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction 2039 * for a single frame 2040 * 2041 * \ingroup high_level_algo 2042 */ 2043 static inline void encode_frame_internal(AV1_COMP *cpi) { 2044 ThreadData *const td = &cpi->td; 2045 MACROBLOCK *const x = &td->mb; 2046 AV1_COMMON *const cm = &cpi->common; 2047 CommonModeInfoParams *const mi_params = &cm->mi_params; 2048 FeatureFlags *const features = &cm->features; 2049 MACROBLOCKD *const xd = &x->e_mbd; 2050 RD_COUNTS *const rdc = &cpi->td.rd_counts; 2051 #if CONFIG_FPMT_TEST 2052 FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs; 2053 FrameProbInfo *const temp_frame_probs_simulation = 2054 &cpi->ppi->temp_frame_probs_simulation; 2055 #endif 2056 FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs; 2057 IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info; 2058 MultiThreadInfo *const mt_info = &cpi->mt_info; 2059 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt; 2060 const AV1EncoderConfig *const oxcf = &cpi->oxcf; 2061 const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode; 2062 int i; 2063 2064 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) { 2065 mi_params->setup_mi(mi_params); 2066 } 2067 2068 set_mi_offsets(mi_params, xd, 0, 0); 2069 2070 av1_zero(*td->counts); 2071 av1_zero(rdc->tx_type_used); 2072 av1_zero(rdc->obmc_used); 2073 av1_zero(rdc->warped_used); 2074 av1_zero(rdc->seg_tmp_pred_cost); 2075 2076 // Reset the flag. 2077 cpi->intrabc_used = 0; 2078 // Need to disable intrabc when superres is selected 2079 if (av1_superres_scaled(cm)) { 2080 features->allow_intrabc = 0; 2081 } 2082 2083 features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc); 2084 2085 if (features->allow_warped_motion && 2086 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) { 2087 const FRAME_UPDATE_TYPE update_type = 2088 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 2089 int warped_probability = 2090 #if CONFIG_FPMT_TEST 2091 cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE 2092 ? temp_frame_probs->warped_probs[update_type] 2093 : 2094 #endif // CONFIG_FPMT_TEST 2095 frame_probs->warped_probs[update_type]; 2096 if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh) 2097 features->allow_warped_motion = 0; 2098 } 2099 2100 int hash_table_created = 0; 2101 if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) && 2102 !cpi->sf.rt_sf.use_nonrd_pick_mode) { 2103 // TODO(any): move this outside of the recoding loop to avoid recalculating 2104 // the hash table. 2105 // add to hash table 2106 const int pic_width = cpi->source->y_crop_width; 2107 const int pic_height = cpi->source->y_crop_height; 2108 uint32_t *block_hash_values[2] = { NULL }; // two buffers used ping-pong 2109 bool error = false; 2110 2111 for (int j = 0; j < 2; ++j) { 2112 block_hash_values[j] = (uint32_t *)aom_malloc( 2113 sizeof(*block_hash_values[j]) * pic_width * pic_height); 2114 if (!block_hash_values[j]) { 2115 error = true; 2116 break; 2117 } 2118 } 2119 2120 av1_hash_table_init(intrabc_hash_info); 2121 if (error || 2122 !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) { 2123 free_block_hash_buffers(block_hash_values); 2124 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, 2125 "Error allocating intrabc_hash_table and buffers"); 2126 } 2127 hash_table_created = 1; 2128 av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0]); 2129 // Hash data generated for screen contents is used for intraBC ME 2130 const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize]; 2131 int max_sb_size = (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)); 2132 2133 if (cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks) { 2134 max_sb_size = AOMMIN(8, max_sb_size); 2135 } 2136 2137 int src_idx = 0; 2138 for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) { 2139 const int dst_idx = !src_idx; 2140 av1_generate_block_hash_value(intrabc_hash_info, cpi->source, size, 2141 block_hash_values[src_idx], 2142 block_hash_values[dst_idx]); 2143 if (size >= min_alloc_size && 2144 !av1_add_to_hash_map_by_row_with_precal_data( 2145 &intrabc_hash_info->intrabc_hash_table, 2146 block_hash_values[dst_idx], pic_width, pic_height, size)) { 2147 error = true; 2148 break; 2149 } 2150 } 2151 2152 free_block_hash_buffers(block_hash_values); 2153 2154 if (error) { 2155 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, 2156 "Error adding data to intrabc_hash_table"); 2157 } 2158 } 2159 2160 const CommonQuantParams *quant_params = &cm->quant_params; 2161 for (i = 0; i < MAX_SEGMENTS; ++i) { 2162 const int qindex = 2163 cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex) 2164 : quant_params->base_qindex; 2165 xd->lossless[i] = 2166 qindex == 0 && quant_params->y_dc_delta_q == 0 && 2167 quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 && 2168 quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0; 2169 if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1; 2170 xd->qindex[i] = qindex; 2171 if (xd->lossless[i]) { 2172 cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT; 2173 } else { 2174 cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients; 2175 } 2176 } 2177 features->coded_lossless = is_coded_lossless(cm, xd); 2178 features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm); 2179 2180 // Fix delta q resolution for the moment 2181 2182 cm->delta_q_info.delta_q_res = 0; 2183 if (cpi->use_ducky_encode) { 2184 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE; 2185 } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ && 2186 !cpi->roi.enabled) { 2187 if (deltaq_mode == DELTA_Q_OBJECTIVE) 2188 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE; 2189 else if (deltaq_mode == DELTA_Q_PERCEPTUAL) 2190 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL; 2191 else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI) 2192 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL; 2193 else if (deltaq_mode == DELTA_Q_USER_RATING_BASED) 2194 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL; 2195 else if (deltaq_mode == DELTA_Q_HDR) 2196 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL; 2197 else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST) 2198 cm->delta_q_info.delta_q_res = 2199 aom_get_variance_boost_delta_q_res(quant_params->base_qindex); 2200 // Set delta_q_present_flag before it is used for the first time 2201 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES; 2202 cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q; 2203 2204 // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q 2205 // is used for ineligible frames. That effectively will turn off row_mt 2206 // usage. Note objective delta_q and tpl eligible frames are only altref 2207 // frames currently. 2208 const GF_GROUP *gf_group = &cpi->ppi->gf_group; 2209 if (cm->delta_q_info.delta_q_present_flag) { 2210 if (deltaq_mode == DELTA_Q_OBJECTIVE && 2211 gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE) 2212 cm->delta_q_info.delta_q_present_flag = 0; 2213 2214 if (deltaq_mode == DELTA_Q_OBJECTIVE && 2215 cm->delta_q_info.delta_q_present_flag) { 2216 cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi); 2217 } 2218 } 2219 2220 // Reset delta_q_used flag 2221 cpi->deltaq_used = 0; 2222 2223 cm->delta_q_info.delta_lf_present_flag = 2224 cm->delta_q_info.delta_q_present_flag && 2225 oxcf->tool_cfg.enable_deltalf_mode; 2226 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI; 2227 2228 // update delta_q_present_flag and delta_lf_present_flag based on 2229 // base_qindex 2230 cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0; 2231 cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0; 2232 } else if (cpi->cyclic_refresh->apply_cyclic_refresh || 2233 cpi->svc.number_temporal_layers == 1) { 2234 cpi->cyclic_refresh->actual_num_seg1_blocks = 0; 2235 cpi->cyclic_refresh->actual_num_seg2_blocks = 0; 2236 } 2237 cpi->rc.cnt_zeromv = 0; 2238 2239 av1_frame_init_quantizer(cpi); 2240 init_encode_frame_mb_context(cpi); 2241 set_default_interp_skip_flags(cm, &cpi->interp_search_flags); 2242 2243 if (cm->prev_frame && cm->prev_frame->seg.enabled && 2244 cpi->svc.number_spatial_layers == 1) 2245 cm->last_frame_seg_map = cm->prev_frame->seg_map; 2246 else 2247 cm->last_frame_seg_map = NULL; 2248 if (features->allow_intrabc || features->coded_lossless) { 2249 av1_set_default_ref_deltas(cm->lf.ref_deltas); 2250 av1_set_default_mode_deltas(cm->lf.mode_deltas); 2251 } else if (cm->prev_frame) { 2252 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES); 2253 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS); 2254 } 2255 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES); 2256 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); 2257 2258 cpi->all_one_sided_refs = 2259 frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm); 2260 2261 cpi->prune_ref_frame_mask = 0; 2262 // Figure out which ref frames can be skipped at frame level. 2263 setup_prune_ref_frame_mask(cpi); 2264 2265 x->txfm_search_info.txb_split_count = 0; 2266 #if CONFIG_SPEED_STATS 2267 x->txfm_search_info.tx_search_count = 0; 2268 #endif // CONFIG_SPEED_STATS 2269 2270 #if !CONFIG_REALTIME_ONLY 2271 #if CONFIG_COLLECT_COMPONENT_TIMING 2272 start_timing(cpi, av1_compute_global_motion_time); 2273 #endif 2274 av1_compute_global_motion_facade(cpi); 2275 #if CONFIG_COLLECT_COMPONENT_TIMING 2276 end_timing(cpi, av1_compute_global_motion_time); 2277 #endif 2278 #endif // !CONFIG_REALTIME_ONLY 2279 2280 #if CONFIG_COLLECT_COMPONENT_TIMING 2281 start_timing(cpi, av1_setup_motion_field_time); 2282 #endif 2283 av1_calculate_ref_frame_side(cm); 2284 2285 features->allow_ref_frame_mvs &= !(cpi->sf.hl_sf.ref_frame_mvs_lvl == 2); 2286 if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm); 2287 #if !CONFIG_REALTIME_ONLY 2288 check_to_disable_ref_frame_mvs(cpi); 2289 #endif // !CONFIG_REALTIME_ONLY 2290 2291 #if CONFIG_COLLECT_COMPONENT_TIMING 2292 end_timing(cpi, av1_setup_motion_field_time); 2293 #endif 2294 2295 cm->current_frame.skip_mode_info.skip_mode_flag = 2296 check_skip_mode_enabled(cpi); 2297 2298 // Initialization of skip mode cost depends on the value of 2299 // 'skip_mode_flag'. This initialization happens in the function 2300 // av1_fill_mode_rates(), which is in turn called in 2301 // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts() 2302 // has to be called after 'skip_mode_flag' is initialized. 2303 av1_initialize_rd_consts(cpi); 2304 av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex); 2305 populate_thresh_to_force_zeromv_skip(cpi); 2306 2307 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy; 2308 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy; 2309 mt_info->row_mt_enabled = 0; 2310 mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS], 2311 cm->tiles.cols * cm->tiles.rows) > 1; 2312 2313 if (oxcf->row_mt && (mt_info->num_workers > 1)) { 2314 mt_info->row_mt_enabled = 1; 2315 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read; 2316 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write; 2317 av1_encode_tiles_row_mt(cpi); 2318 } else { 2319 if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) { 2320 av1_encode_tiles_mt(cpi); 2321 } else { 2322 // Preallocate the pc_tree for realtime coding to reduce the cost of 2323 // memory allocation. 2324 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode; 2325 if (use_nonrd_mode) { 2326 td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size); 2327 if (!td->pc_root) 2328 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 2329 "Failed to allocate PC_TREE"); 2330 } else { 2331 td->pc_root = NULL; 2332 } 2333 2334 encode_tiles(cpi); 2335 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0, 2336 cpi->sf.part_sf.partition_search_type); 2337 td->pc_root = NULL; 2338 } 2339 } 2340 2341 // If intrabc is allowed but never selected, reset the allow_intrabc flag. 2342 if (features->allow_intrabc && !cpi->intrabc_used) { 2343 features->allow_intrabc = 0; 2344 } 2345 if (features->allow_intrabc) { 2346 cm->delta_q_info.delta_lf_present_flag = 0; 2347 } 2348 2349 if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) { 2350 cm->delta_q_info.delta_q_present_flag = 0; 2351 } 2352 2353 // Set the transform size appropriately before bitstream creation 2354 const MODE_EVAL_TYPE eval_type = 2355 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch 2356 ? WINNER_MODE_EVAL 2357 : DEFAULT_EVAL; 2358 const TX_SIZE_SEARCH_METHOD tx_search_type = 2359 cpi->winner_mode_params.tx_size_search_methods[eval_type]; 2360 assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL); 2361 features->tx_mode = select_tx_mode(cm, tx_search_type); 2362 2363 // Retain the frame level probability update conditions for parallel frames. 2364 // These conditions will be consumed during postencode stage to update the 2365 // probability. 2366 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) { 2367 cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] = 2368 cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats; 2369 cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] = 2370 (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 && 2371 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX); 2372 cpi->do_update_frame_probs_warp[cpi->num_frame_recode] = 2373 (features->allow_warped_motion && 2374 cpi->sf.inter_sf.prune_warped_prob_thresh > 0); 2375 cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] = 2376 (cm->current_frame.frame_type != KEY_FRAME && 2377 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 && 2378 features->interp_filter == SWITCHABLE); 2379 } 2380 2381 if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats || 2382 ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 2383 INT_MAX) && 2384 (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) { 2385 const FRAME_UPDATE_TYPE update_type = 2386 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 2387 for (i = 0; i < TX_SIZES_ALL; i++) { 2388 int sum = 0; 2389 int j; 2390 int left = MAX_TX_TYPE_PROB; 2391 2392 for (j = 0; j < TX_TYPES; j++) 2393 sum += cpi->td.rd_counts.tx_type_used[i][j]; 2394 2395 for (j = TX_TYPES - 1; j >= 0; j--) { 2396 int update_txtype_frameprobs = 1; 2397 const int new_prob = 2398 sum ? (int)((int64_t)MAX_TX_TYPE_PROB * 2399 cpi->td.rd_counts.tx_type_used[i][j] / sum) 2400 : (j ? 0 : MAX_TX_TYPE_PROB); 2401 #if CONFIG_FPMT_TEST 2402 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) { 2403 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 2404 0) { 2405 int prob = 2406 (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] + 2407 new_prob) >> 2408 1; 2409 left -= prob; 2410 if (j == 0) prob += left; 2411 temp_frame_probs_simulation->tx_type_probs[update_type][i][j] = 2412 prob; 2413 // Copy temp_frame_probs_simulation to temp_frame_probs 2414 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES; 2415 update_type_idx++) { 2416 temp_frame_probs->tx_type_probs[update_type_idx][i][j] = 2417 temp_frame_probs_simulation 2418 ->tx_type_probs[update_type_idx][i][j]; 2419 } 2420 } 2421 update_txtype_frameprobs = 0; 2422 } 2423 #endif // CONFIG_FPMT_TEST 2424 // Track the frame probabilities of parallel encode frames to update 2425 // during postencode stage. 2426 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) { 2427 update_txtype_frameprobs = 0; 2428 cpi->frame_new_probs[cpi->num_frame_recode] 2429 .tx_type_probs[update_type][i][j] = new_prob; 2430 } 2431 if (update_txtype_frameprobs) { 2432 int prob = 2433 (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1; 2434 left -= prob; 2435 if (j == 0) prob += left; 2436 frame_probs->tx_type_probs[update_type][i][j] = prob; 2437 } 2438 } 2439 } 2440 } 2441 2442 if (cm->seg.enabled) { 2443 cm->seg.temporal_update = 1; 2444 if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1]) 2445 cm->seg.temporal_update = 0; 2446 } 2447 2448 if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 && 2449 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) { 2450 const FRAME_UPDATE_TYPE update_type = 2451 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 2452 2453 for (i = 0; i < BLOCK_SIZES_ALL; i++) { 2454 int sum = 0; 2455 int update_obmc_frameprobs = 1; 2456 for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j]; 2457 2458 const int new_prob = 2459 sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0; 2460 #if CONFIG_FPMT_TEST 2461 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) { 2462 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) { 2463 temp_frame_probs_simulation->obmc_probs[update_type][i] = 2464 (temp_frame_probs_simulation->obmc_probs[update_type][i] + 2465 new_prob) >> 2466 1; 2467 // Copy temp_frame_probs_simulation to temp_frame_probs 2468 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES; 2469 update_type_idx++) { 2470 temp_frame_probs->obmc_probs[update_type_idx][i] = 2471 temp_frame_probs_simulation->obmc_probs[update_type_idx][i]; 2472 } 2473 } 2474 update_obmc_frameprobs = 0; 2475 } 2476 #endif // CONFIG_FPMT_TEST 2477 // Track the frame probabilities of parallel encode frames to update 2478 // during postencode stage. 2479 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) { 2480 update_obmc_frameprobs = 0; 2481 cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] = 2482 new_prob; 2483 } 2484 if (update_obmc_frameprobs) { 2485 frame_probs->obmc_probs[update_type][i] = 2486 (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1; 2487 } 2488 } 2489 } 2490 2491 if (features->allow_warped_motion && 2492 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) { 2493 const FRAME_UPDATE_TYPE update_type = 2494 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 2495 int update_warp_frameprobs = 1; 2496 int sum = 0; 2497 for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i]; 2498 const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0; 2499 #if CONFIG_FPMT_TEST 2500 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) { 2501 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) { 2502 temp_frame_probs_simulation->warped_probs[update_type] = 2503 (temp_frame_probs_simulation->warped_probs[update_type] + 2504 new_prob) >> 2505 1; 2506 // Copy temp_frame_probs_simulation to temp_frame_probs 2507 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES; 2508 update_type_idx++) { 2509 temp_frame_probs->warped_probs[update_type_idx] = 2510 temp_frame_probs_simulation->warped_probs[update_type_idx]; 2511 } 2512 } 2513 update_warp_frameprobs = 0; 2514 } 2515 #endif // CONFIG_FPMT_TEST 2516 // Track the frame probabilities of parallel encode frames to update 2517 // during postencode stage. 2518 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) { 2519 update_warp_frameprobs = 0; 2520 cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] = 2521 new_prob; 2522 } 2523 if (update_warp_frameprobs) { 2524 frame_probs->warped_probs[update_type] = 2525 (frame_probs->warped_probs[update_type] + new_prob) >> 1; 2526 } 2527 } 2528 2529 if (cm->current_frame.frame_type != KEY_FRAME && 2530 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 && 2531 features->interp_filter == SWITCHABLE) { 2532 const FRAME_UPDATE_TYPE update_type = 2533 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index); 2534 2535 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { 2536 int sum = 0; 2537 int j; 2538 int left = 1536; 2539 2540 for (j = 0; j < SWITCHABLE_FILTERS; j++) { 2541 sum += cpi->td.counts->switchable_interp[i][j]; 2542 } 2543 2544 for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) { 2545 int update_interpfilter_frameprobs = 1; 2546 const int new_prob = 2547 sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum 2548 : (j ? 0 : 1536); 2549 #if CONFIG_FPMT_TEST 2550 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) { 2551 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 2552 0) { 2553 int prob = (temp_frame_probs_simulation 2554 ->switchable_interp_probs[update_type][i][j] + 2555 new_prob) >> 2556 1; 2557 left -= prob; 2558 if (j == 0) prob += left; 2559 temp_frame_probs_simulation 2560 ->switchable_interp_probs[update_type][i][j] = prob; 2561 // Copy temp_frame_probs_simulation to temp_frame_probs 2562 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES; 2563 update_type_idx++) { 2564 temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] = 2565 temp_frame_probs_simulation 2566 ->switchable_interp_probs[update_type_idx][i][j]; 2567 } 2568 } 2569 update_interpfilter_frameprobs = 0; 2570 } 2571 #endif // CONFIG_FPMT_TEST 2572 // Track the frame probabilities of parallel encode frames to update 2573 // during postencode stage. 2574 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) { 2575 update_interpfilter_frameprobs = 0; 2576 cpi->frame_new_probs[cpi->num_frame_recode] 2577 .switchable_interp_probs[update_type][i][j] = new_prob; 2578 } 2579 if (update_interpfilter_frameprobs) { 2580 int prob = (frame_probs->switchable_interp_probs[update_type][i][j] + 2581 new_prob) >> 2582 1; 2583 left -= prob; 2584 if (j == 0) prob += left; 2585 frame_probs->switchable_interp_probs[update_type][i][j] = prob; 2586 } 2587 } 2588 } 2589 } 2590 if (hash_table_created) { 2591 av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table); 2592 } 2593 } 2594 2595 /*!\brief Setup reference frame buffers and encode a frame 2596 * 2597 * \ingroup high_level_algo 2598 * \callgraph 2599 * \callergraph 2600 * 2601 * \param[in] cpi Top-level encoder structure 2602 */ 2603 void av1_encode_frame(AV1_COMP *cpi) { 2604 AV1_COMMON *const cm = &cpi->common; 2605 CurrentFrame *const current_frame = &cm->current_frame; 2606 FeatureFlags *const features = &cm->features; 2607 RD_COUNTS *const rdc = &cpi->td.rd_counts; 2608 const AV1EncoderConfig *const oxcf = &cpi->oxcf; 2609 // Indicates whether or not to use a default reduced set for ext-tx 2610 // rather than the potential full set of 16 transforms 2611 features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set; 2612 2613 // Make sure segment_id is no larger than last_active_segid. 2614 if (cm->seg.enabled && cm->seg.update_map) { 2615 const int mi_rows = cm->mi_params.mi_rows; 2616 const int mi_cols = cm->mi_params.mi_cols; 2617 const int last_active_segid = cm->seg.last_active_segid; 2618 uint8_t *map = cpi->enc_seg.map; 2619 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) { 2620 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) { 2621 map[mi_col] = AOMMIN(map[mi_col], last_active_segid); 2622 } 2623 map += mi_cols; 2624 } 2625 } 2626 2627 av1_setup_frame_buf_refs(cm); 2628 enforce_max_ref_frames(cpi, &cpi->ref_frame_flags, 2629 cm->cur_frame->ref_display_order_hint, 2630 cm->current_frame.display_order_hint); 2631 set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info, 2632 cpi->ref_frame_flags); 2633 av1_setup_frame_sign_bias(cm); 2634 2635 // If global motion is enabled, then every buffer which is used as either 2636 // a source or a ref frame should have an image pyramid allocated. 2637 // Check here so that issues can be caught early in debug mode 2638 #if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY 2639 if (cpi->alloc_pyramid) { 2640 assert(cpi->source->y_pyramid); 2641 for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { 2642 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame); 2643 if (buf != NULL) { 2644 assert(buf->buf.y_pyramid); 2645 } 2646 } 2647 } 2648 #endif // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY 2649 2650 #if CONFIG_MISMATCH_DEBUG 2651 mismatch_reset_frame(av1_num_planes(cm)); 2652 #endif 2653 2654 rdc->newmv_or_intra_blocks = 0; 2655 cpi->palette_pixel_num = 0; 2656 2657 if (cpi->sf.hl_sf.frame_parameter_update || 2658 cpi->sf.rt_sf.use_comp_ref_nonrd) { 2659 if (frame_is_intra_only(cm)) 2660 current_frame->reference_mode = SINGLE_REFERENCE; 2661 else 2662 current_frame->reference_mode = REFERENCE_MODE_SELECT; 2663 2664 features->interp_filter = SWITCHABLE; 2665 if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR; 2666 2667 features->switchable_motion_mode = is_switchable_motion_mode_allowed( 2668 features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc); 2669 2670 rdc->compound_ref_used_flag = 0; 2671 rdc->skip_mode_used_flag = 0; 2672 2673 encode_frame_internal(cpi); 2674 2675 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) { 2676 // Use a flag that includes 4x4 blocks 2677 if (rdc->compound_ref_used_flag == 0) { 2678 current_frame->reference_mode = SINGLE_REFERENCE; 2679 #if CONFIG_ENTROPY_STATS 2680 av1_zero(cpi->td.counts->comp_inter); 2681 #endif // CONFIG_ENTROPY_STATS 2682 } 2683 } 2684 // Re-check on the skip mode status as reference mode may have been 2685 // changed. 2686 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info; 2687 if (frame_is_intra_only(cm) || 2688 current_frame->reference_mode == SINGLE_REFERENCE) { 2689 skip_mode_info->skip_mode_allowed = 0; 2690 skip_mode_info->skip_mode_flag = 0; 2691 } 2692 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0) 2693 skip_mode_info->skip_mode_flag = 0; 2694 2695 if (!cm->tiles.large_scale) { 2696 if (features->tx_mode == TX_MODE_SELECT && 2697 cpi->td.mb.txfm_search_info.txb_split_count == 0) 2698 features->tx_mode = TX_MODE_LARGEST; 2699 } 2700 } else { 2701 // This is needed if real-time speed setting is changed on the fly 2702 // from one using compound prediction to one using single reference. 2703 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) 2704 current_frame->reference_mode = SINGLE_REFERENCE; 2705 encode_frame_internal(cpi); 2706 } 2707 }