allintra_vis.c (43471B)
1 /* 2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <assert.h> 13 14 #include "config/aom_config.h" 15 16 #include "aom_util/aom_pthread.h" 17 18 #if CONFIG_TFLITE 19 #include "tensorflow/lite/c/c_api.h" 20 #include "av1/encoder/deltaq4_model.c" 21 #endif 22 23 #include "av1/common/common_data.h" 24 #include "av1/common/enums.h" 25 #include "av1/common/idct.h" 26 #include "av1/common/reconinter.h" 27 #include "av1/encoder/allintra_vis.h" 28 #include "av1/encoder/aq_variance.h" 29 #include "av1/encoder/encoder.h" 30 #include "av1/encoder/ethread.h" 31 #include "av1/encoder/hybrid_fwd_txfm.h" 32 #include "av1/encoder/model_rd.h" 33 #include "av1/encoder/rdopt_utils.h" 34 35 #define MB_WIENER_PRED_BLOCK_SIZE BLOCK_128X128 36 #define MB_WIENER_PRED_BUF_STRIDE 128 37 38 // Maximum delta-q range allowed for Variance Boost after scaling 39 #define VAR_BOOST_MAX_DELTAQ_RANGE 80 40 // Maximum quantization step boost allowed for Variance Boost 41 #define VAR_BOOST_MAX_BOOST 8.0 42 43 void av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON *cm, ThreadData *td) { 44 const int is_high_bitdepth = is_cur_buf_hbd(&td->mb.e_mbd); 45 assert(MB_WIENER_PRED_BLOCK_SIZE < BLOCK_SIZES_ALL); 46 const int buf_width = block_size_wide[MB_WIENER_PRED_BLOCK_SIZE]; 47 const int buf_height = block_size_high[MB_WIENER_PRED_BLOCK_SIZE]; 48 assert(buf_width == MB_WIENER_PRED_BUF_STRIDE); 49 const size_t buf_size = 50 (buf_width * buf_height * sizeof(*td->wiener_tmp_pred_buf)) 51 << is_high_bitdepth; 52 CHECK_MEM_ERROR(cm, td->wiener_tmp_pred_buf, aom_memalign(32, buf_size)); 53 } 54 55 void av1_dealloc_mb_wiener_var_pred_buf(ThreadData *td) { 56 aom_free(td->wiener_tmp_pred_buf); 57 td->wiener_tmp_pred_buf = NULL; 58 } 59 60 void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) { 61 AV1_COMMON *cm = &cpi->common; 62 63 // This block size is also used to determine number of workers in 64 // multi-threading. If it is changed, one needs to change it accordingly in 65 // "compute_num_ai_workers()". 66 cpi->weber_bsize = BLOCK_8X8; 67 68 if (cpi->oxcf.enable_rate_guide_deltaq) { 69 if (cpi->mb_weber_stats && cpi->prep_rate_estimates && 70 cpi->ext_rate_distribution) 71 return; 72 } else { 73 if (cpi->mb_weber_stats) return; 74 } 75 76 CHECK_MEM_ERROR(cm, cpi->mb_weber_stats, 77 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols, 78 sizeof(*cpi->mb_weber_stats))); 79 80 if (cpi->oxcf.enable_rate_guide_deltaq) { 81 CHECK_MEM_ERROR( 82 cm, cpi->prep_rate_estimates, 83 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols, 84 sizeof(*cpi->prep_rate_estimates))); 85 86 CHECK_MEM_ERROR( 87 cm, cpi->ext_rate_distribution, 88 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols, 89 sizeof(*cpi->ext_rate_distribution))); 90 } 91 } 92 93 static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, 94 int mi_col) { 95 AV1_COMMON *const cm = &cpi->common; 96 const int mi_wide = mi_size_wide[bsize]; 97 const int mi_high = mi_size_high[bsize]; 98 99 const int mi_step = mi_size_wide[cpi->weber_bsize]; 100 int mb_stride = cpi->frame_info.mi_cols; 101 int mb_count = 0; 102 int64_t satd = 0; 103 104 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { 105 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { 106 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) 107 continue; 108 109 satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)] 110 .satd; 111 ++mb_count; 112 } 113 } 114 115 if (mb_count) satd = (int)(satd / mb_count); 116 satd = AOMMAX(1, satd); 117 118 return (int)satd; 119 } 120 121 static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, 122 int mi_col) { 123 AV1_COMMON *const cm = &cpi->common; 124 const int mi_wide = mi_size_wide[bsize]; 125 const int mi_high = mi_size_high[bsize]; 126 127 const int mi_step = mi_size_wide[cpi->weber_bsize]; 128 int mb_stride = cpi->frame_info.mi_cols; 129 int mb_count = 0; 130 int64_t distortion = 0; 131 132 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { 133 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { 134 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) 135 continue; 136 137 distortion += 138 cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)] 139 .distortion; 140 ++mb_count; 141 } 142 } 143 144 if (mb_count) distortion = (int)(distortion / mb_count); 145 distortion = AOMMAX(1, distortion); 146 147 return (int)distortion; 148 } 149 150 static double get_max_scale(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 151 int mi_row, int mi_col) { 152 const AV1_COMMON *const cm = &cpi->common; 153 const int mi_wide = mi_size_wide[bsize]; 154 const int mi_high = mi_size_high[bsize]; 155 const int mi_step = mi_size_wide[cpi->weber_bsize]; 156 int mb_stride = cpi->frame_info.mi_cols; 157 double min_max_scale = 10.0; 158 159 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { 160 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { 161 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) 162 continue; 163 const WeberStats *weber_stats = 164 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]; 165 if (weber_stats->max_scale < 1.0) continue; 166 if (weber_stats->max_scale < min_max_scale) 167 min_max_scale = weber_stats->max_scale; 168 } 169 } 170 return min_max_scale; 171 } 172 173 static int get_window_wiener_var(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 174 int mi_row, int mi_col) { 175 const AV1_COMMON *const cm = &cpi->common; 176 const int mi_wide = mi_size_wide[bsize]; 177 const int mi_high = mi_size_high[bsize]; 178 179 const int mi_step = mi_size_wide[cpi->weber_bsize]; 180 int sb_wiener_var = 0; 181 int mb_stride = cpi->frame_info.mi_cols; 182 int mb_count = 0; 183 double base_num = 1; 184 double base_den = 1; 185 double base_reg = 1; 186 187 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { 188 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { 189 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) 190 continue; 191 192 const WeberStats *weber_stats = 193 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]; 194 195 base_num += ((double)weber_stats->distortion) * 196 sqrt((double)weber_stats->src_variance) * 197 weber_stats->rec_pix_max; 198 199 base_den += fabs( 200 weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) - 201 weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance)); 202 203 base_reg += sqrt((double)weber_stats->distortion) * 204 sqrt((double)weber_stats->src_pix_max) * 0.1; 205 ++mb_count; 206 } 207 } 208 209 sb_wiener_var = 210 (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count); 211 sb_wiener_var = AOMMAX(1, sb_wiener_var); 212 213 return (int)sb_wiener_var; 214 } 215 216 static int get_var_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 217 int mi_row, int mi_col) { 218 const AV1_COMMON *const cm = &cpi->common; 219 const int mi_wide = mi_size_wide[bsize]; 220 const int mi_high = mi_size_high[bsize]; 221 222 int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col); 223 224 if (mi_row >= (mi_high / 2)) { 225 sb_wiener_var = 226 AOMMIN(sb_wiener_var, 227 get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col)); 228 } 229 if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) { 230 sb_wiener_var = 231 AOMMIN(sb_wiener_var, 232 get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col)); 233 } 234 if (mi_col >= (mi_wide / 2)) { 235 sb_wiener_var = 236 AOMMIN(sb_wiener_var, 237 get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2)); 238 } 239 if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) { 240 sb_wiener_var = 241 AOMMIN(sb_wiener_var, 242 get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2)); 243 } 244 245 return sb_wiener_var; 246 } 247 248 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) { 249 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; 250 251 assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob); 252 int rate_cost = 1; 253 254 for (int idx = 0; idx < eob; ++idx) { 255 int abs_level = abs(qcoeff[scan_order->scan[idx]]); 256 rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1 + (abs_level > 0); 257 } 258 259 return (rate_cost << AV1_PROB_COST_SHIFT); 260 } 261 262 void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x, 263 MACROBLOCKD *xd, const int mi_row, 264 int16_t *src_diff, tran_low_t *coeff, 265 tran_low_t *qcoeff, tran_low_t *dqcoeff, 266 double *sum_rec_distortion, 267 double *sum_est_rate, uint8_t *pred_buffer) { 268 AV1_COMMON *const cm = &cpi->common; 269 uint8_t *buffer = cpi->source->y_buffer; 270 int buf_stride = cpi->source->y_stride; 271 MB_MODE_INFO mbmi; 272 memset(&mbmi, 0, sizeof(mbmi)); 273 MB_MODE_INFO *mbmi_ptr = &mbmi; 274 xd->mi = &mbmi_ptr; 275 const BLOCK_SIZE bsize = cpi->weber_bsize; 276 const TX_SIZE tx_size = max_txsize_lookup[bsize]; 277 const int block_size = tx_size_wide[tx_size]; 278 const int coeff_count = block_size * block_size; 279 const int mb_step = mi_size_wide[bsize]; 280 const BitDepthInfo bd_info = get_bit_depth_info(xd); 281 const MultiThreadInfo *const mt_info = &cpi->mt_info; 282 const AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt; 283 AV1EncRowMultiThreadSync *const intra_row_mt_sync = 284 &cpi->ppi->intra_row_mt_sync; 285 const int mi_cols = cm->mi_params.mi_cols; 286 const int mt_thread_id = mi_row / mb_step; 287 // TODO(chengchen): test different unit step size 288 const int mt_unit_step = mi_size_wide[MB_WIENER_MT_UNIT_SIZE]; 289 const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step; 290 int mt_unit_col = 0; 291 const int is_high_bitdepth = is_cur_buf_hbd(xd); 292 293 uint8_t *dst_buffer = pred_buffer; 294 const int dst_buffer_stride = MB_WIENER_PRED_BUF_STRIDE; 295 296 if (is_high_bitdepth) { 297 uint16_t *pred_buffer_16 = (uint16_t *)pred_buffer; 298 dst_buffer = CONVERT_TO_BYTEPTR(pred_buffer_16); 299 } 300 301 for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) { 302 if (mi_col % mt_unit_step == 0) { 303 intra_mt->intra_sync_read_ptr(intra_row_mt_sync, mt_thread_id, 304 mt_unit_col); 305 #if CONFIG_MULTITHREAD 306 const int num_workers = 307 AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers); 308 if (num_workers > 1) { 309 const AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt; 310 pthread_mutex_lock(enc_row_mt->mutex_); 311 const bool exit = enc_row_mt->mb_wiener_mt_exit; 312 pthread_mutex_unlock(enc_row_mt->mutex_); 313 // Stop further processing in case any worker has encountered an error. 314 if (exit) break; 315 } 316 #endif 317 } 318 319 PREDICTION_MODE best_mode = DC_PRED; 320 int best_intra_cost = INT_MAX; 321 const int mi_width = mi_size_wide[bsize]; 322 const int mi_height = mi_size_high[bsize]; 323 set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, 324 mi_row, mi_col); 325 set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width, 326 AOMMIN(mi_row + mi_height, cm->mi_params.mi_rows), 327 AOMMIN(mi_col + mi_width, cm->mi_params.mi_cols)); 328 set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], 329 av1_num_planes(cm)); 330 xd->mi[0]->bsize = bsize; 331 xd->mi[0]->motion_mode = SIMPLE_TRANSLATION; 332 // Set above and left mbmi to NULL as they are not available in the 333 // preprocessing stage. 334 // They are used to detemine intra edge filter types in intra prediction. 335 if (xd->up_available) { 336 xd->above_mbmi = NULL; 337 } 338 if (xd->left_available) { 339 xd->left_mbmi = NULL; 340 } 341 uint8_t *mb_buffer = 342 buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE; 343 for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END; 344 ++mode) { 345 // TODO(chengchen): Here we use src instead of reconstructed frame as 346 // the intra predictor to make single and multithread version match. 347 // Ideally we want to use the reconstructed. 348 av1_predict_intra_block( 349 xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter, 350 block_size, block_size, tx_size, mode, 0, 0, FILTER_INTRA_MODES, 351 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0); 352 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size, 353 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride); 354 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff); 355 int intra_cost = aom_satd(coeff, coeff_count); 356 if (intra_cost < best_intra_cost) { 357 best_intra_cost = intra_cost; 358 best_mode = mode; 359 } 360 } 361 362 av1_predict_intra_block( 363 xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter, 364 block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES, 365 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0); 366 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size, 367 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride); 368 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff); 369 370 const struct macroblock_plane *const p = &x->plane[0]; 371 uint16_t eob; 372 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; 373 QUANT_PARAM quant_param; 374 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; 375 av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param); 376 #if CONFIG_AV1_HIGHBITDEPTH 377 if (is_cur_buf_hbd(xd)) { 378 av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, 379 scan_order, &quant_param); 380 } else { 381 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, 382 scan_order, &quant_param); 383 } 384 #else 385 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order, 386 &quant_param); 387 #endif // CONFIG_AV1_HIGHBITDEPTH 388 389 if (cpi->oxcf.enable_rate_guide_deltaq) { 390 const int rate_cost = rate_estimator(qcoeff, eob, tx_size); 391 cpi->prep_rate_estimates[(mi_row / mb_step) * cpi->frame_info.mi_cols + 392 (mi_col / mb_step)] = rate_cost; 393 } 394 395 av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer, 396 dst_buffer_stride, eob, 0); 397 WeberStats *weber_stats = 398 &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols + 399 (mi_col / mb_step)]; 400 401 weber_stats->rec_pix_max = 1; 402 weber_stats->rec_variance = 0; 403 weber_stats->src_pix_max = 1; 404 weber_stats->src_variance = 0; 405 weber_stats->distortion = 0; 406 407 int64_t src_mean = 0; 408 int64_t rec_mean = 0; 409 int64_t dist_mean = 0; 410 411 for (int pix_row = 0; pix_row < block_size; ++pix_row) { 412 for (int pix_col = 0; pix_col < block_size; ++pix_col) { 413 int src_pix, rec_pix; 414 #if CONFIG_AV1_HIGHBITDEPTH 415 if (is_cur_buf_hbd(xd)) { 416 uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer); 417 uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer); 418 src_pix = src[pix_row * buf_stride + pix_col]; 419 rec_pix = rec[pix_row * dst_buffer_stride + pix_col]; 420 } else { 421 src_pix = mb_buffer[pix_row * buf_stride + pix_col]; 422 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col]; 423 } 424 #else 425 src_pix = mb_buffer[pix_row * buf_stride + pix_col]; 426 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col]; 427 #endif 428 src_mean += src_pix; 429 rec_mean += rec_pix; 430 dist_mean += src_pix - rec_pix; 431 weber_stats->src_variance += src_pix * src_pix; 432 weber_stats->rec_variance += rec_pix * rec_pix; 433 weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix); 434 weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix); 435 weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix); 436 } 437 } 438 439 if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) { 440 *sum_rec_distortion += weber_stats->distortion; 441 int est_block_rate = 0; 442 int64_t est_block_dist = 0; 443 model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion, 444 pix_num, &est_block_rate, 445 &est_block_dist); 446 *sum_est_rate += est_block_rate; 447 } 448 449 weber_stats->src_variance -= (src_mean * src_mean) / pix_num; 450 weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num; 451 weber_stats->distortion -= (dist_mean * dist_mean) / pix_num; 452 weber_stats->satd = best_intra_cost; 453 454 qcoeff[0] = 0; 455 int max_scale = 0; 456 for (int idx = 1; idx < coeff_count; ++idx) { 457 const int abs_qcoeff = abs(qcoeff[idx]); 458 max_scale = AOMMAX(max_scale, abs_qcoeff); 459 } 460 weber_stats->max_scale = max_scale; 461 462 if ((mi_col + mb_step) % mt_unit_step == 0 || 463 (mi_col + mb_step) >= mi_cols) { 464 intra_mt->intra_sync_write_ptr(intra_row_mt_sync, mt_thread_id, 465 mt_unit_col, mt_unit_cols); 466 ++mt_unit_col; 467 } 468 } 469 // Set the pointer to null since mbmi is only allocated inside this function. 470 xd->mi = NULL; 471 } 472 473 static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion, 474 double *sum_est_rate) { 475 MACROBLOCK *x = &cpi->td.mb; 476 MACROBLOCKD *xd = &x->e_mbd; 477 const BLOCK_SIZE bsize = cpi->weber_bsize; 478 const int mb_step = mi_size_wide[bsize]; 479 DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]); 480 DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]); 481 DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]); 482 DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]); 483 for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) { 484 av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff, 485 dqcoeff, sum_rec_distortion, sum_est_rate, 486 cpi->td.wiener_tmp_pred_buf); 487 } 488 } 489 490 static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi, 491 const BLOCK_SIZE norm_block_size) { 492 const AV1_COMMON *const cm = &cpi->common; 493 int64_t norm_factor = 1; 494 assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128); 495 const int norm_step = mi_size_wide[norm_block_size]; 496 double sb_wiener_log = 0; 497 double sb_count = 0; 498 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) { 499 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) { 500 const int sb_wiener_var = 501 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col); 502 const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col); 503 const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col); 504 const double scaled_satd = (double)satd / sqrt((double)sse); 505 sb_wiener_log += scaled_satd * log(sb_wiener_var); 506 sb_count += scaled_satd; 507 } 508 } 509 if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count)); 510 norm_factor = AOMMAX(1, norm_factor); 511 512 return norm_factor; 513 } 514 515 static void automatic_intra_tools_off(AV1_COMP *cpi, 516 const double sum_rec_distortion, 517 const double sum_est_rate) { 518 if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return; 519 520 // Thresholds 521 const int high_quality_qindex = 128; 522 const double high_quality_bpp = 2.0; 523 const double high_quality_dist_per_pix = 4.0; 524 525 AV1_COMMON *const cm = &cpi->common; 526 const int qindex = cm->quant_params.base_qindex; 527 const double dist_per_pix = 528 (double)sum_rec_distortion / (cm->width * cm->height); 529 // The estimate bpp is not accurate, an empirical constant 100 is divided. 530 const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100); 531 532 if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp && 533 dist_per_pix < high_quality_dist_per_pix) { 534 cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0; 535 cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0; 536 cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0; 537 cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0; 538 } 539 } 540 541 static void ext_rate_guided_quantization(AV1_COMP *cpi) { 542 // Calculation uses 8x8. 543 const int mb_step = mi_size_wide[cpi->weber_bsize]; 544 // Accumulate to 16x16, step size is in the unit of mi. 545 const int block_step = 4; 546 547 const char *filename = cpi->oxcf.rate_distribution_info; 548 FILE *pfile = fopen(filename, "r"); 549 if (pfile == NULL) { 550 assert(pfile != NULL); 551 return; 552 } 553 554 double ext_rate_sum = 0.0; 555 for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) { 556 for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) { 557 float val; 558 const int fields_converted = fscanf(pfile, "%f", &val); 559 if (fields_converted != 1) { 560 assert(fields_converted == 1); 561 fclose(pfile); 562 return; 563 } 564 ext_rate_sum += val; 565 cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols + 566 (col / mb_step)] = val; 567 } 568 } 569 fclose(pfile); 570 571 int uniform_rate_sum = 0; 572 for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) { 573 for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) { 574 int rate_sum = 0; 575 for (int r = 0; r < block_step; r += mb_step) { 576 for (int c = 0; c < block_step; c += mb_step) { 577 const int mi_row = row + r; 578 const int mi_col = col + c; 579 rate_sum += cpi->prep_rate_estimates[(mi_row / mb_step) * 580 cpi->frame_info.mi_cols + 581 (mi_col / mb_step)]; 582 } 583 } 584 uniform_rate_sum += rate_sum; 585 } 586 } 587 588 const double scale = uniform_rate_sum / ext_rate_sum; 589 cpi->ext_rate_scale = scale; 590 } 591 592 void av1_set_mb_wiener_variance(AV1_COMP *cpi) { 593 AV1_COMMON *const cm = &cpi->common; 594 const SequenceHeader *const seq_params = cm->seq_params; 595 if (aom_realloc_frame_buffer( 596 &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x, 597 seq_params->subsampling_y, seq_params->use_highbitdepth, 598 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL, 599 NULL, cpi->alloc_pyramid, 0)) 600 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, 601 "Failed to allocate frame buffer"); 602 av1_alloc_mb_wiener_var_pred_buf(&cpi->common, &cpi->td); 603 cpi->norm_wiener_variance = 0; 604 605 MACROBLOCK *x = &cpi->td.mb; 606 MACROBLOCKD *xd = &x->e_mbd; 607 // xd->mi needs to be setup since it is used in av1_frame_init_quantizer. 608 MB_MODE_INFO mbmi; 609 memset(&mbmi, 0, sizeof(mbmi)); 610 MB_MODE_INFO *mbmi_ptr = &mbmi; 611 xd->mi = &mbmi_ptr; 612 cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level; 613 av1_frame_init_quantizer(cpi); 614 615 double sum_rec_distortion = 0.0; 616 double sum_est_rate = 0.0; 617 618 MultiThreadInfo *const mt_info = &cpi->mt_info; 619 const int num_workers = 620 AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers); 621 AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt; 622 intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read_dummy; 623 intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write_dummy; 624 // Calculate differential contrast for each block for the entire image. 625 // TODO(chengchen): properly accumulate the distortion and rate in 626 // av1_calc_mb_wiener_var_mt(). Until then, call calc_mb_wiener_var() if 627 // auto_intra_tools_off is true. 628 if (num_workers > 1 && !cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) { 629 intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read; 630 intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write; 631 av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion, 632 &sum_est_rate); 633 } else { 634 calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate); 635 } 636 637 // Determine whether to turn off several intra coding tools. 638 automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate); 639 640 // Read external rate distribution and use it to guide delta quantization 641 if (cpi->oxcf.enable_rate_guide_deltaq) ext_rate_guided_quantization(cpi); 642 643 const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size; 644 cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size); 645 const int norm_step = mi_size_wide[norm_block_size]; 646 647 double sb_wiener_log = 0; 648 double sb_count = 0; 649 for (int its_cnt = 0; its_cnt < 2; ++its_cnt) { 650 sb_wiener_log = 0; 651 sb_count = 0; 652 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) { 653 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; 654 mi_col += norm_step) { 655 int sb_wiener_var = 656 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col); 657 658 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; 659 double min_max_scale = AOMMAX( 660 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col)); 661 662 beta = AOMMIN(beta, 4); 663 beta = AOMMAX(beta, 0.25); 664 665 if (beta < 1 / min_max_scale) continue; 666 667 sb_wiener_var = (int)(cpi->norm_wiener_variance / beta); 668 669 int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col); 670 int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col); 671 double scaled_satd = (double)satd / sqrt((double)sse); 672 sb_wiener_log += scaled_satd * log(sb_wiener_var); 673 sb_count += scaled_satd; 674 } 675 } 676 677 if (sb_count > 0) 678 cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count)); 679 cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance); 680 } 681 682 // Set the pointer to null since mbmi is only allocated inside this function. 683 xd->mi = NULL; 684 aom_free_frame_buffer(&cm->cur_frame->buf); 685 av1_dealloc_mb_wiener_var_pred_buf(&cpi->td); 686 } 687 688 static int get_rate_guided_quantizer(const AV1_COMP *const cpi, 689 BLOCK_SIZE bsize, int mi_row, int mi_col) { 690 // Calculation uses 8x8. 691 const int mb_step = mi_size_wide[cpi->weber_bsize]; 692 // Accumulate to 16x16 693 const int block_step = mi_size_wide[BLOCK_16X16]; 694 double sb_rate_hific = 0.0; 695 double sb_rate_uniform = 0.0; 696 for (int row = mi_row; row < mi_row + mi_size_wide[bsize]; 697 row += block_step) { 698 for (int col = mi_col; col < mi_col + mi_size_high[bsize]; 699 col += block_step) { 700 sb_rate_hific += 701 cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols + 702 (col / mb_step)]; 703 704 for (int r = 0; r < block_step; r += mb_step) { 705 for (int c = 0; c < block_step; c += mb_step) { 706 const int this_row = row + r; 707 const int this_col = col + c; 708 sb_rate_uniform += 709 cpi->prep_rate_estimates[(this_row / mb_step) * 710 cpi->frame_info.mi_cols + 711 (this_col / mb_step)]; 712 } 713 } 714 } 715 } 716 sb_rate_hific *= cpi->ext_rate_scale; 717 718 const double weight = 1.0; 719 const double rate_diff = 720 weight * (sb_rate_hific - sb_rate_uniform) / sb_rate_uniform; 721 double scale = pow(2, rate_diff); 722 723 scale = scale * scale; 724 double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col)); 725 scale = 1.0 / AOMMIN(1.0 / scale, min_max_scale); 726 727 const AV1_COMMON *const cm = &cpi->common; 728 const int base_qindex = cm->quant_params.base_qindex; 729 int offset = 730 av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, scale); 731 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 732 const int max_offset = delta_q_info->delta_q_res * 10; 733 offset = AOMMIN(offset, max_offset - 1); 734 offset = AOMMAX(offset, -max_offset + 1); 735 int qindex = cm->quant_params.base_qindex + offset; 736 qindex = AOMMIN(qindex, MAXQ); 737 qindex = AOMMAX(qindex, MINQ); 738 if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1); 739 740 return qindex; 741 } 742 743 int av1_get_sbq_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize, 744 int mi_row, int mi_col) { 745 if (cpi->oxcf.enable_rate_guide_deltaq) { 746 return get_rate_guided_quantizer(cpi, bsize, mi_row, mi_col); 747 } 748 749 const AV1_COMMON *const cm = &cpi->common; 750 const int base_qindex = cm->quant_params.base_qindex; 751 int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col); 752 int offset = 0; 753 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; 754 double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col)); 755 beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale); 756 757 // Cap beta such that the delta q value is not much far away from the base q. 758 beta = AOMMIN(beta, 4); 759 beta = AOMMAX(beta, 0.25); 760 offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta); 761 const DeltaQInfo *const delta_q_info = &cm->delta_q_info; 762 offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1); 763 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1); 764 int qindex = cm->quant_params.base_qindex + offset; 765 qindex = AOMMIN(qindex, MAXQ); 766 qindex = AOMMAX(qindex, MINQ); 767 if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1); 768 769 return qindex; 770 } 771 772 void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) { 773 AV1_COMMON *cm = &cpi->common; 774 775 if (cpi->mb_delta_q) return; 776 777 CHECK_MEM_ERROR(cm, cpi->mb_delta_q, 778 aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols, 779 sizeof(*cpi->mb_delta_q))); 780 } 781 782 #if CONFIG_TFLITE 783 static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows, 784 int bit_depth, uint8_t *y_buffer, int y_stride, 785 float *predicts0, float *predicts1) { 786 // Create the model and interpreter options. 787 TfLiteModel *model = 788 TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize); 789 if (model == NULL) return 1; 790 791 TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate(); 792 TfLiteInterpreterOptionsSetNumThreads(options, 2); 793 if (options == NULL) { 794 TfLiteModelDelete(model); 795 return 1; 796 } 797 798 // Create the interpreter. 799 TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options); 800 if (interpreter == NULL) { 801 TfLiteInterpreterOptionsDelete(options); 802 TfLiteModelDelete(model); 803 return 1; 804 } 805 806 // Allocate tensors and populate the input tensor data. 807 TfLiteInterpreterAllocateTensors(interpreter); 808 TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0); 809 if (input_tensor == NULL) { 810 TfLiteInterpreterDelete(interpreter); 811 TfLiteInterpreterOptionsDelete(options); 812 TfLiteModelDelete(model); 813 return 1; 814 } 815 816 size_t input_size = TfLiteTensorByteSize(input_tensor); 817 float *input_data = aom_calloc(input_size, 1); 818 if (input_data == NULL) { 819 TfLiteInterpreterDelete(interpreter); 820 TfLiteInterpreterOptionsDelete(options); 821 TfLiteModelDelete(model); 822 return 1; 823 } 824 825 const int num_mi_w = mi_size_wide[block_size]; 826 const int num_mi_h = mi_size_high[block_size]; 827 for (int row = 0; row < num_rows; ++row) { 828 for (int col = 0; col < num_cols; ++col) { 829 const int row_offset = (row * num_mi_h) << 2; 830 const int col_offset = (col * num_mi_w) << 2; 831 832 uint8_t *buf = y_buffer + row_offset * y_stride + col_offset; 833 int r = row_offset, pos = 0; 834 const float base = (float)((1 << bit_depth) - 1); 835 while (r < row_offset + (num_mi_h << 2)) { 836 for (int c = 0; c < (num_mi_w << 2); ++c) { 837 input_data[pos++] = bit_depth > 8 838 ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base 839 : (float)*(buf + c) / base; 840 } 841 buf += y_stride; 842 ++r; 843 } 844 TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size); 845 846 // Execute inference. 847 if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) { 848 TfLiteInterpreterDelete(interpreter); 849 TfLiteInterpreterOptionsDelete(options); 850 TfLiteModelDelete(model); 851 return 1; 852 } 853 854 // Extract the output tensor data. 855 const TfLiteTensor *output_tensor = 856 TfLiteInterpreterGetOutputTensor(interpreter, 0); 857 if (output_tensor == NULL) { 858 TfLiteInterpreterDelete(interpreter); 859 TfLiteInterpreterOptionsDelete(options); 860 TfLiteModelDelete(model); 861 return 1; 862 } 863 864 size_t output_size = TfLiteTensorByteSize(output_tensor); 865 float output_data[2]; 866 867 TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size); 868 predicts0[row * num_cols + col] = output_data[0]; 869 predicts1[row * num_cols + col] = output_data[1]; 870 } 871 } 872 873 // Dispose of the model and interpreter objects. 874 TfLiteInterpreterDelete(interpreter); 875 TfLiteInterpreterOptionsDelete(options); 876 TfLiteModelDelete(model); 877 aom_free(input_data); 878 return 0; 879 } 880 881 void av1_set_mb_ur_variance(AV1_COMP *cpi) { 882 const AV1_COMMON *cm = &cpi->common; 883 const CommonModeInfoParams *const mi_params = &cm->mi_params; 884 uint8_t *y_buffer = cpi->source->y_buffer; 885 const int y_stride = cpi->source->y_stride; 886 const int block_size = cpi->common.seq_params->sb_size; 887 const uint32_t bit_depth = cpi->td.mb.e_mbd.bd; 888 889 const int num_mi_w = mi_size_wide[block_size]; 890 const int num_mi_h = mi_size_high[block_size]; 891 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; 892 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h; 893 894 // TODO(sdeng): fit a better model_1; disable it at this time. 895 float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f; 896 CHECK_MEM_ERROR(cm, mb_delta_q0, 897 aom_calloc(num_rows * num_cols, sizeof(float))); 898 CHECK_MEM_ERROR(cm, mb_delta_q1, 899 aom_calloc(num_rows * num_cols, sizeof(float))); 900 901 if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer, 902 y_stride, mb_delta_q0, mb_delta_q1)) { 903 aom_internal_error(cm->error, AOM_CODEC_ERROR, 904 "Failed to call TFlite functions."); 905 } 906 907 // Loop through each SB block. 908 for (int row = 0; row < num_rows; ++row) { 909 for (int col = 0; col < num_cols; ++col) { 910 const int index = row * num_cols + col; 911 delta_q_avg0 += mb_delta_q0[index]; 912 } 913 } 914 915 delta_q_avg0 /= (float)(num_rows * num_cols); 916 917 float scaling_factor; 918 const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ; 919 if (cq_level < delta_q_avg0) { 920 scaling_factor = cq_level / delta_q_avg0; 921 } else { 922 scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0); 923 } 924 925 for (int row = 0; row < num_rows; ++row) { 926 for (int col = 0; col < num_cols; ++col) { 927 const int index = row * num_cols + col; 928 cpi->mb_delta_q[index] = 929 RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ * 930 scaling_factor * (mb_delta_q0[index] - delta_q_avg0)); 931 } 932 } 933 934 aom_free(mb_delta_q0); 935 aom_free(mb_delta_q1); 936 } 937 #else // !CONFIG_TFLITE 938 void av1_set_mb_ur_variance(AV1_COMP *cpi) { 939 const AV1_COMMON *cm = &cpi->common; 940 const CommonModeInfoParams *const mi_params = &cm->mi_params; 941 const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; 942 uint8_t *y_buffer = cpi->source->y_buffer; 943 const int y_stride = cpi->source->y_stride; 944 const int block_size = cpi->common.seq_params->sb_size; 945 946 const int num_mi_w = mi_size_wide[block_size]; 947 const int num_mi_h = mi_size_high[block_size]; 948 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; 949 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h; 950 951 int *mb_delta_q[2]; 952 CHECK_MEM_ERROR(cm, mb_delta_q[0], 953 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0]))); 954 CHECK_MEM_ERROR(cm, mb_delta_q[1], 955 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1]))); 956 957 // Approximates the model change between current version (Spet 2021) and the 958 // baseline (July 2021). 959 const double model_change[] = { 3.0, 3.0 }; 960 // The following parameters are fitted from user labeled data. 961 const double a[] = { -24.50 * 4.0, -17.20 * 4.0 }; 962 const double b[] = { 0.004898, 0.003093 }; 963 const double c[] = { (29.932 + model_change[0]) * 4.0, 964 (42.100 + model_change[1]) * 4.0 }; 965 int delta_q_avg[2] = { 0, 0 }; 966 // Loop through each SB block. 967 for (int row = 0; row < num_rows; ++row) { 968 for (int col = 0; col < num_cols; ++col) { 969 double var = 0.0, num_of_var = 0.0; 970 const int index = row * num_cols + col; 971 972 // Loop through each 8x8 block. 973 for (int mi_row = row * num_mi_h; 974 mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h; 975 mi_row += 2) { 976 for (int mi_col = col * num_mi_w; 977 mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w; 978 mi_col += 2) { 979 struct buf_2d buf; 980 const int row_offset_y = mi_row << 2; 981 const int col_offset_y = mi_col << 2; 982 983 buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y; 984 buf.stride = y_stride; 985 986 unsigned int block_variance; 987 block_variance = av1_get_perpixel_variance_facade( 988 cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y); 989 990 block_variance = AOMMAX(block_variance, 1); 991 var += log((double)block_variance); 992 num_of_var += 1.0; 993 } 994 } 995 var = exp(var / num_of_var); 996 mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]); 997 mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]); 998 delta_q_avg[0] += mb_delta_q[0][index]; 999 delta_q_avg[1] += mb_delta_q[1][index]; 1000 } 1001 } 1002 1003 delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols)); 1004 delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols)); 1005 1006 int model_idx; 1007 double scaling_factor; 1008 const int cq_level = cpi->oxcf.rc_cfg.cq_level; 1009 if (cq_level < delta_q_avg[0]) { 1010 model_idx = 0; 1011 scaling_factor = (double)cq_level / delta_q_avg[0]; 1012 } else if (cq_level < delta_q_avg[1]) { 1013 model_idx = 2; 1014 scaling_factor = 1015 (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]); 1016 } else { 1017 model_idx = 1; 1018 scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]); 1019 } 1020 1021 const double new_delta_q_avg = 1022 delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]); 1023 for (int row = 0; row < num_rows; ++row) { 1024 for (int col = 0; col < num_cols; ++col) { 1025 const int index = row * num_cols + col; 1026 if (model_idx == 2) { 1027 const double delta_q = 1028 mb_delta_q[0][index] + 1029 scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]); 1030 cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength / 1031 100.0 * (delta_q - new_delta_q_avg)); 1032 } else { 1033 cpi->mb_delta_q[index] = RINT( 1034 (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor * 1035 (mb_delta_q[model_idx][index] - delta_q_avg[model_idx])); 1036 } 1037 } 1038 } 1039 1040 aom_free(mb_delta_q[0]); 1041 aom_free(mb_delta_q[1]); 1042 } 1043 #endif 1044 1045 int av1_get_sbq_user_rating_based(const AV1_COMP *const cpi, int mi_row, 1046 int mi_col) { 1047 const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size; 1048 const CommonModeInfoParams *const mi_params = &cpi->common.mi_params; 1049 const AV1_COMMON *const cm = &cpi->common; 1050 const int base_qindex = cm->quant_params.base_qindex; 1051 if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex; 1052 1053 const int num_mi_w = mi_size_wide[bsize]; 1054 const int num_mi_h = mi_size_high[bsize]; 1055 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; 1056 const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w); 1057 const int delta_q = cpi->mb_delta_q[index]; 1058 1059 int qindex = base_qindex + delta_q; 1060 qindex = AOMMIN(qindex, MAXQ); 1061 qindex = AOMMAX(qindex, MINQ + 1); 1062 1063 return qindex; 1064 } 1065 1066 #if !CONFIG_REALTIME_ONLY 1067 1068 // Variance Boost: a variance adaptive quantization implementation 1069 // SVT-AV1 appendix with an overview and a graphical, step-by-step explanation 1070 // of the implementation 1071 // https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Appendix-Variance-Boost.md 1072 int av1_get_sbq_variance_boost(const AV1_COMP *cpi, const MACROBLOCK *x) { 1073 const AV1_COMMON *cm = &cpi->common; 1074 const int base_qindex = cm->quant_params.base_qindex; 1075 const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth; 1076 1077 // Variance Boost only supports 64x64 SBs. 1078 assert(cm->seq_params->sb_size == BLOCK_64X64); 1079 1080 unsigned int variance = av1_get_variance_boost_block_variance(cpi, x); 1081 // Compute Variance Boost strength from the deltaq_strength value. 1082 double strength = (cpi->oxcf.q_cfg.deltaq_strength / 100.0) * 3.0; 1083 1084 // Clamp strength to a reasonable range. 1085 // deltaq_strength can go up to 1000%, which is too strong for the Variance 1086 // Boost scaling. Testing revealed strengths as high as 6 (200%) are still 1087 // reasonable for some specific scenarios. 1088 strength = fclamp(strength, 0.0, 6.0); 1089 1090 // Variance = 0 areas are either completely flat patches or have very fine 1091 // gradients. Boost these blocks as if they have a variance of 1. 1092 if (variance == 0) { 1093 variance = 1; 1094 } 1095 1096 // Compute a boost based on a fast-growing formula. 1097 // High and medium variance SBs essentially get no boost, while lower variance 1098 // SBs get increasingly stronger boosts. 1099 // Still picture curve, with variance crossover point at 1024. 1100 double qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10.0) + 1.0; 1101 qstep_ratio = fclamp(qstep_ratio, 1.0, VAR_BOOST_MAX_BOOST); 1102 1103 double base_q = av1_convert_qindex_to_q(base_qindex, bit_depth); 1104 double target_q = base_q / qstep_ratio; 1105 int target_qindex = av1_convert_q_to_qindex(target_q, bit_depth); 1106 1107 // Determine the SB's delta_q boost by computing an (unscaled) delta_q from 1108 // the base and target q values, then scale that delta_q according to the 1109 // frame's base qindex. 1110 // The scaling coefficients were chosen empirically to maximize SSIMULACRA 2 1111 // scores, 10th percentile scores, and subjective quality. Boosts become 1112 // smaller (for a given variance) the lower the base qindex. 1113 int boost = (int)round((base_qindex + 544.0) * (base_qindex - target_qindex) / 1114 1279.0); 1115 boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost); 1116 1117 // Variance Boost was designed to always operate in the lossy domain, so MINQ 1118 // is excluded. 1119 int sb_qindex = AOMMAX(base_qindex - boost, MINQ + 1); 1120 1121 return sb_qindex; 1122 } 1123 #endif