tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

allintra_vis.c (43471B)


      1 /*
      2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 
     14 #include "config/aom_config.h"
     15 
     16 #include "aom_util/aom_pthread.h"
     17 
     18 #if CONFIG_TFLITE
     19 #include "tensorflow/lite/c/c_api.h"
     20 #include "av1/encoder/deltaq4_model.c"
     21 #endif
     22 
     23 #include "av1/common/common_data.h"
     24 #include "av1/common/enums.h"
     25 #include "av1/common/idct.h"
     26 #include "av1/common/reconinter.h"
     27 #include "av1/encoder/allintra_vis.h"
     28 #include "av1/encoder/aq_variance.h"
     29 #include "av1/encoder/encoder.h"
     30 #include "av1/encoder/ethread.h"
     31 #include "av1/encoder/hybrid_fwd_txfm.h"
     32 #include "av1/encoder/model_rd.h"
     33 #include "av1/encoder/rdopt_utils.h"
     34 
     35 #define MB_WIENER_PRED_BLOCK_SIZE BLOCK_128X128
     36 #define MB_WIENER_PRED_BUF_STRIDE 128
     37 
     38 // Maximum delta-q range allowed for Variance Boost after scaling
     39 #define VAR_BOOST_MAX_DELTAQ_RANGE 80
     40 // Maximum quantization step boost allowed for Variance Boost
     41 #define VAR_BOOST_MAX_BOOST 8.0
     42 
     43 void av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON *cm, ThreadData *td) {
     44  const int is_high_bitdepth = is_cur_buf_hbd(&td->mb.e_mbd);
     45  assert(MB_WIENER_PRED_BLOCK_SIZE < BLOCK_SIZES_ALL);
     46  const int buf_width = block_size_wide[MB_WIENER_PRED_BLOCK_SIZE];
     47  const int buf_height = block_size_high[MB_WIENER_PRED_BLOCK_SIZE];
     48  assert(buf_width == MB_WIENER_PRED_BUF_STRIDE);
     49  const size_t buf_size =
     50      (buf_width * buf_height * sizeof(*td->wiener_tmp_pred_buf))
     51      << is_high_bitdepth;
     52  CHECK_MEM_ERROR(cm, td->wiener_tmp_pred_buf, aom_memalign(32, buf_size));
     53 }
     54 
     55 void av1_dealloc_mb_wiener_var_pred_buf(ThreadData *td) {
     56  aom_free(td->wiener_tmp_pred_buf);
     57  td->wiener_tmp_pred_buf = NULL;
     58 }
     59 
     60 void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) {
     61  AV1_COMMON *cm = &cpi->common;
     62 
     63  // This block size is also used to determine number of workers in
     64  // multi-threading. If it is changed, one needs to change it accordingly in
     65  // "compute_num_ai_workers()".
     66  cpi->weber_bsize = BLOCK_8X8;
     67 
     68  if (cpi->oxcf.enable_rate_guide_deltaq) {
     69    if (cpi->mb_weber_stats && cpi->prep_rate_estimates &&
     70        cpi->ext_rate_distribution)
     71      return;
     72  } else {
     73    if (cpi->mb_weber_stats) return;
     74  }
     75 
     76  CHECK_MEM_ERROR(cm, cpi->mb_weber_stats,
     77                  aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
     78                             sizeof(*cpi->mb_weber_stats)));
     79 
     80  if (cpi->oxcf.enable_rate_guide_deltaq) {
     81    CHECK_MEM_ERROR(
     82        cm, cpi->prep_rate_estimates,
     83        aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
     84                   sizeof(*cpi->prep_rate_estimates)));
     85 
     86    CHECK_MEM_ERROR(
     87        cm, cpi->ext_rate_distribution,
     88        aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
     89                   sizeof(*cpi->ext_rate_distribution)));
     90  }
     91 }
     92 
     93 static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
     94                        int mi_col) {
     95  AV1_COMMON *const cm = &cpi->common;
     96  const int mi_wide = mi_size_wide[bsize];
     97  const int mi_high = mi_size_high[bsize];
     98 
     99  const int mi_step = mi_size_wide[cpi->weber_bsize];
    100  int mb_stride = cpi->frame_info.mi_cols;
    101  int mb_count = 0;
    102  int64_t satd = 0;
    103 
    104  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
    105    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
    106      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
    107        continue;
    108 
    109      satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
    110                  .satd;
    111      ++mb_count;
    112    }
    113  }
    114 
    115  if (mb_count) satd = (int)(satd / mb_count);
    116  satd = AOMMAX(1, satd);
    117 
    118  return (int)satd;
    119 }
    120 
    121 static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
    122                       int mi_col) {
    123  AV1_COMMON *const cm = &cpi->common;
    124  const int mi_wide = mi_size_wide[bsize];
    125  const int mi_high = mi_size_high[bsize];
    126 
    127  const int mi_step = mi_size_wide[cpi->weber_bsize];
    128  int mb_stride = cpi->frame_info.mi_cols;
    129  int mb_count = 0;
    130  int64_t distortion = 0;
    131 
    132  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
    133    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
    134      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
    135        continue;
    136 
    137      distortion +=
    138          cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
    139              .distortion;
    140      ++mb_count;
    141    }
    142  }
    143 
    144  if (mb_count) distortion = (int)(distortion / mb_count);
    145  distortion = AOMMAX(1, distortion);
    146 
    147  return (int)distortion;
    148 }
    149 
    150 static double get_max_scale(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
    151                            int mi_row, int mi_col) {
    152  const AV1_COMMON *const cm = &cpi->common;
    153  const int mi_wide = mi_size_wide[bsize];
    154  const int mi_high = mi_size_high[bsize];
    155  const int mi_step = mi_size_wide[cpi->weber_bsize];
    156  int mb_stride = cpi->frame_info.mi_cols;
    157  double min_max_scale = 10.0;
    158 
    159  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
    160    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
    161      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
    162        continue;
    163      const WeberStats *weber_stats =
    164          &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
    165      if (weber_stats->max_scale < 1.0) continue;
    166      if (weber_stats->max_scale < min_max_scale)
    167        min_max_scale = weber_stats->max_scale;
    168    }
    169  }
    170  return min_max_scale;
    171 }
    172 
    173 static int get_window_wiener_var(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
    174                                 int mi_row, int mi_col) {
    175  const AV1_COMMON *const cm = &cpi->common;
    176  const int mi_wide = mi_size_wide[bsize];
    177  const int mi_high = mi_size_high[bsize];
    178 
    179  const int mi_step = mi_size_wide[cpi->weber_bsize];
    180  int sb_wiener_var = 0;
    181  int mb_stride = cpi->frame_info.mi_cols;
    182  int mb_count = 0;
    183  double base_num = 1;
    184  double base_den = 1;
    185  double base_reg = 1;
    186 
    187  for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
    188    for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
    189      if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
    190        continue;
    191 
    192      const WeberStats *weber_stats =
    193          &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
    194 
    195      base_num += ((double)weber_stats->distortion) *
    196                  sqrt((double)weber_stats->src_variance) *
    197                  weber_stats->rec_pix_max;
    198 
    199      base_den += fabs(
    200          weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) -
    201          weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance));
    202 
    203      base_reg += sqrt((double)weber_stats->distortion) *
    204                  sqrt((double)weber_stats->src_pix_max) * 0.1;
    205      ++mb_count;
    206    }
    207  }
    208 
    209  sb_wiener_var =
    210      (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count);
    211  sb_wiener_var = AOMMAX(1, sb_wiener_var);
    212 
    213  return (int)sb_wiener_var;
    214 }
    215 
    216 static int get_var_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
    217                                 int mi_row, int mi_col) {
    218  const AV1_COMMON *const cm = &cpi->common;
    219  const int mi_wide = mi_size_wide[bsize];
    220  const int mi_high = mi_size_high[bsize];
    221 
    222  int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col);
    223 
    224  if (mi_row >= (mi_high / 2)) {
    225    sb_wiener_var =
    226        AOMMIN(sb_wiener_var,
    227               get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col));
    228  }
    229  if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) {
    230    sb_wiener_var =
    231        AOMMIN(sb_wiener_var,
    232               get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col));
    233  }
    234  if (mi_col >= (mi_wide / 2)) {
    235    sb_wiener_var =
    236        AOMMIN(sb_wiener_var,
    237               get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2));
    238  }
    239  if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) {
    240    sb_wiener_var =
    241        AOMMIN(sb_wiener_var,
    242               get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2));
    243  }
    244 
    245  return sb_wiener_var;
    246 }
    247 
    248 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
    249  const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
    250 
    251  assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
    252  int rate_cost = 1;
    253 
    254  for (int idx = 0; idx < eob; ++idx) {
    255    int abs_level = abs(qcoeff[scan_order->scan[idx]]);
    256    rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1 + (abs_level > 0);
    257  }
    258 
    259  return (rate_cost << AV1_PROB_COST_SHIFT);
    260 }
    261 
    262 void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
    263                                MACROBLOCKD *xd, const int mi_row,
    264                                int16_t *src_diff, tran_low_t *coeff,
    265                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
    266                                double *sum_rec_distortion,
    267                                double *sum_est_rate, uint8_t *pred_buffer) {
    268  AV1_COMMON *const cm = &cpi->common;
    269  uint8_t *buffer = cpi->source->y_buffer;
    270  int buf_stride = cpi->source->y_stride;
    271  MB_MODE_INFO mbmi;
    272  memset(&mbmi, 0, sizeof(mbmi));
    273  MB_MODE_INFO *mbmi_ptr = &mbmi;
    274  xd->mi = &mbmi_ptr;
    275  const BLOCK_SIZE bsize = cpi->weber_bsize;
    276  const TX_SIZE tx_size = max_txsize_lookup[bsize];
    277  const int block_size = tx_size_wide[tx_size];
    278  const int coeff_count = block_size * block_size;
    279  const int mb_step = mi_size_wide[bsize];
    280  const BitDepthInfo bd_info = get_bit_depth_info(xd);
    281  const MultiThreadInfo *const mt_info = &cpi->mt_info;
    282  const AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
    283  AV1EncRowMultiThreadSync *const intra_row_mt_sync =
    284      &cpi->ppi->intra_row_mt_sync;
    285  const int mi_cols = cm->mi_params.mi_cols;
    286  const int mt_thread_id = mi_row / mb_step;
    287  // TODO(chengchen): test different unit step size
    288  const int mt_unit_step = mi_size_wide[MB_WIENER_MT_UNIT_SIZE];
    289  const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step;
    290  int mt_unit_col = 0;
    291  const int is_high_bitdepth = is_cur_buf_hbd(xd);
    292 
    293  uint8_t *dst_buffer = pred_buffer;
    294  const int dst_buffer_stride = MB_WIENER_PRED_BUF_STRIDE;
    295 
    296  if (is_high_bitdepth) {
    297    uint16_t *pred_buffer_16 = (uint16_t *)pred_buffer;
    298    dst_buffer = CONVERT_TO_BYTEPTR(pred_buffer_16);
    299  }
    300 
    301  for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) {
    302    if (mi_col % mt_unit_step == 0) {
    303      intra_mt->intra_sync_read_ptr(intra_row_mt_sync, mt_thread_id,
    304                                    mt_unit_col);
    305 #if CONFIG_MULTITHREAD
    306      const int num_workers =
    307          AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
    308      if (num_workers > 1) {
    309        const AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
    310        pthread_mutex_lock(enc_row_mt->mutex_);
    311        const bool exit = enc_row_mt->mb_wiener_mt_exit;
    312        pthread_mutex_unlock(enc_row_mt->mutex_);
    313        // Stop further processing in case any worker has encountered an error.
    314        if (exit) break;
    315      }
    316 #endif
    317    }
    318 
    319    PREDICTION_MODE best_mode = DC_PRED;
    320    int best_intra_cost = INT_MAX;
    321    const int mi_width = mi_size_wide[bsize];
    322    const int mi_height = mi_size_high[bsize];
    323    set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
    324                          mi_row, mi_col);
    325    set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width,
    326                   AOMMIN(mi_row + mi_height, cm->mi_params.mi_rows),
    327                   AOMMIN(mi_col + mi_width, cm->mi_params.mi_cols));
    328    set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize],
    329                 av1_num_planes(cm));
    330    xd->mi[0]->bsize = bsize;
    331    xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
    332    // Set above and left mbmi to NULL as they are not available in the
    333    // preprocessing stage.
    334    // They are used to detemine intra edge filter types in intra prediction.
    335    if (xd->up_available) {
    336      xd->above_mbmi = NULL;
    337    }
    338    if (xd->left_available) {
    339      xd->left_mbmi = NULL;
    340    }
    341    uint8_t *mb_buffer =
    342        buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
    343    for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END;
    344         ++mode) {
    345      // TODO(chengchen): Here we use src instead of reconstructed frame as
    346      // the intra predictor to make single and multithread version match.
    347      // Ideally we want to use the reconstructed.
    348      av1_predict_intra_block(
    349          xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
    350          block_size, block_size, tx_size, mode, 0, 0, FILTER_INTRA_MODES,
    351          mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
    352      av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
    353                         mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
    354      av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
    355      int intra_cost = aom_satd(coeff, coeff_count);
    356      if (intra_cost < best_intra_cost) {
    357        best_intra_cost = intra_cost;
    358        best_mode = mode;
    359      }
    360    }
    361 
    362    av1_predict_intra_block(
    363        xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
    364        block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES,
    365        mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
    366    av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
    367                       mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
    368    av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
    369 
    370    const struct macroblock_plane *const p = &x->plane[0];
    371    uint16_t eob;
    372    const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
    373    QUANT_PARAM quant_param;
    374    int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
    375    av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param);
    376 #if CONFIG_AV1_HIGHBITDEPTH
    377    if (is_cur_buf_hbd(xd)) {
    378      av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
    379                                    scan_order, &quant_param);
    380    } else {
    381      av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
    382                             scan_order, &quant_param);
    383    }
    384 #else
    385    av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order,
    386                           &quant_param);
    387 #endif  // CONFIG_AV1_HIGHBITDEPTH
    388 
    389    if (cpi->oxcf.enable_rate_guide_deltaq) {
    390      const int rate_cost = rate_estimator(qcoeff, eob, tx_size);
    391      cpi->prep_rate_estimates[(mi_row / mb_step) * cpi->frame_info.mi_cols +
    392                               (mi_col / mb_step)] = rate_cost;
    393    }
    394 
    395    av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
    396                                dst_buffer_stride, eob, 0);
    397    WeberStats *weber_stats =
    398        &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols +
    399                             (mi_col / mb_step)];
    400 
    401    weber_stats->rec_pix_max = 1;
    402    weber_stats->rec_variance = 0;
    403    weber_stats->src_pix_max = 1;
    404    weber_stats->src_variance = 0;
    405    weber_stats->distortion = 0;
    406 
    407    int64_t src_mean = 0;
    408    int64_t rec_mean = 0;
    409    int64_t dist_mean = 0;
    410 
    411    for (int pix_row = 0; pix_row < block_size; ++pix_row) {
    412      for (int pix_col = 0; pix_col < block_size; ++pix_col) {
    413        int src_pix, rec_pix;
    414 #if CONFIG_AV1_HIGHBITDEPTH
    415        if (is_cur_buf_hbd(xd)) {
    416          uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer);
    417          uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer);
    418          src_pix = src[pix_row * buf_stride + pix_col];
    419          rec_pix = rec[pix_row * dst_buffer_stride + pix_col];
    420        } else {
    421          src_pix = mb_buffer[pix_row * buf_stride + pix_col];
    422          rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
    423        }
    424 #else
    425        src_pix = mb_buffer[pix_row * buf_stride + pix_col];
    426        rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
    427 #endif
    428        src_mean += src_pix;
    429        rec_mean += rec_pix;
    430        dist_mean += src_pix - rec_pix;
    431        weber_stats->src_variance += src_pix * src_pix;
    432        weber_stats->rec_variance += rec_pix * rec_pix;
    433        weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix);
    434        weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix);
    435        weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix);
    436      }
    437    }
    438 
    439    if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
    440      *sum_rec_distortion += weber_stats->distortion;
    441      int est_block_rate = 0;
    442      int64_t est_block_dist = 0;
    443      model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
    444                                      pix_num, &est_block_rate,
    445                                      &est_block_dist);
    446      *sum_est_rate += est_block_rate;
    447    }
    448 
    449    weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
    450    weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
    451    weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
    452    weber_stats->satd = best_intra_cost;
    453 
    454    qcoeff[0] = 0;
    455    int max_scale = 0;
    456    for (int idx = 1; idx < coeff_count; ++idx) {
    457      const int abs_qcoeff = abs(qcoeff[idx]);
    458      max_scale = AOMMAX(max_scale, abs_qcoeff);
    459    }
    460    weber_stats->max_scale = max_scale;
    461 
    462    if ((mi_col + mb_step) % mt_unit_step == 0 ||
    463        (mi_col + mb_step) >= mi_cols) {
    464      intra_mt->intra_sync_write_ptr(intra_row_mt_sync, mt_thread_id,
    465                                     mt_unit_col, mt_unit_cols);
    466      ++mt_unit_col;
    467    }
    468  }
    469  // Set the pointer to null since mbmi is only allocated inside this function.
    470  xd->mi = NULL;
    471 }
    472 
    473 static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion,
    474                               double *sum_est_rate) {
    475  MACROBLOCK *x = &cpi->td.mb;
    476  MACROBLOCKD *xd = &x->e_mbd;
    477  const BLOCK_SIZE bsize = cpi->weber_bsize;
    478  const int mb_step = mi_size_wide[bsize];
    479  DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
    480  DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]);
    481  DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
    482  DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
    483  for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
    484    av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff,
    485                               dqcoeff, sum_rec_distortion, sum_est_rate,
    486                               cpi->td.wiener_tmp_pred_buf);
    487  }
    488 }
    489 
    490 static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi,
    491                                        const BLOCK_SIZE norm_block_size) {
    492  const AV1_COMMON *const cm = &cpi->common;
    493  int64_t norm_factor = 1;
    494  assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128);
    495  const int norm_step = mi_size_wide[norm_block_size];
    496  double sb_wiener_log = 0;
    497  double sb_count = 0;
    498  for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
    499    for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
    500      const int sb_wiener_var =
    501          get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
    502      const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
    503      const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
    504      const double scaled_satd = (double)satd / sqrt((double)sse);
    505      sb_wiener_log += scaled_satd * log(sb_wiener_var);
    506      sb_count += scaled_satd;
    507    }
    508  }
    509  if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count));
    510  norm_factor = AOMMAX(1, norm_factor);
    511 
    512  return norm_factor;
    513 }
    514 
    515 static void automatic_intra_tools_off(AV1_COMP *cpi,
    516                                      const double sum_rec_distortion,
    517                                      const double sum_est_rate) {
    518  if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
    519 
    520  // Thresholds
    521  const int high_quality_qindex = 128;
    522  const double high_quality_bpp = 2.0;
    523  const double high_quality_dist_per_pix = 4.0;
    524 
    525  AV1_COMMON *const cm = &cpi->common;
    526  const int qindex = cm->quant_params.base_qindex;
    527  const double dist_per_pix =
    528      (double)sum_rec_distortion / (cm->width * cm->height);
    529  // The estimate bpp is not accurate, an empirical constant 100 is divided.
    530  const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
    531 
    532  if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
    533      dist_per_pix < high_quality_dist_per_pix) {
    534    cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
    535    cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
    536    cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
    537    cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
    538  }
    539 }
    540 
    541 static void ext_rate_guided_quantization(AV1_COMP *cpi) {
    542  // Calculation uses 8x8.
    543  const int mb_step = mi_size_wide[cpi->weber_bsize];
    544  // Accumulate to 16x16, step size is in the unit of mi.
    545  const int block_step = 4;
    546 
    547  const char *filename = cpi->oxcf.rate_distribution_info;
    548  FILE *pfile = fopen(filename, "r");
    549  if (pfile == NULL) {
    550    assert(pfile != NULL);
    551    return;
    552  }
    553 
    554  double ext_rate_sum = 0.0;
    555  for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
    556    for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
    557      float val;
    558      const int fields_converted = fscanf(pfile, "%f", &val);
    559      if (fields_converted != 1) {
    560        assert(fields_converted == 1);
    561        fclose(pfile);
    562        return;
    563      }
    564      ext_rate_sum += val;
    565      cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
    566                                 (col / mb_step)] = val;
    567    }
    568  }
    569  fclose(pfile);
    570 
    571  int uniform_rate_sum = 0;
    572  for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
    573    for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
    574      int rate_sum = 0;
    575      for (int r = 0; r < block_step; r += mb_step) {
    576        for (int c = 0; c < block_step; c += mb_step) {
    577          const int mi_row = row + r;
    578          const int mi_col = col + c;
    579          rate_sum += cpi->prep_rate_estimates[(mi_row / mb_step) *
    580                                                   cpi->frame_info.mi_cols +
    581                                               (mi_col / mb_step)];
    582        }
    583      }
    584      uniform_rate_sum += rate_sum;
    585    }
    586  }
    587 
    588  const double scale = uniform_rate_sum / ext_rate_sum;
    589  cpi->ext_rate_scale = scale;
    590 }
    591 
    592 void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
    593  AV1_COMMON *const cm = &cpi->common;
    594  const SequenceHeader *const seq_params = cm->seq_params;
    595  if (aom_realloc_frame_buffer(
    596          &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
    597          seq_params->subsampling_y, seq_params->use_highbitdepth,
    598          cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
    599          NULL, cpi->alloc_pyramid, 0))
    600    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
    601                       "Failed to allocate frame buffer");
    602  av1_alloc_mb_wiener_var_pred_buf(&cpi->common, &cpi->td);
    603  cpi->norm_wiener_variance = 0;
    604 
    605  MACROBLOCK *x = &cpi->td.mb;
    606  MACROBLOCKD *xd = &x->e_mbd;
    607  // xd->mi needs to be setup since it is used in av1_frame_init_quantizer.
    608  MB_MODE_INFO mbmi;
    609  memset(&mbmi, 0, sizeof(mbmi));
    610  MB_MODE_INFO *mbmi_ptr = &mbmi;
    611  xd->mi = &mbmi_ptr;
    612  cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
    613  av1_frame_init_quantizer(cpi);
    614 
    615  double sum_rec_distortion = 0.0;
    616  double sum_est_rate = 0.0;
    617 
    618  MultiThreadInfo *const mt_info = &cpi->mt_info;
    619  const int num_workers =
    620      AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
    621  AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
    622  intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read_dummy;
    623  intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write_dummy;
    624  // Calculate differential contrast for each block for the entire image.
    625  // TODO(chengchen): properly accumulate the distortion and rate in
    626  // av1_calc_mb_wiener_var_mt(). Until then, call calc_mb_wiener_var() if
    627  // auto_intra_tools_off is true.
    628  if (num_workers > 1 && !cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
    629    intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read;
    630    intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write;
    631    av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion,
    632                              &sum_est_rate);
    633  } else {
    634    calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate);
    635  }
    636 
    637  // Determine whether to turn off several intra coding tools.
    638  automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
    639 
    640  // Read external rate distribution and use it to guide delta quantization
    641  if (cpi->oxcf.enable_rate_guide_deltaq) ext_rate_guided_quantization(cpi);
    642 
    643  const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size;
    644  cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size);
    645  const int norm_step = mi_size_wide[norm_block_size];
    646 
    647  double sb_wiener_log = 0;
    648  double sb_count = 0;
    649  for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
    650    sb_wiener_log = 0;
    651    sb_count = 0;
    652    for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
    653      for (int mi_col = 0; mi_col < cm->mi_params.mi_cols;
    654           mi_col += norm_step) {
    655        int sb_wiener_var =
    656            get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
    657 
    658        double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
    659        double min_max_scale = AOMMAX(
    660            1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
    661 
    662        beta = AOMMIN(beta, 4);
    663        beta = AOMMAX(beta, 0.25);
    664 
    665        if (beta < 1 / min_max_scale) continue;
    666 
    667        sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);
    668 
    669        int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
    670        int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
    671        double scaled_satd = (double)satd / sqrt((double)sse);
    672        sb_wiener_log += scaled_satd * log(sb_wiener_var);
    673        sb_count += scaled_satd;
    674      }
    675    }
    676 
    677    if (sb_count > 0)
    678      cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count));
    679    cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
    680  }
    681 
    682  // Set the pointer to null since mbmi is only allocated inside this function.
    683  xd->mi = NULL;
    684  aom_free_frame_buffer(&cm->cur_frame->buf);
    685  av1_dealloc_mb_wiener_var_pred_buf(&cpi->td);
    686 }
    687 
    688 static int get_rate_guided_quantizer(const AV1_COMP *const cpi,
    689                                     BLOCK_SIZE bsize, int mi_row, int mi_col) {
    690  // Calculation uses 8x8.
    691  const int mb_step = mi_size_wide[cpi->weber_bsize];
    692  // Accumulate to 16x16
    693  const int block_step = mi_size_wide[BLOCK_16X16];
    694  double sb_rate_hific = 0.0;
    695  double sb_rate_uniform = 0.0;
    696  for (int row = mi_row; row < mi_row + mi_size_wide[bsize];
    697       row += block_step) {
    698    for (int col = mi_col; col < mi_col + mi_size_high[bsize];
    699         col += block_step) {
    700      sb_rate_hific +=
    701          cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
    702                                     (col / mb_step)];
    703 
    704      for (int r = 0; r < block_step; r += mb_step) {
    705        for (int c = 0; c < block_step; c += mb_step) {
    706          const int this_row = row + r;
    707          const int this_col = col + c;
    708          sb_rate_uniform +=
    709              cpi->prep_rate_estimates[(this_row / mb_step) *
    710                                           cpi->frame_info.mi_cols +
    711                                       (this_col / mb_step)];
    712        }
    713      }
    714    }
    715  }
    716  sb_rate_hific *= cpi->ext_rate_scale;
    717 
    718  const double weight = 1.0;
    719  const double rate_diff =
    720      weight * (sb_rate_hific - sb_rate_uniform) / sb_rate_uniform;
    721  double scale = pow(2, rate_diff);
    722 
    723  scale = scale * scale;
    724  double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
    725  scale = 1.0 / AOMMIN(1.0 / scale, min_max_scale);
    726 
    727  const AV1_COMMON *const cm = &cpi->common;
    728  const int base_qindex = cm->quant_params.base_qindex;
    729  int offset =
    730      av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, scale);
    731  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
    732  const int max_offset = delta_q_info->delta_q_res * 10;
    733  offset = AOMMIN(offset, max_offset - 1);
    734  offset = AOMMAX(offset, -max_offset + 1);
    735  int qindex = cm->quant_params.base_qindex + offset;
    736  qindex = AOMMIN(qindex, MAXQ);
    737  qindex = AOMMAX(qindex, MINQ);
    738  if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
    739 
    740  return qindex;
    741 }
    742 
    743 int av1_get_sbq_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
    744                              int mi_row, int mi_col) {
    745  if (cpi->oxcf.enable_rate_guide_deltaq) {
    746    return get_rate_guided_quantizer(cpi, bsize, mi_row, mi_col);
    747  }
    748 
    749  const AV1_COMMON *const cm = &cpi->common;
    750  const int base_qindex = cm->quant_params.base_qindex;
    751  int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col);
    752  int offset = 0;
    753  double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
    754  double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
    755  beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
    756 
    757  // Cap beta such that the delta q value is not much far away from the base q.
    758  beta = AOMMIN(beta, 4);
    759  beta = AOMMAX(beta, 0.25);
    760  offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
    761  const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
    762  offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
    763  offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1);
    764  int qindex = cm->quant_params.base_qindex + offset;
    765  qindex = AOMMIN(qindex, MAXQ);
    766  qindex = AOMMAX(qindex, MINQ);
    767  if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
    768 
    769  return qindex;
    770 }
    771 
    772 void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) {
    773  AV1_COMMON *cm = &cpi->common;
    774 
    775  if (cpi->mb_delta_q) return;
    776 
    777  CHECK_MEM_ERROR(cm, cpi->mb_delta_q,
    778                  aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols,
    779                             sizeof(*cpi->mb_delta_q)));
    780 }
    781 
    782 #if CONFIG_TFLITE
    783 static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows,
    784                         int bit_depth, uint8_t *y_buffer, int y_stride,
    785                         float *predicts0, float *predicts1) {
    786  // Create the model and interpreter options.
    787  TfLiteModel *model =
    788      TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize);
    789  if (model == NULL) return 1;
    790 
    791  TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
    792  TfLiteInterpreterOptionsSetNumThreads(options, 2);
    793  if (options == NULL) {
    794    TfLiteModelDelete(model);
    795    return 1;
    796  }
    797 
    798  // Create the interpreter.
    799  TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
    800  if (interpreter == NULL) {
    801    TfLiteInterpreterOptionsDelete(options);
    802    TfLiteModelDelete(model);
    803    return 1;
    804  }
    805 
    806  // Allocate tensors and populate the input tensor data.
    807  TfLiteInterpreterAllocateTensors(interpreter);
    808  TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
    809  if (input_tensor == NULL) {
    810    TfLiteInterpreterDelete(interpreter);
    811    TfLiteInterpreterOptionsDelete(options);
    812    TfLiteModelDelete(model);
    813    return 1;
    814  }
    815 
    816  size_t input_size = TfLiteTensorByteSize(input_tensor);
    817  float *input_data = aom_calloc(input_size, 1);
    818  if (input_data == NULL) {
    819    TfLiteInterpreterDelete(interpreter);
    820    TfLiteInterpreterOptionsDelete(options);
    821    TfLiteModelDelete(model);
    822    return 1;
    823  }
    824 
    825  const int num_mi_w = mi_size_wide[block_size];
    826  const int num_mi_h = mi_size_high[block_size];
    827  for (int row = 0; row < num_rows; ++row) {
    828    for (int col = 0; col < num_cols; ++col) {
    829      const int row_offset = (row * num_mi_h) << 2;
    830      const int col_offset = (col * num_mi_w) << 2;
    831 
    832      uint8_t *buf = y_buffer + row_offset * y_stride + col_offset;
    833      int r = row_offset, pos = 0;
    834      const float base = (float)((1 << bit_depth) - 1);
    835      while (r < row_offset + (num_mi_h << 2)) {
    836        for (int c = 0; c < (num_mi_w << 2); ++c) {
    837          input_data[pos++] = bit_depth > 8
    838                                  ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base
    839                                  : (float)*(buf + c) / base;
    840        }
    841        buf += y_stride;
    842        ++r;
    843      }
    844      TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size);
    845 
    846      // Execute inference.
    847      if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) {
    848        TfLiteInterpreterDelete(interpreter);
    849        TfLiteInterpreterOptionsDelete(options);
    850        TfLiteModelDelete(model);
    851        return 1;
    852      }
    853 
    854      // Extract the output tensor data.
    855      const TfLiteTensor *output_tensor =
    856          TfLiteInterpreterGetOutputTensor(interpreter, 0);
    857      if (output_tensor == NULL) {
    858        TfLiteInterpreterDelete(interpreter);
    859        TfLiteInterpreterOptionsDelete(options);
    860        TfLiteModelDelete(model);
    861        return 1;
    862      }
    863 
    864      size_t output_size = TfLiteTensorByteSize(output_tensor);
    865      float output_data[2];
    866 
    867      TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size);
    868      predicts0[row * num_cols + col] = output_data[0];
    869      predicts1[row * num_cols + col] = output_data[1];
    870    }
    871  }
    872 
    873  // Dispose of the model and interpreter objects.
    874  TfLiteInterpreterDelete(interpreter);
    875  TfLiteInterpreterOptionsDelete(options);
    876  TfLiteModelDelete(model);
    877  aom_free(input_data);
    878  return 0;
    879 }
    880 
    881 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
    882  const AV1_COMMON *cm = &cpi->common;
    883  const CommonModeInfoParams *const mi_params = &cm->mi_params;
    884  uint8_t *y_buffer = cpi->source->y_buffer;
    885  const int y_stride = cpi->source->y_stride;
    886  const int block_size = cpi->common.seq_params->sb_size;
    887  const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
    888 
    889  const int num_mi_w = mi_size_wide[block_size];
    890  const int num_mi_h = mi_size_high[block_size];
    891  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
    892  const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
    893 
    894  // TODO(sdeng): fit a better model_1; disable it at this time.
    895  float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f;
    896  CHECK_MEM_ERROR(cm, mb_delta_q0,
    897                  aom_calloc(num_rows * num_cols, sizeof(float)));
    898  CHECK_MEM_ERROR(cm, mb_delta_q1,
    899                  aom_calloc(num_rows * num_cols, sizeof(float)));
    900 
    901  if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer,
    902                    y_stride, mb_delta_q0, mb_delta_q1)) {
    903    aom_internal_error(cm->error, AOM_CODEC_ERROR,
    904                       "Failed to call TFlite functions.");
    905  }
    906 
    907  // Loop through each SB block.
    908  for (int row = 0; row < num_rows; ++row) {
    909    for (int col = 0; col < num_cols; ++col) {
    910      const int index = row * num_cols + col;
    911      delta_q_avg0 += mb_delta_q0[index];
    912    }
    913  }
    914 
    915  delta_q_avg0 /= (float)(num_rows * num_cols);
    916 
    917  float scaling_factor;
    918  const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ;
    919  if (cq_level < delta_q_avg0) {
    920    scaling_factor = cq_level / delta_q_avg0;
    921  } else {
    922    scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0);
    923  }
    924 
    925  for (int row = 0; row < num_rows; ++row) {
    926    for (int col = 0; col < num_cols; ++col) {
    927      const int index = row * num_cols + col;
    928      cpi->mb_delta_q[index] =
    929          RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ *
    930               scaling_factor * (mb_delta_q0[index] - delta_q_avg0));
    931    }
    932  }
    933 
    934  aom_free(mb_delta_q0);
    935  aom_free(mb_delta_q1);
    936 }
    937 #else  // !CONFIG_TFLITE
    938 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
    939  const AV1_COMMON *cm = &cpi->common;
    940  const CommonModeInfoParams *const mi_params = &cm->mi_params;
    941  const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
    942  uint8_t *y_buffer = cpi->source->y_buffer;
    943  const int y_stride = cpi->source->y_stride;
    944  const int block_size = cpi->common.seq_params->sb_size;
    945 
    946  const int num_mi_w = mi_size_wide[block_size];
    947  const int num_mi_h = mi_size_high[block_size];
    948  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
    949  const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
    950 
    951  int *mb_delta_q[2];
    952  CHECK_MEM_ERROR(cm, mb_delta_q[0],
    953                  aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
    954  CHECK_MEM_ERROR(cm, mb_delta_q[1],
    955                  aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
    956 
    957  // Approximates the model change between current version (Spet 2021) and the
    958  // baseline (July 2021).
    959  const double model_change[] = { 3.0, 3.0 };
    960  // The following parameters are fitted from user labeled data.
    961  const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
    962  const double b[] = { 0.004898, 0.003093 };
    963  const double c[] = { (29.932 + model_change[0]) * 4.0,
    964                       (42.100 + model_change[1]) * 4.0 };
    965  int delta_q_avg[2] = { 0, 0 };
    966  // Loop through each SB block.
    967  for (int row = 0; row < num_rows; ++row) {
    968    for (int col = 0; col < num_cols; ++col) {
    969      double var = 0.0, num_of_var = 0.0;
    970      const int index = row * num_cols + col;
    971 
    972      // Loop through each 8x8 block.
    973      for (int mi_row = row * num_mi_h;
    974           mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h;
    975           mi_row += 2) {
    976        for (int mi_col = col * num_mi_w;
    977             mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w;
    978             mi_col += 2) {
    979          struct buf_2d buf;
    980          const int row_offset_y = mi_row << 2;
    981          const int col_offset_y = mi_col << 2;
    982 
    983          buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
    984          buf.stride = y_stride;
    985 
    986          unsigned int block_variance;
    987          block_variance = av1_get_perpixel_variance_facade(
    988              cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y);
    989 
    990          block_variance = AOMMAX(block_variance, 1);
    991          var += log((double)block_variance);
    992          num_of_var += 1.0;
    993        }
    994      }
    995      var = exp(var / num_of_var);
    996      mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]);
    997      mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]);
    998      delta_q_avg[0] += mb_delta_q[0][index];
    999      delta_q_avg[1] += mb_delta_q[1][index];
   1000    }
   1001  }
   1002 
   1003  delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
   1004  delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
   1005 
   1006  int model_idx;
   1007  double scaling_factor;
   1008  const int cq_level = cpi->oxcf.rc_cfg.cq_level;
   1009  if (cq_level < delta_q_avg[0]) {
   1010    model_idx = 0;
   1011    scaling_factor = (double)cq_level / delta_q_avg[0];
   1012  } else if (cq_level < delta_q_avg[1]) {
   1013    model_idx = 2;
   1014    scaling_factor =
   1015        (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
   1016  } else {
   1017    model_idx = 1;
   1018    scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
   1019  }
   1020 
   1021  const double new_delta_q_avg =
   1022      delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
   1023  for (int row = 0; row < num_rows; ++row) {
   1024    for (int col = 0; col < num_cols; ++col) {
   1025      const int index = row * num_cols + col;
   1026      if (model_idx == 2) {
   1027        const double delta_q =
   1028            mb_delta_q[0][index] +
   1029            scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
   1030        cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength /
   1031                                      100.0 * (delta_q - new_delta_q_avg));
   1032      } else {
   1033        cpi->mb_delta_q[index] = RINT(
   1034            (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor *
   1035            (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
   1036      }
   1037    }
   1038  }
   1039 
   1040  aom_free(mb_delta_q[0]);
   1041  aom_free(mb_delta_q[1]);
   1042 }
   1043 #endif
   1044 
   1045 int av1_get_sbq_user_rating_based(const AV1_COMP *const cpi, int mi_row,
   1046                                  int mi_col) {
   1047  const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
   1048  const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
   1049  const AV1_COMMON *const cm = &cpi->common;
   1050  const int base_qindex = cm->quant_params.base_qindex;
   1051  if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex;
   1052 
   1053  const int num_mi_w = mi_size_wide[bsize];
   1054  const int num_mi_h = mi_size_high[bsize];
   1055  const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
   1056  const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w);
   1057  const int delta_q = cpi->mb_delta_q[index];
   1058 
   1059  int qindex = base_qindex + delta_q;
   1060  qindex = AOMMIN(qindex, MAXQ);
   1061  qindex = AOMMAX(qindex, MINQ + 1);
   1062 
   1063  return qindex;
   1064 }
   1065 
   1066 #if !CONFIG_REALTIME_ONLY
   1067 
   1068 // Variance Boost: a variance adaptive quantization implementation
   1069 // SVT-AV1 appendix with an overview and a graphical, step-by-step explanation
   1070 // of the implementation
   1071 // https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Appendix-Variance-Boost.md
   1072 int av1_get_sbq_variance_boost(const AV1_COMP *cpi, const MACROBLOCK *x) {
   1073  const AV1_COMMON *cm = &cpi->common;
   1074  const int base_qindex = cm->quant_params.base_qindex;
   1075  const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
   1076 
   1077  // Variance Boost only supports 64x64 SBs.
   1078  assert(cm->seq_params->sb_size == BLOCK_64X64);
   1079 
   1080  unsigned int variance = av1_get_variance_boost_block_variance(cpi, x);
   1081  // Compute Variance Boost strength from the deltaq_strength value.
   1082  double strength = (cpi->oxcf.q_cfg.deltaq_strength / 100.0) * 3.0;
   1083 
   1084  // Clamp strength to a reasonable range.
   1085  // deltaq_strength can go up to 1000%, which is too strong for the Variance
   1086  // Boost scaling. Testing revealed strengths as high as 6 (200%) are still
   1087  // reasonable for some specific scenarios.
   1088  strength = fclamp(strength, 0.0, 6.0);
   1089 
   1090  // Variance = 0 areas are either completely flat patches or have very fine
   1091  // gradients. Boost these blocks as if they have a variance of 1.
   1092  if (variance == 0) {
   1093    variance = 1;
   1094  }
   1095 
   1096  // Compute a boost based on a fast-growing formula.
   1097  // High and medium variance SBs essentially get no boost, while lower variance
   1098  // SBs get increasingly stronger boosts.
   1099  // Still picture curve, with variance crossover point at 1024.
   1100  double qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10.0) + 1.0;
   1101  qstep_ratio = fclamp(qstep_ratio, 1.0, VAR_BOOST_MAX_BOOST);
   1102 
   1103  double base_q = av1_convert_qindex_to_q(base_qindex, bit_depth);
   1104  double target_q = base_q / qstep_ratio;
   1105  int target_qindex = av1_convert_q_to_qindex(target_q, bit_depth);
   1106 
   1107  // Determine the SB's delta_q boost by computing an (unscaled) delta_q from
   1108  // the base and target q values, then scale that delta_q according to the
   1109  // frame's base qindex.
   1110  // The scaling coefficients were chosen empirically to maximize SSIMULACRA 2
   1111  // scores, 10th percentile scores, and subjective quality. Boosts become
   1112  // smaller (for a given variance) the lower the base qindex.
   1113  int boost = (int)round((base_qindex + 544.0) * (base_qindex - target_qindex) /
   1114                         1279.0);
   1115  boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost);
   1116 
   1117  // Variance Boost was designed to always operate in the lossy domain, so MINQ
   1118  // is excluded.
   1119  int sb_qindex = AOMMAX(base_qindex - boost, MINQ + 1);
   1120 
   1121  return sb_qindex;
   1122 }
   1123 #endif