tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rd.c (63164B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <limits.h>
     14 #include <math.h>
     15 #include <stdio.h>
     16 
     17 #include "aom_dsp/aom_dsp_common.h"
     18 #include "aom_mem/aom_mem.h"
     19 #include "aom_ports/bitops.h"
     20 #include "aom_ports/mem.h"
     21 #include "aom_ports/aom_once.h"
     22 
     23 #include "av1/common/common.h"
     24 #include "av1/common/entropy.h"
     25 #include "av1/common/entropymode.h"
     26 #include "av1/common/pred_common.h"
     27 #include "av1/common/quant_common.h"
     28 #include "av1/common/reconinter.h"
     29 #include "av1/common/reconintra.h"
     30 #include "av1/common/seg_common.h"
     31 
     32 #include "av1/encoder/cost.h"
     33 #include "av1/encoder/encodemv.h"
     34 #include "av1/encoder/encoder.h"
     35 #include "av1/encoder/nonrd_opt.h"
     36 #include "av1/encoder/ratectrl.h"
     37 #include "av1/encoder/rd.h"
     38 #include "config/aom_config.h"
     39 
     40 #define RD_THRESH_POW 1.25
     41 
     42 // The baseline rd thresholds for breaking out of the rd loop for
     43 // certain modes are assumed to be based on 8x8 blocks.
     44 // This table is used to correct for block size.
     45 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
     46 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
     47  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
     48 };
     49 
     50 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
     51                                            [EXT_TX_SIZES] = {
     52                                              { 1, 1, 1, 1 },  // unused
     53                                              { 1, 1, 0, 0 },
     54                                              { 0, 0, 1, 0 },
     55                                            };
     56 
     57 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
     58                                            [EXT_TX_SIZES] = {
     59                                              { 1, 1, 1, 1 },  // unused
     60                                              { 1, 1, 0, 0 },
     61                                              { 0, 0, 1, 0 },
     62                                              { 0, 1, 1, 1 },
     63                                            };
     64 
     65 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
     66                                                      EXT_TX_SETS_INTER)] = {
     67  {
     68      // Intra
     69      EXT_TX_SET_DCTONLY,
     70      EXT_TX_SET_DTT4_IDTX_1DDCT,
     71      EXT_TX_SET_DTT4_IDTX,
     72  },
     73  {
     74      // Inter
     75      EXT_TX_SET_DCTONLY,
     76      EXT_TX_SET_ALL16,
     77      EXT_TX_SET_DTT9_IDTX_1DDCT,
     78      EXT_TX_SET_DCT_IDTX,
     79  },
     80 };
     81 
     82 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
     83                         FRAME_CONTEXT *fc) {
     84  int i, j;
     85 
     86  for (i = 0; i < PARTITION_CONTEXTS; ++i)
     87    av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
     88                             fc->partition_cdf[i], NULL);
     89 
     90  if (cm->current_frame.skip_mode_info.skip_mode_flag) {
     91    for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
     92      av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
     93                               fc->skip_mode_cdfs[i], NULL);
     94    }
     95  }
     96 
     97  for (i = 0; i < SKIP_CONTEXTS; ++i) {
     98    av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
     99                             fc->skip_txfm_cdfs[i], NULL);
    100  }
    101 
    102  for (i = 0; i < KF_MODE_CONTEXTS; ++i)
    103    for (j = 0; j < KF_MODE_CONTEXTS; ++j)
    104      av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
    105                               fc->kf_y_cdf[i][j], NULL);
    106 
    107  for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
    108    av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
    109                             NULL);
    110  for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
    111    for (j = 0; j < INTRA_MODES; ++j)
    112      av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
    113                               fc->uv_mode_cdf[i][j], NULL);
    114 
    115  av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
    116                           fc->filter_intra_mode_cdf, NULL);
    117  for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
    118    if (av1_filter_intra_allowed_bsize(cm, i))
    119      av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
    120                               fc->filter_intra_cdfs[i], NULL);
    121  }
    122 
    123  for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
    124    av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
    125                             fc->switchable_interp_cdf[i], NULL);
    126 
    127  for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
    128    av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
    129                             fc->palette_y_size_cdf[i], NULL);
    130    av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
    131                             fc->palette_uv_size_cdf[i], NULL);
    132    for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
    133      av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
    134                               fc->palette_y_mode_cdf[i][j], NULL);
    135    }
    136  }
    137 
    138  for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
    139    av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
    140                             fc->palette_uv_mode_cdf[i], NULL);
    141  }
    142 
    143  for (i = 0; i < PALETTE_SIZES; ++i) {
    144    for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
    145      av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
    146                               fc->palette_y_color_index_cdf[i][j], NULL);
    147      av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
    148                               fc->palette_uv_color_index_cdf[i][j], NULL);
    149    }
    150  }
    151 
    152  int sign_cost[CFL_JOINT_SIGNS];
    153  av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
    154  for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
    155    int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
    156    int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
    157    if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
    158      memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
    159    } else {
    160      const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
    161      av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
    162    }
    163    if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
    164      memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
    165    } else {
    166      const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
    167      av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
    168    }
    169    for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
    170      cost_u[u] += sign_cost[joint_sign];
    171  }
    172 
    173  for (i = 0; i < MAX_TX_CATS; ++i)
    174    for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
    175      av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
    176                               fc->tx_size_cdf[i][j], NULL);
    177 
    178  for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
    179    av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
    180                             fc->txfm_partition_cdf[i], NULL);
    181  }
    182 
    183  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
    184    int s;
    185    for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
    186      if (use_inter_ext_tx_for_txsize[s][i]) {
    187        av1_cost_tokens_from_cdf(
    188            mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
    189            av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
    190      }
    191    }
    192    for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
    193      if (use_intra_ext_tx_for_txsize[s][i]) {
    194        for (j = 0; j < INTRA_MODES; ++j) {
    195          av1_cost_tokens_from_cdf(
    196              mode_costs->intra_tx_type_costs[s][i][j],
    197              fc->intra_ext_tx_cdf[s][i][j],
    198              av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
    199        }
    200      }
    201    }
    202  }
    203  for (i = 0; i < DIRECTIONAL_MODES; ++i) {
    204    av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
    205                             fc->angle_delta_cdf[i], NULL);
    206  }
    207  av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
    208 
    209  for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i) {
    210    av1_cost_tokens_from_cdf(mode_costs->spatial_pred_cost[i],
    211                             fc->seg.spatial_pred_seg_cdf[i], NULL);
    212  }
    213 
    214  for (i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) {
    215    av1_cost_tokens_from_cdf(mode_costs->tmp_pred_cost[i], fc->seg.pred_cdf[i],
    216                             NULL);
    217  }
    218 
    219  if (!frame_is_intra_only(cm)) {
    220    for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
    221      av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
    222                               fc->comp_inter_cdf[i], NULL);
    223    }
    224 
    225    for (i = 0; i < REF_CONTEXTS; ++i) {
    226      for (j = 0; j < SINGLE_REFS - 1; ++j) {
    227        av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
    228                                 fc->single_ref_cdf[i][j], NULL);
    229      }
    230    }
    231 
    232    for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
    233      av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
    234                               fc->comp_ref_type_cdf[i], NULL);
    235    }
    236 
    237    for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
    238      for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
    239        av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
    240                                 fc->uni_comp_ref_cdf[i][j], NULL);
    241      }
    242    }
    243 
    244    for (i = 0; i < REF_CONTEXTS; ++i) {
    245      for (j = 0; j < FWD_REFS - 1; ++j) {
    246        av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
    247                                 fc->comp_ref_cdf[i][j], NULL);
    248      }
    249    }
    250 
    251    for (i = 0; i < REF_CONTEXTS; ++i) {
    252      for (j = 0; j < BWD_REFS - 1; ++j) {
    253        av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
    254                                 fc->comp_bwdref_cdf[i][j], NULL);
    255      }
    256    }
    257 
    258    for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
    259      av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
    260                               fc->intra_inter_cdf[i], NULL);
    261    }
    262 
    263    for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
    264      av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
    265                               NULL);
    266    }
    267 
    268    for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
    269      av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
    270                               fc->zeromv_cdf[i], NULL);
    271    }
    272 
    273    for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
    274      av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
    275                               NULL);
    276    }
    277 
    278    for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
    279      av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
    280                               NULL);
    281    }
    282    for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
    283      av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
    284                               fc->inter_compound_mode_cdf[i], NULL);
    285    for (i = 0; i < BLOCK_SIZES_ALL; ++i)
    286      av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
    287                               fc->compound_type_cdf[i], NULL);
    288    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
    289      if (av1_is_wedge_used(i)) {
    290        av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
    291                                 fc->wedge_idx_cdf[i], NULL);
    292      }
    293    }
    294    for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
    295      av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
    296                               fc->interintra_cdf[i], NULL);
    297      av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
    298                               fc->interintra_mode_cdf[i], NULL);
    299    }
    300    for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
    301      av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
    302                               fc->wedge_interintra_cdf[i], NULL);
    303    }
    304    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
    305      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
    306                               fc->motion_mode_cdf[i], NULL);
    307    }
    308    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
    309      av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
    310                               fc->obmc_cdf[i], NULL);
    311    }
    312    for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
    313      av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
    314                               fc->compound_index_cdf[i], NULL);
    315    }
    316    for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
    317      av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
    318                               fc->comp_group_idx_cdf[i], NULL);
    319    }
    320  }
    321 }
    322 
    323 #if !CONFIG_REALTIME_ONLY
    324 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
    325  av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
    326                           fc->switchable_restore_cdf, NULL);
    327  av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
    328                           fc->wiener_restore_cdf, NULL);
    329  av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
    330                           fc->sgrproj_restore_cdf, NULL);
    331 }
    332 #endif  // !CONFIG_REALTIME_ONLY
    333 
    334 // Values are now correlated to quantizer.
    335 static int sad_per_bit_lut_8[QINDEX_RANGE];
    336 static int sad_per_bit_lut_10[QINDEX_RANGE];
    337 static int sad_per_bit_lut_12[QINDEX_RANGE];
    338 
    339 static void init_me_luts_bd(int *bit16lut, int range,
    340                            aom_bit_depth_t bit_depth) {
    341  int i;
    342  // Initialize the sad lut tables using a formulaic calculation for now.
    343  // This is to make it easier to resolve the impact of experimental changes
    344  // to the quantizer tables.
    345  for (i = 0; i < range; i++) {
    346    const double q = av1_convert_qindex_to_q(i, bit_depth);
    347    bit16lut[i] = (int)(0.0418 * q + 2.4107);
    348  }
    349 }
    350 
    351 static void init_me_luts(void) {
    352  init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
    353  init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
    354  init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
    355 }
    356 
    357 void av1_init_me_luts(void) { aom_once(init_me_luts); }
    358 
    359 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
    360                                         8,  8,  4,  4,  2,  2,  1,  0 };
    361 
    362 static const int rd_layer_depth_factor[7] = {
    363  160, 160, 160, 160, 192, 208, 224
    364 };
    365 
    366 // Returns the default rd multiplier for inter frames for a given qindex.
    367 // The function here is a first pass estimate based on data from
    368 // a previous Vizer run
    369 static double def_inter_rd_multiplier(int qindex) {
    370  return 3.2 + (0.0015 * (double)qindex);
    371 }
    372 
    373 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
    374 // The function here is a first pass estimate based on data from
    375 // a previous Vizer run
    376 static double def_arf_rd_multiplier(int qindex) {
    377  return 3.25 + (0.0015 * (double)qindex);
    378 }
    379 
    380 // Returns the default rd multiplier for key frames for a given qindex.
    381 // The function here is a first pass estimate based on data from
    382 // a previous Vizer run
    383 static double def_kf_rd_multiplier(int qindex) {
    384  return 3.3 + (0.0015 * (double)qindex);
    385 }
    386 
    387 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
    388                                        FRAME_UPDATE_TYPE update_type,
    389                                        int qindex, aom_tune_metric tuning) {
    390  const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
    391  int64_t rdmult = q * q;
    392  if (update_type == KF_UPDATE) {
    393    double def_rd_q_mult = def_kf_rd_multiplier(q);
    394    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
    395  } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
    396    double def_rd_q_mult = def_arf_rd_multiplier(q);
    397    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
    398  } else {
    399    double def_rd_q_mult = def_inter_rd_multiplier(q);
    400    rdmult = (int64_t)((double)rdmult * def_rd_q_mult);
    401  }
    402 
    403  if (tuning == AOM_TUNE_IQ || tuning == AOM_TUNE_SSIMULACRA2) {
    404    // Further multiply rdmult (by up to 200/128 = 1.5625) to improve image
    405    // quality. The most noticeable effect is a mild bias towards choosing
    406    // larger transform sizes (e.g. one 16x16 transform instead of 4 8x8
    407    // transforms).
    408    // For very high qindexes, start progressively reducing the weight towards
    409    // unity (128/128), as transforms are large enough and making them even
    410    // larger actually harms subjective quality and SSIMULACRA 2 scores.
    411    // This weight part of the equation was determined by iteratively increasing
    412    // weight on CID22 and Daala's subset1, and observing its effects on visual
    413    // quality and SSIMULACRA 2 scores along the usable (0-100) range.
    414    // The ramp-down part of the equation was determined by choosing a fixed
    415    // initial qindex point [qindex 159 = (255 - 159) * 3 / 4] where SSIMULACRA
    416    // 2 scores for encodes with qindexes greater than 159 scored at or above
    417    // their equivalents with no rdmult adjustment.
    418    const int weight = clamp(((255 - qindex) * 3) / 4, 0, 72) + 128;
    419    rdmult = (int64_t)((double)rdmult * weight / 128.0);
    420  }
    421 
    422  switch (bit_depth) {
    423    case AOM_BITS_8: break;
    424    case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
    425    case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
    426    default:
    427      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
    428      return -1;
    429  }
    430  return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
    431 }
    432 
    433 int av1_compute_rd_mult(const int qindex, const aom_bit_depth_t bit_depth,
    434                        const FRAME_UPDATE_TYPE update_type,
    435                        const int layer_depth, const int boost_index,
    436                        const FRAME_TYPE frame_type,
    437                        const int use_fixed_qp_offsets,
    438                        const int is_stat_consumption_stage,
    439                        const aom_tune_metric tuning) {
    440  int64_t rdmult = av1_compute_rd_mult_based_on_qindex(bit_depth, update_type,
    441                                                       qindex, tuning);
    442  if (is_stat_consumption_stage && !use_fixed_qp_offsets &&
    443      (frame_type != KEY_FRAME)) {
    444    // Layer depth adjustment
    445    rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
    446    // ARF boost adjustment
    447    rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
    448  }
    449  return rdmult > 0 ? (int)AOMMIN(rdmult, INT_MAX) : 1;
    450 }
    451 
    452 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
    453  assert(beta > 0.0);
    454  int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
    455  int newq = (int)rint(q / sqrt(beta));
    456  int orig_qindex = qindex;
    457  if (newq == q) {
    458    return 0;
    459  }
    460  if (newq < q) {
    461    while (qindex > 0) {
    462      qindex--;
    463      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
    464      if (newq >= q) {
    465        break;
    466      }
    467    }
    468  } else {
    469    while (qindex < MAXQ) {
    470      qindex++;
    471      q = av1_dc_quant_QTX(qindex, 0, bit_depth);
    472      if (newq <= q) {
    473        break;
    474      }
    475    }
    476  }
    477  return qindex - orig_qindex;
    478 }
    479 
    480 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
    481                                  int curr_qindex) {
    482  curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
    483  const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
    484  const int deltaq_deadzone = delta_q_res / 4;
    485  const int qmask = ~(delta_q_res - 1);
    486  int abs_deltaq_index = abs(curr_qindex - prev_qindex);
    487  abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
    488  int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
    489  adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
    490  return adjust_qindex;
    491 }
    492 
    493 #if !CONFIG_REALTIME_ONLY
    494 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
    495  assert(beta > 0.0);
    496  const AV1_COMMON *cm = &cpi->common;
    497 
    498  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
    499  const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
    500  const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
    501  const FRAME_TYPE frame_type = cm->current_frame.frame_type;
    502 
    503  const int qindex_rdmult = cm->quant_params.base_qindex;
    504  return (int)(av1_compute_rd_mult(
    505                   qindex_rdmult, cm->seq_params->bit_depth,
    506                   cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
    507                   layer_depth, boost_index, frame_type,
    508                   cpi->oxcf.q_cfg.use_fixed_qp_offsets,
    509                   is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning) /
    510               beta);
    511 }
    512 #endif  // !CONFIG_REALTIME_ONLY
    513 
    514 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
    515  double q;
    516  switch (bit_depth) {
    517    case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
    518    case AOM_BITS_10:
    519      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
    520      break;
    521    case AOM_BITS_12:
    522      q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
    523      break;
    524    default:
    525      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
    526      return -1;
    527  }
    528  // TODO(debargha): Adjust the function below.
    529  return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
    530 }
    531 
    532 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
    533  switch (cpi->common.seq_params->bit_depth) {
    534    case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
    535    case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
    536    case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
    537    default:
    538      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
    539  }
    540 }
    541 
    542 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
    543                                 int use_nonrd_pick_mode) {
    544  int i, bsize, segment_id;
    545  THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
    546  int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
    547 
    548  if (use_nonrd_pick_mode) {
    549    for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
    550      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
    551      if (ref != INTRA_FRAME) {
    552        for (i = 0; i < RTC_INTER_MODES; i++)
    553          mode_indices[num_modes_count++] =
    554              mode_idx[ref][mode_offset(inter_mode_list[i])];
    555      } else {
    556        for (i = 0; i < RTC_INTRA_MODES; i++)
    557          mode_indices[num_modes_count++] =
    558              mode_idx[ref][mode_offset(intra_mode_list[i])];
    559      }
    560    }
    561  }
    562 
    563  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    564    const int qindex = clamp(
    565        av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
    566            cm->quant_params.y_dc_delta_q,
    567        0, MAXQ);
    568    const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
    569 
    570    for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
    571      // Threshold here seems unnecessarily harsh but fine given actual
    572      // range of values used for cpi->sf.thresh_mult[].
    573      const int t = q * rd_thresh_block_size_factor[bsize];
    574      const int thresh_max = INT_MAX / t;
    575 
    576      for (i = 0; i < num_modes_count; ++i) {
    577        const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
    578        rd->threshes[segment_id][bsize][mode_index] =
    579            rd->thresh_mult[mode_index] < thresh_max
    580                ? rd->thresh_mult[mode_index] * t / 4
    581                : INT_MAX;
    582      }
    583    }
    584  }
    585 }
    586 
    587 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
    588                          const int num_planes) {
    589  const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
    590  for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
    591    for (int plane = 0; plane < nplanes; ++plane) {
    592      LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
    593 
    594      for (int ctx = 0; ctx < 2; ++ctx) {
    595        aom_cdf_prob *pcdf;
    596        switch (eob_multi_size) {
    597          case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
    598          case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
    599          case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
    600          case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
    601          case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
    602          case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
    603          case 6:
    604          default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
    605        }
    606        av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
    607      }
    608    }
    609  }
    610  for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
    611    for (int plane = 0; plane < nplanes; ++plane) {
    612      LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
    613 
    614      for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
    615        av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
    616                                 fc->txb_skip_cdf[tx_size][ctx], NULL);
    617 
    618      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
    619        av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
    620                                 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
    621                                 NULL);
    622      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
    623        av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
    624                                 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
    625 
    626      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
    627        pcost->base_cost[ctx][4] = 0;
    628        pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
    629                                   av1_cost_literal(1) -
    630                                   pcost->base_cost[ctx][0];
    631        pcost->base_cost[ctx][6] =
    632            pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
    633        pcost->base_cost[ctx][7] =
    634            pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
    635      }
    636 
    637      for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
    638        av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
    639                                 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
    640 
    641      for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
    642        av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
    643                                 fc->dc_sign_cdf[plane][ctx], NULL);
    644 
    645      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
    646        int br_rate[BR_CDF_SIZE];
    647        int prev_cost = 0;
    648        int i, j;
    649        av1_cost_tokens_from_cdf(
    650            br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
    651            NULL);
    652        for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
    653          for (j = 0; j < BR_CDF_SIZE - 1; j++) {
    654            pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
    655          }
    656          prev_cost += br_rate[j];
    657        }
    658        pcost->lps_cost[ctx][i] = prev_cost;
    659      }
    660      for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
    661        pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
    662            pcost->lps_cost[ctx][0];
    663        for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
    664          pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
    665              pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
    666        }
    667      }
    668    }
    669  }
    670 }
    671 
    672 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
    673                       MvCosts *mv_costs) {
    674  // Avoid accessing 'mv_costs' when it is not allocated.
    675  if (mv_costs == NULL) return;
    676 
    677  mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
    678  mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
    679  mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
    680  mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
    681  if (integer_mv) {
    682    mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
    683    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
    684                             nmvc, MV_SUBPEL_NONE);
    685  } else {
    686    mv_costs->mv_cost_stack =
    687        usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
    688    av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
    689                             nmvc, usehp);
    690  }
    691 }
    692 
    693 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
    694  dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
    695  dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
    696  av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
    697                           MV_SUBPEL_NONE);
    698 }
    699 
    700 // Populates speed features based on codec control settings (of type
    701 // COST_UPDATE_TYPE) and expected speed feature settings (of type
    702 // INTERNAL_COST_UPDATE_TYPE) by considering the least frequent cost update.
    703 // The populated/updated speed features are used for cost updates in the
    704 // encoder.
    705 // WARNING: Population of unified cost update frequency needs to be taken care
    706 // accordingly, in case of any modifications/additions to the enum
    707 // COST_UPDATE_TYPE/INTERNAL_COST_UPDATE_TYPE.
    708 static inline void populate_unified_cost_update_freq(
    709    const CostUpdateFreq cost_upd_freq, SPEED_FEATURES *const sf) {
    710  INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
    711  // Mapping of entropy cost update frequency from the encoder's codec control
    712  // settings of type COST_UPDATE_TYPE to speed features of type
    713  // INTERNAL_COST_UPDATE_TYPE.
    714  static const INTERNAL_COST_UPDATE_TYPE
    715      map_cost_upd_to_internal_cost_upd[NUM_COST_UPDATE_TYPES] = {
    716        INTERNAL_COST_UPD_SB, INTERNAL_COST_UPD_SBROW, INTERNAL_COST_UPD_TILE,
    717        INTERNAL_COST_UPD_OFF
    718      };
    719 
    720  inter_sf->mv_cost_upd_level =
    721      AOMMIN(inter_sf->mv_cost_upd_level,
    722             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mv]);
    723  inter_sf->coeff_cost_upd_level =
    724      AOMMIN(inter_sf->coeff_cost_upd_level,
    725             map_cost_upd_to_internal_cost_upd[cost_upd_freq.coeff]);
    726  inter_sf->mode_cost_upd_level =
    727      AOMMIN(inter_sf->mode_cost_upd_level,
    728             map_cost_upd_to_internal_cost_upd[cost_upd_freq.mode]);
    729  sf->intra_sf.dv_cost_upd_level =
    730      AOMMIN(sf->intra_sf.dv_cost_upd_level,
    731             map_cost_upd_to_internal_cost_upd[cost_upd_freq.dv]);
    732 }
    733 
    734 // Checks if entropy costs should be initialized/updated at frame level or not.
    735 static inline int is_frame_level_cost_upd_freq_set(
    736    const AV1_COMMON *const cm, const INTERNAL_COST_UPDATE_TYPE cost_upd_level,
    737    const int use_nonrd_pick_mode, const int frames_since_key) {
    738  const int fill_costs =
    739      frame_is_intra_only(cm) ||
    740      (use_nonrd_pick_mode ? frames_since_key < 2
    741                           : (cm->current_frame.frame_number & 0x07) == 1);
    742  return ((!use_nonrd_pick_mode && cost_upd_level != INTERNAL_COST_UPD_OFF) ||
    743          cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
    744 }
    745 
    746 // Decide whether we want to update the mode entropy cost for the current frame.
    747 // The logit is currently inherited from selective_disable_cdf_rtc.
    748 static inline int should_force_mode_cost_update(const AV1_COMP *cpi) {
    749  const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
    750  if (!rt_sf->frame_level_mode_cost_update) {
    751    return false;
    752  }
    753 
    754  if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
    755    return cpi->frames_since_last_update == 1;
    756  } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
    757    if (cpi->svc.number_spatial_layers == 1 &&
    758        cpi->svc.number_temporal_layers == 1) {
    759      const AV1_COMMON *const cm = &cpi->common;
    760      const RATE_CONTROL *const rc = &cpi->rc;
    761 
    762      return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
    763             rc->high_source_sad || rc->frames_since_key < 10 ||
    764             cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
    765             cm->current_frame.frame_number % 8 == 0;
    766    } else if (cpi->svc.number_temporal_layers > 1) {
    767      return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
    768    }
    769  }
    770 
    771  return false;
    772 }
    773 
    774 void av1_initialize_rd_consts(AV1_COMP *cpi) {
    775  AV1_COMMON *const cm = &cpi->common;
    776  MACROBLOCK *const x = &cpi->td.mb;
    777  SPEED_FEATURES *const sf = &cpi->sf;
    778  RD_OPT *const rd = &cpi->rd;
    779  int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
    780  int frames_since_key = cpi->rc.frames_since_key;
    781 
    782  const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
    783  const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
    784  const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
    785  const FRAME_TYPE frame_type = cm->current_frame.frame_type;
    786 
    787  const int qindex_rdmult =
    788      cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q;
    789  rd->RDMULT = av1_compute_rd_mult(
    790      qindex_rdmult, cm->seq_params->bit_depth,
    791      cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth,
    792      boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets,
    793      is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning);
    794 #if CONFIG_RD_COMMAND
    795  if (cpi->oxcf.pass == 2) {
    796    const RD_COMMAND *rd_command = &cpi->rd_command;
    797    if (rd_command->option_ls[rd_command->frame_index] ==
    798        RD_OPTION_SET_Q_RDMULT) {
    799      rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
    800    }
    801  }
    802 #endif  // CONFIG_RD_COMMAND
    803 
    804  av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
    805 
    806  set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
    807 
    808  populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
    809  const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
    810  // Frame level mv cost update
    811  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mv_cost_upd_level,
    812                                       use_nonrd_pick_mode, frames_since_key))
    813    av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
    814                      cm->features.allow_high_precision_mv, x->mv_costs);
    815 
    816  // Frame level coefficient cost update
    817  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->coeff_cost_upd_level,
    818                                       use_nonrd_pick_mode, frames_since_key))
    819    av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
    820 
    821  // Frame level mode cost update
    822  if (should_force_mode_cost_update(cpi) ||
    823      is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
    824                                       use_nonrd_pick_mode, frames_since_key))
    825    av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
    826 
    827  // Frame level dv cost update
    828  if (av1_need_dv_costs(cpi)) {
    829    if (cpi->td.dv_costs_alloc == NULL) {
    830      CHECK_MEM_ERROR(
    831          cm, cpi->td.dv_costs_alloc,
    832          (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.dv_costs_alloc)));
    833      cpi->td.mb.dv_costs = cpi->td.dv_costs_alloc;
    834    }
    835    av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
    836  }
    837 }
    838 
    839 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
    840  // NOTE: The tables below must be of the same size.
    841 
    842  // The functions described below are sampled at the four most significant
    843  // bits of x^2 + 8 / 256.
    844 
    845  // Normalized rate:
    846  // This table models the rate for a Laplacian source with given variance
    847  // when quantized with a uniform quantizer with given stepsize. The
    848  // closed form expression is:
    849  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
    850  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
    851  // and H(x) is the binary entropy function.
    852  static const int rate_tab_q10[] = {
    853    65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
    854    4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
    855    3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
    856    2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
    857    1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
    858    911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
    859    395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
    860    73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
    861    5,     3,    2,    1,    1,    1,    0,    0,
    862  };
    863  // Normalized distortion:
    864  // This table models the normalized distortion for a Laplacian source
    865  // with given variance when quantized with a uniform quantizer
    866  // with given stepsize. The closed form expression is:
    867  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
    868  // where x = qpstep / sqrt(variance).
    869  // Note the actual distortion is Dn * variance.
    870  static const int dist_tab_q10[] = {
    871    0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
    872    5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
    873    18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
    874    59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
    875    151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
    876    375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
    877    680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
    878    949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
    879    1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
    880  };
    881  static const int xsq_iq_q10[] = {
    882    0,      4,      8,      12,     16,     20,     24,     28,     32,
    883    40,     48,     56,     64,     72,     80,     88,     96,     112,
    884    128,    144,    160,    176,    192,    208,    224,    256,    288,
    885    320,    352,    384,    416,    448,    480,    544,    608,    672,
    886    736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
    887    1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
    888    3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
    889    7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
    890    16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
    891    36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
    892    81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
    893    180192, 196576, 212960, 229344, 245728,
    894  };
    895  const int tmp = (xsq_q10 >> 2) + 8;
    896  const int k = get_msb(tmp) - 3;
    897  const int xq = (k << 3) + ((tmp >> k) & 0x7);
    898  const int one_q10 = 1 << 10;
    899  const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
    900  const int b_q10 = one_q10 - a_q10;
    901  *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
    902  *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
    903 }
    904 
    905 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
    906                                  unsigned int qstep, int *rate,
    907                                  int64_t *dist) {
    908  // This function models the rate and distortion for a Laplacian
    909  // source with given variance when quantized with a uniform quantizer
    910  // with given stepsize. The closed form expressions are in:
    911  // Hang and Chen, "Source Model for transform video coder and its
    912  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
    913  // Sys. for Video Tech., April 1997.
    914  if (var == 0) {
    915    *rate = 0;
    916    *dist = 0;
    917  } else {
    918    int d_q10, r_q10;
    919    static const uint32_t MAX_XSQ_Q10 = 245727;
    920    const uint64_t xsq_q10_64 =
    921        (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
    922    const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
    923    model_rd_norm(xsq_q10, &r_q10, &d_q10);
    924    *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
    925    *dist = (var * (int64_t)d_q10 + 512) >> 10;
    926  }
    927 }
    928 
    929 static double interp_cubic(const double *p, double x) {
    930  return p[1] + 0.5 * x *
    931                    (p[2] - p[0] +
    932                     x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
    933                          x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
    934 }
    935 
    936 /*
    937 static double interp_bicubic(const double *p, int p_stride, double x,
    938                             double y) {
    939  double q[4];
    940  q[0] = interp_cubic(p, x);
    941  q[1] = interp_cubic(p + p_stride, x);
    942  q[2] = interp_cubic(p + 2 * p_stride, x);
    943  q[3] = interp_cubic(p + 3 * p_stride, x);
    944  return interp_cubic(q, y);
    945 }
    946 */
    947 
    948 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
    949  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
    950 };
    951 
    952 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
    953  return (sse_norm > 16.0);
    954 }
    955 
    956 static const double interp_rgrid_curv[4][65] = {
    957  {
    958      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
    959      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
    960      0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
    961      122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
    962      126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
    963      262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
    964      726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
    965      1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
    966      1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
    967      2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
    968      2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
    969      2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
    970      3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
    971  },
    972  {
    973      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
    974      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
    975      0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
    976      28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
    977      39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
    978      137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
    979      614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
    980      1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
    981      1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
    982      1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
    983      2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
    984      2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
    985      3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
    986  },
    987  {
    988      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
    989      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
    990      0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
    991      6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
    992      13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
    993      98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
    994      525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
    995      926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
    996      1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
    997      1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
    998      2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
    999      2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
   1000      3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
   1001  },
   1002  {
   1003      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
   1004      0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
   1005      0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
   1006      0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
   1007      3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
   1008      65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
   1009      355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
   1010      619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
   1011      1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
   1012      1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
   1013      1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
   1014      2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
   1015      3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
   1016  },
   1017 };
   1018 
   1019 static const double interp_dgrid_curv[3][65] = {
   1020  {
   1021      16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
   1022      15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
   1023      15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
   1024      13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
   1025      7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
   1026      1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
   1027      0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
   1028      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
   1029      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
   1030      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
   1031      0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
   1032  },
   1033  {
   1034      16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
   1035      15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
   1036      15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
   1037      13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
   1038      5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
   1039      1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
   1040      0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
   1041      0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
   1042      0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
   1043      0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
   1044      0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
   1045  },
   1046 };
   1047 
   1048 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
   1049                          double *rate_f, double *distbysse_f) {
   1050  const double x_start = -15.5;
   1051  const double x_end = 16.5;
   1052  const double x_step = 0.5;
   1053  const double epsilon = 1e-6;
   1054  const int rcat = bsize_curvfit_model_cat_lookup[bsize];
   1055  const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
   1056  (void)x_end;
   1057 
   1058  xqr = AOMMAX(xqr, x_start + x_step + epsilon);
   1059  xqr = AOMMIN(xqr, x_end - x_step - epsilon);
   1060  const double x = (xqr - x_start) / x_step;
   1061  const int xi = (int)floor(x);
   1062  const double xo = x - xi;
   1063 
   1064  assert(xi > 0);
   1065 
   1066  const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
   1067  *rate_f = interp_cubic(prate, xo);
   1068  const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
   1069  *distbysse_f = interp_cubic(pdist, xo);
   1070 }
   1071 
   1072 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
   1073                                       const struct macroblockd_plane *pd,
   1074                                       ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
   1075                                       ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
   1076  const int num_4x4_w = mi_size_wide[plane_bsize];
   1077  const int num_4x4_h = mi_size_high[plane_bsize];
   1078  const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
   1079  const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
   1080 
   1081  memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
   1082  memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
   1083 }
   1084 
   1085 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
   1086                              const struct macroblockd_plane *pd,
   1087                              ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
   1088                              ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
   1089  assert(plane_bsize < BLOCK_SIZES_ALL);
   1090  get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
   1091 }
   1092 
   1093 // Special clamping used in the encoder when calculating a prediction
   1094 //
   1095 // Logically, all pixel fetches used for prediction are clamped against the
   1096 // edges of the frame. But doing this directly is slow, so instead we allocate
   1097 // a finite border around the frame and fill it with copies of the outermost
   1098 // pixels.
   1099 //
   1100 // Since this border is finite, we need to clamp the motion vector before
   1101 // prediction in order to avoid out-of-bounds reads. At the same time, this
   1102 // clamp must not change the prediction result.
   1103 //
   1104 // We can balance both of these concerns by calculating how far we would have
   1105 // to go in each direction before the extended prediction region (the current
   1106 // block + AOM_INTERP_EXTEND many pixels around the block) would be mapped
   1107 // so that it touches the frame only at one row or column. This is a special
   1108 // point because any more extreme MV will always lead to the same prediction.
   1109 // So it is safe to clamp at that point.
   1110 //
   1111 // In the worst case, this requires a border of
   1112 //   max_block_width + 2*AOM_INTERP_EXTEND = 128 + 2*4 = 136 pixels
   1113 // around the frame edges.
   1114 static inline void enc_clamp_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
   1115                                MV *mv) {
   1116  int bw = xd->width << MI_SIZE_LOG2;
   1117  int bh = xd->height << MI_SIZE_LOG2;
   1118 
   1119  int px_to_left_edge = xd->mi_col << MI_SIZE_LOG2;
   1120  int px_to_right_edge = (cm->mi_params.mi_cols - xd->mi_col) << MI_SIZE_LOG2;
   1121  int px_to_top_edge = xd->mi_row << MI_SIZE_LOG2;
   1122  int px_to_bottom_edge = (cm->mi_params.mi_rows - xd->mi_row) << MI_SIZE_LOG2;
   1123 
   1124  const SubpelMvLimits mv_limits = {
   1125    .col_min = -GET_MV_SUBPEL(px_to_left_edge + bw + AOM_INTERP_EXTEND),
   1126    .col_max = GET_MV_SUBPEL(px_to_right_edge + AOM_INTERP_EXTEND),
   1127    .row_min = -GET_MV_SUBPEL(px_to_top_edge + bh + AOM_INTERP_EXTEND),
   1128    .row_max = GET_MV_SUBPEL(px_to_bottom_edge + AOM_INTERP_EXTEND)
   1129  };
   1130  clamp_mv(mv, &mv_limits);
   1131 }
   1132 
   1133 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
   1134                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
   1135  const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
   1136  const int_mv ref_mv =
   1137      av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
   1138  const int_mv ref_mv1 =
   1139      av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
   1140  MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
   1141  int num_mv_refs = 0;
   1142  pred_mv[num_mv_refs++] = ref_mv.as_mv;
   1143  if (ref_mv.as_int != ref_mv1.as_int) {
   1144    pred_mv[num_mv_refs++] = ref_mv1.as_mv;
   1145  }
   1146 
   1147  assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
   1148 
   1149  const uint8_t *const src_y_ptr = x->plane[0].src.buf;
   1150  int zero_seen = 0;
   1151  int best_sad = INT_MAX;
   1152  int max_mv = 0;
   1153  // Get the sad for each candidate reference mv.
   1154  for (int i = 0; i < num_mv_refs; ++i) {
   1155    MV *this_mv = &pred_mv[i];
   1156    enc_clamp_mv(&cpi->common, &x->e_mbd, this_mv);
   1157 
   1158    const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
   1159    const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
   1160    max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
   1161 
   1162    if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
   1163    zero_seen |= (fp_row == 0 && fp_col == 0);
   1164 
   1165    const uint8_t *const ref_y_ptr =
   1166        &ref_y_buffer[ref_y_stride * fp_row + fp_col];
   1167    // Find sad for current vector.
   1168    const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
   1169        src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
   1170    // Note if it is the best so far.
   1171    if (this_sad < best_sad) {
   1172      best_sad = this_sad;
   1173    }
   1174    if (i == 0)
   1175      x->pred_mv0_sad[ref_frame] = this_sad;
   1176    else if (i == 1)
   1177      x->pred_mv1_sad[ref_frame] = this_sad;
   1178  }
   1179 
   1180  // Note the index of the mv that worked best in the reference list.
   1181  x->max_mv_context[ref_frame] = max_mv;
   1182  x->pred_mv_sad[ref_frame] = best_sad;
   1183 }
   1184 
   1185 void av1_setup_pred_block(const MACROBLOCKD *xd,
   1186                          struct buf_2d dst[MAX_MB_PLANE],
   1187                          const YV12_BUFFER_CONFIG *src,
   1188                          const struct scale_factors *scale,
   1189                          const struct scale_factors *scale_uv,
   1190                          const int num_planes) {
   1191  dst[0].buf = src->y_buffer;
   1192  dst[0].stride = src->y_stride;
   1193  dst[1].buf = src->u_buffer;
   1194  dst[2].buf = src->v_buffer;
   1195  dst[1].stride = dst[2].stride = src->uv_stride;
   1196 
   1197  const int mi_row = xd->mi_row;
   1198  const int mi_col = xd->mi_col;
   1199  for (int i = 0; i < num_planes; ++i) {
   1200    setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
   1201                     i ? src->uv_crop_width : src->y_crop_width,
   1202                     i ? src->uv_crop_height : src->y_crop_height,
   1203                     dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
   1204                     xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
   1205  }
   1206 }
   1207 
   1208 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
   1209                                             int ref_frame) {
   1210  assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
   1211  RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
   1212  const RefCntBuffer *const ref_buf =
   1213      get_ref_frame_buf(&cpi->common, ref_frame);
   1214  return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
   1215                                                       : NULL;
   1216 }
   1217 
   1218 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
   1219                            InterpFilter interp_filter, int dual_filter) {
   1220  if (interp_filter == SWITCHABLE) {
   1221    const MB_MODE_INFO *const mbmi = xd->mi[0];
   1222    int inter_filter_cost = 0;
   1223    for (int dir = 0; dir < 2; ++dir) {
   1224      if (dir && !dual_filter) break;
   1225      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
   1226      const InterpFilter filter =
   1227          av1_extract_interp_filter(mbmi->interp_filters, dir);
   1228      inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
   1229    }
   1230    return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
   1231  } else {
   1232    return 0;
   1233  }
   1234 }
   1235 
   1236 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
   1237  RD_OPT *const rd = &cpi->rd;
   1238 
   1239  // Set baseline threshold values.
   1240  av1_zero(rd->thresh_mult);
   1241 
   1242  rd->thresh_mult[THR_NEARESTMV] = 300;
   1243  rd->thresh_mult[THR_NEARESTL2] = 300;
   1244  rd->thresh_mult[THR_NEARESTL3] = 300;
   1245  rd->thresh_mult[THR_NEARESTB] = 300;
   1246  rd->thresh_mult[THR_NEARESTA2] = 300;
   1247  rd->thresh_mult[THR_NEARESTA] = 300;
   1248  rd->thresh_mult[THR_NEARESTG] = 300;
   1249 
   1250  rd->thresh_mult[THR_NEWMV] = 1000;
   1251  rd->thresh_mult[THR_NEWL2] = 1000;
   1252  rd->thresh_mult[THR_NEWL3] = 1000;
   1253  rd->thresh_mult[THR_NEWB] = 1000;
   1254  rd->thresh_mult[THR_NEWA2] = 1100;
   1255  rd->thresh_mult[THR_NEWA] = 1000;
   1256  rd->thresh_mult[THR_NEWG] = 1000;
   1257 
   1258  rd->thresh_mult[THR_NEARMV] = 1000;
   1259  rd->thresh_mult[THR_NEARL2] = 1000;
   1260  rd->thresh_mult[THR_NEARL3] = 1000;
   1261  rd->thresh_mult[THR_NEARB] = 1000;
   1262  rd->thresh_mult[THR_NEARA2] = 1000;
   1263  rd->thresh_mult[THR_NEARA] = 1000;
   1264  rd->thresh_mult[THR_NEARG] = 1000;
   1265 
   1266  rd->thresh_mult[THR_GLOBALMV] = 2200;
   1267  rd->thresh_mult[THR_GLOBALL2] = 2000;
   1268  rd->thresh_mult[THR_GLOBALL3] = 2000;
   1269  rd->thresh_mult[THR_GLOBALB] = 2400;
   1270  rd->thresh_mult[THR_GLOBALA2] = 2000;
   1271  rd->thresh_mult[THR_GLOBALG] = 2000;
   1272  rd->thresh_mult[THR_GLOBALA] = 2400;
   1273 
   1274  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
   1275  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
   1276  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
   1277  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
   1278  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
   1279  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
   1280  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
   1281  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
   1282  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
   1283  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
   1284  rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
   1285  rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
   1286 
   1287  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
   1288  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
   1289  rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
   1290  rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
   1291 
   1292  rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
   1293  rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
   1294  rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
   1295  rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
   1296  rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
   1297  rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
   1298  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
   1299 
   1300  rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
   1301  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
   1302  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
   1303  rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
   1304  rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
   1305  rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
   1306  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
   1307 
   1308  rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
   1309  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
   1310  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
   1311  rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
   1312  rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
   1313  rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
   1314  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
   1315 
   1316  rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
   1317  rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
   1318  rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
   1319  rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
   1320  rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
   1321  rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
   1322  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
   1323 
   1324  rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
   1325  rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
   1326  rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
   1327  rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
   1328  rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
   1329  rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
   1330  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
   1331 
   1332  rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
   1333  rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
   1334  rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
   1335  rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
   1336  rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
   1337  rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
   1338  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
   1339 
   1340  rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
   1341  rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
   1342  rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
   1343  rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
   1344  rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
   1345  rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
   1346  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
   1347 
   1348  rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
   1349  rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
   1350  rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
   1351  rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
   1352  rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
   1353  rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
   1354  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
   1355 
   1356  rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
   1357  rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
   1358  rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
   1359  rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
   1360  rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
   1361  rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
   1362  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
   1363 
   1364  rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
   1365  rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
   1366  rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
   1367  rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
   1368  rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
   1369  rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
   1370  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
   1371 
   1372  rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
   1373  rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
   1374  rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
   1375  rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
   1376  rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
   1377  rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
   1378  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
   1379 
   1380  rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
   1381  rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
   1382  rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
   1383  rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
   1384  rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
   1385  rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
   1386  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
   1387 
   1388  rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
   1389  rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
   1390  rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
   1391  rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
   1392  rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
   1393  rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
   1394  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
   1395 
   1396  rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
   1397  rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
   1398  rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
   1399  rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
   1400  rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
   1401  rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
   1402  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
   1403 
   1404  rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
   1405  rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
   1406  rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
   1407  rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
   1408  rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
   1409  rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
   1410  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
   1411 
   1412  rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
   1413  rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
   1414  rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
   1415  rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
   1416  rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
   1417  rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
   1418  rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
   1419 
   1420  rd->thresh_mult[THR_DC] = 1000;
   1421  rd->thresh_mult[THR_PAETH] = 1000;
   1422  rd->thresh_mult[THR_SMOOTH] = 2200;
   1423  rd->thresh_mult[THR_SMOOTH_V] = 2000;
   1424  rd->thresh_mult[THR_SMOOTH_H] = 2000;
   1425  rd->thresh_mult[THR_H_PRED] = 2000;
   1426  rd->thresh_mult[THR_V_PRED] = 1800;
   1427  rd->thresh_mult[THR_D135_PRED] = 2500;
   1428  rd->thresh_mult[THR_D203_PRED] = 2000;
   1429  rd->thresh_mult[THR_D157_PRED] = 2500;
   1430  rd->thresh_mult[THR_D67_PRED] = 2000;
   1431  rd->thresh_mult[THR_D113_PRED] = 2500;
   1432  rd->thresh_mult[THR_D45_PRED] = 2500;
   1433 }
   1434 
   1435 static inline void update_thr_fact(int (*factor_buf)[MAX_MODES],
   1436                                   THR_MODES best_mode_index,
   1437                                   THR_MODES mode_start, THR_MODES mode_end,
   1438                                   BLOCK_SIZE min_size, BLOCK_SIZE max_size,
   1439                                   int max_rd_thresh_factor) {
   1440  for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
   1441    for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
   1442      int *const fact = &factor_buf[bs][mode];
   1443      if (mode == best_mode_index) {
   1444        *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
   1445      } else {
   1446        *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
   1447      }
   1448    }
   1449  }
   1450 }
   1451 
   1452 void av1_update_rd_thresh_fact(
   1453    const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
   1454    int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
   1455    THR_MODES inter_mode_start, THR_MODES inter_mode_end,
   1456    THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
   1457  assert(use_adaptive_rd_thresh > 0);
   1458  const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
   1459 
   1460  const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
   1461  BLOCK_SIZE min_size, max_size;
   1462  if (bsize_is_1_to_4) {
   1463    // This part handles block sizes with 1:4 and 4:1 aspect ratios
   1464    // TODO(any): Experiment with threshold update for parent/child blocks
   1465    min_size = bsize;
   1466    max_size = bsize;
   1467  } else {
   1468    min_size = AOMMAX(bsize - 2, BLOCK_4X4);
   1469    max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
   1470  }
   1471 
   1472  update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
   1473                  min_size, max_size, max_rd_thresh_factor);
   1474  update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
   1475                  min_size, max_size, max_rd_thresh_factor);
   1476 }
   1477 
   1478 int av1_get_intra_cost_penalty(int qindex, int qdelta,
   1479                               aom_bit_depth_t bit_depth) {
   1480  const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
   1481  switch (bit_depth) {
   1482    case AOM_BITS_8: return 20 * q;
   1483    case AOM_BITS_10: return 5 * q;
   1484    case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
   1485    default:
   1486      assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
   1487      return -1;
   1488  }
   1489 }