tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rdopt.c (279172B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <assert.h>
     13 #include <math.h>
     14 #include <stdbool.h>
     15 #include <stdint.h>
     16 #include <string.h>
     17 
     18 #include "config/aom_config.h"
     19 #include "config/aom_dsp_rtcd.h"
     20 #include "config/av1_rtcd.h"
     21 
     22 #include "aom_dsp/aom_dsp_common.h"
     23 #include "aom_dsp/blend.h"
     24 #include "aom_mem/aom_mem.h"
     25 #include "aom_ports/aom_timer.h"
     26 #include "aom_ports/mem.h"
     27 
     28 #include "av1/common/av1_common_int.h"
     29 #include "av1/common/cfl.h"
     30 #include "av1/common/blockd.h"
     31 #include "av1/common/common.h"
     32 #include "av1/common/common_data.h"
     33 #include "av1/common/entropy.h"
     34 #include "av1/common/entropymode.h"
     35 #include "av1/common/enums.h"
     36 #include "av1/common/idct.h"
     37 #include "av1/common/mvref_common.h"
     38 #include "av1/common/obmc.h"
     39 #include "av1/common/pred_common.h"
     40 #include "av1/common/quant_common.h"
     41 #include "av1/common/reconinter.h"
     42 #include "av1/common/reconintra.h"
     43 #include "av1/common/scan.h"
     44 #include "av1/common/seg_common.h"
     45 #include "av1/common/txb_common.h"
     46 #include "av1/common/warped_motion.h"
     47 
     48 #include "av1/encoder/aq_variance.h"
     49 #include "av1/encoder/av1_quantize.h"
     50 #include "av1/encoder/block.h"
     51 #include "av1/encoder/cost.h"
     52 #include "av1/encoder/compound_type.h"
     53 #include "av1/encoder/encodemb.h"
     54 #include "av1/encoder/encodemv.h"
     55 #include "av1/encoder/encoder.h"
     56 #include "av1/encoder/encodetxb.h"
     57 #include "av1/encoder/hybrid_fwd_txfm.h"
     58 #include "av1/encoder/interp_search.h"
     59 #include "av1/encoder/intra_mode_search.h"
     60 #include "av1/encoder/intra_mode_search_utils.h"
     61 #include "av1/encoder/mcomp.h"
     62 #include "av1/encoder/ml.h"
     63 #include "av1/encoder/mode_prune_model_weights.h"
     64 #include "av1/encoder/model_rd.h"
     65 #include "av1/encoder/motion_search_facade.h"
     66 #include "av1/encoder/palette.h"
     67 #include "av1/encoder/pustats.h"
     68 #include "av1/encoder/random.h"
     69 #include "av1/encoder/ratectrl.h"
     70 #include "av1/encoder/rd.h"
     71 #include "av1/encoder/rdopt.h"
     72 #include "av1/encoder/reconinter_enc.h"
     73 #include "av1/encoder/tokenize.h"
     74 #include "av1/encoder/tpl_model.h"
     75 #include "av1/encoder/tx_search.h"
     76 #include "av1/encoder/var_based_part.h"
     77 
     78 #define LAST_NEW_MV_INDEX 6
     79 
     80 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
     81 // The values are kept in Q12 format and equation used to derive is
     82 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
     83 #define MODE_THRESH_QBITS 12
     84 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
     85  10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
     86  9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
     87  9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
     88  9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
     89  9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
     90  8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
     91  8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
     92  8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
     93  8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
     94  7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
     95  7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
     96  7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
     97  7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
     98  6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
     99  6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
    100  6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
    101  5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
    102  5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
    103  5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
    104  5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
    105  4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
    106  4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
    107  4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
    108  4144,  4120,  4096
    109 };
    110 
    111 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
    112  THR_NEARESTMV,
    113  THR_NEARESTL2,
    114  THR_NEARESTL3,
    115  THR_NEARESTB,
    116  THR_NEARESTA2,
    117  THR_NEARESTA,
    118  THR_NEARESTG,
    119 
    120  THR_NEWMV,
    121  THR_NEWL2,
    122  THR_NEWL3,
    123  THR_NEWB,
    124  THR_NEWA2,
    125  THR_NEWA,
    126  THR_NEWG,
    127 
    128  THR_NEARMV,
    129  THR_NEARL2,
    130  THR_NEARL3,
    131  THR_NEARB,
    132  THR_NEARA2,
    133  THR_NEARA,
    134  THR_NEARG,
    135 
    136  THR_GLOBALMV,
    137  THR_GLOBALL2,
    138  THR_GLOBALL3,
    139  THR_GLOBALB,
    140  THR_GLOBALA2,
    141  THR_GLOBALA,
    142  THR_GLOBALG,
    143 
    144  THR_COMP_NEAREST_NEARESTLA,
    145  THR_COMP_NEAREST_NEARESTL2A,
    146  THR_COMP_NEAREST_NEARESTL3A,
    147  THR_COMP_NEAREST_NEARESTGA,
    148  THR_COMP_NEAREST_NEARESTLB,
    149  THR_COMP_NEAREST_NEARESTL2B,
    150  THR_COMP_NEAREST_NEARESTL3B,
    151  THR_COMP_NEAREST_NEARESTGB,
    152  THR_COMP_NEAREST_NEARESTLA2,
    153  THR_COMP_NEAREST_NEARESTL2A2,
    154  THR_COMP_NEAREST_NEARESTL3A2,
    155  THR_COMP_NEAREST_NEARESTGA2,
    156  THR_COMP_NEAREST_NEARESTLL2,
    157  THR_COMP_NEAREST_NEARESTLL3,
    158  THR_COMP_NEAREST_NEARESTLG,
    159  THR_COMP_NEAREST_NEARESTBA,
    160 
    161  THR_COMP_NEAR_NEARLB,
    162  THR_COMP_NEW_NEWLB,
    163  THR_COMP_NEW_NEARESTLB,
    164  THR_COMP_NEAREST_NEWLB,
    165  THR_COMP_NEW_NEARLB,
    166  THR_COMP_NEAR_NEWLB,
    167  THR_COMP_GLOBAL_GLOBALLB,
    168 
    169  THR_COMP_NEAR_NEARLA,
    170  THR_COMP_NEW_NEWLA,
    171  THR_COMP_NEW_NEARESTLA,
    172  THR_COMP_NEAREST_NEWLA,
    173  THR_COMP_NEW_NEARLA,
    174  THR_COMP_NEAR_NEWLA,
    175  THR_COMP_GLOBAL_GLOBALLA,
    176 
    177  THR_COMP_NEAR_NEARL2A,
    178  THR_COMP_NEW_NEWL2A,
    179  THR_COMP_NEW_NEARESTL2A,
    180  THR_COMP_NEAREST_NEWL2A,
    181  THR_COMP_NEW_NEARL2A,
    182  THR_COMP_NEAR_NEWL2A,
    183  THR_COMP_GLOBAL_GLOBALL2A,
    184 
    185  THR_COMP_NEAR_NEARL3A,
    186  THR_COMP_NEW_NEWL3A,
    187  THR_COMP_NEW_NEARESTL3A,
    188  THR_COMP_NEAREST_NEWL3A,
    189  THR_COMP_NEW_NEARL3A,
    190  THR_COMP_NEAR_NEWL3A,
    191  THR_COMP_GLOBAL_GLOBALL3A,
    192 
    193  THR_COMP_NEAR_NEARGA,
    194  THR_COMP_NEW_NEWGA,
    195  THR_COMP_NEW_NEARESTGA,
    196  THR_COMP_NEAREST_NEWGA,
    197  THR_COMP_NEW_NEARGA,
    198  THR_COMP_NEAR_NEWGA,
    199  THR_COMP_GLOBAL_GLOBALGA,
    200 
    201  THR_COMP_NEAR_NEARL2B,
    202  THR_COMP_NEW_NEWL2B,
    203  THR_COMP_NEW_NEARESTL2B,
    204  THR_COMP_NEAREST_NEWL2B,
    205  THR_COMP_NEW_NEARL2B,
    206  THR_COMP_NEAR_NEWL2B,
    207  THR_COMP_GLOBAL_GLOBALL2B,
    208 
    209  THR_COMP_NEAR_NEARL3B,
    210  THR_COMP_NEW_NEWL3B,
    211  THR_COMP_NEW_NEARESTL3B,
    212  THR_COMP_NEAREST_NEWL3B,
    213  THR_COMP_NEW_NEARL3B,
    214  THR_COMP_NEAR_NEWL3B,
    215  THR_COMP_GLOBAL_GLOBALL3B,
    216 
    217  THR_COMP_NEAR_NEARGB,
    218  THR_COMP_NEW_NEWGB,
    219  THR_COMP_NEW_NEARESTGB,
    220  THR_COMP_NEAREST_NEWGB,
    221  THR_COMP_NEW_NEARGB,
    222  THR_COMP_NEAR_NEWGB,
    223  THR_COMP_GLOBAL_GLOBALGB,
    224 
    225  THR_COMP_NEAR_NEARLA2,
    226  THR_COMP_NEW_NEWLA2,
    227  THR_COMP_NEW_NEARESTLA2,
    228  THR_COMP_NEAREST_NEWLA2,
    229  THR_COMP_NEW_NEARLA2,
    230  THR_COMP_NEAR_NEWLA2,
    231  THR_COMP_GLOBAL_GLOBALLA2,
    232 
    233  THR_COMP_NEAR_NEARL2A2,
    234  THR_COMP_NEW_NEWL2A2,
    235  THR_COMP_NEW_NEARESTL2A2,
    236  THR_COMP_NEAREST_NEWL2A2,
    237  THR_COMP_NEW_NEARL2A2,
    238  THR_COMP_NEAR_NEWL2A2,
    239  THR_COMP_GLOBAL_GLOBALL2A2,
    240 
    241  THR_COMP_NEAR_NEARL3A2,
    242  THR_COMP_NEW_NEWL3A2,
    243  THR_COMP_NEW_NEARESTL3A2,
    244  THR_COMP_NEAREST_NEWL3A2,
    245  THR_COMP_NEW_NEARL3A2,
    246  THR_COMP_NEAR_NEWL3A2,
    247  THR_COMP_GLOBAL_GLOBALL3A2,
    248 
    249  THR_COMP_NEAR_NEARGA2,
    250  THR_COMP_NEW_NEWGA2,
    251  THR_COMP_NEW_NEARESTGA2,
    252  THR_COMP_NEAREST_NEWGA2,
    253  THR_COMP_NEW_NEARGA2,
    254  THR_COMP_NEAR_NEWGA2,
    255  THR_COMP_GLOBAL_GLOBALGA2,
    256 
    257  THR_COMP_NEAR_NEARLL2,
    258  THR_COMP_NEW_NEWLL2,
    259  THR_COMP_NEW_NEARESTLL2,
    260  THR_COMP_NEAREST_NEWLL2,
    261  THR_COMP_NEW_NEARLL2,
    262  THR_COMP_NEAR_NEWLL2,
    263  THR_COMP_GLOBAL_GLOBALLL2,
    264 
    265  THR_COMP_NEAR_NEARLL3,
    266  THR_COMP_NEW_NEWLL3,
    267  THR_COMP_NEW_NEARESTLL3,
    268  THR_COMP_NEAREST_NEWLL3,
    269  THR_COMP_NEW_NEARLL3,
    270  THR_COMP_NEAR_NEWLL3,
    271  THR_COMP_GLOBAL_GLOBALLL3,
    272 
    273  THR_COMP_NEAR_NEARLG,
    274  THR_COMP_NEW_NEWLG,
    275  THR_COMP_NEW_NEARESTLG,
    276  THR_COMP_NEAREST_NEWLG,
    277  THR_COMP_NEW_NEARLG,
    278  THR_COMP_NEAR_NEWLG,
    279  THR_COMP_GLOBAL_GLOBALLG,
    280 
    281  THR_COMP_NEAR_NEARBA,
    282  THR_COMP_NEW_NEWBA,
    283  THR_COMP_NEW_NEARESTBA,
    284  THR_COMP_NEAREST_NEWBA,
    285  THR_COMP_NEW_NEARBA,
    286  THR_COMP_NEAR_NEWBA,
    287  THR_COMP_GLOBAL_GLOBALBA,
    288 
    289  THR_DC,
    290  THR_PAETH,
    291  THR_SMOOTH,
    292  THR_SMOOTH_V,
    293  THR_SMOOTH_H,
    294  THR_H_PRED,
    295  THR_V_PRED,
    296  THR_D135_PRED,
    297  THR_D203_PRED,
    298  THR_D157_PRED,
    299  THR_D67_PRED,
    300  THR_D113_PRED,
    301  THR_D45_PRED,
    302 };
    303 
    304 /*!\cond */
    305 typedef struct SingleInterModeState {
    306  int64_t rd;
    307  MV_REFERENCE_FRAME ref_frame;
    308  int valid;
    309 } SingleInterModeState;
    310 
    311 typedef struct InterModeSearchState {
    312  int64_t best_rd;
    313  int64_t best_skip_rd[2];
    314  MB_MODE_INFO best_mbmode;
    315  int best_rate_y;
    316  int best_rate_uv;
    317  int best_mode_skippable;
    318  int best_skip2;
    319  THR_MODES best_mode_index;
    320  int num_available_refs;
    321  int64_t dist_refs[REF_FRAMES];
    322  int dist_order_refs[REF_FRAMES];
    323  int64_t mode_threshold[MAX_MODES];
    324  int64_t best_intra_rd;
    325  unsigned int best_pred_sse;
    326 
    327  /*!
    328   * \brief Keep track of best intra rd for use in compound mode.
    329   */
    330  int64_t best_pred_rd[REFERENCE_MODES];
    331  // Save a set of single_newmv for each checked ref_mv.
    332  int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
    333  int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
    334  int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
    335  int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
    336  // The rd of simple translation in single inter modes
    337  int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
    338  int64_t best_single_rd[REF_FRAMES];
    339  PREDICTION_MODE best_single_mode[REF_FRAMES];
    340 
    341  // Single search results by [directions][modes][reference frames]
    342  SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
    343  int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
    344  SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
    345                                            [FWD_REFS];
    346  int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
    347  MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
    348  IntraModeSearchState intra_search_state;
    349  RD_STATS best_y_rdcost;
    350 } InterModeSearchState;
    351 /*!\endcond */
    352 
    353 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
    354  for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
    355    InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
    356    md->ready = 0;
    357    md->num = 0;
    358    md->dist_sum = 0;
    359    md->ld_sum = 0;
    360    md->sse_sum = 0;
    361    md->sse_sse_sum = 0;
    362    md->sse_ld_sum = 0;
    363  }
    364 }
    365 
    366 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
    367                             int64_t sse, int *est_residue_cost,
    368                             int64_t *est_dist) {
    369  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
    370  if (md->ready) {
    371    if (sse < md->dist_mean) {
    372      *est_residue_cost = 0;
    373      *est_dist = sse;
    374    } else {
    375      *est_dist = (int64_t)round(md->dist_mean);
    376      const double est_ld = md->a * sse + md->b;
    377      // Clamp estimated rate cost by INT_MAX / 2.
    378      // TODO(angiebird@google.com): find better solution than clamping.
    379      if (fabs(est_ld) < 1e-2) {
    380        *est_residue_cost = INT_MAX / 2;
    381      } else {
    382        double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
    383        if (est_residue_cost_dbl < 0) {
    384          *est_residue_cost = 0;
    385        } else {
    386          *est_residue_cost =
    387              (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
    388        }
    389      }
    390      if (*est_residue_cost <= 0) {
    391        *est_residue_cost = 0;
    392        *est_dist = sse;
    393      }
    394    }
    395    return 1;
    396  }
    397  return 0;
    398 }
    399 
    400 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
    401  for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
    402    const int block_idx = inter_mode_data_block_idx(bsize);
    403    InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
    404    if (block_idx == -1) continue;
    405    if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
    406      continue;
    407    } else {
    408      if (md->ready == 0) {
    409        md->dist_mean = md->dist_sum / md->num;
    410        md->ld_mean = md->ld_sum / md->num;
    411        md->sse_mean = md->sse_sum / md->num;
    412        md->sse_sse_mean = md->sse_sse_sum / md->num;
    413        md->sse_ld_mean = md->sse_ld_sum / md->num;
    414      } else {
    415        const double factor = 3;
    416        md->dist_mean =
    417            (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
    418        md->ld_mean =
    419            (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
    420        md->sse_mean =
    421            (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
    422        md->sse_sse_mean =
    423            (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
    424            (factor + 1);
    425        md->sse_ld_mean =
    426            (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
    427            (factor + 1);
    428      }
    429 
    430      const double my = md->ld_mean;
    431      const double mx = md->sse_mean;
    432      const double dx = sqrt(md->sse_sse_mean);
    433      const double dxy = md->sse_ld_mean;
    434 
    435      md->a = (dxy - mx * my) / (dx * dx - mx * mx);
    436      md->b = my - md->a * mx;
    437      md->ready = 1;
    438 
    439      md->num = 0;
    440      md->dist_sum = 0;
    441      md->ld_sum = 0;
    442      md->sse_sum = 0;
    443      md->sse_sse_sum = 0;
    444      md->sse_ld_sum = 0;
    445    }
    446    (void)rdmult;
    447  }
    448 }
    449 
    450 static inline void inter_mode_data_push(TileDataEnc *tile_data,
    451                                        BLOCK_SIZE bsize, int64_t sse,
    452                                        int64_t dist, int residue_cost) {
    453  if (residue_cost == 0 || sse == dist) return;
    454  const int block_idx = inter_mode_data_block_idx(bsize);
    455  if (block_idx == -1) return;
    456  InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
    457  if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
    458    const double ld = (sse - dist) * 1. / residue_cost;
    459    ++rd_model->num;
    460    rd_model->dist_sum += dist;
    461    rd_model->ld_sum += ld;
    462    rd_model->sse_sum += sse;
    463    rd_model->sse_sse_sum += (double)sse * (double)sse;
    464    rd_model->sse_ld_sum += sse * ld;
    465  }
    466 }
    467 
    468 static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
    469                                         int mode_rate, int64_t sse, int64_t rd,
    470                                         RD_STATS *rd_cost, RD_STATS *rd_cost_y,
    471                                         RD_STATS *rd_cost_uv,
    472                                         const MB_MODE_INFO *mbmi) {
    473  const int num = inter_modes_info->num;
    474  assert(num < MAX_INTER_MODES);
    475  inter_modes_info->mbmi_arr[num] = *mbmi;
    476  inter_modes_info->mode_rate_arr[num] = mode_rate;
    477  inter_modes_info->sse_arr[num] = sse;
    478  inter_modes_info->est_rd_arr[num] = rd;
    479  inter_modes_info->rd_cost_arr[num] = *rd_cost;
    480  inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
    481  inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
    482  ++inter_modes_info->num;
    483 }
    484 
    485 static int compare_rd_idx_pair(const void *a, const void *b) {
    486  if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
    487    // To avoid inconsistency in qsort() ordering when two elements are equal,
    488    // using idx as tie breaker. Refer aomedia:2928
    489    if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
    490      return 0;
    491    else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
    492      return 1;
    493    else
    494      return -1;
    495  } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
    496    return 1;
    497  } else {
    498    return -1;
    499  }
    500 }
    501 
    502 static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
    503                                         RdIdxPair *rd_idx_pair_arr) {
    504  if (inter_modes_info->num == 0) {
    505    return;
    506  }
    507  for (int i = 0; i < inter_modes_info->num; ++i) {
    508    rd_idx_pair_arr[i].idx = i;
    509    rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
    510  }
    511  qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
    512        compare_rd_idx_pair);
    513 }
    514 
    515 // Similar to get_horver_correlation, but also takes into account first
    516 // row/column, when computing horizontal/vertical correlation.
    517 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
    518                                       int width, int height, float *hcorr,
    519                                       float *vcorr) {
    520  // The following notation is used:
    521  // x - current pixel
    522  // y - left neighbor pixel
    523  // z - top neighbor pixel
    524  int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
    525  int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
    526  int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
    527 
    528  // First, process horizontal correlation on just the first row
    529  x_sum += diff[0];
    530  x2_sum += diff[0] * diff[0];
    531  x_firstrow += diff[0];
    532  x2_firstrow += diff[0] * diff[0];
    533  for (int j = 1; j < width; ++j) {
    534    const int16_t x = diff[j];
    535    const int16_t y = diff[j - 1];
    536    x_sum += x;
    537    x_firstrow += x;
    538    x2_sum += x * x;
    539    x2_firstrow += x * x;
    540    xy_sum += x * y;
    541  }
    542 
    543  // Process vertical correlation in the first column
    544  x_firstcol += diff[0];
    545  x2_firstcol += diff[0] * diff[0];
    546  for (int i = 1; i < height; ++i) {
    547    const int16_t x = diff[i * stride];
    548    const int16_t z = diff[(i - 1) * stride];
    549    x_sum += x;
    550    x_firstcol += x;
    551    x2_sum += x * x;
    552    x2_firstcol += x * x;
    553    xz_sum += x * z;
    554  }
    555 
    556  // Now process horiz and vert correlation through the rest unit
    557  for (int i = 1; i < height; ++i) {
    558    for (int j = 1; j < width; ++j) {
    559      const int16_t x = diff[i * stride + j];
    560      const int16_t y = diff[i * stride + j - 1];
    561      const int16_t z = diff[(i - 1) * stride + j];
    562      x_sum += x;
    563      x2_sum += x * x;
    564      xy_sum += x * y;
    565      xz_sum += x * z;
    566    }
    567  }
    568 
    569  for (int j = 0; j < width; ++j) {
    570    x_finalrow += diff[(height - 1) * stride + j];
    571    x2_finalrow +=
    572        diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
    573  }
    574  for (int i = 0; i < height; ++i) {
    575    x_finalcol += diff[i * stride + width - 1];
    576    x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
    577  }
    578 
    579  int64_t xhor_sum = x_sum - x_finalcol;
    580  int64_t xver_sum = x_sum - x_finalrow;
    581  int64_t y_sum = x_sum - x_firstcol;
    582  int64_t z_sum = x_sum - x_firstrow;
    583  int64_t x2hor_sum = x2_sum - x2_finalcol;
    584  int64_t x2ver_sum = x2_sum - x2_finalrow;
    585  int64_t y2_sum = x2_sum - x2_firstcol;
    586  int64_t z2_sum = x2_sum - x2_firstrow;
    587 
    588  const float num_hor = (float)(height * (width - 1));
    589  const float num_ver = (float)((height - 1) * width);
    590 
    591  const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
    592  const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
    593 
    594  const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
    595  const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
    596 
    597  const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
    598  const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
    599 
    600  if (xhor_var_n > 0 && y_var_n > 0) {
    601    *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
    602    *hcorr = *hcorr < 0 ? 0 : *hcorr;
    603  } else {
    604    *hcorr = 1.0;
    605  }
    606  if (xver_var_n > 0 && z_var_n > 0) {
    607    *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
    608    *vcorr = *vcorr < 0 ? 0 : *vcorr;
    609  } else {
    610    *vcorr = 1.0;
    611  }
    612 }
    613 
    614 static void get_variance_stats_hbd(const MACROBLOCK *x, int64_t *src_var,
    615                                   int64_t *rec_var) {
    616  const MACROBLOCKD *xd = &x->e_mbd;
    617  const MB_MODE_INFO *mbmi = xd->mi[0];
    618  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
    619  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
    620 
    621  BLOCK_SIZE bsize = mbmi->bsize;
    622  int bw = block_size_wide[bsize];
    623  int bh = block_size_high[bsize];
    624 
    625  static const int gau_filter[3][3] = {
    626    { 1, 2, 1 },
    627    { 2, 4, 2 },
    628    { 1, 2, 1 },
    629  };
    630 
    631  DECLARE_ALIGNED(16, uint16_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
    632 
    633  uint16_t *pred_ptr = &dclevel[bw + 1];
    634  int pred_stride = xd->plane[0].dst.stride;
    635 
    636  for (int idy = -1; idy < bh + 1; ++idy) {
    637    for (int idx = -1; idx < bw + 1; ++idx) {
    638      int offset_idy = idy;
    639      int offset_idx = idx;
    640      if (idy == -1) offset_idy = 0;
    641      if (idy == bh) offset_idy = bh - 1;
    642      if (idx == -1) offset_idx = 0;
    643      if (idx == bw) offset_idx = bw - 1;
    644 
    645      int offset = offset_idy * pred_stride + offset_idx;
    646      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(pd->dst.buf)[offset];
    647    }
    648  }
    649 
    650  *rec_var = 0;
    651  for (int idy = 0; idy < bh; ++idy) {
    652    for (int idx = 0; idx < bw; ++idx) {
    653      int sum = 0;
    654      for (int iy = 0; iy < 3; ++iy)
    655        for (int ix = 0; ix < 3; ++ix)
    656          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
    657                 gau_filter[iy][ix];
    658 
    659      sum = sum >> 4;
    660 
    661      int64_t diff = pred_ptr[idy * bw + idx] - sum;
    662      *rec_var += diff * diff;
    663    }
    664  }
    665  *rec_var <<= 4;
    666 
    667  int src_stride = p->src.stride;
    668  for (int idy = -1; idy < bh + 1; ++idy) {
    669    for (int idx = -1; idx < bw + 1; ++idx) {
    670      int offset_idy = idy;
    671      int offset_idx = idx;
    672      if (idy == -1) offset_idy = 0;
    673      if (idy == bh) offset_idy = bh - 1;
    674      if (idx == -1) offset_idx = 0;
    675      if (idx == bw) offset_idx = bw - 1;
    676 
    677      int offset = offset_idy * src_stride + offset_idx;
    678      pred_ptr[idy * bw + idx] = CONVERT_TO_SHORTPTR(p->src.buf)[offset];
    679    }
    680  }
    681 
    682  *src_var = 0;
    683  for (int idy = 0; idy < bh; ++idy) {
    684    for (int idx = 0; idx < bw; ++idx) {
    685      int sum = 0;
    686      for (int iy = 0; iy < 3; ++iy)
    687        for (int ix = 0; ix < 3; ++ix)
    688          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
    689                 gau_filter[iy][ix];
    690 
    691      sum = sum >> 4;
    692 
    693      int64_t diff = pred_ptr[idy * bw + idx] - sum;
    694      *src_var += diff * diff;
    695    }
    696  }
    697  *src_var <<= 4;
    698 }
    699 
    700 static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
    701                               int64_t *rec_var) {
    702  const MACROBLOCKD *xd = &x->e_mbd;
    703  const MB_MODE_INFO *mbmi = xd->mi[0];
    704  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
    705  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
    706 
    707  BLOCK_SIZE bsize = mbmi->bsize;
    708  int bw = block_size_wide[bsize];
    709  int bh = block_size_high[bsize];
    710 
    711  static const int gau_filter[3][3] = {
    712    { 1, 2, 1 },
    713    { 2, 4, 2 },
    714    { 1, 2, 1 },
    715  };
    716 
    717  DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
    718 
    719  uint8_t *pred_ptr = &dclevel[bw + 1];
    720  int pred_stride = xd->plane[0].dst.stride;
    721 
    722  for (int idy = -1; idy < bh + 1; ++idy) {
    723    for (int idx = -1; idx < bw + 1; ++idx) {
    724      int offset_idy = idy;
    725      int offset_idx = idx;
    726      if (idy == -1) offset_idy = 0;
    727      if (idy == bh) offset_idy = bh - 1;
    728      if (idx == -1) offset_idx = 0;
    729      if (idx == bw) offset_idx = bw - 1;
    730 
    731      int offset = offset_idy * pred_stride + offset_idx;
    732      pred_ptr[idy * bw + idx] = pd->dst.buf[offset];
    733    }
    734  }
    735 
    736  *rec_var = 0;
    737  for (int idy = 0; idy < bh; ++idy) {
    738    for (int idx = 0; idx < bw; ++idx) {
    739      int sum = 0;
    740      for (int iy = 0; iy < 3; ++iy)
    741        for (int ix = 0; ix < 3; ++ix)
    742          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
    743                 gau_filter[iy][ix];
    744 
    745      sum = sum >> 4;
    746 
    747      int64_t diff = pred_ptr[idy * bw + idx] - sum;
    748      *rec_var += diff * diff;
    749    }
    750  }
    751  *rec_var <<= 4;
    752 
    753  int src_stride = p->src.stride;
    754  for (int idy = -1; idy < bh + 1; ++idy) {
    755    for (int idx = -1; idx < bw + 1; ++idx) {
    756      int offset_idy = idy;
    757      int offset_idx = idx;
    758      if (idy == -1) offset_idy = 0;
    759      if (idy == bh) offset_idy = bh - 1;
    760      if (idx == -1) offset_idx = 0;
    761      if (idx == bw) offset_idx = bw - 1;
    762 
    763      int offset = offset_idy * src_stride + offset_idx;
    764      pred_ptr[idy * bw + idx] = p->src.buf[offset];
    765    }
    766  }
    767 
    768  *src_var = 0;
    769  for (int idy = 0; idy < bh; ++idy) {
    770    for (int idx = 0; idx < bw; ++idx) {
    771      int sum = 0;
    772      for (int iy = 0; iy < 3; ++iy)
    773        for (int ix = 0; ix < 3; ++ix)
    774          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
    775                 gau_filter[iy][ix];
    776 
    777      sum = sum >> 4;
    778 
    779      int64_t diff = pred_ptr[idy * bw + idx] - sum;
    780      *src_var += diff * diff;
    781    }
    782  }
    783  *src_var <<= 4;
    784 }
    785 
    786 static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
    787                          RD_STATS *rd_cost) {
    788  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
    789 
    790  if (frame_is_kf_gf_arf(cpi)) return;
    791 
    792  int64_t src_var, rec_var;
    793 
    794  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
    795  if (is_hbd)
    796    get_variance_stats_hbd(x, &src_var, &rec_var);
    797  else
    798    get_variance_stats(x, &src_var, &rec_var);
    799 
    800  if (src_var <= rec_var) return;
    801 
    802  int64_t var_offset = src_var - rec_var;
    803 
    804  rd_cost->dist += var_offset;
    805 
    806  rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
    807 }
    808 
    809 static void adjust_cost(const AV1_COMP *cpi, const MACROBLOCK *x,
    810                        int64_t *rd_cost) {
    811  if (cpi->oxcf.algo_cfg.sharpness != 3) return;
    812 
    813  if (frame_is_kf_gf_arf(cpi)) return;
    814 
    815  int64_t src_var, rec_var;
    816  const bool is_hbd = is_cur_buf_hbd(&x->e_mbd);
    817 
    818  if (is_hbd)
    819    get_variance_stats_hbd(x, &src_var, &rec_var);
    820  else
    821    get_variance_stats(x, &src_var, &rec_var);
    822 
    823  if (src_var <= rec_var) return;
    824 
    825  int64_t var_offset = src_var - rec_var;
    826 
    827  *rd_cost += RDCOST(x->rdmult, 0, var_offset);
    828 }
    829 
    830 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
    831                       int64_t *sse_y) {
    832  const AV1_COMMON *cm = &cpi->common;
    833  const int num_planes = av1_num_planes(cm);
    834  const MACROBLOCKD *xd = &x->e_mbd;
    835  const MB_MODE_INFO *mbmi = xd->mi[0];
    836  int64_t total_sse = 0;
    837  for (int plane = 0; plane < num_planes; ++plane) {
    838    if (plane && !xd->is_chroma_ref) break;
    839    const struct macroblock_plane *const p = &x->plane[plane];
    840    const struct macroblockd_plane *const pd = &xd->plane[plane];
    841    const BLOCK_SIZE bs =
    842        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
    843    unsigned int sse;
    844 
    845    cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
    846                            pd->dst.stride, &sse);
    847    total_sse += sse;
    848    if (!plane && sse_y) *sse_y = sse;
    849  }
    850  total_sse <<= 4;
    851  return total_sse;
    852 }
    853 
    854 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
    855                          intptr_t block_size, int64_t *ssz) {
    856  int i;
    857  int64_t error = 0, sqcoeff = 0;
    858 
    859  for (i = 0; i < block_size; i++) {
    860    const int diff = coeff[i] - dqcoeff[i];
    861    error += diff * diff;
    862    sqcoeff += coeff[i] * coeff[i];
    863  }
    864 
    865  *ssz = sqcoeff;
    866  return error;
    867 }
    868 
    869 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
    870                             intptr_t block_size) {
    871  int64_t error = 0;
    872 
    873  for (int i = 0; i < block_size; i++) {
    874    const int diff = coeff[i] - dqcoeff[i];
    875    error += diff * diff;
    876  }
    877 
    878  return error;
    879 }
    880 
    881 #if CONFIG_AV1_HIGHBITDEPTH
    882 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
    883                                 const tran_low_t *dqcoeff, intptr_t block_size,
    884                                 int64_t *ssz, int bd) {
    885  int i;
    886  int64_t error = 0, sqcoeff = 0;
    887  int shift = 2 * (bd - 8);
    888  int rounding = (1 << shift) >> 1;
    889 
    890  for (i = 0; i < block_size; i++) {
    891    const int64_t diff = coeff[i] - dqcoeff[i];
    892    error += diff * diff;
    893    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
    894  }
    895  error = (error + rounding) >> shift;
    896  sqcoeff = (sqcoeff + rounding) >> shift;
    897 
    898  *ssz = sqcoeff;
    899  return error;
    900 }
    901 #endif
    902 
    903 static int conditional_skipintra(PREDICTION_MODE mode,
    904                                 PREDICTION_MODE best_intra_mode) {
    905  if (mode == D113_PRED && best_intra_mode != V_PRED &&
    906      best_intra_mode != D135_PRED)
    907    return 1;
    908  if (mode == D67_PRED && best_intra_mode != V_PRED &&
    909      best_intra_mode != D45_PRED)
    910    return 1;
    911  if (mode == D203_PRED && best_intra_mode != H_PRED &&
    912      best_intra_mode != D45_PRED)
    913    return 1;
    914  if (mode == D157_PRED && best_intra_mode != H_PRED &&
    915      best_intra_mode != D135_PRED)
    916    return 1;
    917  return 0;
    918 }
    919 
    920 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
    921                       int16_t mode_context) {
    922  if (is_inter_compound_mode(mode)) {
    923    return mode_costs
    924        ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
    925  }
    926 
    927  int mode_cost = 0;
    928  int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
    929 
    930  assert(is_inter_mode(mode));
    931 
    932  if (mode == NEWMV) {
    933    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
    934    return mode_cost;
    935  } else {
    936    mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
    937    mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
    938 
    939    if (mode == GLOBALMV) {
    940      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
    941      return mode_cost;
    942    } else {
    943      mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
    944      mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
    945      mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
    946      return mode_cost;
    947    }
    948  }
    949 }
    950 
    951 static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
    952                                              int ref_idx) {
    953  return ref_idx ? compound_ref1_mode(this_mode)
    954                 : compound_ref0_mode(this_mode);
    955 }
    956 
    957 static inline void estimate_ref_frame_costs(
    958    const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
    959    int segment_id, unsigned int *ref_costs_single,
    960    unsigned int (*ref_costs_comp)[REF_FRAMES]) {
    961  int seg_ref_active =
    962      segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
    963  if (seg_ref_active) {
    964    memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
    965    int ref_frame;
    966    for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
    967      memset(ref_costs_comp[ref_frame], 0,
    968             REF_FRAMES * sizeof((*ref_costs_comp)[0]));
    969  } else {
    970    int intra_inter_ctx = av1_get_intra_inter_context(xd);
    971    ref_costs_single[INTRA_FRAME] =
    972        mode_costs->intra_inter_cost[intra_inter_ctx][0];
    973    unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
    974 
    975    for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
    976      ref_costs_single[i] = base_cost;
    977 
    978    const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
    979    const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
    980    const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
    981    const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
    982    const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
    983    const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
    984 
    985    // Determine cost of a single ref frame, where frame types are represented
    986    // by a tree:
    987    // Level 0: add cost whether this ref is a forward or backward ref
    988    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
    989    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
    990    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
    991    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
    992    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
    993    ref_costs_single[ALTREF2_FRAME] +=
    994        mode_costs->single_ref_cost[ctx_p1][0][1];
    995    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
    996 
    997    // Level 1: if this ref is forward ref,
    998    // add cost whether it is last/last2 or last3/golden
    999    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
   1000    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
   1001    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
   1002    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
   1003 
   1004    // Level 1: if this ref is backward ref
   1005    // then add cost whether this ref is altref or backward ref
   1006    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
   1007    ref_costs_single[ALTREF2_FRAME] +=
   1008        mode_costs->single_ref_cost[ctx_p2][1][0];
   1009    ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
   1010 
   1011    // Level 2: further add cost whether this ref is last or last2
   1012    ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
   1013    ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
   1014 
   1015    // Level 2: last3 or golden
   1016    ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
   1017    ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
   1018 
   1019    // Level 2: bwdref or altref2
   1020    ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
   1021    ref_costs_single[ALTREF2_FRAME] +=
   1022        mode_costs->single_ref_cost[ctx_p6][5][1];
   1023 
   1024    if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
   1025      // Similar to single ref, determine cost of compound ref frames.
   1026      // cost_compound_refs = cost_first_ref + cost_second_ref
   1027      const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
   1028      const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
   1029      const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
   1030      const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
   1031      const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
   1032 
   1033      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
   1034      unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
   1035 
   1036      ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
   1037          ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
   1038              base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
   1039      ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
   1040      ref_bicomp_costs[ALTREF_FRAME] = 0;
   1041 
   1042      // cost of first ref frame
   1043      ref_bicomp_costs[LAST_FRAME] +=
   1044          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
   1045      ref_bicomp_costs[LAST2_FRAME] +=
   1046          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
   1047      ref_bicomp_costs[LAST3_FRAME] +=
   1048          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
   1049      ref_bicomp_costs[GOLDEN_FRAME] +=
   1050          mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
   1051 
   1052      ref_bicomp_costs[LAST_FRAME] +=
   1053          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
   1054      ref_bicomp_costs[LAST2_FRAME] +=
   1055          mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
   1056 
   1057      ref_bicomp_costs[LAST3_FRAME] +=
   1058          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
   1059      ref_bicomp_costs[GOLDEN_FRAME] +=
   1060          mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
   1061 
   1062      // cost of second ref frame
   1063      ref_bicomp_costs[BWDREF_FRAME] +=
   1064          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
   1065      ref_bicomp_costs[ALTREF2_FRAME] +=
   1066          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
   1067      ref_bicomp_costs[ALTREF_FRAME] +=
   1068          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
   1069 
   1070      ref_bicomp_costs[BWDREF_FRAME] +=
   1071          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
   1072      ref_bicomp_costs[ALTREF2_FRAME] +=
   1073          mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
   1074 
   1075      // cost: if one ref frame is forward ref, the other ref is backward ref
   1076      int ref0, ref1;
   1077      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
   1078        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
   1079          ref_costs_comp[ref0][ref1] =
   1080              ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
   1081        }
   1082      }
   1083 
   1084      // cost: if both ref frames are the same side.
   1085      const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
   1086      const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
   1087      const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
   1088      ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
   1089          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
   1090          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
   1091          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
   1092      ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
   1093          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
   1094          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
   1095          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
   1096          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
   1097      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
   1098          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
   1099          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
   1100          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
   1101          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
   1102      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
   1103          base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
   1104          mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
   1105    } else {
   1106      int ref0, ref1;
   1107      for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
   1108        for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
   1109          ref_costs_comp[ref0][ref1] = 512;
   1110      }
   1111      ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
   1112      ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
   1113      ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
   1114      ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
   1115    }
   1116  }
   1117 }
   1118 
   1119 static inline void store_coding_context(
   1120 #if CONFIG_INTERNAL_STATS
   1121    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
   1122 #else
   1123    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
   1124 #endif  // CONFIG_INTERNAL_STATS
   1125    int skippable) {
   1126  MACROBLOCKD *const xd = &x->e_mbd;
   1127 
   1128  // Take a snapshot of the coding context so it can be
   1129  // restored if we decide to encode this way
   1130  ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
   1131  ctx->skippable = skippable;
   1132 #if CONFIG_INTERNAL_STATS
   1133  ctx->best_mode_index = mode_index;
   1134 #endif  // CONFIG_INTERNAL_STATS
   1135  ctx->mic = *xd->mi[0];
   1136  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
   1137                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
   1138 }
   1139 
   1140 static inline void setup_buffer_ref_mvs_inter(
   1141    const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
   1142    BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
   1143  const AV1_COMMON *cm = &cpi->common;
   1144  const int num_planes = av1_num_planes(cm);
   1145  const YV12_BUFFER_CONFIG *scaled_ref_frame =
   1146      av1_get_scaled_ref_frame(cpi, ref_frame);
   1147  MACROBLOCKD *const xd = &x->e_mbd;
   1148  MB_MODE_INFO *const mbmi = xd->mi[0];
   1149  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   1150  const struct scale_factors *const sf =
   1151      get_ref_scale_factors_const(cm, ref_frame);
   1152  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
   1153  assert(yv12 != NULL);
   1154 
   1155  if (scaled_ref_frame) {
   1156    // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
   1157    // support scaling.
   1158    av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
   1159                         num_planes);
   1160  } else {
   1161    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
   1162  }
   1163 
   1164  // Gets an initial list of candidate vectors from neighbours and orders them
   1165  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
   1166                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
   1167                   mbmi_ext->mode_context);
   1168  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
   1169  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
   1170  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
   1171  // Further refinement that is encode side only to test the top few candidates
   1172  // in full and choose the best as the center point for subsequent searches.
   1173  // The current implementation doesn't support scaling.
   1174  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
   1175              ref_frame, block_size);
   1176 
   1177  // Go back to unscaled reference.
   1178  if (scaled_ref_frame) {
   1179    // We had temporarily setup pred block based on scaled reference above. Go
   1180    // back to unscaled reference now, for subsequent use.
   1181    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
   1182  }
   1183 }
   1184 
   1185 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
   1186 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
   1187 
   1188 // TODO(jingning): this mv clamping function should be block size dependent.
   1189 static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
   1190  const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
   1191                                     xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
   1192                                     xd->mb_to_top_edge - LEFT_TOP_MARGIN,
   1193                                     xd->mb_to_bottom_edge +
   1194                                         RIGHT_BOTTOM_MARGIN };
   1195  clamp_mv(mv, &mv_limits);
   1196 }
   1197 
   1198 /* If the current mode shares the same mv with other modes with higher cost,
   1199 * skip this mode. */
   1200 static int skip_repeated_mv(const AV1_COMMON *const cm,
   1201                            const MACROBLOCK *const x,
   1202                            PREDICTION_MODE this_mode,
   1203                            const MV_REFERENCE_FRAME ref_frames[2],
   1204                            InterModeSearchState *search_state) {
   1205  const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
   1206  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
   1207  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   1208  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
   1209  PREDICTION_MODE compare_mode = MB_MODE_COUNT;
   1210  if (!is_comp_pred) {
   1211    if (this_mode == NEARMV) {
   1212      if (ref_mv_count == 0) {
   1213        // NEARMV has the same motion vector as NEARESTMV
   1214        compare_mode = NEARESTMV;
   1215      }
   1216      if (ref_mv_count == 1 &&
   1217          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
   1218        // NEARMV has the same motion vector as GLOBALMV
   1219        compare_mode = GLOBALMV;
   1220      }
   1221    }
   1222    if (this_mode == GLOBALMV) {
   1223      if (ref_mv_count == 0 &&
   1224          cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
   1225        // GLOBALMV has the same motion vector as NEARESTMV
   1226        compare_mode = NEARESTMV;
   1227      }
   1228      if (ref_mv_count == 1) {
   1229        // GLOBALMV has the same motion vector as NEARMV
   1230        compare_mode = NEARMV;
   1231      }
   1232    }
   1233 
   1234    if (compare_mode != MB_MODE_COUNT) {
   1235      // Use modelled_rd to check whether compare mode was searched
   1236      if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
   1237          INT64_MAX) {
   1238        const int16_t mode_ctx =
   1239            av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
   1240        const int compare_cost =
   1241            cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
   1242        const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
   1243 
   1244        // Only skip if the mode cost is larger than compare mode cost
   1245        if (this_cost > compare_cost) {
   1246          search_state->modelled_rd[this_mode][0][ref_frames[0]] =
   1247              search_state->modelled_rd[compare_mode][0][ref_frames[0]];
   1248          return 1;
   1249        }
   1250      }
   1251    }
   1252  }
   1253  return 0;
   1254 }
   1255 
   1256 static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
   1257                                     const AV1_COMMON *cm,
   1258                                     const MACROBLOCK *x) {
   1259  const MACROBLOCKD *const xd = &x->e_mbd;
   1260  *out_mv = in_mv;
   1261  lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
   1262                     cm->features.cur_frame_force_integer_mv);
   1263  clamp_mv2(&out_mv->as_mv, xd);
   1264  return av1_is_fullmv_in_range(&x->mv_limits,
   1265                                get_fullmv_from_mv(&out_mv->as_mv));
   1266 }
   1267 
   1268 // To use single newmv directly for compound modes, need to clamp the mv to the
   1269 // valid mv range. Without this, encoder would generate out of range mv, and
   1270 // this is seen in 8k encoding.
   1271 static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
   1272                                     int ref_idx) {
   1273  const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
   1274  SubpelMvLimits mv_limits;
   1275 
   1276  av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
   1277  clamp_mv(&mv->as_mv, &mv_limits);
   1278 }
   1279 
   1280 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
   1281                            const BLOCK_SIZE bsize, int_mv *cur_mv,
   1282                            int *const rate_mv, HandleInterModeArgs *const args,
   1283                            inter_mode_info *mode_info) {
   1284  MACROBLOCKD *const xd = &x->e_mbd;
   1285  MB_MODE_INFO *const mbmi = xd->mi[0];
   1286  const int is_comp_pred = has_second_ref(mbmi);
   1287  const PREDICTION_MODE this_mode = mbmi->mode;
   1288  const int refs[2] = { mbmi->ref_frame[0],
   1289                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
   1290  const int ref_mv_idx = mbmi->ref_mv_idx;
   1291 
   1292  if (is_comp_pred) {
   1293    const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
   1294    const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
   1295    if (this_mode == NEW_NEWMV) {
   1296      if (valid_mv0) {
   1297        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
   1298        clamp_mv_in_range(x, &cur_mv[0], 0);
   1299      }
   1300      if (valid_mv1) {
   1301        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
   1302        clamp_mv_in_range(x, &cur_mv[1], 1);
   1303      }
   1304      *rate_mv = 0;
   1305      for (int i = 0; i < 2; ++i) {
   1306        const int_mv ref_mv = av1_get_ref_mv(x, i);
   1307        *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
   1308                                    x->mv_costs->nmv_joint_cost,
   1309                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
   1310      }
   1311    } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
   1312      if (valid_mv1) {
   1313        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
   1314        clamp_mv_in_range(x, &cur_mv[1], 1);
   1315      }
   1316      const int_mv ref_mv = av1_get_ref_mv(x, 1);
   1317      *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
   1318                                 x->mv_costs->nmv_joint_cost,
   1319                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
   1320    } else {
   1321      assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
   1322      if (valid_mv0) {
   1323        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
   1324        clamp_mv_in_range(x, &cur_mv[0], 0);
   1325      }
   1326      const int_mv ref_mv = av1_get_ref_mv(x, 0);
   1327      *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
   1328                                 x->mv_costs->nmv_joint_cost,
   1329                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
   1330    }
   1331  } else {
   1332    // Single ref case.
   1333    const int ref_idx = 0;
   1334    int search_range = INT_MAX;
   1335 
   1336    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
   1337      const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
   1338      int min_mv_diff = INT_MAX;
   1339      int best_match = -1;
   1340      MV prev_ref_mv[2] = { { 0 } };
   1341      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
   1342        prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
   1343                                                     idx, &x->mbmi_ext)
   1344                               .as_mv;
   1345        const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
   1346                                       abs(ref_mv.col - prev_ref_mv[idx].col));
   1347 
   1348        if (min_mv_diff > ref_mv_diff) {
   1349          min_mv_diff = ref_mv_diff;
   1350          best_match = idx;
   1351        }
   1352      }
   1353 
   1354      if (min_mv_diff < (16 << 3)) {
   1355        if (args->single_newmv_valid[best_match][refs[0]]) {
   1356          search_range = min_mv_diff;
   1357          search_range +=
   1358              AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
   1359                         prev_ref_mv[best_match].row),
   1360                     abs(args->single_newmv[best_match][refs[0]].as_mv.col -
   1361                         prev_ref_mv[best_match].col));
   1362          // Get full pixel search range.
   1363          search_range = (search_range + 4) >> 3;
   1364        }
   1365      }
   1366    }
   1367 
   1368    int_mv best_mv;
   1369    av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
   1370                             mode_info, &best_mv, args);
   1371    if (best_mv.as_int == INVALID_MV) return INT64_MAX;
   1372 
   1373    args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
   1374    args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
   1375    args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
   1376    cur_mv[0].as_int = best_mv.as_int;
   1377 
   1378    // Return after single_newmv is set.
   1379    if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
   1380  }
   1381 
   1382  return 0;
   1383 }
   1384 
   1385 static inline void update_mode_start_end_index(
   1386    const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
   1387    int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
   1388    int interintra_allowed, int eval_motion_mode) {
   1389  *mode_index_start = (int)SIMPLE_TRANSLATION;
   1390  *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
   1391  if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
   1392    if (!eval_motion_mode) {
   1393      *mode_index_end = (int)SIMPLE_TRANSLATION;
   1394    } else {
   1395      // Set the start index appropriately to process motion modes other than
   1396      // simple translation
   1397      *mode_index_start = 1;
   1398    }
   1399  }
   1400  if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
   1401    *mode_index_end = SIMPLE_TRANSLATION;
   1402 }
   1403 
   1404 // Increase rd cost of warp mode for low complexity decoding.
   1405 static inline void increase_warp_mode_rd(const MB_MODE_INFO *const best_mbmi,
   1406                                         const MB_MODE_INFO *const this_mbmi,
   1407                                         int64_t *const best_scaled_rd,
   1408                                         int64_t *const this_scaled_rd,
   1409                                         int rd_bias_scale_pct) {
   1410  // Check rd bias percentage is non-zero.
   1411  if (!rd_bias_scale_pct) return;
   1412  if (*best_scaled_rd == INT64_MAX || *this_scaled_rd == INT64_MAX) return;
   1413 
   1414  // Experiments have been performed with increasing the RD cost of warp mode at
   1415  // the below locations of inter mode evaluation.
   1416  // (1). Inter mode evaluation loop in av1_rd_pick_inter_mode().
   1417  // (2). Motion mode evaluation during handle_inter_mode() call.
   1418  // (3). Motion mode evaluation for winner motion modes.
   1419  // (4). Tx search for best inter candidates.
   1420  // Based on the speed quality trade-off results of this speed feature, the rd
   1421  // bias logic is enabled only at (2), (3) and (4).
   1422  const double rd_bias_scale = rd_bias_scale_pct / 100.0;
   1423  if (best_mbmi->motion_mode == WARPED_CAUSAL)
   1424    *best_scaled_rd += (int64_t)(rd_bias_scale * *best_scaled_rd);
   1425  if (this_mbmi->motion_mode == WARPED_CAUSAL)
   1426    *this_scaled_rd += (int64_t)(rd_bias_scale * *this_scaled_rd);
   1427 }
   1428 
   1429 /*!\brief AV1 motion mode search
   1430 *
   1431 * \ingroup inter_mode_search
   1432 * Function to search over and determine the motion mode. It will update
   1433 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
   1434 * WARPED_CAUSAL and determine any necessary side information for the selected
   1435 * motion mode. It will also perform the full transform search, unless the
   1436 * input parameter do_tx_search indicates to do an estimation of the RD rather
   1437 * than an RD corresponding to a full transform search. It will return the
   1438 * RD for the final motion_mode.
   1439 * Do the RD search for a given inter mode and compute all information relevant
   1440 * to the input mode. It will compute the best MV,
   1441 * compound parameters (if the mode is a compound mode) and interpolation filter
   1442 * parameters.
   1443 *
   1444 * \param[in]     cpi               Top-level encoder structure.
   1445 * \param[in]     tile_data         Pointer to struct holding adaptive
   1446 *                                  data/contexts/models for the tile during
   1447 *                                  encoding.
   1448 * \param[in]     x                 Pointer to struct holding all the data for
   1449 *                                  the current macroblock.
   1450 * \param[in]     bsize             Current block size.
   1451 * \param[in,out] rd_stats          Struct to keep track of the overall RD
   1452 *                                  information.
   1453 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
   1454 *                                  for only the Y plane.
   1455 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
   1456 *                                  for only the UV planes.
   1457 * \param[in]     args              HandleInterModeArgs struct holding
   1458 *                                  miscellaneous arguments for inter mode
   1459 *                                  search. See the documentation for this
   1460 *                                  struct for a description of each member.
   1461 * \param[in]     ref_best_rd       Best RD found so far for this block.
   1462 *                                  It is used for early termination of this
   1463 *                                  search if the RD exceeds this value.
   1464 * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
   1465 *                                  best total RD for a skip mode so far, and
   1466 *                                  skip_rd[1] is the best RD for a skip mode so
   1467 *                                  far in luma. This is used as a speed feature
   1468 *                                  to skip the transform search if the computed
   1469 *                                  skip RD for the current mode is not better
   1470 *                                  than the best skip_rd so far.
   1471 * \param[in,out] rate_mv           The rate associated with the motion vectors.
   1472 *                                  This will be modified if a motion search is
   1473 *                                  done in the motion mode search.
   1474 * \param[in,out] orig_dst          A prediction buffer to hold a computed
   1475 *                                  prediction. This will eventually hold the
   1476 *                                  final prediction, and the tmp_dst info will
   1477 *                                  be copied here.
   1478 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
   1479 *                                  do_tx_search (see below) is 0.
   1480 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
   1481 *                                  a full transform search. This will compute
   1482 *                                  an estimated RD for the modes without the
   1483 *                                  transform search and later perform the full
   1484 *                                  transform search on the best candidates.
   1485 * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
   1486 *                                  information to perform a full transform
   1487 *                                  search only on winning candidates searched
   1488 *                                  with an estimate for transform coding RD.
   1489 * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
   1490 *                                  motion modes other than SIMPLE_TRANSLATION.
   1491 * \param[out]    yrd               Stores the rdcost corresponding to encoding
   1492 *                                  the luma plane.
   1493 * \return Returns INT64_MAX if the determined motion mode is invalid and the
   1494 * current motion mode being tested should be skipped. It returns 0 if the
   1495 * motion mode search is a success.
   1496 */
   1497 static int64_t motion_mode_rd(
   1498    const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
   1499    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
   1500    RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
   1501    int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
   1502    int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
   1503    int eval_motion_mode, int64_t *yrd) {
   1504  const AV1_COMMON *const cm = &cpi->common;
   1505  const FeatureFlags *const features = &cm->features;
   1506  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   1507  const int num_planes = av1_num_planes(cm);
   1508  MACROBLOCKD *xd = &x->e_mbd;
   1509  MB_MODE_INFO *mbmi = xd->mi[0];
   1510  const int is_comp_pred = has_second_ref(mbmi);
   1511  const PREDICTION_MODE this_mode = mbmi->mode;
   1512  const int rate2_nocoeff = rd_stats->rate;
   1513  int best_xskip_txfm = 0;
   1514  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
   1515  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
   1516  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
   1517  const int rate_mv0 = *rate_mv;
   1518  const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
   1519                                 is_interintra_allowed(mbmi) &&
   1520                                 mbmi->compound_idx;
   1521  WARP_SAMPLE_INFO *const warp_sample_info =
   1522      &x->warp_sample_info[mbmi->ref_frame[0]];
   1523  int *pts0 = warp_sample_info->pts;
   1524  int *pts_inref0 = warp_sample_info->pts_inref;
   1525 
   1526  assert(mbmi->ref_frame[1] != INTRA_FRAME);
   1527  const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
   1528  av1_invalid_rd_stats(&best_rd_stats);
   1529  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
   1530  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
   1531  *yrd = INT64_MAX;
   1532  if (features->switchable_motion_mode) {
   1533    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
   1534    // is allowed.
   1535    last_motion_mode_allowed = motion_mode_allowed(
   1536        xd->global_motion, xd, mbmi, features->allow_warped_motion);
   1537  }
   1538 
   1539  if (last_motion_mode_allowed == WARPED_CAUSAL) {
   1540    // Collect projection samples used in least squares approximation of
   1541    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
   1542    if (warp_sample_info->num < 0) {
   1543      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
   1544    }
   1545    mbmi->num_proj_ref = warp_sample_info->num;
   1546  }
   1547  const int total_samples = mbmi->num_proj_ref;
   1548  if (total_samples == 0) {
   1549    // Do not search WARPED_CAUSAL if there are no samples to use to determine
   1550    // warped parameters.
   1551    last_motion_mode_allowed = OBMC_CAUSAL;
   1552  }
   1553 
   1554  const MB_MODE_INFO base_mbmi = *mbmi;
   1555  MB_MODE_INFO best_mbmi;
   1556  const int interp_filter = features->interp_filter;
   1557  const int switchable_rate =
   1558      av1_is_interp_needed(xd)
   1559          ? av1_get_switchable_rate(x, xd, interp_filter,
   1560                                    cm->seq_params->enable_dual_filter)
   1561          : 0;
   1562  int64_t best_rd = INT64_MAX;
   1563  int best_rate_mv = rate_mv0;
   1564  const int mi_row = xd->mi_row;
   1565  const int mi_col = xd->mi_col;
   1566  int mode_index_start, mode_index_end;
   1567  const int txfm_rd_gate_level =
   1568      get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
   1569                             cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
   1570                             TX_SEARCH_MOTION_MODE, eval_motion_mode);
   1571 
   1572  // Modify the start and end index according to speed features. For example,
   1573  // if SIMPLE_TRANSLATION has already been searched according to
   1574  // the motion_mode_for_winner_cand speed feature, update the mode_index_start
   1575  // to avoid searching it again.
   1576  update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
   1577                              last_motion_mode_allowed, interintra_allowed,
   1578                              eval_motion_mode);
   1579  // Main function loop. This loops over all of the possible motion modes and
   1580  // computes RD to determine the best one. This process includes computing
   1581  // any necessary side information for the motion mode and performing the
   1582  // transform search.
   1583  for (int mode_index = mode_index_start; mode_index <= mode_index_end;
   1584       mode_index++) {
   1585    if (args->skip_motion_mode && mode_index) continue;
   1586    int tmp_rate2 = rate2_nocoeff;
   1587    const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
   1588    int tmp_rate_mv = rate_mv0;
   1589 
   1590    *mbmi = base_mbmi;
   1591    if (is_interintra_mode) {
   1592      // Only use SIMPLE_TRANSLATION for interintra
   1593      mbmi->motion_mode = SIMPLE_TRANSLATION;
   1594    } else {
   1595      mbmi->motion_mode = (MOTION_MODE)mode_index;
   1596      assert(mbmi->ref_frame[1] != INTRA_FRAME);
   1597    }
   1598 
   1599    if (cpi->oxcf.algo_cfg.sharpness == 3 &&
   1600        (mbmi->motion_mode == OBMC_CAUSAL ||
   1601         mbmi->motion_mode == WARPED_CAUSAL))
   1602      continue;
   1603 
   1604    // Do not search OBMC if the probability of selecting it is below a
   1605    // predetermined threshold for this update_type and block size.
   1606    const FRAME_UPDATE_TYPE update_type =
   1607        get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
   1608    int use_actual_frame_probs = 1;
   1609    int prune_obmc;
   1610 #if CONFIG_FPMT_TEST
   1611    use_actual_frame_probs =
   1612        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
   1613    if (!use_actual_frame_probs) {
   1614      prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
   1615                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
   1616    }
   1617 #endif
   1618    if (use_actual_frame_probs) {
   1619      prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
   1620                   cpi->sf.inter_sf.prune_obmc_prob_thresh;
   1621    }
   1622    if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
   1623        mbmi->motion_mode == OBMC_CAUSAL)
   1624      continue;
   1625 
   1626    if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
   1627      // SIMPLE_TRANSLATION mode: no need to recalculate.
   1628      // The prediction is calculated before motion_mode_rd() is called in
   1629      // handle_inter_mode()
   1630    } else if (mbmi->motion_mode == OBMC_CAUSAL) {
   1631      const uint32_t cur_mv = mbmi->mv[0].as_int;
   1632      // OBMC_CAUSAL not allowed for compound prediction
   1633      assert(!is_comp_pred);
   1634      if (have_newmv_in_inter_mode(this_mode)) {
   1635        av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
   1636                                 &mbmi->mv[0], NULL);
   1637        tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
   1638      }
   1639      if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
   1640        // Build the predictor according to the current motion vector if it has
   1641        // not already been built
   1642        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
   1643                                      0, av1_num_planes(cm) - 1);
   1644      }
   1645      // Build the inter predictor by blending the predictor corresponding to
   1646      // this MV, and the neighboring blocks using the OBMC model
   1647      av1_build_obmc_inter_prediction(
   1648          cm, xd, args->above_pred_buf, args->above_pred_stride,
   1649          args->left_pred_buf, args->left_pred_stride);
   1650 #if !CONFIG_REALTIME_ONLY
   1651    } else if (mbmi->motion_mode == WARPED_CAUSAL) {
   1652      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
   1653      mbmi->motion_mode = WARPED_CAUSAL;
   1654      mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
   1655      mbmi->interp_filters =
   1656          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
   1657 
   1658      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
   1659      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
   1660      // Select the samples according to motion vector difference
   1661      if (mbmi->num_proj_ref > 1) {
   1662        mbmi->num_proj_ref = av1_selectSamples(
   1663            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
   1664      }
   1665 
   1666      // Compute the warped motion parameters with a least squares fit
   1667      //  using the collected samples
   1668      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
   1669                               mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
   1670                               &mbmi->wm_params, mi_row, mi_col)) {
   1671        assert(!is_comp_pred);
   1672        if (have_newmv_in_inter_mode(this_mode)) {
   1673          // Refine MV for NEWMV mode
   1674          const int_mv mv0 = mbmi->mv[0];
   1675          const WarpedMotionParams wm_params0 = mbmi->wm_params;
   1676          const int num_proj_ref0 = mbmi->num_proj_ref;
   1677 
   1678          const int_mv ref_mv = av1_get_ref_mv(x, 0);
   1679          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
   1680          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
   1681                                            &ref_mv.as_mv, NULL);
   1682 
   1683          // Refine MV in a small range.
   1684          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
   1685                               total_samples, cpi->sf.mv_sf.warp_search_method,
   1686                               cpi->sf.mv_sf.warp_search_iters);
   1687 
   1688          if (mv0.as_int != mbmi->mv[0].as_int) {
   1689            // Keep the refined MV and WM parameters.
   1690            tmp_rate_mv = av1_mv_bit_cost(
   1691                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
   1692                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
   1693            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
   1694          } else {
   1695            // Restore the old MV and WM parameters.
   1696            mbmi->mv[0] = mv0;
   1697            mbmi->wm_params = wm_params0;
   1698            mbmi->num_proj_ref = num_proj_ref0;
   1699          }
   1700        }
   1701 
   1702        // Build the warped predictor
   1703        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
   1704                                      av1_num_planes(cm) - 1);
   1705      } else {
   1706        continue;
   1707      }
   1708 #endif  // !CONFIG_REALTIME_ONLY
   1709    } else if (is_interintra_mode) {
   1710      const int ret =
   1711          av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
   1712                                      &tmp_rate_mv, &tmp_rate2, orig_dst);
   1713      if (ret < 0) continue;
   1714    }
   1715 
   1716    // If we are searching newmv and the mv is the same as refmv, skip the
   1717    // current mode
   1718    if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
   1719 
   1720    // Update rd_stats for the current motion mode
   1721    txfm_info->skip_txfm = 0;
   1722    rd_stats->dist = 0;
   1723    rd_stats->sse = 0;
   1724    rd_stats->skip_txfm = 1;
   1725    rd_stats->rate = tmp_rate2;
   1726    const ModeCosts *mode_costs = &x->mode_costs;
   1727    if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
   1728    if (interintra_allowed) {
   1729      rd_stats->rate +=
   1730          mode_costs->interintra_cost[size_group_lookup[bsize]]
   1731                                     [mbmi->ref_frame[1] == INTRA_FRAME];
   1732    }
   1733    if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
   1734        (mbmi->ref_frame[1] != INTRA_FRAME)) {
   1735      if (last_motion_mode_allowed == WARPED_CAUSAL) {
   1736        rd_stats->rate +=
   1737            mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
   1738      } else {
   1739        rd_stats->rate +=
   1740            mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
   1741      }
   1742    }
   1743 
   1744    int64_t this_yrd = INT64_MAX;
   1745 
   1746    if (!do_tx_search) {
   1747      // Avoid doing a transform search here to speed up the overall mode
   1748      // search. It will be done later in the mode search if the current
   1749      // motion mode seems promising.
   1750      int64_t curr_sse = -1;
   1751      int64_t sse_y = -1;
   1752      int est_residue_cost = 0;
   1753      int64_t est_dist = 0;
   1754      int64_t est_rd = 0;
   1755      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
   1756        curr_sse = get_sse(cpi, x, &sse_y);
   1757        const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
   1758                                                 &est_residue_cost, &est_dist);
   1759        (void)has_est_rd;
   1760        assert(has_est_rd);
   1761      } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
   1762                 cpi->sf.rt_sf.use_nonrd_pick_mode) {
   1763        model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
   1764            cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
   1765            NULL, &curr_sse, NULL, NULL, NULL);
   1766        sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
   1767      }
   1768      est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
   1769      if (est_rd * 0.80 > *best_est_rd) {
   1770        mbmi->ref_frame[1] = ref_frame_1;
   1771        continue;
   1772      }
   1773      const int mode_rate = rd_stats->rate;
   1774      rd_stats->rate += est_residue_cost;
   1775      rd_stats->dist = est_dist;
   1776      rd_stats->rdcost = est_rd;
   1777      if (rd_stats->rdcost < *best_est_rd) {
   1778        *best_est_rd = rd_stats->rdcost;
   1779        assert(sse_y >= 0);
   1780        ref_skip_rd[1] = txfm_rd_gate_level
   1781                             ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
   1782                             : INT64_MAX;
   1783      }
   1784      if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
   1785        if (!is_comp_pred) {
   1786          assert(curr_sse >= 0);
   1787          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
   1788                                rd_stats->rdcost, rd_stats, rd_stats_y,
   1789                                rd_stats_uv, mbmi);
   1790        }
   1791      } else {
   1792        assert(curr_sse >= 0);
   1793        inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
   1794                              rd_stats->rdcost, rd_stats, rd_stats_y,
   1795                              rd_stats_uv, mbmi);
   1796      }
   1797      mbmi->skip_txfm = 0;
   1798    } else {
   1799      // Perform full transform search
   1800      int64_t skip_rd = INT64_MAX;
   1801      int64_t skip_rdy = INT64_MAX;
   1802      if (txfm_rd_gate_level) {
   1803        // Check if the mode is good enough based on skip RD
   1804        int64_t sse_y = INT64_MAX;
   1805        int64_t curr_sse = get_sse(cpi, x, &sse_y);
   1806        skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
   1807        skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
   1808        int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
   1809                                        txfm_rd_gate_level, 0);
   1810        if (!eval_txfm) continue;
   1811      }
   1812 
   1813      // Do transform search
   1814      const int mode_rate = rd_stats->rate;
   1815      if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
   1816                           rd_stats->rate, ref_best_rd)) {
   1817        if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
   1818          return INT64_MAX;
   1819        }
   1820        continue;
   1821      }
   1822      const int skip_ctx = av1_get_skip_txfm_context(xd);
   1823      const int y_rate =
   1824          rd_stats->skip_txfm
   1825              ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
   1826              : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
   1827      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
   1828 
   1829      const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
   1830      if (curr_rd < ref_best_rd) {
   1831        ref_best_rd = curr_rd;
   1832        ref_skip_rd[0] = skip_rd;
   1833        ref_skip_rd[1] = skip_rdy;
   1834      }
   1835      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
   1836        inter_mode_data_push(
   1837            tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
   1838            rd_stats_y->rate + rd_stats_uv->rate +
   1839                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
   1840      }
   1841    }
   1842 
   1843    if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
   1844      if (is_nontrans_global_motion(xd, xd->mi[0])) {
   1845        mbmi->interp_filters =
   1846            av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
   1847      }
   1848    }
   1849 
   1850    adjust_cost(cpi, x, &this_yrd);
   1851    adjust_rdcost(cpi, x, rd_stats);
   1852    adjust_rdcost(cpi, x, rd_stats_y);
   1853 
   1854    const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
   1855    if (mode_index == 0) {
   1856      args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
   1857    }
   1858    int64_t best_scaled_rd = best_rd;
   1859    int64_t this_scaled_rd = tmp_rd;
   1860    if (mode_index != 0)
   1861      increase_warp_mode_rd(&best_mbmi, mbmi, &best_scaled_rd, &this_scaled_rd,
   1862                            cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
   1863 
   1864    if (mode_index == 0 || this_scaled_rd < best_scaled_rd) {
   1865      // Update best_rd data if this is the best motion mode so far
   1866      best_mbmi = *mbmi;
   1867      best_rd = tmp_rd;
   1868      best_rd_stats = *rd_stats;
   1869      best_rd_stats_y = *rd_stats_y;
   1870      best_rate_mv = tmp_rate_mv;
   1871      *yrd = this_yrd;
   1872      if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
   1873      memcpy(best_blk_skip, txfm_info->blk_skip,
   1874             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
   1875      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
   1876      best_xskip_txfm = mbmi->skip_txfm;
   1877    }
   1878  }
   1879  // Update RD and mbmi stats for selected motion mode
   1880  mbmi->ref_frame[1] = ref_frame_1;
   1881  *rate_mv = best_rate_mv;
   1882  if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
   1883    av1_invalid_rd_stats(rd_stats);
   1884    restore_dst_buf(xd, *orig_dst, num_planes);
   1885    return INT64_MAX;
   1886  }
   1887  *mbmi = best_mbmi;
   1888  *rd_stats = best_rd_stats;
   1889  *rd_stats_y = best_rd_stats_y;
   1890  if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
   1891  memcpy(txfm_info->blk_skip, best_blk_skip,
   1892         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
   1893  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
   1894  txfm_info->skip_txfm = best_xskip_txfm;
   1895 
   1896  restore_dst_buf(xd, *orig_dst, num_planes);
   1897  return 0;
   1898 }
   1899 
   1900 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
   1901                            MACROBLOCK *const x, BLOCK_SIZE bsize,
   1902                            const BUFFER_SET *const orig_dst, int64_t best_rd) {
   1903  assert(bsize < BLOCK_SIZES_ALL);
   1904  const AV1_COMMON *cm = &cpi->common;
   1905  const int num_planes = av1_num_planes(cm);
   1906  MACROBLOCKD *const xd = &x->e_mbd;
   1907  const int mi_row = xd->mi_row;
   1908  const int mi_col = xd->mi_col;
   1909  int64_t total_sse = 0;
   1910  int64_t this_rd = INT64_MAX;
   1911  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
   1912  rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
   1913 
   1914  for (int plane = 0; plane < num_planes; ++plane) {
   1915    // Call av1_enc_build_inter_predictor() for one plane at a time.
   1916    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
   1917                                  plane, plane);
   1918    const struct macroblockd_plane *const pd = &xd->plane[plane];
   1919    const BLOCK_SIZE plane_bsize =
   1920        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
   1921 
   1922    av1_subtract_plane(x, plane_bsize, plane);
   1923 
   1924    int64_t sse =
   1925        av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
   1926    if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
   1927    sse <<= 4;
   1928    total_sse += sse;
   1929    // When current rd cost is more than the best rd, skip evaluation of
   1930    // remaining planes.
   1931    this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
   1932    if (this_rd > best_rd) break;
   1933  }
   1934 
   1935  rd_stats->dist = rd_stats->sse = total_sse;
   1936  rd_stats->rdcost = this_rd;
   1937 
   1938  restore_dst_buf(xd, *orig_dst, num_planes);
   1939  return 0;
   1940 }
   1941 
   1942 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
   1943 // mode
   1944 // Note(rachelbarker): This speed feature currently does not interact correctly
   1945 // with global motion. The issue is that, when global motion is used, GLOBALMV
   1946 // produces a different prediction to NEARESTMV/NEARMV even if the motion
   1947 // vectors are the same. Thus GLOBALMV should not be pruned in this case.
   1948 static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
   1949                                      int ref_idx,
   1950                                      const MV_REFERENCE_FRAME *ref_frame,
   1951                                      PREDICTION_MODE single_mode) {
   1952  const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
   1953  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
   1954  assert(single_mode != NEWMV);
   1955  if (single_mode == NEARESTMV) {
   1956    return 0;
   1957  } else if (single_mode == NEARMV) {
   1958    // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
   1959    // when ref_mv_count = 1, NEARMV is same as GLOBALMV
   1960    if (ref_mv_count < 2) return 1;
   1961  } else if (single_mode == GLOBALMV) {
   1962    // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
   1963    if (ref_mv_count == 0) return 1;
   1964    // when ref_mv_count == 1, NEARMV is same as GLOBALMV
   1965    else if (ref_mv_count == 1)
   1966      return 0;
   1967 
   1968    int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
   1969    // Check GLOBALMV is matching with any mv in ref_mv_stack
   1970    for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
   1971      int_mv this_mv;
   1972 
   1973      if (ref_idx == 0)
   1974        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
   1975      else
   1976        this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
   1977 
   1978      if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
   1979        return 1;
   1980    }
   1981  }
   1982  return 0;
   1983 }
   1984 
   1985 static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
   1986                              int ref_idx, int ref_mv_idx,
   1987                              int skip_repeated_ref_mv,
   1988                              const MV_REFERENCE_FRAME *ref_frame,
   1989                              const MB_MODE_INFO_EXT *mbmi_ext) {
   1990  const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
   1991  assert(is_inter_singleref_mode(single_mode));
   1992  if (single_mode == NEWMV) {
   1993    this_mv->as_int = INVALID_MV;
   1994  } else if (single_mode == GLOBALMV) {
   1995    if (skip_repeated_ref_mv &&
   1996        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
   1997      return 0;
   1998    *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
   1999  } else {
   2000    assert(single_mode == NEARMV || single_mode == NEARESTMV);
   2001    const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
   2002    const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
   2003    if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
   2004      assert(ref_mv_offset >= 0);
   2005      if (ref_idx == 0) {
   2006        *this_mv =
   2007            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
   2008      } else {
   2009        *this_mv =
   2010            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
   2011      }
   2012    } else {
   2013      if (skip_repeated_ref_mv &&
   2014          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
   2015        return 0;
   2016      *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
   2017    }
   2018  }
   2019  return 1;
   2020 }
   2021 
   2022 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
   2023 // population
   2024 static inline int skip_nearest_near_mv_using_refmv_weight(
   2025    const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
   2026    const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
   2027  if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
   2028  // Do not skip the mode if the current block has not yet obtained a valid
   2029  // inter mode.
   2030  if (!is_inter_mode(best_mode)) return 0;
   2031 
   2032  const MACROBLOCKD *xd = &x->e_mbd;
   2033  // Do not skip the mode if both the top and left neighboring blocks are not
   2034  // available.
   2035  if (!xd->left_available || !xd->up_available) return 0;
   2036  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   2037  const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
   2038  const int ref_mv_count =
   2039      AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
   2040 
   2041  if (ref_mv_count == 0) return 0;
   2042  // If ref mv list has at least one nearest candidate do not prune NEARESTMV
   2043  if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
   2044 
   2045  // Count number of ref mvs populated from nearest candidates
   2046  int nearest_refmv_count = 0;
   2047  for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
   2048    if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
   2049  }
   2050 
   2051  // nearest_refmv_count indicates the closeness of block motion characteristics
   2052  // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
   2053  // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
   2054  // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
   2055  // mode since these modes work well for blocks that shares similar motion
   2056  // characteristics with its neighbor. Thus, NEARMV mode is pruned when
   2057  // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
   2058  // mode is pruned if none of the ref mvs are populated from nearest candidate.
   2059  const int prune_thresh = 1 + (ref_mv_count >= 2);
   2060  if (nearest_refmv_count < prune_thresh) return 1;
   2061  return 0;
   2062 }
   2063 
   2064 // This function update the non-new mv for the current prediction mode
   2065 static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
   2066                               const AV1_COMMON *cm, const MACROBLOCK *x,
   2067                               int skip_repeated_ref_mv) {
   2068  const MACROBLOCKD *xd = &x->e_mbd;
   2069  const MB_MODE_INFO *mbmi = xd->mi[0];
   2070  const int is_comp_pred = has_second_ref(mbmi);
   2071 
   2072  int ret = 1;
   2073  for (int i = 0; i < is_comp_pred + 1; ++i) {
   2074    int_mv this_mv;
   2075    this_mv.as_int = INVALID_MV;
   2076    ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
   2077                      skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
   2078    if (!ret) return 0;
   2079    const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
   2080    if (single_mode == NEWMV) {
   2081      const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   2082      cur_mv[i] =
   2083          (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
   2084                         .this_mv
   2085                   : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
   2086                         .comp_mv;
   2087    } else {
   2088      ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
   2089    }
   2090  }
   2091  return ret;
   2092 }
   2093 
   2094 static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
   2095                               const MB_MODE_INFO_EXT *mbmi_ext,
   2096                               const int (*const drl_mode_cost0)[2],
   2097                               int8_t ref_frame_type) {
   2098  int cost = 0;
   2099  if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
   2100    for (int idx = 0; idx < 2; ++idx) {
   2101      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
   2102        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
   2103        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
   2104        if (mbmi->ref_mv_idx == idx) return cost;
   2105      }
   2106    }
   2107    return cost;
   2108  }
   2109 
   2110  if (have_nearmv_in_inter_mode(mbmi->mode)) {
   2111    for (int idx = 1; idx < 3; ++idx) {
   2112      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
   2113        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
   2114        cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
   2115        if (mbmi->ref_mv_idx == (idx - 1)) return cost;
   2116      }
   2117    }
   2118    return cost;
   2119  }
   2120  return cost;
   2121 }
   2122 
   2123 static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
   2124                                        const MB_MODE_INFO *const mbmi,
   2125                                        PREDICTION_MODE this_mode) {
   2126  for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
   2127    const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
   2128    const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
   2129    if (single_mode == NEWMV &&
   2130        args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
   2131      return 0;
   2132    }
   2133  }
   2134  return 1;
   2135 }
   2136 
   2137 static int get_drl_refmv_count(const MACROBLOCK *const x,
   2138                               const MV_REFERENCE_FRAME *ref_frame,
   2139                               PREDICTION_MODE mode) {
   2140  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   2141  const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
   2142  const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
   2143  const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
   2144  const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
   2145  const int has_drl =
   2146      (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
   2147  const int ref_set =
   2148      has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
   2149 
   2150  return ref_set;
   2151 }
   2152 
   2153 // Checks if particular ref_mv_idx should be pruned.
   2154 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
   2155                                         const int qindex,
   2156                                         const int ref_mv_idx) {
   2157  if (reduce_inter_modes >= 3) return 1;
   2158  // Q-index logic based pruning is enabled only for
   2159  // reduce_inter_modes = 2.
   2160  assert(reduce_inter_modes == 2);
   2161  // When reduce_inter_modes=2, pruning happens as below based on q index.
   2162  // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
   2163  // For q index range between 86 and 170: prune if ref_mv_idx == 2.
   2164  // For q index range between 171 and 255: no pruning.
   2165  const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
   2166  return (ref_mv_idx >= min_prune_ref_mv_idx);
   2167 }
   2168 
   2169 // Whether this reference motion vector can be skipped, based on initial
   2170 // heuristics.
   2171 static bool ref_mv_idx_early_breakout(
   2172    const SPEED_FEATURES *const sf,
   2173    const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
   2174    const HandleInterModeArgs *const args, int64_t ref_best_rd,
   2175    int ref_mv_idx) {
   2176  MACROBLOCKD *xd = &x->e_mbd;
   2177  MB_MODE_INFO *mbmi = xd->mi[0];
   2178  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   2179  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   2180  const int is_comp_pred = has_second_ref(mbmi);
   2181  if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
   2182    if (mbmi->ref_frame[0] == LAST2_FRAME ||
   2183        mbmi->ref_frame[0] == LAST3_FRAME ||
   2184        mbmi->ref_frame[1] == LAST2_FRAME ||
   2185        mbmi->ref_frame[1] == LAST3_FRAME) {
   2186      const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
   2187      if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
   2188          REF_CAT_LEVEL) {
   2189        return true;
   2190      }
   2191    }
   2192    // TODO(any): Experiment with reduce_inter_modes for compound prediction
   2193    if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
   2194        have_newmv_in_inter_mode(mbmi->mode)) {
   2195      if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
   2196          mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
   2197        const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
   2198        const int do_prune = prune_ref_mv_idx_using_qindex(
   2199            sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
   2200        if (do_prune &&
   2201            (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
   2202             REF_CAT_LEVEL)) {
   2203          return true;
   2204        }
   2205      }
   2206    }
   2207  }
   2208 
   2209  mbmi->ref_mv_idx = ref_mv_idx;
   2210  if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
   2211    return true;
   2212  }
   2213  size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
   2214  const int drl_cost = get_drl_cost(
   2215      mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
   2216  est_rd_rate += drl_cost;
   2217  if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
   2218      mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
   2219    return true;
   2220  }
   2221  return false;
   2222 }
   2223 
   2224 // Compute the estimated RD cost for the motion vector with simple translation.
   2225 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
   2226                                          RD_STATS *rd_stats,
   2227                                          HandleInterModeArgs *args,
   2228                                          int ref_mv_idx, int64_t ref_best_rd,
   2229                                          BLOCK_SIZE bsize) {
   2230  MACROBLOCKD *xd = &x->e_mbd;
   2231  MB_MODE_INFO *mbmi = xd->mi[0];
   2232  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   2233  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   2234  const AV1_COMMON *cm = &cpi->common;
   2235  const int is_comp_pred = has_second_ref(mbmi);
   2236  const ModeCosts *mode_costs = &x->mode_costs;
   2237 
   2238  struct macroblockd_plane *p = xd->plane;
   2239  const BUFFER_SET orig_dst = {
   2240    { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
   2241    { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
   2242  };
   2243  av1_init_rd_stats(rd_stats);
   2244 
   2245  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
   2246  mbmi->comp_group_idx = 0;
   2247  mbmi->compound_idx = 1;
   2248  if (mbmi->ref_frame[1] == INTRA_FRAME) {
   2249    mbmi->ref_frame[1] = NONE_FRAME;
   2250  }
   2251  int16_t mode_ctx =
   2252      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
   2253 
   2254  mbmi->num_proj_ref = 0;
   2255  mbmi->motion_mode = SIMPLE_TRANSLATION;
   2256  mbmi->ref_mv_idx = ref_mv_idx;
   2257 
   2258  rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
   2259  const int drl_cost =
   2260      get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
   2261  rd_stats->rate += drl_cost;
   2262 
   2263  int_mv cur_mv[2];
   2264  if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
   2265    return INT64_MAX;
   2266  }
   2267  assert(have_nearmv_in_inter_mode(mbmi->mode));
   2268  for (int i = 0; i < is_comp_pred + 1; ++i) {
   2269    mbmi->mv[i].as_int = cur_mv[i].as_int;
   2270  }
   2271  const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
   2272  rd_stats->rate += ref_mv_cost;
   2273 
   2274  if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
   2275    return INT64_MAX;
   2276  }
   2277 
   2278  mbmi->motion_mode = SIMPLE_TRANSLATION;
   2279  mbmi->num_proj_ref = 0;
   2280  if (is_comp_pred) {
   2281    // Only compound_average
   2282    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
   2283    mbmi->comp_group_idx = 0;
   2284    mbmi->compound_idx = 1;
   2285  }
   2286  set_default_interp_filters(mbmi, cm->features.interp_filter);
   2287 
   2288  const int mi_row = xd->mi_row;
   2289  const int mi_col = xd->mi_col;
   2290  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
   2291                                AOM_PLANE_Y, AOM_PLANE_Y);
   2292  int est_rate;
   2293  int64_t est_dist;
   2294  model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
   2295                                  NULL, NULL, NULL, NULL, NULL);
   2296  return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
   2297 }
   2298 
   2299 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
   2300 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
   2301 // it is included.
   2302 static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
   2303 
   2304 static inline bool mask_check_bit(int mask, int index) {
   2305  return (mask >> index) & 0x1;
   2306 }
   2307 
   2308 // Before performing the full MV search in handle_inter_mode, do a simple
   2309 // translation search and see if we can eliminate any motion vectors.
   2310 // Returns an integer where, if the i-th bit is set, it means that the i-th
   2311 // motion vector should be searched. This is only set for NEAR_MV.
   2312 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
   2313                                RD_STATS *rd_stats,
   2314                                HandleInterModeArgs *const args,
   2315                                int64_t ref_best_rd, BLOCK_SIZE bsize,
   2316                                const int ref_set) {
   2317  // If the number of ref mv count is equal to 1, do not prune the same. It
   2318  // is better to evaluate the same than to prune it.
   2319  if (ref_set == 1) return 1;
   2320  AV1_COMMON *const cm = &cpi->common;
   2321  const MACROBLOCKD *const xd = &x->e_mbd;
   2322  const MB_MODE_INFO *const mbmi = xd->mi[0];
   2323  const PREDICTION_MODE this_mode = mbmi->mode;
   2324 
   2325  // Only search indices if they have some chance of being good.
   2326  int good_indices = 0;
   2327  for (int i = 0; i < ref_set; ++i) {
   2328    if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
   2329                                  ref_best_rd, i)) {
   2330      continue;
   2331    }
   2332    mask_set_bit(&good_indices, i);
   2333  }
   2334 
   2335  // Only prune in NEARMV mode, if the speed feature is set, and the block size
   2336  // is large enough. If these conditions are not met, return all good indices
   2337  // found so far.
   2338  if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
   2339    return good_indices;
   2340  if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
   2341  if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
   2342  // Do not prune when there is internal resizing. TODO(elliottk) fix this
   2343  // so b/2384 can be resolved.
   2344  if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
   2345      (mbmi->ref_frame[1] > 0 &&
   2346       av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
   2347    return good_indices;
   2348  }
   2349 
   2350  // Calculate the RD cost for the motion vectors using simple translation.
   2351  int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
   2352  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
   2353    // If this index is bad, ignore it.
   2354    if (!mask_check_bit(good_indices, ref_mv_idx)) {
   2355      continue;
   2356    }
   2357    idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
   2358        cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
   2359  }
   2360  // Find the index with the best RD cost.
   2361  int best_idx = 0;
   2362  for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
   2363    if (idx_rdcost[i] < idx_rdcost[best_idx]) {
   2364      best_idx = i;
   2365    }
   2366  }
   2367  // Only include indices that are good and within a % of the best.
   2368  const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
   2369  // If the simple translation cost is not within this multiple of the
   2370  // best RD, skip it. Note that the cutoff is derived experimentally.
   2371  const double ref_dth = 5;
   2372  int result = 0;
   2373  for (int i = 0; i < ref_set; ++i) {
   2374    if (mask_check_bit(good_indices, i) &&
   2375        (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
   2376        (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
   2377      mask_set_bit(&result, i);
   2378    }
   2379  }
   2380  return result;
   2381 }
   2382 
   2383 /*!\brief Motion mode information for inter mode search speedup.
   2384 *
   2385 * Used in a speed feature to search motion modes other than
   2386 * SIMPLE_TRANSLATION only on winning candidates.
   2387 */
   2388 typedef struct motion_mode_candidate {
   2389  /*!
   2390   * Mode info for the motion mode candidate.
   2391   */
   2392  MB_MODE_INFO mbmi;
   2393  /*!
   2394   * Rate describing the cost of the motion vectors for this candidate.
   2395   */
   2396  int rate_mv;
   2397  /*!
   2398   * Rate before motion mode search and transform coding is applied.
   2399   */
   2400  int rate2_nocoeff;
   2401  /*!
   2402   * An integer value 0 or 1 which indicates whether or not to skip the motion
   2403   * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
   2404   * candidate.
   2405   */
   2406  int skip_motion_mode;
   2407  /*!
   2408   * Total RD cost for this candidate.
   2409   */
   2410  int64_t rd_cost;
   2411 } motion_mode_candidate;
   2412 
   2413 /*!\cond */
   2414 typedef struct motion_mode_best_st_candidate {
   2415  motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
   2416  int num_motion_mode_cand;
   2417 } motion_mode_best_st_candidate;
   2418 
   2419 // Checks if the current reference frame matches with neighbouring block's
   2420 // (top/left) reference frames
   2421 static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
   2422                                               MB_MODE_INFO *nb_mbmi) {
   2423  MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
   2424                                          nb_mbmi->ref_frame[1] };
   2425  MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
   2426                                           cur_mbmi->ref_frame[1] };
   2427  const int is_cur_comp_pred = has_second_ref(cur_mbmi);
   2428  int match_found = 0;
   2429 
   2430  for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
   2431    if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
   2432        (cur_ref_frames[i] == nb_ref_frames[1]))
   2433      match_found = 1;
   2434  }
   2435  return match_found;
   2436 }
   2437 
   2438 static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
   2439                                              MACROBLOCKD *xd) {
   2440  if (!xd->up_available) return 1;
   2441  const int mi_col = xd->mi_col;
   2442  MB_MODE_INFO **cur_mbmi = xd->mi;
   2443  // prev_row_mi points into the mi array, starting at the beginning of the
   2444  // previous row.
   2445  MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
   2446  const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
   2447  uint8_t mi_step;
   2448  for (int above_mi_col = mi_col; above_mi_col < end_col;
   2449       above_mi_col += mi_step) {
   2450    MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
   2451    mi_step = mi_size_wide[above_mi[0]->bsize];
   2452    int match_found = 0;
   2453    if (is_inter_block(*above_mi))
   2454      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
   2455    if (match_found) return 1;
   2456  }
   2457  return 0;
   2458 }
   2459 
   2460 static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
   2461                                             MACROBLOCKD *xd) {
   2462  if (!xd->left_available) return 1;
   2463  const int mi_row = xd->mi_row;
   2464  MB_MODE_INFO **cur_mbmi = xd->mi;
   2465  // prev_col_mi points into the mi array, starting at the top of the
   2466  // previous column
   2467  MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
   2468  const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
   2469  uint8_t mi_step;
   2470  for (int left_mi_row = mi_row; left_mi_row < end_row;
   2471       left_mi_row += mi_step) {
   2472    MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
   2473    mi_step = mi_size_high[left_mi[0]->bsize];
   2474    int match_found = 0;
   2475    if (is_inter_block(*left_mi))
   2476      match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
   2477    if (match_found) return 1;
   2478  }
   2479  return 0;
   2480 }
   2481 /*!\endcond */
   2482 
   2483 /*! \brief Struct used to hold TPL data to
   2484 * narrow down parts of the inter mode search.
   2485 */
   2486 typedef struct {
   2487  /*!
   2488   * The best inter cost out of all of the reference frames.
   2489   */
   2490  int64_t best_inter_cost;
   2491  /*!
   2492   * The inter cost for each reference frame.
   2493   */
   2494  int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
   2495 } PruneInfoFromTpl;
   2496 
   2497 #if !CONFIG_REALTIME_ONLY
   2498 // TODO(Remya): Check if get_tpl_stats_b() can be reused
   2499 static inline void get_block_level_tpl_stats(
   2500    AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
   2501    PruneInfoFromTpl *inter_cost_info_from_tpl) {
   2502  AV1_COMMON *const cm = &cpi->common;
   2503 
   2504  assert(IMPLIES(cpi->ppi->gf_group.size > 0,
   2505                 cpi->gf_frame_index < cpi->ppi->gf_group.size));
   2506  const int tpl_idx = cpi->gf_frame_index;
   2507  TplParams *const tpl_data = &cpi->ppi->tpl_data;
   2508  if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
   2509  const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
   2510  const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
   2511  const int mi_wide = mi_size_wide[bsize];
   2512  const int mi_high = mi_size_high[bsize];
   2513  const int tpl_stride = tpl_frame->stride;
   2514  const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
   2515  const int mi_col_sr =
   2516      coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
   2517  const int mi_col_end_sr =
   2518      coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
   2519  const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
   2520 
   2521  const int row_step = step;
   2522  const int col_step_sr =
   2523      coded_to_superres_mi(step, cm->superres_scale_denominator);
   2524  for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
   2525       row += row_step) {
   2526    for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
   2527         col += col_step_sr) {
   2528      const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
   2529          row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
   2530 
   2531      // Sums up the inter cost of corresponding ref frames
   2532      for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
   2533        inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
   2534            this_stats->pred_error[ref_idx];
   2535      }
   2536    }
   2537  }
   2538 
   2539  // Computes the best inter cost (minimum inter_cost)
   2540  int64_t best_inter_cost = INT64_MAX;
   2541  for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
   2542    const int64_t cur_inter_cost =
   2543        inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
   2544    // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
   2545    // calculating the minimum inter_cost
   2546    if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
   2547        valid_refs[ref_idx])
   2548      best_inter_cost = cur_inter_cost;
   2549  }
   2550  inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
   2551 }
   2552 #endif
   2553 
   2554 static inline int prune_modes_based_on_tpl_stats(
   2555    PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
   2556    const PREDICTION_MODE this_mode, int prune_mode_level) {
   2557  const int have_newmv = have_newmv_in_inter_mode(this_mode);
   2558  if ((prune_mode_level < 2) && have_newmv) return 0;
   2559 
   2560  const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
   2561  if (best_inter_cost == INT64_MAX) return 0;
   2562 
   2563  const int prune_level = prune_mode_level - 1;
   2564  int64_t cur_inter_cost;
   2565 
   2566  const int is_globalmv =
   2567      (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
   2568  const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
   2569 
   2570  // Thresholds used for pruning:
   2571  // Lower value indicates aggressive pruning and higher value indicates
   2572  // conservative pruning which is set based on ref_mv_idx and speed feature.
   2573  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
   2574  // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
   2575  static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
   2576    { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
   2577  };
   2578 
   2579  const int is_comp_pred = (refs[1] > INTRA_FRAME);
   2580  if (!is_comp_pred) {
   2581    cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
   2582  } else {
   2583    const int64_t inter_cost_ref0 =
   2584        inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
   2585    const int64_t inter_cost_ref1 =
   2586        inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
   2587    // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
   2588    // more aggressive pruning
   2589    cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
   2590  }
   2591 
   2592  // Prune the mode if cur_inter_cost is greater than threshold times
   2593  // best_inter_cost
   2594  if (cur_inter_cost >
   2595      ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
   2596        best_inter_cost) >>
   2597       2))
   2598    return 1;
   2599  return 0;
   2600 }
   2601 
   2602 /*!\brief High level function to select parameters for compound mode.
   2603 *
   2604 * \ingroup inter_mode_search
   2605 * The main search functionality is done in the call to av1_compound_type_rd().
   2606 *
   2607 * \param[in]     cpi               Top-level encoder structure.
   2608 * \param[in]     x                 Pointer to struct holding all the data for
   2609 *                                  the current macroblock.
   2610 * \param[in]     args              HandleInterModeArgs struct holding
   2611 *                                  miscellaneous arguments for inter mode
   2612 *                                  search. See the documentation for this
   2613 *                                  struct for a description of each member.
   2614 * \param[in]     ref_best_rd       Best RD found so far for this block.
   2615 *                                  It is used for early termination of this
   2616 *                                  search if the RD exceeds this value.
   2617 * \param[in,out] cur_mv            Current motion vector.
   2618 * \param[in]     bsize             Current block size.
   2619 * \param[in,out] compmode_interinter_cost  RD of the selected interinter
   2620                                    compound mode.
   2621 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
   2622 *                                  allocated buffers for the compound
   2623 *                                  predictors and masks in the compound type
   2624 *                                  search.
   2625 * \param[in,out] orig_dst          A prediction buffer to hold a computed
   2626 *                                  prediction. This will eventually hold the
   2627 *                                  final prediction, and the tmp_dst info will
   2628 *                                  be copied here.
   2629 * \param[in]     tmp_dst           A temporary prediction buffer to hold a
   2630 *                                  computed prediction.
   2631 * \param[in,out] rate_mv           The rate associated with the motion vectors.
   2632 *                                  This will be modified if a motion search is
   2633 *                                  done in the motion mode search.
   2634 * \param[in,out] rd_stats          Struct to keep track of the overall RD
   2635 *                                  information.
   2636 * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
   2637 *                                  best total RD for a skip mode so far, and
   2638 *                                  skip_rd[1] is the best RD for a skip mode so
   2639 *                                  far in luma. This is used as a speed feature
   2640 *                                  to skip the transform search if the computed
   2641 *                                  skip RD for the current mode is not better
   2642 *                                  than the best skip_rd so far.
   2643 * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
   2644 *                                  predictor. If this is 0, the inter predictor
   2645 *                                  has already been built and thus we can avoid
   2646 *                                  repeating computation.
   2647 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
   2648 * a viable candidate.
   2649 */
   2650 static int process_compound_inter_mode(
   2651    AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
   2652    int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
   2653    int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
   2654    const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
   2655    RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
   2656  MACROBLOCKD *xd = &x->e_mbd;
   2657  MB_MODE_INFO *mbmi = xd->mi[0];
   2658  const AV1_COMMON *cm = &cpi->common;
   2659  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
   2660                                   cm->seq_params->enable_masked_compound;
   2661  int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
   2662                         (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
   2663 
   2664  const int num_planes = av1_num_planes(cm);
   2665  const int mi_row = xd->mi_row;
   2666  const int mi_col = xd->mi_col;
   2667  int is_luma_interp_done = 0;
   2668  set_default_interp_filters(mbmi, cm->features.interp_filter);
   2669 
   2670  int64_t best_rd_compound;
   2671  int64_t rd_thresh;
   2672  const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
   2673  const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
   2674  rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
   2675                                         comp_type_rd_scale);
   2676  // Select compound type and any parameters related to that type
   2677  // (for example, the mask parameters if it is a masked mode) and compute
   2678  // the RD
   2679  *compmode_interinter_cost = av1_compound_type_rd(
   2680      cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
   2681      orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
   2682      ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
   2683  if (ref_best_rd < INT64_MAX &&
   2684      (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
   2685          ref_best_rd) {
   2686    restore_dst_buf(xd, *orig_dst, num_planes);
   2687    return 1;
   2688  }
   2689 
   2690  // Build only uv predictor for COMPOUND_AVERAGE.
   2691  // Note there is no need to call av1_enc_build_inter_predictor
   2692  // for luma if COMPOUND_AVERAGE is selected because it is the first
   2693  // candidate in av1_compound_type_rd, which means it used the dst_buf
   2694  // rather than the tmp_buf.
   2695  if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
   2696    if (num_planes > 1) {
   2697      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
   2698                                    AOM_PLANE_U, num_planes - 1);
   2699    }
   2700    *skip_build_pred = 1;
   2701  }
   2702  return 0;
   2703 }
   2704 
   2705 // Speed feature to prune out MVs that are similar to previous MVs if they
   2706 // don't achieve the best RD advantage.
   2707 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
   2708                                   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
   2709                                   MB_MODE_INFO *mbmi, int pruning_factor) {
   2710  int i;
   2711  const int is_comp_pred = has_second_ref(mbmi);
   2712  const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
   2713 
   2714  // Skip the evaluation if an MV match is found.
   2715  if (ref_mv_idx > 0) {
   2716    for (int idx = 0; idx < ref_mv_idx; ++idx) {
   2717      if (save_mv[idx][0].as_int == INVALID_MV) continue;
   2718 
   2719      int mv_diff = 0;
   2720      for (i = 0; i < 1 + is_comp_pred; ++i) {
   2721        mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
   2722                   abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
   2723      }
   2724 
   2725      // If this mode is not the best one, and current MV is similar to
   2726      // previous stored MV, terminate this ref_mv_idx evaluation.
   2727      if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
   2728    }
   2729  }
   2730 
   2731  if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
   2732    for (i = 0; i < is_comp_pred + 1; ++i)
   2733      save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
   2734  }
   2735 
   2736  return 0;
   2737 }
   2738 
   2739 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
   2740 *
   2741 * \ingroup inter_mode_search
   2742 *
   2743 * Compares the sse of zero mv and the best sse found in single new_mv. If the
   2744 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
   2745 * Else returns 0.
   2746 *
   2747 * Note that the sse of here comes from single_motion_search. So it is
   2748 * interpolated with the filter in motion search, not the actual interpolation
   2749 * filter used in encoding.
   2750 *
   2751 * \param[in]     fn_ptr            A table of function pointers to compute SSE.
   2752 * \param[in]     x                 Pointer to struct holding all the data for
   2753 *                                  the current macroblock.
   2754 * \param[in]     bsize             The current block_size.
   2755 * \param[in]     args              The args to handle_inter_mode, used to track
   2756 *                                  the best SSE.
   2757 * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
   2758 *                                       prune_zero_mv_with_sse value
   2759 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
   2760 */
   2761 static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
   2762                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
   2763                                         const HandleInterModeArgs *args,
   2764                                         int prune_zero_mv_with_sse) {
   2765  const MACROBLOCKD *xd = &x->e_mbd;
   2766  const MB_MODE_INFO *mbmi = xd->mi[0];
   2767 
   2768  const int is_comp_pred = has_second_ref(mbmi);
   2769  const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
   2770 
   2771  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
   2772    if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
   2773      // Pruning logic only works for IDENTITY type models
   2774      // Note: In theory we could apply similar logic for TRANSLATION
   2775      // type models, but we do not code these due to a spec bug
   2776      // (see comments in gm_get_motion_vector() in av1/common/mv.h)
   2777      assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
   2778      return 0;
   2779    }
   2780 
   2781    // Don't prune if we have invalid data
   2782    assert(mbmi->mv[idx].as_int == 0);
   2783    if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
   2784      return 0;
   2785    }
   2786  }
   2787 
   2788  // Sum up the sse of ZEROMV and best NEWMV
   2789  unsigned int this_sse_sum = 0;
   2790  unsigned int best_sse_sum = 0;
   2791  for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
   2792    const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
   2793    const struct macroblockd_plane *pd = xd->plane;
   2794    const struct buf_2d *src_buf = &p->src;
   2795    const struct buf_2d *ref_buf = &pd->pre[idx];
   2796    const uint8_t *src = src_buf->buf;
   2797    const uint8_t *ref = ref_buf->buf;
   2798    const int src_stride = src_buf->stride;
   2799    const int ref_stride = ref_buf->stride;
   2800 
   2801    unsigned int this_sse;
   2802    fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
   2803    this_sse_sum += this_sse;
   2804 
   2805    const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
   2806    best_sse_sum += best_sse;
   2807  }
   2808 
   2809  const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
   2810  if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
   2811    return 1;
   2812  }
   2813 
   2814  return 0;
   2815 }
   2816 
   2817 /*!\brief Searches for interpolation filter in realtime mode during winner eval
   2818 *
   2819 * \ingroup inter_mode_search
   2820 *
   2821 * Does a simple interpolation filter search during winner mode evaluation. This
   2822 * is currently only used by realtime mode as \ref
   2823 * av1_interpolation_filter_search is not called during realtime encoding.
   2824 *
   2825 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
   2826 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
   2827 * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
   2828 *  *
   2829 * \param[in]     cpi               Pointer to the compressor. Used for feature
   2830 *                                  flags.
   2831 * \param[in,out] x                 Pointer to macroblock. This is primarily
   2832 *                                  used to access the buffers.
   2833 * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
   2834 * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
   2835 * \param[in]     bsize             The current block_size.
   2836 * \return Returns true if a predictor is built in xd->dst, false otherwise.
   2837 */
   2838 static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
   2839                                      int mi_row, int mi_col,
   2840                                      BLOCK_SIZE bsize) {
   2841  static const InterpFilters filters_ref_set[3] = {
   2842    { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
   2843    { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
   2844    { MULTITAP_SHARP, MULTITAP_SHARP }
   2845  };
   2846 
   2847  const AV1_COMMON *const cm = &cpi->common;
   2848  MACROBLOCKD *const xd = &x->e_mbd;
   2849  MB_MODE_INFO *const mi = xd->mi[0];
   2850  int64_t best_cost = INT64_MAX;
   2851  int best_filter_index = -1;
   2852  // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
   2853  const int num_planes = av1_num_planes(cm);
   2854  const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
   2855  assert(is_inter_mode(mi->mode));
   2856  assert(mi->motion_mode == SIMPLE_TRANSLATION);
   2857  assert(!is_inter_compound_mode(mi->mode));
   2858 
   2859  if (!av1_is_interp_needed(xd)) {
   2860    return false;
   2861  }
   2862 
   2863  struct macroblockd_plane *pd = xd->plane;
   2864  const BUFFER_SET orig_dst = {
   2865    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
   2866    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
   2867  };
   2868  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
   2869  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
   2870                                 tmp_buf + 2 * MAX_SB_SQUARE },
   2871                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
   2872  const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
   2873 
   2874  for (int i = 0; i < 3; ++i) {
   2875    if (is_240p_or_lesser) {
   2876      if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
   2877        continue;
   2878      }
   2879    } else {
   2880      if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
   2881        continue;
   2882      }
   2883    }
   2884    int64_t cost;
   2885    RD_STATS tmp_rd = { 0 };
   2886 
   2887    mi->interp_filters.as_filters = filters_ref_set[i];
   2888    av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
   2889 
   2890    model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
   2891                       ? MODELRD_LEGACY
   2892                       : MODELRD_TYPE_INTERP_FILTER](
   2893        cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
   2894        &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
   2895 
   2896    tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
   2897                                           cm->seq_params->enable_dual_filter);
   2898    cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
   2899    if (cost < best_cost) {
   2900      best_filter_index = i;
   2901      best_cost = cost;
   2902      swap_dst_buf(xd, dst_bufs, num_planes);
   2903    }
   2904  }
   2905  assert(best_filter_index >= 0);
   2906 
   2907  mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
   2908 
   2909  const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
   2910 
   2911  if (is_best_pred_in_orig) {
   2912    swap_dst_buf(xd, dst_bufs, num_planes);
   2913  } else {
   2914    // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
   2915    // is_best_pred_in_orig is false, that means the current buffer is the
   2916    // original one.
   2917    assert(&orig_dst == dst_bufs[0]);
   2918    assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
   2919    const int width = block_size_wide[bsize];
   2920    const int height = block_size_high[bsize];
   2921 #if CONFIG_AV1_HIGHBITDEPTH
   2922    const bool is_hbd = is_cur_buf_hbd(xd);
   2923    if (is_hbd) {
   2924      aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
   2925                               tmp_dst.stride[AOM_PLANE_Y],
   2926                               CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
   2927                               orig_dst.stride[AOM_PLANE_Y], width, height);
   2928    } else {
   2929      aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
   2930                        orig_dst.plane[AOM_PLANE_Y],
   2931                        orig_dst.stride[AOM_PLANE_Y], width, height);
   2932    }
   2933 #else
   2934    aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
   2935                      orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
   2936                      width, height);
   2937 #endif
   2938  }
   2939 
   2940  // Build the YUV predictor.
   2941  if (num_planes > 1) {
   2942    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
   2943                                  AOM_PLANE_U, AOM_PLANE_V);
   2944  }
   2945 
   2946  return true;
   2947 }
   2948 
   2949 /*!\brief AV1 inter mode RD computation
   2950 *
   2951 * \ingroup inter_mode_search
   2952 * Do the RD search for a given inter mode and compute all information relevant
   2953 * to the input mode. It will compute the best MV,
   2954 * compound parameters (if the mode is a compound mode) and interpolation filter
   2955 * parameters.
   2956 *
   2957 * \param[in]     cpi               Top-level encoder structure.
   2958 * \param[in]     tile_data         Pointer to struct holding adaptive
   2959 *                                  data/contexts/models for the tile during
   2960 *                                  encoding.
   2961 * \param[in]     x                 Pointer to structure holding all the data
   2962 *                                  for the current macroblock.
   2963 * \param[in]     bsize             Current block size.
   2964 * \param[in,out] rd_stats          Struct to keep track of the overall RD
   2965 *                                  information.
   2966 * \param[in,out] rd_stats_y        Struct to keep track of the RD information
   2967 *                                  for only the Y plane.
   2968 * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
   2969 *                                  for only the UV planes.
   2970 * \param[in]     args              HandleInterModeArgs struct holding
   2971 *                                  miscellaneous arguments for inter mode
   2972 *                                  search. See the documentation for this
   2973 *                                  struct for a description of each member.
   2974 * \param[in]     ref_best_rd       Best RD found so far for this block.
   2975 *                                  It is used for early termination of this
   2976 *                                  search if the RD exceeds this value.
   2977 * \param[in]     tmp_buf           Temporary buffer used to hold predictors
   2978 *                                  built in this search.
   2979 * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
   2980 *                                  allocated buffers for the compound
   2981 *                                  predictors and masks in the compound type
   2982 *                                  search.
   2983 * \param[in,out] best_est_rd       Estimated RD for motion mode search if
   2984 *                                  do_tx_search (see below) is 0.
   2985 * \param[in]     do_tx_search      Parameter to indicate whether or not to do
   2986 *                                  a full transform search. This will compute
   2987 *                                  an estimated RD for the modes without the
   2988 *                                  transform search and later perform the full
   2989 *                                  transform search on the best candidates.
   2990 * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
   2991 *                                  information to perform a full transform
   2992 *                                  search only on winning candidates searched
   2993 *                                  with an estimate for transform coding RD.
   2994 * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
   2995 *                                  motion mode information used in a speed
   2996 *                                  feature to search motion modes other than
   2997 *                                  SIMPLE_TRANSLATION only on winning
   2998 *                                  candidates.
   2999 * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
   3000 *                                  best total RD for a skip mode so far, and
   3001 *                                  skip_rd[1] is the best RD for a skip mode so
   3002 *                                  far in luma. This is used as a speed feature
   3003 *                                  to skip the transform search if the computed
   3004 *                                  skip RD for the current mode is not better
   3005 *                                  than the best skip_rd so far.
   3006 * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
   3007 *                                         narrow down the search based on data
   3008 *                                         collected in the TPL model.
   3009 * \param[out]    yrd               Stores the rdcost corresponding to encoding
   3010 *                                  the luma plane.
   3011 *
   3012 * \return The RD cost for the mode being searched.
   3013 */
   3014 static int64_t handle_inter_mode(
   3015    AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
   3016    BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
   3017    RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
   3018    uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
   3019    int64_t *best_est_rd, const int do_tx_search,
   3020    InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
   3021    int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
   3022    int64_t *yrd) {
   3023  const AV1_COMMON *cm = &cpi->common;
   3024  const int num_planes = av1_num_planes(cm);
   3025  MACROBLOCKD *xd = &x->e_mbd;
   3026  MB_MODE_INFO *mbmi = xd->mi[0];
   3027  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   3028  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   3029  const int is_comp_pred = has_second_ref(mbmi);
   3030  const PREDICTION_MODE this_mode = mbmi->mode;
   3031 
   3032 #if CONFIG_REALTIME_ONLY
   3033  const int prune_modes_based_on_tpl = 0;
   3034 #else   // CONFIG_REALTIME_ONLY
   3035  const TplParams *const tpl_data = &cpi->ppi->tpl_data;
   3036  const int prune_modes_based_on_tpl =
   3037      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
   3038      av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
   3039 #endif  // CONFIG_REALTIME_ONLY
   3040  int i;
   3041  // Reference frames for this mode
   3042  const int refs[2] = { mbmi->ref_frame[0],
   3043                        (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
   3044  int rate_mv = 0;
   3045  int64_t rd = INT64_MAX;
   3046  // Do first prediction into the destination buffer. Do the next
   3047  // prediction into a temporary buffer. Then keep track of which one
   3048  // of these currently holds the best predictor, and use the other
   3049  // one for future predictions. In the end, copy from tmp_buf to
   3050  // dst if necessary.
   3051  struct macroblockd_plane *pd = xd->plane;
   3052  const BUFFER_SET orig_dst = {
   3053    { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
   3054    { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
   3055  };
   3056  const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
   3057                                 tmp_buf + 2 * MAX_SB_SQUARE },
   3058                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
   3059 
   3060  int64_t ret_val = INT64_MAX;
   3061  const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   3062  RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
   3063  int64_t best_rd = INT64_MAX;
   3064  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
   3065  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
   3066  int64_t best_yrd = INT64_MAX;
   3067  MB_MODE_INFO best_mbmi = *mbmi;
   3068  int best_xskip_txfm = 0;
   3069  int64_t newmv_ret_val = INT64_MAX;
   3070  inter_mode_info mode_info[MAX_REF_MV_SEARCH];
   3071 
   3072  // Do not prune the mode based on inter cost from tpl if the current ref frame
   3073  // is the winner ref in neighbouring blocks.
   3074  int ref_match_found_in_above_nb = 0;
   3075  int ref_match_found_in_left_nb = 0;
   3076  if (prune_modes_based_on_tpl) {
   3077    ref_match_found_in_above_nb =
   3078        find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
   3079    ref_match_found_in_left_nb =
   3080        find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
   3081  }
   3082 
   3083  // First, perform a simple translation search for each of the indices. If
   3084  // an index performs well, it will be fully searched in the main loop
   3085  // of this function.
   3086  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
   3087  // Save MV results from first 2 ref_mv_idx.
   3088  int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
   3089  int best_ref_mv_idx = -1;
   3090  const int idx_mask =
   3091      ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
   3092  const int16_t mode_ctx =
   3093      av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
   3094  const ModeCosts *mode_costs = &x->mode_costs;
   3095  const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
   3096  const int base_rate =
   3097      args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
   3098 
   3099  for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
   3100    save_mv[i][0].as_int = INVALID_MV;
   3101    save_mv[i][1].as_int = INVALID_MV;
   3102  }
   3103  args->start_mv_cnt = 0;
   3104 
   3105  // Main loop of this function. This will  iterate over all of the ref mvs
   3106  // in the dynamic reference list and do the following:
   3107  //    1.) Get the current MV. Create newmv MV if necessary
   3108  //    2.) Search compound type and parameters if applicable
   3109  //    3.) Do interpolation filter search
   3110  //    4.) Build the inter predictor
   3111  //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
   3112  //        WARPED_CAUSAL)
   3113  //    6.) Update stats if best so far
   3114  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
   3115    mbmi->ref_mv_idx = ref_mv_idx;
   3116 
   3117    mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
   3118    mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
   3119    const int drl_cost = get_drl_cost(
   3120        mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
   3121    mode_info[ref_mv_idx].drl_cost = drl_cost;
   3122    mode_info[ref_mv_idx].skip = 0;
   3123 
   3124    if (!mask_check_bit(idx_mask, ref_mv_idx)) {
   3125      // MV did not perform well in simple translation search. Skip it.
   3126      continue;
   3127    }
   3128    if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
   3129        !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
   3130      // Skip mode if TPL model indicates it will not be beneficial.
   3131      if (prune_modes_based_on_tpl_stats(
   3132              inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
   3133              cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
   3134        continue;
   3135    }
   3136    av1_init_rd_stats(rd_stats);
   3137 
   3138    // Initialize compound mode data
   3139    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
   3140    mbmi->comp_group_idx = 0;
   3141    mbmi->compound_idx = 1;
   3142    if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
   3143 
   3144    mbmi->num_proj_ref = 0;
   3145    mbmi->motion_mode = SIMPLE_TRANSLATION;
   3146 
   3147    // Compute cost for signalling this DRL index
   3148    rd_stats->rate = base_rate;
   3149    rd_stats->rate += drl_cost;
   3150 
   3151    int rs = 0;
   3152    int compmode_interinter_cost = 0;
   3153 
   3154    int_mv cur_mv[2];
   3155 
   3156    // TODO(Cherma): Extend this speed feature to support compound mode
   3157    int skip_repeated_ref_mv =
   3158        is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
   3159    // Generate the current mv according to the prediction mode
   3160    if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
   3161      continue;
   3162    }
   3163 
   3164    // The above call to build_cur_mv does not handle NEWMV modes. Build
   3165    // the mv here if we have NEWMV for any predictors.
   3166    if (have_newmv_in_inter_mode(this_mode)) {
   3167 #if CONFIG_COLLECT_COMPONENT_TIMING
   3168      start_timing(cpi, handle_newmv_time);
   3169 #endif
   3170      newmv_ret_val =
   3171          handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
   3172 #if CONFIG_COLLECT_COMPONENT_TIMING
   3173      end_timing(cpi, handle_newmv_time);
   3174 #endif
   3175 
   3176      if (newmv_ret_val != 0) continue;
   3177 
   3178      if (is_inter_singleref_mode(this_mode) &&
   3179          cur_mv[0].as_int != INVALID_MV) {
   3180        const MV_REFERENCE_FRAME ref = refs[0];
   3181        const unsigned int this_sse = x->pred_sse[ref];
   3182        if (this_sse < args->best_single_sse_in_refs[ref]) {
   3183          args->best_single_sse_in_refs[ref] = this_sse;
   3184        }
   3185 
   3186        if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
   3187          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
   3188          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
   3189          const double scale_factor[3][11] = {
   3190            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
   3191            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
   3192            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
   3193          };
   3194          assert(pix_idx >= 0);
   3195          assert(th_idx <= 2);
   3196          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
   3197            continue;
   3198        }
   3199      }
   3200 
   3201      rd_stats->rate += rate_mv;
   3202    }
   3203    // Copy the motion vector for this mode into mbmi struct
   3204    for (i = 0; i < is_comp_pred + 1; ++i) {
   3205      mbmi->mv[i].as_int = cur_mv[i].as_int;
   3206    }
   3207 
   3208    if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
   3209        mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
   3210      continue;
   3211    }
   3212 
   3213    // Skip the rest of the search if prune_ref_mv_idx_search speed feature
   3214    // is enabled, and the current MV is similar to a previous one.
   3215    if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
   3216        prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
   3217                                cpi->sf.inter_sf.prune_ref_mv_idx_search))
   3218      continue;
   3219 
   3220    if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
   3221        (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
   3222      if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
   3223                                 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
   3224        continue;
   3225      }
   3226    }
   3227 
   3228    int skip_build_pred = 0;
   3229    const int mi_row = xd->mi_row;
   3230    const int mi_col = xd->mi_col;
   3231 
   3232    // Handle a compound predictor, continue if it is determined this
   3233    // cannot be the best compound mode
   3234    if (is_comp_pred) {
   3235 #if CONFIG_COLLECT_COMPONENT_TIMING
   3236      start_timing(cpi, compound_type_rd_time);
   3237 #endif
   3238      const int not_best_mode = process_compound_inter_mode(
   3239          cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
   3240          rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
   3241          &skip_build_pred);
   3242 #if CONFIG_COLLECT_COMPONENT_TIMING
   3243      end_timing(cpi, compound_type_rd_time);
   3244 #endif
   3245      if (not_best_mode) continue;
   3246    }
   3247 
   3248    if (!args->skip_ifs) {
   3249 #if CONFIG_COLLECT_COMPONENT_TIMING
   3250      start_timing(cpi, interpolation_filter_search_time);
   3251 #endif
   3252      // Determine the interpolation filter for this mode
   3253      ret_val = av1_interpolation_filter_search(
   3254          x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
   3255          &skip_build_pred, args, ref_best_rd);
   3256 #if CONFIG_COLLECT_COMPONENT_TIMING
   3257      end_timing(cpi, interpolation_filter_search_time);
   3258 #endif
   3259      if (args->modelled_rd != NULL && !is_comp_pred) {
   3260        args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
   3261      }
   3262      if (ret_val != 0) {
   3263        restore_dst_buf(xd, orig_dst, num_planes);
   3264        continue;
   3265      } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
   3266                 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
   3267        restore_dst_buf(xd, orig_dst, num_planes);
   3268        continue;
   3269      }
   3270 
   3271      // Compute modelled RD if enabled
   3272      if (args->modelled_rd != NULL) {
   3273        if (is_comp_pred) {
   3274          const int mode0 = compound_ref0_mode(this_mode);
   3275          const int mode1 = compound_ref1_mode(this_mode);
   3276          const int64_t mrd =
   3277              AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
   3278                     args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
   3279          if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
   3280            restore_dst_buf(xd, orig_dst, num_planes);
   3281            continue;
   3282          }
   3283        }
   3284      }
   3285    }
   3286 
   3287    rd_stats->rate += compmode_interinter_cost;
   3288    if (skip_build_pred != 1) {
   3289      // Build this inter predictor if it has not been previously built
   3290      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
   3291                                    av1_num_planes(cm) - 1);
   3292    }
   3293 
   3294 #if CONFIG_COLLECT_COMPONENT_TIMING
   3295    start_timing(cpi, motion_mode_rd_time);
   3296 #endif
   3297    int rate2_nocoeff = rd_stats->rate;
   3298    // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
   3299    // OBMC_CAUSAL or WARPED_CAUSAL
   3300    int64_t this_yrd;
   3301    ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
   3302                             rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
   3303                             &orig_dst, best_est_rd, do_tx_search,
   3304                             inter_modes_info, 0, &this_yrd);
   3305 #if CONFIG_COLLECT_COMPONENT_TIMING
   3306    end_timing(cpi, motion_mode_rd_time);
   3307 #endif
   3308    assert(
   3309        IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
   3310 
   3311    if (ret_val != INT64_MAX) {
   3312      int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
   3313      const THR_MODES mode_enum = get_prediction_mode_idx(
   3314          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   3315      // Collect mode stats for multiwinner mode processing
   3316      store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
   3317                              rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
   3318                              cpi->sf.winner_mode_sf.multi_winner_mode_type,
   3319                              do_tx_search);
   3320      if (tmp_rd < best_rd) {
   3321        best_yrd = this_yrd;
   3322        // Update the best rd stats if we found the best mode so far
   3323        best_rd_stats = *rd_stats;
   3324        best_rd_stats_y = *rd_stats_y;
   3325        best_rd_stats_uv = *rd_stats_uv;
   3326        best_rd = tmp_rd;
   3327        best_mbmi = *mbmi;
   3328        best_xskip_txfm = txfm_info->skip_txfm;
   3329        memcpy(best_blk_skip, txfm_info->blk_skip,
   3330               sizeof(best_blk_skip[0]) * xd->height * xd->width);
   3331        av1_copy_array(best_tx_type_map, xd->tx_type_map,
   3332                       xd->height * xd->width);
   3333        motion_mode_cand->rate_mv = rate_mv;
   3334        motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
   3335      }
   3336 
   3337      if (tmp_rd < ref_best_rd) {
   3338        ref_best_rd = tmp_rd;
   3339        best_ref_mv_idx = ref_mv_idx;
   3340      }
   3341    }
   3342    restore_dst_buf(xd, orig_dst, num_planes);
   3343  }
   3344 
   3345  if (best_rd == INT64_MAX) return INT64_MAX;
   3346 
   3347  // re-instate status of the best choice
   3348  *rd_stats = best_rd_stats;
   3349  *rd_stats_y = best_rd_stats_y;
   3350  *rd_stats_uv = best_rd_stats_uv;
   3351  *yrd = best_yrd;
   3352  *mbmi = best_mbmi;
   3353  txfm_info->skip_txfm = best_xskip_txfm;
   3354  assert(IMPLIES(mbmi->comp_group_idx == 1,
   3355                 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
   3356  memcpy(txfm_info->blk_skip, best_blk_skip,
   3357         sizeof(best_blk_skip[0]) * xd->height * xd->width);
   3358  av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
   3359 
   3360  rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
   3361 
   3362  return rd_stats->rdcost;
   3363 }
   3364 
   3365 /*!\brief Search for the best intrabc predictor
   3366 *
   3367 * \ingroup intra_mode_search
   3368 * \callergraph
   3369 * This function performs a motion search to find the best intrabc predictor.
   3370 *
   3371 * \returns Returns the best overall rdcost (including the non-intrabc modes
   3372 * search before this function).
   3373 */
   3374 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
   3375                                       PICK_MODE_CONTEXT *ctx,
   3376                                       RD_STATS *rd_stats, BLOCK_SIZE bsize,
   3377                                       int64_t best_rd) {
   3378  const AV1_COMMON *const cm = &cpi->common;
   3379  if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
   3380      !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
   3381    return INT64_MAX;
   3382  if (cpi->sf.mv_sf.intrabc_search_level >= 1 && bsize != BLOCK_4X4 &&
   3383      bsize != BLOCK_8X8 && bsize != BLOCK_16X16) {
   3384    return INT64_MAX;
   3385  }
   3386  const int num_planes = av1_num_planes(cm);
   3387 
   3388  MACROBLOCKD *const xd = &x->e_mbd;
   3389  const TileInfo *tile = &xd->tile;
   3390  MB_MODE_INFO *mbmi = xd->mi[0];
   3391  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   3392 
   3393  const int mi_row = xd->mi_row;
   3394  const int mi_col = xd->mi_col;
   3395  const int w = block_size_wide[bsize];
   3396  const int h = block_size_high[bsize];
   3397  const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
   3398  const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
   3399 
   3400  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   3401  const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
   3402  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
   3403                   xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
   3404                   mbmi_ext->mode_context);
   3405  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
   3406  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
   3407  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
   3408  int_mv nearestmv, nearmv;
   3409  av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
   3410                                   0);
   3411 
   3412  if (nearestmv.as_int == INVALID_MV) {
   3413    nearestmv.as_int = 0;
   3414  }
   3415  if (nearmv.as_int == INVALID_MV) {
   3416    nearmv.as_int = 0;
   3417  }
   3418 
   3419  int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
   3420  if (dv_ref.as_int == 0) {
   3421    av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
   3422  }
   3423  // Ref DV should not have sub-pel.
   3424  assert((dv_ref.as_mv.col & 7) == 0);
   3425  assert((dv_ref.as_mv.row & 7) == 0);
   3426  mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
   3427 
   3428  struct buf_2d yv12_mb[MAX_MB_PLANE];
   3429  av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
   3430  for (int i = 0; i < num_planes; ++i) {
   3431    xd->plane[i].pre[0] = yv12_mb[i];
   3432  }
   3433 
   3434  enum IntrabcMotionDirection {
   3435    IBC_MOTION_ABOVE,
   3436    IBC_MOTION_LEFT,
   3437    IBC_MOTION_DIRECTIONS
   3438  };
   3439 
   3440  MB_MODE_INFO best_mbmi = *mbmi;
   3441  RD_STATS best_rdstats = *rd_stats;
   3442  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
   3443  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
   3444  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
   3445 
   3446  FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
   3447  const SEARCH_METHODS search_method =
   3448      av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
   3449  const search_site_config *lookahead_search_sites =
   3450      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
   3451  const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
   3452  av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
   3453                                     &dv_ref.as_mv, start_mv,
   3454                                     lookahead_search_sites, search_method,
   3455                                     /*fine_search_interval=*/0);
   3456  const IntraBCMVCosts *const dv_costs = x->dv_costs;
   3457  av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
   3458 
   3459  const enum IntrabcMotionDirection max_dir = cpi->sf.mv_sf.intrabc_search_level
   3460                                                  ? IBC_MOTION_LEFT
   3461                                                  : IBC_MOTION_DIRECTIONS;
   3462 
   3463  for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir;
   3464       ++dir) {
   3465    switch (dir) {
   3466      case IBC_MOTION_ABOVE:
   3467        fullms_params.mv_limits.col_min =
   3468            (tile->mi_col_start - mi_col) * MI_SIZE;
   3469        fullms_params.mv_limits.col_max =
   3470            (tile->mi_col_end - mi_col) * MI_SIZE - w;
   3471        fullms_params.mv_limits.row_min =
   3472            (tile->mi_row_start - mi_row) * MI_SIZE;
   3473        fullms_params.mv_limits.row_max =
   3474            (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
   3475        break;
   3476      case IBC_MOTION_LEFT:
   3477        fullms_params.mv_limits.col_min =
   3478            (tile->mi_col_start - mi_col) * MI_SIZE;
   3479        fullms_params.mv_limits.col_max =
   3480            (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
   3481        // TODO(aconverse@google.com): Minimize the overlap between above and
   3482        // left areas.
   3483        fullms_params.mv_limits.row_min =
   3484            (tile->mi_row_start - mi_row) * MI_SIZE;
   3485        int bottom_coded_mi_edge =
   3486            AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
   3487        fullms_params.mv_limits.row_max =
   3488            (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
   3489        break;
   3490      default: assert(0);
   3491    }
   3492    assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
   3493    assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
   3494    assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
   3495    assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
   3496 
   3497    av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
   3498 
   3499    if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
   3500        fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
   3501      continue;
   3502    }
   3503 
   3504    const int step_param = cpi->mv_search_params.mv_step_param;
   3505    IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
   3506    int_mv best_mv;
   3507    FULLPEL_MV_STATS best_mv_stats;
   3508    int bestsme = INT_MAX;
   3509 
   3510    // Perform a hash search first, and see if we get any matches.
   3511    if (!cpi->sf.mv_sf.hash_max_8x8_intrabc_blocks || bsize <= BLOCK_8X8) {
   3512      bestsme = av1_intrabc_hash_search(cpi, xd, &fullms_params,
   3513                                        intrabc_hash_info, &best_mv.as_fullmv);
   3514    }
   3515 
   3516    // If intrabc_search_level is not 0 and we found a hash search match, do
   3517    // not proceed with pixel search as the hash match is very likely to be the
   3518    // best intrabc candidate anyway.
   3519    if (bestsme == INT_MAX || cpi->sf.mv_sf.intrabc_search_level == 0) {
   3520      int_mv best_pixel_mv;
   3521      const int pixelsme =
   3522          av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
   3523                                &best_pixel_mv.as_fullmv, &best_mv_stats, NULL);
   3524      if (pixelsme < bestsme) {
   3525        bestsme = pixelsme;
   3526        best_mv = best_pixel_mv;
   3527      }
   3528    }
   3529    if (bestsme == INT_MAX) continue;
   3530    const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
   3531    if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
   3532                                get_fullmv_from_mv(&dv)))
   3533      continue;
   3534    if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
   3535                         cm->seq_params->mib_size_log2))
   3536      continue;
   3537 
   3538    // DV should not have sub-pel.
   3539    assert((dv.col & 7) == 0);
   3540    assert((dv.row & 7) == 0);
   3541    memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
   3542    mbmi->filter_intra_mode_info.use_filter_intra = 0;
   3543    mbmi->use_intrabc = 1;
   3544    mbmi->mode = DC_PRED;
   3545    mbmi->uv_mode = UV_DC_PRED;
   3546    mbmi->motion_mode = SIMPLE_TRANSLATION;
   3547    mbmi->mv[0].as_mv = dv;
   3548    mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
   3549    mbmi->skip_txfm = 0;
   3550    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
   3551                                  av1_num_planes(cm) - 1);
   3552 
   3553    // TODO(aconverse@google.com): The full motion field defining discount
   3554    // in MV_COST_WEIGHT is too large. Explore other values.
   3555    const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
   3556                                        dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
   3557    const int rate_mode = x->mode_costs.intrabc_cost[1];
   3558    RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
   3559    if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
   3560                         &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
   3561      continue;
   3562    rd_stats_yuv.rdcost =
   3563        RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
   3564    if (rd_stats_yuv.rdcost < best_rd) {
   3565      best_rd = rd_stats_yuv.rdcost;
   3566      best_mbmi = *mbmi;
   3567      best_rdstats = rd_stats_yuv;
   3568      memcpy(best_blk_skip, txfm_info->blk_skip,
   3569             sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
   3570      av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
   3571    }
   3572  }
   3573  *mbmi = best_mbmi;
   3574  *rd_stats = best_rdstats;
   3575  memcpy(txfm_info->blk_skip, best_blk_skip,
   3576         sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
   3577  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
   3578 #if CONFIG_RD_DEBUG
   3579  mbmi->rd_stats = *rd_stats;
   3580 #endif
   3581  return best_rd;
   3582 }
   3583 
   3584 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
   3585 // typedef here because Doxygen doesn't know about the typedefs yet. So using
   3586 // the typedef will prevent doxygen from finding this function and generating
   3587 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
   3588 // doxygen, we can revert back to using the typedefs.
   3589 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
   3590                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
   3591                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
   3592  const AV1_COMMON *const cm = &cpi->common;
   3593  MACROBLOCKD *const xd = &x->e_mbd;
   3594  MB_MODE_INFO *const mbmi = xd->mi[0];
   3595  const int num_planes = av1_num_planes(cm);
   3596  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   3597  int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
   3598  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
   3599  int64_t dist_y = 0, dist_uv = 0;
   3600 
   3601  ctx->rd_stats.skip_txfm = 0;
   3602  mbmi->ref_frame[0] = INTRA_FRAME;
   3603  mbmi->ref_frame[1] = NONE_FRAME;
   3604  mbmi->use_intrabc = 0;
   3605  mbmi->mv[0].as_int = 0;
   3606  mbmi->skip_mode = 0;
   3607 
   3608  const int64_t intra_yrd =
   3609      av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
   3610                                 &y_skip_txfm, bsize, best_rd, ctx);
   3611 
   3612  // Initialize default mode evaluation params
   3613  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
   3614 
   3615  if (intra_yrd < best_rd) {
   3616    // Search intra modes for uv planes if needed
   3617    if (num_planes > 1) {
   3618      // Set up the tx variables for reproducing the y predictions in case we
   3619      // need it for chroma-from-luma.
   3620      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
   3621        memcpy(txfm_info->blk_skip, ctx->blk_skip,
   3622               sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
   3623        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
   3624      }
   3625      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
   3626      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
   3627                                  &dist_uv, &uv_skip_txfm, bsize,
   3628                                  max_uv_tx_size);
   3629    }
   3630 
   3631    // Intra block is always coded as non-skip
   3632    rd_cost->rate =
   3633        rate_y + rate_uv +
   3634        x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
   3635    rd_cost->dist = dist_y + dist_uv;
   3636    rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
   3637    rd_cost->skip_txfm = 0;
   3638  } else {
   3639    rd_cost->rate = INT_MAX;
   3640  }
   3641 
   3642  if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
   3643    best_rd = rd_cost->rdcost;
   3644  if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
   3645    ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
   3646    memcpy(ctx->blk_skip, txfm_info->blk_skip,
   3647           sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
   3648    assert(rd_cost->rate != INT_MAX);
   3649  }
   3650  if (rd_cost->rate == INT_MAX) return;
   3651 
   3652  ctx->mic = *xd->mi[0];
   3653  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
   3654                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
   3655  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
   3656 }
   3657 
   3658 static inline void calc_target_weighted_pred(
   3659    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
   3660    const uint8_t *above, int above_stride, const uint8_t *left,
   3661    int left_stride);
   3662 
   3663 static inline void rd_pick_skip_mode(
   3664    RD_STATS *rd_cost, InterModeSearchState *search_state,
   3665    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
   3666    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
   3667  const AV1_COMMON *const cm = &cpi->common;
   3668  const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
   3669  const int num_planes = av1_num_planes(cm);
   3670  MACROBLOCKD *const xd = &x->e_mbd;
   3671  MB_MODE_INFO *const mbmi = xd->mi[0];
   3672 
   3673  x->compound_idx = 1;  // COMPOUND_AVERAGE
   3674  RD_STATS skip_mode_rd_stats;
   3675  av1_invalid_rd_stats(&skip_mode_rd_stats);
   3676 
   3677  if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
   3678      skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
   3679    return;
   3680  }
   3681 
   3682  const MV_REFERENCE_FRAME ref_frame =
   3683      LAST_FRAME + skip_mode_info->ref_frame_idx_0;
   3684  const MV_REFERENCE_FRAME second_ref_frame =
   3685      LAST_FRAME + skip_mode_info->ref_frame_idx_1;
   3686  const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
   3687  const THR_MODES mode_index =
   3688      get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
   3689 
   3690  if (mode_index == THR_INVALID) {
   3691    return;
   3692  }
   3693 
   3694  if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
   3695       cpi->sf.inter_sf.disable_onesided_comp) &&
   3696      cpi->all_one_sided_refs) {
   3697    return;
   3698  }
   3699 
   3700  mbmi->mode = this_mode;
   3701  mbmi->uv_mode = UV_DC_PRED;
   3702  mbmi->ref_frame[0] = ref_frame;
   3703  mbmi->ref_frame[1] = second_ref_frame;
   3704  const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   3705  if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
   3706    MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
   3707    if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
   3708        mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
   3709      return;
   3710    }
   3711    av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
   3712                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
   3713                     mbmi_ext->mode_context);
   3714    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
   3715    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
   3716    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
   3717  }
   3718 
   3719  assert(this_mode == NEAREST_NEARESTMV);
   3720  if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
   3721    return;
   3722  }
   3723 
   3724  mbmi->filter_intra_mode_info.use_filter_intra = 0;
   3725  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
   3726  mbmi->comp_group_idx = 0;
   3727  mbmi->compound_idx = x->compound_idx;
   3728  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
   3729  mbmi->motion_mode = SIMPLE_TRANSLATION;
   3730  mbmi->ref_mv_idx = 0;
   3731  mbmi->skip_mode = mbmi->skip_txfm = 1;
   3732  mbmi->palette_mode_info.palette_size[0] = 0;
   3733  mbmi->palette_mode_info.palette_size[1] = 0;
   3734 
   3735  set_default_interp_filters(mbmi, cm->features.interp_filter);
   3736 
   3737  set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   3738  for (int i = 0; i < num_planes; i++) {
   3739    xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
   3740    xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
   3741  }
   3742 
   3743  BUFFER_SET orig_dst;
   3744  for (int i = 0; i < num_planes; i++) {
   3745    orig_dst.plane[i] = xd->plane[i].dst.buf;
   3746    orig_dst.stride[i] = xd->plane[i].dst.stride;
   3747  }
   3748 
   3749  // Compare the use of skip_mode with the best intra/inter mode obtained.
   3750  const int skip_mode_ctx = av1_get_skip_mode_context(xd);
   3751  int64_t best_intra_inter_mode_cost = INT64_MAX;
   3752  if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
   3753    const ModeCosts *mode_costs = &x->mode_costs;
   3754    best_intra_inter_mode_cost = RDCOST(
   3755        x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
   3756        rd_cost->dist);
   3757    // Account for non-skip mode rate in total rd stats
   3758    rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
   3759    av1_rd_cost_update(x->rdmult, rd_cost);
   3760  }
   3761 
   3762  // Obtain the rdcost for skip_mode.
   3763  skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
   3764               best_intra_inter_mode_cost);
   3765 
   3766  if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
   3767      (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
   3768    assert(mode_index != THR_INVALID);
   3769    search_state->best_mbmode.skip_mode = 1;
   3770    search_state->best_mbmode = *mbmi;
   3771    memset(search_state->best_mbmode.inter_tx_size,
   3772           search_state->best_mbmode.tx_size,
   3773           sizeof(search_state->best_mbmode.inter_tx_size));
   3774    set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
   3775                  search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
   3776                  xd);
   3777    search_state->best_mode_index = mode_index;
   3778 
   3779    // Update rd_cost
   3780    rd_cost->rate = skip_mode_rd_stats.rate;
   3781    rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
   3782    rd_cost->rdcost = skip_mode_rd_stats.rdcost;
   3783 
   3784    search_state->best_rd = rd_cost->rdcost;
   3785    search_state->best_skip2 = 1;
   3786    search_state->best_mode_skippable = 1;
   3787 
   3788    x->txfm_search_info.skip_txfm = 1;
   3789  }
   3790 }
   3791 
   3792 // Get winner mode stats of given mode index
   3793 static inline MB_MODE_INFO *get_winner_mode_stats(
   3794    MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
   3795    int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
   3796    RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
   3797    THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
   3798    int mode_idx) {
   3799  MB_MODE_INFO *winner_mbmi;
   3800  if (multi_winner_mode_type) {
   3801    assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
   3802    WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
   3803    winner_mbmi = &winner_mode_stat->mbmi;
   3804 
   3805    *winner_rd_cost = &winner_mode_stat->rd_cost;
   3806    *winner_rate_y = winner_mode_stat->rate_y;
   3807    *winner_rate_uv = winner_mode_stat->rate_uv;
   3808    *winner_mode_index = winner_mode_stat->mode_index;
   3809  } else {
   3810    winner_mbmi = best_mbmode;
   3811    *winner_rd_cost = best_rd_cost;
   3812    *winner_rate_y = best_rate_y;
   3813    *winner_rate_uv = best_rate_uv;
   3814    *winner_mode_index = *best_mode_index;
   3815  }
   3816  return winner_mbmi;
   3817 }
   3818 
   3819 // speed feature: fast intra/inter transform type search
   3820 // Used for speed >= 2
   3821 // When this speed feature is on, in rd mode search, only DCT is used.
   3822 // After the mode is determined, this function is called, to select
   3823 // transform types and get accurate rdcost.
   3824 static inline void refine_winner_mode_tx(
   3825    const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
   3826    PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
   3827    MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
   3828    int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
   3829  const AV1_COMMON *const cm = &cpi->common;
   3830  MACROBLOCKD *const xd = &x->e_mbd;
   3831  MB_MODE_INFO *const mbmi = xd->mi[0];
   3832  TxfmSearchParams *txfm_params = &x->txfm_search_params;
   3833  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   3834  int64_t best_rd;
   3835  const int num_planes = av1_num_planes(cm);
   3836 
   3837  if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
   3838                                         rd_cost->skip_txfm))
   3839    return;
   3840 
   3841  // Set params for winner mode evaluation
   3842  set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
   3843 
   3844  // No best mode identified so far
   3845  if (*best_mode_index == THR_INVALID) return;
   3846 
   3847  best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
   3848  for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
   3849    RD_STATS *winner_rd_stats = NULL;
   3850    int winner_rate_y = 0, winner_rate_uv = 0;
   3851    THR_MODES winner_mode_index = 0;
   3852 
   3853    // TODO(any): Combine best mode and multi-winner mode processing paths
   3854    // Get winner mode stats for current mode index
   3855    MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
   3856        x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
   3857        &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
   3858        cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
   3859 
   3860    if (xd->lossless[winner_mbmi->segment_id] == 0 &&
   3861        winner_mode_index != THR_INVALID &&
   3862        is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
   3863                                          rd_cost->skip_txfm)) {
   3864      RD_STATS rd_stats = *winner_rd_stats;
   3865      int skip_blk = 0;
   3866      RD_STATS rd_stats_y, rd_stats_uv;
   3867      const int skip_ctx = av1_get_skip_txfm_context(xd);
   3868 
   3869      *mbmi = *winner_mbmi;
   3870 
   3871      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   3872 
   3873      // Select prediction reference frames.
   3874      for (int i = 0; i < num_planes; i++) {
   3875        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
   3876        if (has_second_ref(mbmi))
   3877          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
   3878      }
   3879 
   3880      if (is_inter_mode(mbmi->mode)) {
   3881        const int mi_row = xd->mi_row;
   3882        const int mi_col = xd->mi_col;
   3883        bool is_predictor_built = false;
   3884        const PREDICTION_MODE prediction_mode = mbmi->mode;
   3885        // Do interpolation filter search for realtime mode if applicable.
   3886        if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
   3887            cpi->oxcf.mode == REALTIME &&
   3888            cm->current_frame.reference_mode == SINGLE_REFERENCE &&
   3889            is_inter_mode(prediction_mode) &&
   3890            mbmi->motion_mode == SIMPLE_TRANSLATION &&
   3891            !is_inter_compound_mode(prediction_mode)) {
   3892          is_predictor_built =
   3893              fast_interp_search(cpi, x, mi_row, mi_col, bsize);
   3894        }
   3895        if (!is_predictor_built) {
   3896          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
   3897                                        av1_num_planes(cm) - 1);
   3898        }
   3899        if (mbmi->motion_mode == OBMC_CAUSAL)
   3900          av1_build_obmc_inter_predictors_sb(cm, xd);
   3901 
   3902        av1_subtract_plane(x, bsize, 0);
   3903        if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
   3904            !xd->lossless[mbmi->segment_id]) {
   3905          av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
   3906                                              INT64_MAX);
   3907          assert(rd_stats_y.rate != INT_MAX);
   3908        } else {
   3909          av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
   3910                                            INT64_MAX);
   3911          memset(mbmi->inter_tx_size, mbmi->tx_size,
   3912                 sizeof(mbmi->inter_tx_size));
   3913          for (int i = 0; i < xd->height * xd->width; ++i)
   3914            set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
   3915        }
   3916      } else {
   3917        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
   3918                                          INT64_MAX);
   3919      }
   3920 
   3921      if (num_planes > 1) {
   3922        av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
   3923      } else {
   3924        av1_init_rd_stats(&rd_stats_uv);
   3925      }
   3926 
   3927      const int comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
   3928 
   3929      const ModeCosts *mode_costs = &x->mode_costs;
   3930      if (is_inter_mode(mbmi->mode) &&
   3931          (!cpi->oxcf.algo_cfg.sharpness || !comp_pred) &&
   3932          RDCOST(x->rdmult,
   3933                 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
   3934                     rd_stats_uv.rate,
   3935                 (rd_stats_y.dist + rd_stats_uv.dist)) >
   3936              RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
   3937                     (rd_stats_y.sse + rd_stats_uv.sse))) {
   3938        skip_blk = 1;
   3939        rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
   3940        rd_stats_uv.rate = 0;
   3941        rd_stats_y.dist = rd_stats_y.sse;
   3942        rd_stats_uv.dist = rd_stats_uv.sse;
   3943      } else {
   3944        skip_blk = 0;
   3945        rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
   3946      }
   3947      int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
   3948                      winner_rate_y - winner_rate_uv;
   3949      int64_t this_rd =
   3950          RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
   3951      if (best_rd > this_rd) {
   3952        *best_mbmode = *mbmi;
   3953        *best_mode_index = winner_mode_index;
   3954        av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
   3955        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
   3956        rd_cost->rate = this_rate;
   3957        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
   3958        rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
   3959        rd_cost->rdcost = this_rd;
   3960        best_rd = this_rd;
   3961        *best_skip2 = skip_blk;
   3962      }
   3963    }
   3964  }
   3965 }
   3966 
   3967 /*!\cond */
   3968 typedef struct {
   3969  // Mask for each reference frame, specifying which prediction modes to NOT try
   3970  // during search.
   3971  uint32_t pred_modes[REF_FRAMES];
   3972  // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
   3973  // reference frames (i, j).
   3974  // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
   3975  // (NONE_FRAME).
   3976  bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
   3977 } mode_skip_mask_t;
   3978 /*!\endcond */
   3979 
   3980 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
   3981 static inline void disable_reference(
   3982    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
   3983  for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
   3984    ref_combo[ref][ref2 + 1] = true;
   3985  }
   3986 }
   3987 
   3988 // Update 'ref_combo' mask to disable all inter references except ALTREF.
   3989 static inline void disable_inter_references_except_altref(
   3990    bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
   3991  disable_reference(LAST_FRAME, ref_combo);
   3992  disable_reference(LAST2_FRAME, ref_combo);
   3993  disable_reference(LAST3_FRAME, ref_combo);
   3994  disable_reference(GOLDEN_FRAME, ref_combo);
   3995  disable_reference(BWDREF_FRAME, ref_combo);
   3996  disable_reference(ALTREF2_FRAME, ref_combo);
   3997 }
   3998 
   3999 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
   4000  { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
   4001  { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
   4002  { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
   4003  { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
   4004  { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
   4005  { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
   4006  { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
   4007  { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
   4008 };
   4009 
   4010 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
   4011 
   4012 static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
   4013  if (ref_set == REF_SET_FULL) {
   4014    // Everything available by default.
   4015    memset(mask, 0, sizeof(*mask));
   4016  } else {
   4017    // All modes available by default.
   4018    memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
   4019    // All references disabled first.
   4020    for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
   4021      for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
   4022        mask->ref_combo[ref1][ref2 + 1] = true;
   4023      }
   4024    }
   4025    const MV_REFERENCE_FRAME(*ref_set_combos)[2];
   4026    int num_ref_combos;
   4027 
   4028    // Then enable reduced set of references explicitly.
   4029    switch (ref_set) {
   4030      case REF_SET_REDUCED:
   4031        ref_set_combos = reduced_ref_combos;
   4032        num_ref_combos =
   4033            (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
   4034        break;
   4035      case REF_SET_REALTIME:
   4036        ref_set_combos = real_time_ref_combos;
   4037        num_ref_combos =
   4038            (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
   4039        break;
   4040      default: assert(0); num_ref_combos = 0;
   4041    }
   4042 
   4043    for (int i = 0; i < num_ref_combos; ++i) {
   4044      const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
   4045      mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
   4046    }
   4047  }
   4048 }
   4049 
   4050 static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
   4051                                       const AV1_COMP *cpi, MACROBLOCK *x,
   4052                                       BLOCK_SIZE bsize) {
   4053  const AV1_COMMON *const cm = &cpi->common;
   4054  const struct segmentation *const seg = &cm->seg;
   4055  MACROBLOCKD *const xd = &x->e_mbd;
   4056  MB_MODE_INFO *const mbmi = xd->mi[0];
   4057  unsigned char segment_id = mbmi->segment_id;
   4058  const SPEED_FEATURES *const sf = &cpi->sf;
   4059  const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
   4060  REF_SET ref_set = REF_SET_FULL;
   4061 
   4062  if (sf->rt_sf.use_real_time_ref_set)
   4063    ref_set = REF_SET_REALTIME;
   4064  else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
   4065    ref_set = REF_SET_REDUCED;
   4066 
   4067  default_skip_mask(mask, ref_set);
   4068 
   4069  int min_pred_mv_sad = INT_MAX;
   4070  MV_REFERENCE_FRAME ref_frame;
   4071  if (ref_set == REF_SET_REALTIME) {
   4072    // For real-time encoding, we only look at a subset of ref frames. So the
   4073    // threshold for pruning should be computed from this subset as well.
   4074    const int num_rt_refs =
   4075        sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
   4076    for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
   4077      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
   4078      if (ref != INTRA_FRAME) {
   4079        min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
   4080      }
   4081    }
   4082  } else {
   4083    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
   4084      min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
   4085  }
   4086 
   4087  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
   4088    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
   4089      // Skip checking missing reference in both single and compound reference
   4090      // modes.
   4091      disable_reference(ref_frame, mask->ref_combo);
   4092    } else {
   4093      // Skip fixed mv modes for poor references
   4094      if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
   4095        mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
   4096      }
   4097    }
   4098    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
   4099        get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
   4100      // Reference not used for the segment.
   4101      disable_reference(ref_frame, mask->ref_combo);
   4102    }
   4103  }
   4104  // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
   4105  // is disabled for this segment. This is to prevent the possibility that we
   4106  // end up unable to pick any mode.
   4107  if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
   4108    // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
   4109    // unless ARNR filtering is enabled in which case we want
   4110    // an unfiltered alternative. We allow near/nearest as well
   4111    // because they may result in zero-zero MVs but be cheaper.
   4112    if (cpi->rc.is_src_frame_alt_ref &&
   4113        (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
   4114      disable_inter_references_except_altref(mask->ref_combo);
   4115 
   4116      mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
   4117      const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
   4118      int_mv near_mv, nearest_mv, global_mv;
   4119      get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
   4120                  &x->mbmi_ext);
   4121      get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
   4122      get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
   4123 
   4124      if (near_mv.as_int != global_mv.as_int)
   4125        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
   4126      if (nearest_mv.as_int != global_mv.as_int)
   4127        mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
   4128    }
   4129  }
   4130 
   4131  if (cpi->rc.is_src_frame_alt_ref) {
   4132    if (inter_sf->alt_ref_search_fp &&
   4133        (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
   4134      mask->pred_modes[ALTREF_FRAME] = 0;
   4135      disable_inter_references_except_altref(mask->ref_combo);
   4136      disable_reference(INTRA_FRAME, mask->ref_combo);
   4137    }
   4138  }
   4139 
   4140  if (inter_sf->alt_ref_search_fp) {
   4141    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
   4142      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
   4143      // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
   4144      // those are past frames
   4145      MV_REFERENCE_FRAME start_frame =
   4146          inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
   4147      for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
   4148        if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
   4149            0) {
   4150          // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
   4151          // to the relative dist of LAST_FRAME.
   4152          if (inter_sf->alt_ref_search_fp == 1 &&
   4153              (abs(cpi->ref_frame_dist_info
   4154                       .ref_relative_dist[ref_frame - LAST_FRAME]) >
   4155               1.5 * abs(cpi->ref_frame_dist_info
   4156                             .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
   4157            continue;
   4158          }
   4159          if (x->pred_mv_sad[ref_frame] > sad_thresh)
   4160            mask->pred_modes[ref_frame] |= INTER_ALL;
   4161        }
   4162      }
   4163    }
   4164  }
   4165 
   4166  if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
   4167    if (x->best_pred_mv_sad[0] < INT_MAX) {
   4168      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
   4169      const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
   4170 
   4171      // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
   4172      for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
   4173        ref_frame = prune_ref_list[ref_idx];
   4174        if (x->pred_mv_sad[ref_frame] > sad_thresh)
   4175          mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
   4176      }
   4177    }
   4178  }
   4179 
   4180  if (bsize > sf->part_sf.max_intra_bsize) {
   4181    disable_reference(INTRA_FRAME, mask->ref_combo);
   4182  }
   4183 
   4184  if (!cpi->oxcf.tool_cfg.enable_global_motion) {
   4185    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
   4186      mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
   4187      mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
   4188    }
   4189  }
   4190 
   4191  mask->pred_modes[INTRA_FRAME] |=
   4192      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
   4193 
   4194  // Prune reference frames which are not the closest to the current
   4195  // frame and with large pred_mv_sad.
   4196  if (inter_sf->prune_single_ref) {
   4197    assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
   4198    const double prune_threshes[2] = { 1.20, 1.05 };
   4199 
   4200    for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
   4201      const RefFrameDistanceInfo *const ref_frame_dist_info =
   4202          &cpi->ref_frame_dist_info;
   4203      const int is_closest_ref =
   4204          (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
   4205          (ref_frame == ref_frame_dist_info->nearest_future_ref);
   4206 
   4207      if (!is_closest_ref) {
   4208        const int dir =
   4209            (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
   4210                ? 0
   4211                : 1;
   4212        if (x->best_pred_mv_sad[dir] < INT_MAX &&
   4213            x->pred_mv_sad[ref_frame] >
   4214                prune_threshes[inter_sf->prune_single_ref - 1] *
   4215                    x->best_pred_mv_sad[dir])
   4216          mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
   4217      }
   4218    }
   4219  }
   4220 }
   4221 
   4222 static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
   4223                                          HandleInterModeArgs *const args,
   4224                                          int is_hbd) {
   4225  if (is_hbd) {
   4226    const int len = sizeof(uint16_t);
   4227    args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
   4228    args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
   4229                                                 (MAX_SB_SQUARE >> 1) * len);
   4230    args->above_pred_buf[2] =
   4231        CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
   4232    args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
   4233    args->left_pred_buf[1] =
   4234        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
   4235    args->left_pred_buf[2] =
   4236        CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
   4237  } else {
   4238    args->above_pred_buf[0] = obmc_buffer->above_pred;
   4239    args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
   4240    args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
   4241    args->left_pred_buf[0] = obmc_buffer->left_pred;
   4242    args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
   4243    args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
   4244  }
   4245 }
   4246 
   4247 static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
   4248                                  MV_REFERENCE_FRAME ref_frame) {
   4249  const AV1_COMMON *const cm = &cpi->common;
   4250  MV_REFERENCE_FRAME rf[2];
   4251  av1_set_ref_frame(rf, ref_frame);
   4252 
   4253  if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
   4254 
   4255  if (prune_ref_by_selective_ref_frame(cpi, x, rf,
   4256                                       cm->cur_frame->ref_display_order_hint)) {
   4257    return 1;
   4258  }
   4259 
   4260  return 0;
   4261 }
   4262 
   4263 static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
   4264                                                    int skip_ref_frame_mask) {
   4265  for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
   4266    if (!(skip_ref_frame_mask & (1 << r))) {
   4267      const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
   4268      if (rf[0] == ref_frame || rf[1] == ref_frame) {
   4269        return 1;
   4270      }
   4271    }
   4272  }
   4273  return 0;
   4274 }
   4275 
   4276 static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
   4277                                             const MB_MODE_INFO *mi_cache) {
   4278  if (!mi_cache) {
   4279    return 0;
   4280  }
   4281 
   4282  if (ref_frame < REF_FRAMES) {
   4283    return (ref_frame == mi_cache->ref_frame[0] ||
   4284            ref_frame == mi_cache->ref_frame[1]);
   4285  }
   4286 
   4287  // if we are here, then the current mode is compound.
   4288  MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
   4289  return ref_frame == cached_ref_type;
   4290 }
   4291 
   4292 // Please add/modify parameter setting in this function, making it consistent
   4293 // and easy to read and maintain.
   4294 static inline void set_params_rd_pick_inter_mode(
   4295    const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
   4296    BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
   4297    unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
   4298    struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
   4299  const AV1_COMMON *const cm = &cpi->common;
   4300  MACROBLOCKD *const xd = &x->e_mbd;
   4301  MB_MODE_INFO *const mbmi = xd->mi[0];
   4302  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   4303  unsigned char segment_id = mbmi->segment_id;
   4304 
   4305  init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
   4306  av1_collect_neighbors_ref_counts(xd);
   4307  estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
   4308                           ref_costs_comp);
   4309 
   4310  const int mi_row = xd->mi_row;
   4311  const int mi_col = xd->mi_col;
   4312  x->best_pred_mv_sad[0] = INT_MAX;
   4313  x->best_pred_mv_sad[1] = INT_MAX;
   4314 
   4315  for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
   4316       ++ref_frame) {
   4317    x->pred_mv_sad[ref_frame] = INT_MAX;
   4318    mbmi_ext->mode_context[ref_frame] = 0;
   4319    mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
   4320    if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
   4321      // Skip the ref frame if the mask says skip and the ref is not used by
   4322      // compound ref.
   4323      if (skip_ref_frame_mask & (1 << ref_frame) &&
   4324          !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
   4325          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
   4326        continue;
   4327      }
   4328      assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
   4329      setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
   4330    }
   4331    if (cpi->sf.inter_sf.alt_ref_search_fp ||
   4332        cpi->sf.inter_sf.prune_single_ref ||
   4333        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
   4334      // Store the best pred_mv_sad across all past frames
   4335      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
   4336          0)
   4337        x->best_pred_mv_sad[0] =
   4338            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
   4339      else
   4340        // Store the best pred_mv_sad across all future frames
   4341        x->best_pred_mv_sad[1] =
   4342            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
   4343    }
   4344  }
   4345 
   4346  if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
   4347    // No second reference on RT ref set, so no need to initialize
   4348    for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
   4349         ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
   4350      mbmi_ext->mode_context[ref_frame] = 0;
   4351      mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
   4352      const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
   4353      if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
   4354            (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
   4355        continue;
   4356      }
   4357 
   4358      if (skip_ref_frame_mask & (1 << ref_frame) &&
   4359          !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
   4360        continue;
   4361      }
   4362      // Ref mv list population is not required, when compound references are
   4363      // pruned.
   4364      if (prune_ref_frame(cpi, x, ref_frame)) continue;
   4365 
   4366      av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
   4367                       xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
   4368                       mbmi_ext->mode_context);
   4369      // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
   4370      // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
   4371      av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
   4372    }
   4373  }
   4374 
   4375  av1_count_overlappable_neighbors(cm, xd);
   4376  const FRAME_UPDATE_TYPE update_type =
   4377      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
   4378  int use_actual_frame_probs = 1;
   4379  int prune_obmc;
   4380 #if CONFIG_FPMT_TEST
   4381  use_actual_frame_probs =
   4382      (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
   4383  if (!use_actual_frame_probs) {
   4384    prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
   4385                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
   4386  }
   4387 #endif
   4388  if (use_actual_frame_probs) {
   4389    prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
   4390                 cpi->sf.inter_sf.prune_obmc_prob_thresh;
   4391  }
   4392  if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
   4393    if (check_num_overlappable_neighbors(mbmi) &&
   4394        is_motion_variation_allowed_bsize(bsize)) {
   4395      int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
   4396      int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
   4397                                       MAX_SB_SIZE >> 1 };
   4398      int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
   4399                                        MAX_SB_SIZE >> 1 };
   4400      int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
   4401      av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
   4402                                          dst_width1, dst_height1,
   4403                                          args->above_pred_stride);
   4404      av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
   4405                                         dst_width2, dst_height2,
   4406                                         args->left_pred_stride);
   4407      const int num_planes = av1_num_planes(cm);
   4408      av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
   4409                           mi_col, 0, num_planes);
   4410      calc_target_weighted_pred(
   4411          cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
   4412          args->left_pred_buf[0], args->left_pred_stride[0]);
   4413    }
   4414  }
   4415 
   4416  init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
   4417 
   4418  // Set params for mode evaluation
   4419  set_mode_eval_params(cpi, x, MODE_EVAL);
   4420 
   4421  x->comp_rd_stats_idx = 0;
   4422 
   4423  for (int idx = 0; idx < REF_FRAMES; idx++) {
   4424    args->best_single_sse_in_refs[idx] = INT32_MAX;
   4425  }
   4426 }
   4427 
   4428 static inline void init_single_inter_mode_search_state(
   4429    InterModeSearchState *search_state) {
   4430  for (int dir = 0; dir < 2; ++dir) {
   4431    for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
   4432      for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
   4433        SingleInterModeState *state;
   4434 
   4435        state = &search_state->single_state[dir][mode][ref_frame];
   4436        state->ref_frame = NONE_FRAME;
   4437        state->rd = INT64_MAX;
   4438 
   4439        state = &search_state->single_state_modelled[dir][mode][ref_frame];
   4440        state->ref_frame = NONE_FRAME;
   4441        state->rd = INT64_MAX;
   4442 
   4443        search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
   4444      }
   4445    }
   4446  }
   4447 
   4448  for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
   4449    search_state->best_single_rd[ref_frame] = INT64_MAX;
   4450    search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
   4451  }
   4452  av1_zero(search_state->single_state_cnt);
   4453  av1_zero(search_state->single_state_modelled_cnt);
   4454 }
   4455 
   4456 static inline void init_inter_mode_search_state(
   4457    InterModeSearchState *search_state, const AV1_COMP *cpi,
   4458    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
   4459  init_intra_mode_search_state(&search_state->intra_search_state);
   4460  av1_invalid_rd_stats(&search_state->best_y_rdcost);
   4461 
   4462  search_state->best_rd = best_rd_so_far;
   4463  search_state->best_skip_rd[0] = INT64_MAX;
   4464  search_state->best_skip_rd[1] = INT64_MAX;
   4465 
   4466  av1_zero(search_state->best_mbmode);
   4467 
   4468  search_state->best_rate_y = INT_MAX;
   4469 
   4470  search_state->best_rate_uv = INT_MAX;
   4471 
   4472  search_state->best_mode_skippable = 0;
   4473 
   4474  search_state->best_skip2 = 0;
   4475 
   4476  search_state->best_mode_index = THR_INVALID;
   4477 
   4478  const MACROBLOCKD *const xd = &x->e_mbd;
   4479  const MB_MODE_INFO *const mbmi = xd->mi[0];
   4480  const unsigned char segment_id = mbmi->segment_id;
   4481 
   4482  search_state->num_available_refs = 0;
   4483  memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
   4484  memset(search_state->dist_order_refs, -1,
   4485         sizeof(search_state->dist_order_refs));
   4486 
   4487  for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
   4488    search_state->mode_threshold[i] = 0;
   4489  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
   4490  for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
   4491    search_state->mode_threshold[i] =
   4492        ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
   4493        RD_THRESH_FAC_FRAC_BITS;
   4494 
   4495  search_state->best_intra_rd = INT64_MAX;
   4496 
   4497  search_state->best_pred_sse = UINT_MAX;
   4498 
   4499  av1_zero(search_state->single_newmv);
   4500  av1_zero(search_state->single_newmv_rate);
   4501  av1_zero(search_state->single_newmv_valid);
   4502  for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
   4503    for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
   4504      for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
   4505        search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
   4506        search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
   4507      }
   4508    }
   4509  }
   4510 
   4511  for (int i = 0; i < REFERENCE_MODES; ++i) {
   4512    search_state->best_pred_rd[i] = INT64_MAX;
   4513  }
   4514 
   4515  if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
   4516    for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
   4517      search_state->mode_threshold[i] =
   4518          ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
   4519          RD_THRESH_FAC_FRAC_BITS;
   4520 
   4521    for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
   4522      for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
   4523        for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
   4524          search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
   4525          search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
   4526        }
   4527      }
   4528    }
   4529 
   4530    init_single_inter_mode_search_state(search_state);
   4531  }
   4532 }
   4533 
   4534 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
   4535                           const MV_REFERENCE_FRAME *ref_frame,
   4536                           const PREDICTION_MODE this_mode) {
   4537  if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
   4538    return true;
   4539  }
   4540 
   4541  return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
   4542 }
   4543 
   4544 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
   4545                                      BLOCK_SIZE bsize,
   4546                                      PREDICTION_MODE curr_mode,
   4547                                      const MV_REFERENCE_FRAME *ref_frames) {
   4548  const int comp_pred = ref_frames[1] > INTRA_FRAME;
   4549  if (comp_pred) {
   4550    if (!is_comp_ref_allowed(bsize)) return 1;
   4551    if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
   4552      return 1;
   4553    }
   4554 
   4555    const AV1_COMMON *const cm = &cpi->common;
   4556    if (frame_is_intra_only(cm)) return 1;
   4557 
   4558    const CurrentFrame *const current_frame = &cm->current_frame;
   4559    if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
   4560 
   4561    const struct segmentation *const seg = &cm->seg;
   4562    const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
   4563    // Do not allow compound prediction if the segment level reference frame
   4564    // feature is in use as in this case there can only be one reference.
   4565    if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
   4566  }
   4567 
   4568  if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
   4569    // Mode must be compatible
   4570    if (!is_interintra_allowed_bsize(bsize)) return 1;
   4571    if (!is_interintra_allowed_mode(curr_mode)) return 1;
   4572  }
   4573 
   4574  return 0;
   4575 }
   4576 
   4577 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
   4578                                        BLOCK_SIZE bsize, int mib_size) {
   4579  const int sb_size_mask = mib_size - 1;
   4580  const MACROBLOCKD *const xd = &x->e_mbd;
   4581  const int mi_row = xd->mi_row;
   4582  const int mi_col = xd->mi_col;
   4583  const int mi_row_in_sb = mi_row & sb_size_mask;
   4584  const int mi_col_in_sb = mi_col & sb_size_mask;
   4585  const int mi_w = mi_size_wide[bsize];
   4586  const int mi_h = mi_size_high[bsize];
   4587  int picked_ref_frames_mask = 0;
   4588  for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
   4589    for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
   4590      picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
   4591    }
   4592  }
   4593  return picked_ref_frames_mask;
   4594 }
   4595 
   4596 // Check if reference frame pair of the current block matches with the given
   4597 // block.
   4598 static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
   4599                                       const MV_REFERENCE_FRAME *ref_frames) {
   4600  return ((ref_frames[0] == mbmi->ref_frame[0]) &&
   4601          (ref_frames[1] == mbmi->ref_frame[1]));
   4602 }
   4603 
   4604 // Case 1: return 0, means don't skip this mode
   4605 // Case 2: return 1, means skip this mode completely
   4606 // Case 3: return 2, means skip compound only, but still try single motion modes
   4607 static int inter_mode_search_order_independent_skip(
   4608    const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
   4609    InterModeSearchState *search_state, int skip_ref_frame_mask,
   4610    PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
   4611  if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
   4612    return 1;
   4613  }
   4614 
   4615  const int ref_type = av1_ref_frame_type(ref_frame);
   4616  if (!cpi->sf.rt_sf.use_real_time_ref_set)
   4617    if (prune_ref_frame(cpi, x, ref_type)) return 1;
   4618 
   4619  // This is only used in motion vector unit test.
   4620  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
   4621      ref_frame[0] == INTRA_FRAME)
   4622    return 1;
   4623 
   4624  const AV1_COMMON *const cm = &cpi->common;
   4625  if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
   4626    return 1;
   4627  }
   4628 
   4629  // Reuse the prediction mode in cache
   4630  if (x->use_mb_mode_cache) {
   4631    const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
   4632    const PREDICTION_MODE cached_mode = cached_mi->mode;
   4633    const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
   4634    const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
   4635 
   4636    // If the cached mode is intra, then we just need to match the mode.
   4637    if (is_mode_intra(cached_mode) && mode != cached_mode) {
   4638      return 1;
   4639    }
   4640 
   4641    // If the cached mode is single inter mode, then we match the mode and
   4642    // reference frame.
   4643    if (cached_mode_is_single) {
   4644      if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
   4645        return 1;
   4646      }
   4647    } else {
   4648      // If the cached mode is compound, then we need to consider several cases.
   4649      const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
   4650      if (mode_is_single) {
   4651        // If the mode is single, we know the modes can't match. But we might
   4652        // still want to search it if compound mode depends on the current mode.
   4653        int skip_motion_mode_only = 0;
   4654        if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
   4655          skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
   4656        } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
   4657          skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
   4658        } else if (cached_mode == NEW_NEWMV) {
   4659          skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
   4660                                   ref_frame[0] == cached_frame[1]);
   4661        }
   4662 
   4663        return 1 + skip_motion_mode_only;
   4664      } else {
   4665        // If both modes are compound, then everything must match.
   4666        if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
   4667            ref_frame[1] != cached_frame[1]) {
   4668          return 1;
   4669        }
   4670      }
   4671    }
   4672  }
   4673 
   4674  const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
   4675  // If no valid mode has been found so far in PARTITION_NONE when finding a
   4676  // valid partition is required, do not skip mode.
   4677  if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
   4678      x->must_find_valid_partition)
   4679    return 0;
   4680 
   4681  const SPEED_FEATURES *const sf = &cpi->sf;
   4682  // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
   4683  // frames
   4684  if (sf->inter_sf.prune_nearmv_using_neighbors &&
   4685      (mode == NEAR_NEARMV || mode == NEARMV)) {
   4686    const MACROBLOCKD *const xd = &x->e_mbd;
   4687    if (search_state->best_rd != INT64_MAX && xd->left_available &&
   4688        xd->up_available) {
   4689      const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
   4690                                                    { 1, 1, 0 },
   4691                                                    { 2, 1, 0 } };
   4692      const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
   4693 
   4694      assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
   4695             qindex_sub_range < 3);
   4696      const int num_ref_frame_pair_match_thresh =
   4697          thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
   4698                    [qindex_sub_range];
   4699 
   4700      assert(num_ref_frame_pair_match_thresh <= 2 &&
   4701             num_ref_frame_pair_match_thresh >= 0);
   4702      int num_ref_frame_pair_match = 0;
   4703 
   4704      num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
   4705      num_ref_frame_pair_match +=
   4706          match_ref_frame_pair(xd->above_mbmi, ref_frame);
   4707 
   4708      // Pruning based on ref frame pair match with neighbors.
   4709      if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
   4710    }
   4711  }
   4712 
   4713  int skip_motion_mode = 0;
   4714  if (mbmi->partition != PARTITION_NONE) {
   4715    int skip_ref = skip_ref_frame_mask & (1 << ref_type);
   4716    if (ref_type <= ALTREF_FRAME && skip_ref) {
   4717      // Since the compound ref modes depends on the motion estimation result of
   4718      // two single ref modes (best mv of single ref modes as the start point),
   4719      // if current single ref mode is marked skip, we need to check if it will
   4720      // be used in compound ref modes.
   4721      if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
   4722        // Found a not skipped compound ref mode which contains current
   4723        // single ref. So this single ref can't be skipped completely
   4724        // Just skip its motion mode search, still try its simple
   4725        // transition mode.
   4726        skip_motion_mode = 1;
   4727        skip_ref = 0;
   4728      }
   4729    }
   4730    // If we are reusing the prediction from cache, and the current frame is
   4731    // required by the cache, then we cannot prune it.
   4732    if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
   4733      skip_ref = 0;
   4734      // If the cache only needs the current reference type for compound
   4735      // prediction, then we can skip motion mode search.
   4736      skip_motion_mode = (ref_type <= ALTREF_FRAME &&
   4737                          x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
   4738    }
   4739    if (skip_ref) return 1;
   4740  }
   4741 
   4742  if (ref_frame[0] == INTRA_FRAME) {
   4743    if (mode != DC_PRED) {
   4744      // Disable intra modes other than DC_PRED for blocks with low variance
   4745      // Threshold for intra skipping based on source variance
   4746      // TODO(debargha): Specialize the threshold for super block sizes
   4747      const unsigned int skip_intra_var_thresh = 64;
   4748      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
   4749          x->source_variance < skip_intra_var_thresh)
   4750        return 1;
   4751    }
   4752  }
   4753 
   4754  if (skip_motion_mode) return 2;
   4755 
   4756  return 0;
   4757 }
   4758 
   4759 static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
   4760                             const MV_REFERENCE_FRAME *ref_frames,
   4761                             const AV1_COMMON *cm) {
   4762  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
   4763  mbmi->ref_mv_idx = 0;
   4764  mbmi->mode = curr_mode;
   4765  mbmi->uv_mode = UV_DC_PRED;
   4766  mbmi->ref_frame[0] = ref_frames[0];
   4767  mbmi->ref_frame[1] = ref_frames[1];
   4768  pmi->palette_size[0] = 0;
   4769  pmi->palette_size[1] = 0;
   4770  mbmi->filter_intra_mode_info.use_filter_intra = 0;
   4771  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
   4772  mbmi->motion_mode = SIMPLE_TRANSLATION;
   4773  mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
   4774  set_default_interp_filters(mbmi, cm->features.interp_filter);
   4775 }
   4776 
   4777 static inline void collect_single_states(MACROBLOCK *x,
   4778                                         InterModeSearchState *search_state,
   4779                                         const MB_MODE_INFO *const mbmi) {
   4780  int i, j;
   4781  const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
   4782  const PREDICTION_MODE this_mode = mbmi->mode;
   4783  const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
   4784  const int mode_offset = INTER_OFFSET(this_mode);
   4785  const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
   4786 
   4787  // Simple rd
   4788  int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
   4789  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
   4790    const int64_t rd =
   4791        search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
   4792    if (rd < simple_rd) simple_rd = rd;
   4793  }
   4794 
   4795  // Insertion sort of single_state
   4796  const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
   4797  SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
   4798  i = search_state->single_state_cnt[dir][mode_offset];
   4799  for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
   4800    state_s[j] = state_s[j - 1];
   4801  state_s[j] = this_state_s;
   4802  search_state->single_state_cnt[dir][mode_offset]++;
   4803 
   4804  // Modelled rd
   4805  int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
   4806  for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
   4807    const int64_t rd =
   4808        search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
   4809    if (rd < modelled_rd) modelled_rd = rd;
   4810  }
   4811 
   4812  // Insertion sort of single_state_modelled
   4813  const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
   4814  SingleInterModeState *state_m =
   4815      search_state->single_state_modelled[dir][mode_offset];
   4816  i = search_state->single_state_modelled_cnt[dir][mode_offset];
   4817  for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
   4818    state_m[j] = state_m[j - 1];
   4819  state_m[j] = this_state_m;
   4820  search_state->single_state_modelled_cnt[dir][mode_offset]++;
   4821 }
   4822 
   4823 static inline void analyze_single_states(const AV1_COMP *cpi,
   4824                                         InterModeSearchState *search_state) {
   4825  const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
   4826  assert(prune_level >= 1);
   4827  int i, j, dir, mode;
   4828 
   4829  for (dir = 0; dir < 2; ++dir) {
   4830    int64_t best_rd;
   4831    SingleInterModeState(*state)[FWD_REFS];
   4832    const int prune_factor = prune_level >= 2 ? 6 : 5;
   4833 
   4834    // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
   4835    // reference frames for all the modes (NEARESTMV and NEARMV may not
   4836    // have same motion vectors). Always keep the best of each mode
   4837    // because it might form the best possible combination with other mode.
   4838    state = search_state->single_state[dir];
   4839    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
   4840                     state[INTER_OFFSET(GLOBALMV)][0].rd);
   4841    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
   4842      for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
   4843        if (state[mode][i].rd != INT64_MAX &&
   4844            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
   4845          state[mode][i].valid = 0;
   4846        }
   4847      }
   4848    }
   4849 
   4850    state = search_state->single_state_modelled[dir];
   4851    best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
   4852                     state[INTER_OFFSET(GLOBALMV)][0].rd);
   4853    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
   4854      for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
   4855        if (state[mode][i].rd != INT64_MAX &&
   4856            (state[mode][i].rd >> 3) * prune_factor > best_rd) {
   4857          state[mode][i].valid = 0;
   4858        }
   4859      }
   4860    }
   4861  }
   4862 
   4863  // Ordering by simple rd first, then by modelled rd
   4864  for (dir = 0; dir < 2; ++dir) {
   4865    for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
   4866      const int state_cnt_s = search_state->single_state_cnt[dir][mode];
   4867      const int state_cnt_m =
   4868          search_state->single_state_modelled_cnt[dir][mode];
   4869      SingleInterModeState *state_s = search_state->single_state[dir][mode];
   4870      SingleInterModeState *state_m =
   4871          search_state->single_state_modelled[dir][mode];
   4872      int count = 0;
   4873      const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
   4874      for (i = 0; i < state_cnt_s; ++i) {
   4875        if (state_s[i].rd == INT64_MAX) break;
   4876        if (state_s[i].valid) {
   4877          search_state->single_rd_order[dir][mode][count++] =
   4878              state_s[i].ref_frame;
   4879        }
   4880      }
   4881      if (count >= max_candidates) continue;
   4882 
   4883      for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
   4884        if (state_m[i].rd == INT64_MAX) break;
   4885        if (!state_m[i].valid) continue;
   4886        const int ref_frame = state_m[i].ref_frame;
   4887        int match = 0;
   4888        // Check if existing already
   4889        for (j = 0; j < count; ++j) {
   4890          if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
   4891            match = 1;
   4892            break;
   4893          }
   4894        }
   4895        if (match) continue;
   4896        // Check if this ref_frame is removed in simple rd
   4897        int valid = 1;
   4898        for (j = 0; j < state_cnt_s; ++j) {
   4899          if (ref_frame == state_s[j].ref_frame) {
   4900            valid = state_s[j].valid;
   4901            break;
   4902          }
   4903        }
   4904        if (valid) {
   4905          search_state->single_rd_order[dir][mode][count++] = ref_frame;
   4906        }
   4907      }
   4908    }
   4909  }
   4910 }
   4911 
   4912 static int compound_skip_get_candidates(
   4913    const AV1_COMP *cpi, const InterModeSearchState *search_state,
   4914    const int dir, const PREDICTION_MODE mode) {
   4915  const int mode_offset = INTER_OFFSET(mode);
   4916  const SingleInterModeState *state =
   4917      search_state->single_state[dir][mode_offset];
   4918  const SingleInterModeState *state_modelled =
   4919      search_state->single_state_modelled[dir][mode_offset];
   4920 
   4921  int max_candidates = 0;
   4922  for (int i = 0; i < FWD_REFS; ++i) {
   4923    if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
   4924    max_candidates++;
   4925  }
   4926 
   4927  int candidates = max_candidates;
   4928  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
   4929    candidates = AOMMIN(2, max_candidates);
   4930  }
   4931  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
   4932    if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
   4933        state[0].ref_frame == state_modelled[0].ref_frame)
   4934      candidates = 1;
   4935    if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
   4936  }
   4937 
   4938  if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
   4939    // Limit the number of candidates to 1 in each direction for compound
   4940    // prediction
   4941    candidates = AOMMIN(1, candidates);
   4942  }
   4943  return candidates;
   4944 }
   4945 
   4946 static int compound_skip_by_single_states(
   4947    const AV1_COMP *cpi, const InterModeSearchState *search_state,
   4948    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
   4949    const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
   4950  const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
   4951  const int mode[2] = { compound_ref0_mode(this_mode),
   4952                        compound_ref1_mode(this_mode) };
   4953  const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
   4954  const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
   4955                            refs[1] <= GOLDEN_FRAME ? 0 : 1 };
   4956  int ref_searched[2] = { 0, 0 };
   4957  int ref_mv_match[2] = { 1, 1 };
   4958  int i, j;
   4959 
   4960  for (i = 0; i < 2; ++i) {
   4961    const SingleInterModeState *state =
   4962        search_state->single_state[mode_dir[i]][mode_offset[i]];
   4963    const int state_cnt =
   4964        search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
   4965    for (j = 0; j < state_cnt; ++j) {
   4966      if (state[j].ref_frame == refs[i]) {
   4967        ref_searched[i] = 1;
   4968        break;
   4969      }
   4970    }
   4971  }
   4972 
   4973  const int ref_set = get_drl_refmv_count(x, refs, this_mode);
   4974  for (i = 0; i < 2; ++i) {
   4975    if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
   4976      continue;
   4977    }
   4978    const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
   4979    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
   4980      int_mv single_mv;
   4981      int_mv comp_mv;
   4982      get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
   4983                  &x->mbmi_ext);
   4984      get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
   4985      if (single_mv.as_int != comp_mv.as_int) {
   4986        ref_mv_match[i] = 0;
   4987        break;
   4988      }
   4989    }
   4990  }
   4991 
   4992  for (i = 0; i < 2; ++i) {
   4993    if (!ref_searched[i] || !ref_mv_match[i]) continue;
   4994    const int candidates =
   4995        compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
   4996    const MV_REFERENCE_FRAME *ref_order =
   4997        search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
   4998    int match = 0;
   4999    for (j = 0; j < candidates; ++j) {
   5000      if (refs[i] == ref_order[j]) {
   5001        match = 1;
   5002        break;
   5003      }
   5004    }
   5005    if (!match) return 1;
   5006  }
   5007 
   5008  return 0;
   5009 }
   5010 
   5011 // Check if ref frames of current block matches with given block.
   5012 static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
   5013                                   const MV_REFERENCE_FRAME *ref_frames,
   5014                                   int *const is_ref_match) {
   5015  if (is_inter_block(mbmi)) {
   5016    is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
   5017    is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
   5018    if (has_second_ref(mbmi)) {
   5019      is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
   5020      is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
   5021    }
   5022  }
   5023 }
   5024 
   5025 // Prune compound mode using ref frames of neighbor blocks.
   5026 static inline int compound_skip_using_neighbor_refs(
   5027    MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
   5028    const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
   5029  // Exclude non-extended compound modes from pruning
   5030  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
   5031      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
   5032    return 0;
   5033 
   5034  if (prune_ext_comp_using_neighbors >= 3) return 1;
   5035 
   5036  int is_ref_match[2] = { 0 };  // 0 - match for forward refs
   5037                                // 1 - match for backward refs
   5038  // Check if ref frames of this block matches with left neighbor.
   5039  if (xd->left_available)
   5040    match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
   5041 
   5042  // Check if ref frames of this block matches with above neighbor.
   5043  if (xd->up_available)
   5044    match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
   5045 
   5046  // Combine ref frame match with neighbors in forward and backward refs.
   5047  const int track_ref_match = is_ref_match[0] + is_ref_match[1];
   5048 
   5049  // Pruning based on ref frame match with neighbors.
   5050  if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
   5051  return 1;
   5052 }
   5053 
   5054 // Update best single mode for the given reference frame based on simple rd.
   5055 static inline void update_best_single_mode(InterModeSearchState *search_state,
   5056                                           const PREDICTION_MODE this_mode,
   5057                                           const MV_REFERENCE_FRAME ref_frame,
   5058                                           int64_t this_rd) {
   5059  if (this_rd < search_state->best_single_rd[ref_frame]) {
   5060    search_state->best_single_rd[ref_frame] = this_rd;
   5061    search_state->best_single_mode[ref_frame] = this_mode;
   5062  }
   5063 }
   5064 
   5065 // Prune compound mode using best single mode for the same reference.
   5066 static inline int skip_compound_using_best_single_mode_ref(
   5067    const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
   5068    const PREDICTION_MODE *best_single_mode,
   5069    int prune_comp_using_best_single_mode_ref) {
   5070  // Exclude non-extended compound modes from pruning
   5071  if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
   5072      this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
   5073    return 0;
   5074 
   5075  assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
   5076  const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
   5077  // Get ref frame direction corresponding to NEWMV
   5078  // 0 - NEWMV corresponding to forward direction
   5079  // 1 - NEWMV corresponding to backward direction
   5080  const int newmv_dir = comp_mode_ref0 != NEWMV;
   5081 
   5082  // Avoid pruning the compound mode when ref frame corresponding to NEWMV
   5083  // have NEWMV as single mode winner.
   5084  // Example: For an extended-compound mode,
   5085  // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
   5086  // - Ref frame corresponding to NEWMV is ALTREF_FRAME
   5087  // - Avoid pruning this mode, if best single mode corresponding to ref frame
   5088  //   ALTREF_FRAME is NEWMV
   5089  const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
   5090  if (single_mode == NEWMV) return 0;
   5091 
   5092  // Avoid pruning the compound mode when best single mode is not available
   5093  if (prune_comp_using_best_single_mode_ref == 1)
   5094    if (single_mode == MB_MODE_COUNT) return 0;
   5095  return 1;
   5096 }
   5097 
   5098 static int compare_int64(const void *a, const void *b) {
   5099  int64_t a64 = *((int64_t *)a);
   5100  int64_t b64 = *((int64_t *)b);
   5101  if (a64 < b64) {
   5102    return -1;
   5103  } else if (a64 == b64) {
   5104    return 0;
   5105  } else {
   5106    return 1;
   5107  }
   5108 }
   5109 
   5110 static inline void update_search_state(
   5111    InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
   5112    PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
   5113    const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
   5114    THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
   5115  const MACROBLOCKD *xd = &x->e_mbd;
   5116  const MB_MODE_INFO *mbmi = xd->mi[0];
   5117  const int skip_ctx = av1_get_skip_txfm_context(xd);
   5118  const int skip_txfm =
   5119      mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
   5120  const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   5121 
   5122  search_state->best_rd = new_best_rd_stats->rdcost;
   5123  search_state->best_mode_index = new_best_mode;
   5124  *best_rd_stats_dst = *new_best_rd_stats;
   5125  search_state->best_mbmode = *mbmi;
   5126  search_state->best_skip2 = skip_txfm;
   5127  search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
   5128  // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
   5129  // rate_uv because av1_txfm_search process is replaced by rd estimation.
   5130  // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
   5131  // These two values will be updated when av1_txfm_search is called.
   5132  if (txfm_search_done) {
   5133    search_state->best_rate_y =
   5134        new_best_rd_stats_y->rate +
   5135        x->mode_costs.skip_txfm_cost[skip_ctx]
   5136                                    [new_best_rd_stats->skip_txfm || skip_txfm];
   5137    search_state->best_rate_uv = new_best_rd_stats_uv->rate;
   5138  }
   5139  search_state->best_y_rdcost = *new_best_rd_stats_y;
   5140  memcpy(ctx->blk_skip, txfm_info->blk_skip,
   5141         sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
   5142  av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
   5143 }
   5144 
   5145 // Find the best RD for a reference frame (among single reference modes)
   5146 // and store +10% of it in the 0-th element in ref_frame_rd.
   5147 static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
   5148  assert(ref_frame_rd[0] == INT64_MAX);
   5149  int64_t ref_copy[REF_FRAMES - 1];
   5150  memcpy(ref_copy, ref_frame_rd + 1,
   5151         sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
   5152  qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
   5153 
   5154  int64_t cutoff = ref_copy[0];
   5155  // The cut-off is within 10% of the best.
   5156  if (cutoff != INT64_MAX) {
   5157    assert(cutoff < INT64_MAX / 200);
   5158    cutoff = (110 * cutoff) / 100;
   5159  }
   5160  ref_frame_rd[0] = cutoff;
   5161 }
   5162 
   5163 // Check if either frame is within the cutoff.
   5164 static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
   5165                                        MV_REFERENCE_FRAME frame1,
   5166                                        MV_REFERENCE_FRAME frame2) {
   5167  assert(frame2 > 0);
   5168  return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
   5169         ref_frame_rd[frame2] <= ref_frame_rd[0];
   5170 }
   5171 
   5172 static inline void evaluate_motion_mode_for_winner_candidates(
   5173    const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
   5174    HandleInterModeArgs *const args, TileDataEnc *const tile_data,
   5175    PICK_MODE_CONTEXT *const ctx,
   5176    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
   5177    const motion_mode_best_st_candidate *const best_motion_mode_cands,
   5178    int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
   5179    InterModeSearchState *const search_state, int64_t *yrd) {
   5180  const AV1_COMMON *const cm = &cpi->common;
   5181  const int num_planes = av1_num_planes(cm);
   5182  MACROBLOCKD *const xd = &x->e_mbd;
   5183  MB_MODE_INFO *const mbmi = xd->mi[0];
   5184  InterModesInfo *const inter_modes_info = x->inter_modes_info;
   5185  const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
   5186 
   5187  for (int cand = 0; cand < num_best_cand; cand++) {
   5188    RD_STATS rd_stats;
   5189    RD_STATS rd_stats_y;
   5190    RD_STATS rd_stats_uv;
   5191    av1_init_rd_stats(&rd_stats);
   5192    av1_init_rd_stats(&rd_stats_y);
   5193    av1_init_rd_stats(&rd_stats_uv);
   5194    int rate_mv;
   5195 
   5196    rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
   5197    args->skip_motion_mode =
   5198        best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
   5199    *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
   5200    rd_stats.rate =
   5201        best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
   5202 
   5203    // Continue if the best candidate is compound.
   5204    if (!is_inter_singleref_mode(mbmi->mode)) continue;
   5205 
   5206    x->txfm_search_info.skip_txfm = 0;
   5207    struct macroblockd_plane *pd = xd->plane;
   5208    const BUFFER_SET orig_dst = {
   5209      { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
   5210      { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
   5211    };
   5212 
   5213    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   5214    // Initialize motion mode to simple translation
   5215    // Calculation of switchable rate depends on it.
   5216    mbmi->motion_mode = 0;
   5217    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
   5218    for (int i = 0; i < num_planes; i++) {
   5219      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
   5220      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
   5221    }
   5222 
   5223    int64_t skip_rd[2] = { search_state->best_skip_rd[0],
   5224                           search_state->best_skip_rd[1] };
   5225    int64_t this_yrd = INT64_MAX;
   5226    int64_t ret_value = motion_mode_rd(
   5227        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
   5228        search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
   5229        do_tx_search, inter_modes_info, 1, &this_yrd);
   5230 
   5231    if (ret_value != INT64_MAX) {
   5232      rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
   5233      const THR_MODES mode_enum = get_prediction_mode_idx(
   5234          mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   5235      // Collect mode stats for multiwinner mode processing
   5236      store_winner_mode_stats(
   5237          &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
   5238          mode_enum, NULL, bsize, rd_stats.rdcost,
   5239          cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
   5240 
   5241      int64_t best_scaled_rd = search_state->best_rd;
   5242      int64_t this_scaled_rd = rd_stats.rdcost;
   5243      if (search_state->best_mode_index != THR_INVALID)
   5244        increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
   5245                              &this_scaled_rd,
   5246                              cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
   5247 
   5248      if (this_scaled_rd < best_scaled_rd) {
   5249        *yrd = this_yrd;
   5250        update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
   5251                            &rd_stats_uv, mode_enum, x, do_tx_search);
   5252        if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
   5253      }
   5254    }
   5255  }
   5256 }
   5257 
   5258 /*!\cond */
   5259 // Arguments for speed feature pruning of inter mode search
   5260 typedef struct {
   5261  int *skip_motion_mode;
   5262  mode_skip_mask_t *mode_skip_mask;
   5263  InterModeSearchState *search_state;
   5264  int skip_ref_frame_mask;
   5265  int reach_first_comp_mode;
   5266  int mode_thresh_mul_fact;
   5267  int num_single_modes_processed;
   5268  int prune_cpd_using_sr_stats_ready;
   5269 } InterModeSFArgs;
   5270 /*!\endcond */
   5271 
   5272 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
   5273                           int64_t *ref_frame_rd, int midx,
   5274                           InterModeSFArgs *args, int is_low_temp_var) {
   5275  const SPEED_FEATURES *const sf = &cpi->sf;
   5276  MACROBLOCKD *const xd = &x->e_mbd;
   5277  // Get the actual prediction mode we are trying in this iteration
   5278  const THR_MODES mode_enum = av1_default_mode_order[midx];
   5279  const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
   5280  const PREDICTION_MODE this_mode = mode_def->mode;
   5281  const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
   5282  const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
   5283  const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
   5284  const int comp_pred = second_ref_frame > INTRA_FRAME;
   5285 
   5286  if (ref_frame == INTRA_FRAME) return 1;
   5287 
   5288  const FRAME_UPDATE_TYPE update_type =
   5289      get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
   5290  if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
   5291      comp_pred) {
   5292    return 1;
   5293  }
   5294 
   5295  // This is for real time encoding.
   5296  if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
   5297      this_mode != NEARESTMV)
   5298    return 1;
   5299 
   5300  // Check if this mode should be skipped because it is incompatible with the
   5301  // current frame
   5302  if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
   5303    return 1;
   5304  const int ret = inter_mode_search_order_independent_skip(
   5305      cpi, x, args->mode_skip_mask, args->search_state,
   5306      args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
   5307  if (ret == 1) return 1;
   5308  *(args->skip_motion_mode) = (ret == 2);
   5309 
   5310  // We've reached the first compound prediction mode, get stats from the
   5311  // single reference predictors to help with pruning.
   5312  // Disable this pruning logic if interpolation filter search was skipped for
   5313  // single prediction modes as it can result in aggressive pruning of compound
   5314  // prediction modes due to the absence of modelled_rd populated by
   5315  // av1_interpolation_filter_search().
   5316  // TODO(Remya): Check the impact of the sf
   5317  // 'prune_comp_search_by_single_result' if compound prediction modes are
   5318  // enabled in future for REALTIME encode.
   5319  if (!sf->interp_sf.skip_interp_filter_search &&
   5320      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
   5321      args->reach_first_comp_mode == 0) {
   5322    analyze_single_states(cpi, args->search_state);
   5323    args->reach_first_comp_mode = 1;
   5324  }
   5325 
   5326  // Prune aggressively when best mode is skippable.
   5327  int mul_fact = args->search_state->best_mode_skippable
   5328                     ? args->mode_thresh_mul_fact
   5329                     : (1 << MODE_THRESH_QBITS);
   5330  int64_t mode_threshold =
   5331      (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
   5332      MODE_THRESH_QBITS;
   5333 
   5334  if (args->search_state->best_rd < mode_threshold) return 1;
   5335 
   5336  // Skip this compound mode based on the RD results from the single prediction
   5337  // modes
   5338  if (!sf->interp_sf.skip_interp_filter_search &&
   5339      sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
   5340    if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
   5341                                       ref_frame, second_ref_frame, x))
   5342      return 1;
   5343  }
   5344 
   5345  if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
   5346    // After we done with single reference modes, find the 2nd best RD
   5347    // for a reference frame. Only search compound modes that have a reference
   5348    // frame at least as good as the 2nd best.
   5349    if (!args->prune_cpd_using_sr_stats_ready &&
   5350        args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
   5351      find_top_ref(ref_frame_rd);
   5352      args->prune_cpd_using_sr_stats_ready = 1;
   5353    }
   5354    if (args->prune_cpd_using_sr_stats_ready &&
   5355        !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
   5356      return 1;
   5357  }
   5358 
   5359  // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
   5360  if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
   5361      (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
   5362    return 1;
   5363  }
   5364 
   5365  if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
   5366    if (compound_skip_using_neighbor_refs(
   5367            xd, this_mode, ref_frames,
   5368            sf->inter_sf.prune_ext_comp_using_neighbors))
   5369      return 1;
   5370  }
   5371 
   5372  if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
   5373    if (skip_compound_using_best_single_mode_ref(
   5374            this_mode, ref_frames, args->search_state->best_single_mode,
   5375            sf->inter_sf.prune_comp_using_best_single_mode_ref))
   5376      return 1;
   5377  }
   5378 
   5379  if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
   5380    const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
   5381    if (skip_nearest_near_mv_using_refmv_weight(
   5382            x, this_mode, ref_frame_type,
   5383            args->search_state->best_mbmode.mode)) {
   5384      // Ensure the mode is pruned only when the current block has obtained a
   5385      // valid inter mode.
   5386      assert(is_inter_mode(args->search_state->best_mbmode.mode));
   5387      return 1;
   5388    }
   5389  }
   5390 
   5391  if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
   5392      ref_frame == GOLDEN_FRAME && !comp_pred) {
   5393    const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
   5394    if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
   5395        args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
   5396      if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
   5397        return 1;
   5398    }
   5399  }
   5400 
   5401  return 0;
   5402 }
   5403 
   5404 static void record_best_compound(REFERENCE_MODE reference_mode,
   5405                                 RD_STATS *rd_stats, int comp_pred, int rdmult,
   5406                                 InterModeSearchState *search_state,
   5407                                 int compmode_cost) {
   5408  int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
   5409 
   5410  if (reference_mode == REFERENCE_MODE_SELECT) {
   5411    single_rate = rd_stats->rate - compmode_cost;
   5412    hybrid_rate = rd_stats->rate;
   5413  } else {
   5414    single_rate = rd_stats->rate;
   5415    hybrid_rate = rd_stats->rate + compmode_cost;
   5416  }
   5417 
   5418  single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
   5419  hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
   5420 
   5421  if (!comp_pred) {
   5422    if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
   5423      search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
   5424  } else {
   5425    if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
   5426      search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
   5427  }
   5428  if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
   5429    search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
   5430 }
   5431 
   5432 // Does a transform search over a list of the best inter mode candidates.
   5433 // This is called if the original mode search computed an RD estimate
   5434 // for the transform search rather than doing a full search.
   5435 static void tx_search_best_inter_candidates(
   5436    AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
   5437    int64_t best_rd_so_far, BLOCK_SIZE bsize,
   5438    struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
   5439    InterModeSearchState *search_state, RD_STATS *rd_cost,
   5440    PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
   5441  AV1_COMMON *const cm = &cpi->common;
   5442  MACROBLOCKD *const xd = &x->e_mbd;
   5443  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   5444  const ModeCosts *mode_costs = &x->mode_costs;
   5445  const int num_planes = av1_num_planes(cm);
   5446  const int skip_ctx = av1_get_skip_txfm_context(xd);
   5447  MB_MODE_INFO *const mbmi = xd->mi[0];
   5448  InterModesInfo *inter_modes_info = x->inter_modes_info;
   5449  inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
   5450  search_state->best_rd = best_rd_so_far;
   5451  search_state->best_mode_index = THR_INVALID;
   5452  // Initialize best mode stats for winner mode processing
   5453  x->winner_mode_count = 0;
   5454  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
   5455                          NULL, bsize, best_rd_so_far,
   5456                          cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
   5457  inter_modes_info->num =
   5458      inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
   5459          ? inter_modes_info->num
   5460          : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
   5461  const int64_t top_est_rd =
   5462      inter_modes_info->num > 0
   5463          ? inter_modes_info
   5464                ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
   5465          : INT64_MAX;
   5466  *yrd = INT64_MAX;
   5467  int64_t best_rd_in_this_partition = INT64_MAX;
   5468  int num_inter_mode_cands = inter_modes_info->num;
   5469  int newmv_mode_evaled = 0;
   5470  int max_allowed_cands = INT_MAX;
   5471  if (cpi->sf.inter_sf.limit_inter_mode_cands) {
   5472    // The bound on the no. of inter mode candidates, beyond which the
   5473    // candidates are limited if a newmv mode got evaluated, is set as
   5474    // max_allowed_cands + 1.
   5475    const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
   5476    assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
   5477    max_allowed_cands =
   5478        num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
   5479  }
   5480 
   5481  int num_mode_thresh = INT_MAX;
   5482  if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
   5483    // Bound the no. of transform searches per prediction mode beyond a
   5484    // threshold.
   5485    const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
   5486    assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
   5487    num_mode_thresh =
   5488        num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
   5489  }
   5490 
   5491  int num_tx_cands = 0;
   5492  int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
   5493  // Iterate over best inter mode candidates and perform tx search
   5494  for (int j = 0; j < num_inter_mode_cands; ++j) {
   5495    const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
   5496    *mbmi = inter_modes_info->mbmi_arr[data_idx];
   5497    const PREDICTION_MODE prediction_mode = mbmi->mode;
   5498    int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
   5499    if (curr_est_rd * 0.80 > top_est_rd) break;
   5500 
   5501    if (num_tx_cands > num_mode_thresh) {
   5502      if ((prediction_mode != NEARESTMV &&
   5503           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
   5504          (prediction_mode == NEARESTMV &&
   5505           num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
   5506        continue;
   5507    }
   5508 
   5509    txfm_info->skip_txfm = 0;
   5510    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   5511 
   5512    // Select prediction reference frames.
   5513    const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
   5514    for (int i = 0; i < num_planes; i++) {
   5515      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
   5516      if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
   5517    }
   5518 
   5519    bool is_predictor_built = false;
   5520 
   5521    // Initialize RD stats
   5522    RD_STATS rd_stats;
   5523    RD_STATS rd_stats_y;
   5524    RD_STATS rd_stats_uv;
   5525    const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
   5526    int64_t skip_rd = INT64_MAX;
   5527    const int txfm_rd_gate_level = get_txfm_rd_gate_level(
   5528        cm->seq_params->enable_masked_compound,
   5529        cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
   5530        /*eval_motion_mode=*/0);
   5531    if (txfm_rd_gate_level) {
   5532      // Check if the mode is good enough based on skip RD
   5533      int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
   5534      skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
   5535      int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
   5536                                      skip_rd, txfm_rd_gate_level, 0);
   5537      if (!eval_txfm) continue;
   5538    }
   5539 
   5540    // Build the prediction for this mode
   5541    if (!is_predictor_built) {
   5542      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
   5543                                    av1_num_planes(cm) - 1);
   5544    }
   5545    if (mbmi->motion_mode == OBMC_CAUSAL) {
   5546      av1_build_obmc_inter_predictors_sb(cm, xd);
   5547    }
   5548 
   5549    num_tx_cands++;
   5550    if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
   5551    num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
   5552    int64_t this_yrd = INT64_MAX;
   5553    // Do the transform search
   5554    if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
   5555                         mode_rate, search_state->best_rd)) {
   5556      continue;
   5557    } else {
   5558      const int y_rate =
   5559          rd_stats.skip_txfm
   5560              ? mode_costs->skip_txfm_cost[skip_ctx][1]
   5561              : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
   5562      this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
   5563 
   5564      if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
   5565        inter_mode_data_push(
   5566            tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
   5567            rd_stats_y.rate + rd_stats_uv.rate +
   5568                mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
   5569      }
   5570    }
   5571 
   5572    rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
   5573 
   5574    const THR_MODES mode_enum = get_prediction_mode_idx(
   5575        prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   5576 
   5577    // Collect mode stats for multiwinner mode processing
   5578    const int txfm_search_done = 1;
   5579    store_winner_mode_stats(
   5580        &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
   5581        NULL, bsize, rd_stats.rdcost,
   5582        cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
   5583 
   5584    int64_t best_scaled_rd = search_state->best_rd;
   5585    int64_t this_scaled_rd = rd_stats.rdcost;
   5586    increase_warp_mode_rd(&search_state->best_mbmode, mbmi, &best_scaled_rd,
   5587                          &this_scaled_rd,
   5588                          cpi->sf.inter_sf.bias_warp_mode_rd_scale_pct);
   5589    if (this_scaled_rd < best_rd_in_this_partition) {
   5590      best_rd_in_this_partition = rd_stats.rdcost;
   5591      *yrd = this_yrd;
   5592    }
   5593 
   5594    if (this_scaled_rd < best_scaled_rd) {
   5595      update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
   5596                          &rd_stats_uv, mode_enum, x, txfm_search_done);
   5597      search_state->best_skip_rd[0] = skip_rd;
   5598      // Limit the total number of modes to be evaluated if the first is valid
   5599      // and transform skip or compound
   5600      if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
   5601        if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
   5602          // Evaluate more candidates at high quantizers where occurrence of
   5603          // transform skip is high.
   5604          const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
   5605          const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
   5606          num_inter_mode_cands =
   5607              AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
   5608        } else if (!j && has_second_ref(&search_state->best_mbmode)) {
   5609          const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
   5610          // Evaluate more candidates at low quantizers where occurrence of
   5611          // single reference mode is high.
   5612          const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
   5613                                                { 10, 7, 5, 3 } };
   5614          const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
   5615          num_inter_mode_cands = AOMMIN(
   5616              max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
   5617        }
   5618      }
   5619    }
   5620    // If the number of candidates evaluated exceeds max_allowed_cands, break if
   5621    // a newmv mode was evaluated already.
   5622    if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
   5623  }
   5624 }
   5625 
   5626 // Indicates number of winner simple translation modes to be used
   5627 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
   5628 
   5629 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
   5630 // speed feature. This list consists of modes that have only searched
   5631 // SIMPLE_TRANSLATION. The final list will be used to search other motion
   5632 // modes after the initial RD search.
   5633 static void handle_winner_cand(
   5634    MB_MODE_INFO *const mbmi,
   5635    motion_mode_best_st_candidate *best_motion_mode_cands,
   5636    int max_winner_motion_mode_cand, int64_t this_rd,
   5637    motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
   5638  // Number of current motion mode candidates in list
   5639  const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
   5640  int valid_motion_mode_cand_loc = num_motion_mode_cand;
   5641 
   5642  // find the best location to insert new motion mode candidate
   5643  for (int j = 0; j < num_motion_mode_cand; j++) {
   5644    if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
   5645      valid_motion_mode_cand_loc = j;
   5646      break;
   5647    }
   5648  }
   5649 
   5650  // Insert motion mode if location is found
   5651  if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
   5652    if (num_motion_mode_cand > 0 &&
   5653        valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
   5654      memmove(
   5655          &best_motion_mode_cands
   5656               ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
   5657          &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
   5658          (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
   5659           valid_motion_mode_cand_loc) *
   5660              sizeof(best_motion_mode_cands->motion_mode_cand[0]));
   5661    motion_mode_cand->mbmi = *mbmi;
   5662    motion_mode_cand->rd_cost = this_rd;
   5663    motion_mode_cand->skip_motion_mode = skip_motion_mode;
   5664    best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
   5665        *motion_mode_cand;
   5666    best_motion_mode_cands->num_motion_mode_cand =
   5667        AOMMIN(max_winner_motion_mode_cand,
   5668               best_motion_mode_cands->num_motion_mode_cand + 1);
   5669  }
   5670 }
   5671 
   5672 /*!\brief Search intra modes in interframes
   5673 *
   5674 * \ingroup intra_mode_search
   5675 *
   5676 * This function searches for the best intra mode when the current frame is an
   5677 * interframe. This function however does *not* handle luma palette mode.
   5678 * Palette mode is currently handled by \ref av1_search_palette_mode.
   5679 *
   5680 * This function will first iterate through the luma mode candidates to find the
   5681 * best luma intra mode. Once the best luma mode it's found, it will then search
   5682 * for the best chroma mode. Because palette mode is currently not handled by
   5683 * here, a cache of uv mode is stored in
   5684 * InterModeSearchState::intra_search_state so it can be reused later by \ref
   5685 * av1_search_palette_mode.
   5686 *
   5687 * \param[in,out] search_state      Struct keep track of the prediction mode
   5688 *                                  search state in interframe.
   5689 *
   5690 * \param[in]     cpi               Top-level encoder structure.
   5691 * \param[in,out] x                 Pointer to struct holding all the data for
   5692 *                                  the current prediction block.
   5693 * \param[out]    rd_cost           Stores the best rd_cost among all the
   5694 *                                  prediction modes searched.
   5695 * \param[in]     bsize             Current block size.
   5696 * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
   5697 *                                  copy the tx_type and txfm_skip arrays.
   5698 *                                  for only the Y plane.
   5699 * \param[in]     sf_args           Stores the list of intra mode candidates
   5700 *                                  to be searched.
   5701 * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
   5702 *                                      current ref frame is an intra frame.
   5703 * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
   5704 *                                  terminate chroma intra mode search.
   5705 *
   5706 * \remark If a new best mode is found, search_state and rd_costs are updated
   5707 * correspondingly. While x is also modified, it is only used as a temporary
   5708 * buffer, and the final decisions are stored in search_state.
   5709 */
   5710 static inline void search_intra_modes_in_interframe(
   5711    InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
   5712    RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
   5713    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
   5714    int64_t yrd_threshold) {
   5715  const AV1_COMMON *const cm = &cpi->common;
   5716  const SPEED_FEATURES *const sf = &cpi->sf;
   5717  const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
   5718  MACROBLOCKD *const xd = &x->e_mbd;
   5719  MB_MODE_INFO *const mbmi = xd->mi[0];
   5720  IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
   5721 
   5722  int is_best_y_mode_intra = 0;
   5723  RD_STATS best_intra_rd_stats_y;
   5724  int64_t best_rd_y = INT64_MAX;
   5725  int best_mode_cost_y = -1;
   5726  MB_MODE_INFO best_mbmi = *xd->mi[0];
   5727  THR_MODES best_mode_enum = THR_INVALID;
   5728  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
   5729  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
   5730  const int num_4x4 = bsize_to_num_blk(bsize);
   5731 
   5732  // Performs luma search
   5733  int64_t best_model_rd = INT64_MAX;
   5734  int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
   5735  for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
   5736    top_intra_model_rd[i] = INT64_MAX;
   5737  }
   5738 
   5739  if (cpi->oxcf.algo_cfg.sharpness) {
   5740    int bh = mi_size_high[bsize];
   5741    int bw = mi_size_wide[bsize];
   5742    if (bh > 4 || bw > 4) return;
   5743  }
   5744 
   5745  for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
   5746    if (sf->intra_sf.skip_intra_in_interframe &&
   5747        search_state->intra_search_state.skip_intra_modes)
   5748      break;
   5749    set_y_mode_and_delta_angle(
   5750        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
   5751    assert(mbmi->mode < INTRA_MODE_END);
   5752 
   5753    // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
   5754    if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
   5755      continue;
   5756 
   5757    const THR_MODES mode_enum =
   5758        get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
   5759    if ((!intra_mode_cfg->enable_smooth_intra ||
   5760         cpi->sf.intra_sf.disable_smooth_intra) &&
   5761        (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
   5762         mbmi->mode == SMOOTH_V_PRED))
   5763      continue;
   5764    if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
   5765      continue;
   5766    if (av1_is_directional_mode(mbmi->mode) &&
   5767        !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
   5768        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
   5769      continue;
   5770    const PREDICTION_MODE this_mode = mbmi->mode;
   5771 
   5772    assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
   5773    assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
   5774    init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
   5775    x->txfm_search_info.skip_txfm = 0;
   5776 
   5777    if (this_mode != DC_PRED) {
   5778      // Only search the oblique modes if the best so far is
   5779      // one of the neighboring directional modes
   5780      if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
   5781          (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
   5782        if (search_state->best_mode_index != THR_INVALID &&
   5783            search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
   5784          continue;
   5785      }
   5786      if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
   5787        if (conditional_skipintra(
   5788                this_mode, search_state->intra_search_state.best_intra_mode))
   5789          continue;
   5790      }
   5791    }
   5792 
   5793    RD_STATS intra_rd_stats_y;
   5794    int mode_cost_y;
   5795    int64_t intra_rd_y = INT64_MAX;
   5796    const int is_luma_result_valid = av1_handle_intra_y_mode(
   5797        intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
   5798        &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
   5799        &best_model_rd, top_intra_model_rd);
   5800 
   5801    if (intra_rd_y < INT64_MAX) {
   5802      adjust_cost(cpi, x, &intra_rd_y);
   5803    }
   5804 
   5805    if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
   5806      is_best_y_mode_intra = 1;
   5807      if (intra_rd_y < best_rd_y) {
   5808        best_intra_rd_stats_y = intra_rd_stats_y;
   5809        best_mode_cost_y = mode_cost_y;
   5810        best_rd_y = intra_rd_y;
   5811        best_mbmi = *mbmi;
   5812        best_mode_enum = mode_enum;
   5813        memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
   5814               sizeof(best_blk_skip[0]) * num_4x4);
   5815        av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
   5816      }
   5817    }
   5818  }
   5819 
   5820  if (!is_best_y_mode_intra) {
   5821    return;
   5822  }
   5823 
   5824  assert(best_rd_y < INT64_MAX);
   5825 
   5826  // Restores the best luma mode
   5827  *mbmi = best_mbmi;
   5828  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
   5829         sizeof(best_blk_skip[0]) * num_4x4);
   5830  av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
   5831 
   5832  // Performs chroma search
   5833  RD_STATS intra_rd_stats, intra_rd_stats_uv;
   5834  av1_init_rd_stats(&intra_rd_stats);
   5835  av1_init_rd_stats(&intra_rd_stats_uv);
   5836  const int num_planes = av1_num_planes(cm);
   5837  if (num_planes > 1) {
   5838    const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
   5839        intra_search_state, cpi, x, bsize, &intra_rd_stats,
   5840        &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
   5841 
   5842    if (!intra_uv_mode_valid) {
   5843      return;
   5844    }
   5845  }
   5846 
   5847  // Merge the luma and chroma rd stats
   5848  assert(best_mode_cost_y >= 0);
   5849  intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
   5850  if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
   5851    // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
   5852    // in the tokenonly rate, but for intra blocks, tx_size is always coded
   5853    // (prediction granularity), so we account for it in the full rate,
   5854    // not the tokenonly rate.
   5855    best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
   5856  }
   5857 
   5858  const ModeCosts *mode_costs = &x->mode_costs;
   5859  const PREDICTION_MODE mode = mbmi->mode;
   5860  if (num_planes > 1 && xd->is_chroma_ref) {
   5861    const int uv_mode_cost =
   5862        mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
   5863    intra_rd_stats.rate +=
   5864        intra_rd_stats_uv.rate +
   5865        intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
   5866  }
   5867 
   5868  // Intra block is always coded as non-skip
   5869  intra_rd_stats.skip_txfm = 0;
   5870  intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
   5871  // Add in the cost of the no skip flag.
   5872  const int skip_ctx = av1_get_skip_txfm_context(xd);
   5873  intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
   5874  // Calculate the final RD estimate for this mode.
   5875  const int64_t this_rd =
   5876      RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
   5877  // Keep record of best intra rd
   5878  if (this_rd < search_state->best_intra_rd) {
   5879    search_state->best_intra_rd = this_rd;
   5880    intra_search_state->best_intra_mode = mode;
   5881  }
   5882 
   5883  for (int i = 0; i < REFERENCE_MODES; ++i) {
   5884    search_state->best_pred_rd[i] =
   5885        AOMMIN(search_state->best_pred_rd[i], this_rd);
   5886  }
   5887 
   5888  intra_rd_stats.rdcost = this_rd;
   5889 
   5890  adjust_rdcost(cpi, x, &intra_rd_stats);
   5891 
   5892  // Collect mode stats for multiwinner mode processing
   5893  const int txfm_search_done = 1;
   5894  store_winner_mode_stats(
   5895      &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
   5896      &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
   5897      cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
   5898  if (intra_rd_stats.rdcost < search_state->best_rd) {
   5899    update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
   5900                        &best_intra_rd_stats_y, &intra_rd_stats_uv,
   5901                        best_mode_enum, x, txfm_search_done);
   5902  }
   5903 }
   5904 
   5905 #if !CONFIG_REALTIME_ONLY
   5906 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
   5907 // features in intra mode pruning.
   5908 static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
   5909                                                MACROBLOCK *x, BLOCK_SIZE bsize,
   5910                                                int mi_row, int mi_col,
   5911                                                int64_t *inter_cost,
   5912                                                int64_t *intra_cost) {
   5913  const AV1_COMMON *const cm = &cpi->common;
   5914  // Only consider full SB.
   5915  const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
   5916  const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
   5917  const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
   5918                  (block_size_high[sb_size] / tpl_bsize_1d);
   5919  SuperBlockEnc *sb_enc = &x->sb_enc;
   5920  if (sb_enc->tpl_data_count == len) {
   5921    const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
   5922    const int tpl_stride = sb_enc->tpl_stride;
   5923    const int tplw = mi_size_wide[tpl_bsize];
   5924    const int tplh = mi_size_high[tpl_bsize];
   5925    const int nw = mi_size_wide[bsize] / tplw;
   5926    const int nh = mi_size_high[bsize] / tplh;
   5927    if (nw >= 1 && nh >= 1) {
   5928      const int of_h = mi_row % mi_size_high[sb_size];
   5929      const int of_w = mi_col % mi_size_wide[sb_size];
   5930      const int start = of_h / tplh * tpl_stride + of_w / tplw;
   5931 
   5932      for (int k = 0; k < nh; k++) {
   5933        for (int l = 0; l < nw; l++) {
   5934          *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
   5935          *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
   5936        }
   5937      }
   5938      *inter_cost /= nw * nh;
   5939      *intra_cost /= nw * nh;
   5940    }
   5941  }
   5942 }
   5943 #endif  // !CONFIG_REALTIME_ONLY
   5944 
   5945 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
   5946 // intra mode search.
   5947 static inline void skip_intra_modes_in_interframe(
   5948    AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
   5949    InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
   5950    int64_t inter_cost, int64_t intra_cost) {
   5951  MACROBLOCKD *const xd = &x->e_mbd;
   5952  const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
   5953  if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
   5954      bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
   5955    const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
   5956    const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
   5957    if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
   5958        x->source_variance > 128) {
   5959      search_state->intra_search_state.skip_intra_modes = 1;
   5960      return;
   5961    }
   5962  }
   5963 
   5964  const unsigned int src_var_thresh_intra_skip = 1;
   5965  const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
   5966  if (!(skip_intra_in_interframe &&
   5967        (x->source_variance > src_var_thresh_intra_skip)))
   5968    return;
   5969 
   5970  // Prune intra search based on best inter mode being transfrom skip.
   5971  if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
   5972    const int qindex_thresh[2] = { 200, MAXQ };
   5973    const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
   5974    if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
   5975        (x->qindex <= qindex_thresh[ind])) {
   5976      search_state->intra_search_state.skip_intra_modes = 1;
   5977      return;
   5978    } else if ((skip_intra_in_interframe >= 4) &&
   5979               (inter_cost < 0 || intra_cost < 0)) {
   5980      search_state->intra_search_state.skip_intra_modes = 1;
   5981      return;
   5982    }
   5983  }
   5984  // Use ML model to prune intra search.
   5985  if (inter_cost >= 0 && intra_cost >= 0) {
   5986    const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
   5987                                     ? &av1_intrap_nn_config
   5988                                     : &av1_intrap_hd_nn_config;
   5989    float nn_features[6];
   5990    float scores[2] = { 0.0f };
   5991 
   5992    nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
   5993    nn_features[1] = (float)mi_size_wide_log2[bsize];
   5994    nn_features[2] = (float)mi_size_high_log2[bsize];
   5995    nn_features[3] = (float)intra_cost;
   5996    nn_features[4] = (float)inter_cost;
   5997    const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
   5998    const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
   5999    nn_features[5] = (float)(ac_q_max / ac_q);
   6000 
   6001    av1_nn_predict(nn_features, nn_config, 1, scores);
   6002 
   6003    // For two parameters, the max prob returned from av1_nn_softmax equals
   6004    // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
   6005    // calling of av1_nn_softmax.
   6006    const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
   6007    assert(skip_intra_in_interframe <= 5);
   6008    if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
   6009      search_state->intra_search_state.skip_intra_modes = 1;
   6010    }
   6011  }
   6012 }
   6013 
   6014 static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
   6015                                             int is_single_pred) {
   6016  const MODE encoding_mode = cpi->oxcf.mode;
   6017  if (encoding_mode == REALTIME) {
   6018    return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
   6019            (cpi->sf.interp_sf.skip_interp_filter_search ||
   6020             cpi->sf.winner_mode_sf.winner_mode_ifs));
   6021  } else if (encoding_mode == GOOD) {
   6022    // Skip interpolation filter search for single prediction modes.
   6023    return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
   6024  }
   6025  return false;
   6026 }
   6027 
   6028 static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
   6029                                     BLOCK_SIZE bsize) {
   6030  const AV1_COMMON *const cm = &cpi->common;
   6031  const SPEED_FEATURES *const sf = &cpi->sf;
   6032 
   6033  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
   6034      !sf->rt_sf.short_circuit_low_temp_var ||
   6035      !sf->rt_sf.prune_inter_modes_using_temp_var) {
   6036    return 0;
   6037  }
   6038 
   6039  const int mi_row = x->e_mbd.mi_row;
   6040  const int mi_col = x->e_mbd.mi_col;
   6041  int is_low_temp_var = 0;
   6042 
   6043  if (cm->seq_params->sb_size == BLOCK_64X64)
   6044    is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
   6045        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
   6046  else
   6047    is_low_temp_var = av1_get_force_skip_low_temp_var(
   6048        &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
   6049 
   6050  return is_low_temp_var;
   6051 }
   6052 
   6053 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
   6054 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
   6055                            struct macroblock *x, struct RD_STATS *rd_cost,
   6056                            BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
   6057                            int64_t best_rd_so_far) {
   6058  AV1_COMMON *const cm = &cpi->common;
   6059  const FeatureFlags *const features = &cm->features;
   6060  const int num_planes = av1_num_planes(cm);
   6061  const SPEED_FEATURES *const sf = &cpi->sf;
   6062  MACROBLOCKD *const xd = &x->e_mbd;
   6063  MB_MODE_INFO *const mbmi = xd->mi[0];
   6064  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   6065  int i;
   6066  const ModeCosts *mode_costs = &x->mode_costs;
   6067  const int *comp_inter_cost =
   6068      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
   6069 
   6070  InterModeSearchState search_state;
   6071  init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
   6072  INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
   6073    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
   6074    INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
   6075  };
   6076  HandleInterModeArgs args = { { NULL },
   6077                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
   6078                               { NULL },
   6079                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
   6080                                 MAX_SB_SIZE >> 1 },
   6081                               NULL,
   6082                               NULL,
   6083                               NULL,
   6084                               search_state.modelled_rd,
   6085                               INT_MAX,
   6086                               INT_MAX,
   6087                               search_state.simple_rd,
   6088                               0,
   6089                               false,
   6090                               interintra_modes,
   6091                               { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
   6092                               { { 0, 0 } },
   6093                               { 0 },
   6094                               0,
   6095                               0,
   6096                               -1,
   6097                               -1,
   6098                               -1,
   6099                               { 0 },
   6100                               { 0 },
   6101                               UINT_MAX };
   6102  // Currently, is_low_temp_var is used in real time encoding.
   6103  const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
   6104 
   6105  for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
   6106  // Indicates the appropriate number of simple translation winner modes for
   6107  // exhaustive motion mode evaluation
   6108  const int max_winner_motion_mode_cand =
   6109      num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
   6110  assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
   6111  motion_mode_candidate motion_mode_cand;
   6112  motion_mode_best_st_candidate best_motion_mode_cands;
   6113  // Initializing the number of motion mode candidates to zero.
   6114  best_motion_mode_cands.num_motion_mode_cand = 0;
   6115  for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
   6116    best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
   6117 
   6118  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
   6119 
   6120  av1_invalid_rd_stats(rd_cost);
   6121 
   6122  for (i = 0; i < REF_FRAMES; ++i) {
   6123    x->warp_sample_info[i].num = -1;
   6124  }
   6125 
   6126  // Ref frames that are selected by square partition blocks.
   6127  int picked_ref_frames_mask = 0;
   6128  if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
   6129      mbmi->partition != PARTITION_NONE) {
   6130    // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
   6131    // partition blocks. prune_ref_frame_for_rect_partitions >=2
   6132    // implies prune for vert, horiz and extended partition blocks.
   6133    if ((mbmi->partition != PARTITION_VERT &&
   6134         mbmi->partition != PARTITION_HORZ) ||
   6135        sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
   6136      picked_ref_frames_mask =
   6137          fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
   6138    }
   6139  }
   6140 
   6141 #if CONFIG_COLLECT_COMPONENT_TIMING
   6142  start_timing(cpi, set_params_rd_pick_inter_mode_time);
   6143 #endif
   6144  // Skip ref frames that never selected by square blocks.
   6145  const int skip_ref_frame_mask =
   6146      picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
   6147  mode_skip_mask_t mode_skip_mask;
   6148  unsigned int ref_costs_single[REF_FRAMES];
   6149  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
   6150  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
   6151  // init params, set frame modes, speed features
   6152  set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
   6153                                skip_ref_frame_mask, ref_costs_single,
   6154                                ref_costs_comp, yv12_mb);
   6155 #if CONFIG_COLLECT_COMPONENT_TIMING
   6156  end_timing(cpi, set_params_rd_pick_inter_mode_time);
   6157 #endif
   6158 
   6159  int64_t best_est_rd = INT64_MAX;
   6160  const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
   6161  // If do_tx_search is 0, only estimated RD should be computed.
   6162  // If do_tx_search is 1, all modes have TX search performed.
   6163  const int do_tx_search =
   6164      !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
   6165        (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
   6166         num_pels_log2_lookup[bsize] > 8));
   6167  InterModesInfo *inter_modes_info = x->inter_modes_info;
   6168  inter_modes_info->num = 0;
   6169 
   6170  // Temporary buffers used by handle_inter_mode().
   6171  uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
   6172 
   6173  // The best RD found for the reference frame, among single reference modes.
   6174  // Note that the 0-th element will contain a cut-off that is later used
   6175  // to determine if we should skip a compound mode.
   6176  int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
   6177                                       INT64_MAX, INT64_MAX, INT64_MAX,
   6178                                       INT64_MAX, INT64_MAX };
   6179 
   6180  // Prepared stats used later to check if we could skip intra mode eval.
   6181  int64_t inter_cost = -1;
   6182  int64_t intra_cost = -1;
   6183  // Need to tweak the threshold for hdres speed 0 & 1.
   6184  const int mi_row = xd->mi_row;
   6185  const int mi_col = xd->mi_col;
   6186 
   6187  // Obtain the relevant tpl stats for pruning inter modes
   6188  PruneInfoFromTpl inter_cost_info_from_tpl;
   6189 #if !CONFIG_REALTIME_ONLY
   6190  if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
   6191    // x->tpl_keep_ref_frame[id] = 1 => no pruning in
   6192    // prune_ref_by_selective_ref_frame()
   6193    // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
   6194    // prune_ref_by_selective_ref_frame()
   6195    // Populating valid_refs[idx] = 1 ensures that
   6196    // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
   6197    // pruned ref frame.
   6198    int valid_refs[INTER_REFS_PER_FRAME];
   6199    for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
   6200      const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
   6201      valid_refs[frame - 1] =
   6202          x->tpl_keep_ref_frame[frame] ||
   6203          !prune_ref_by_selective_ref_frame(
   6204              cpi, x, refs, cm->cur_frame->ref_display_order_hint);
   6205    }
   6206    av1_zero(inter_cost_info_from_tpl);
   6207    get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
   6208                              &inter_cost_info_from_tpl);
   6209  }
   6210 
   6211  const int do_pruning =
   6212      (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
   6213  if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
   6214      cpi->oxcf.algo_cfg.enable_tpl_model)
   6215    calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
   6216                                 &intra_cost);
   6217 #endif  // !CONFIG_REALTIME_ONLY
   6218 
   6219  // Initialize best mode stats for winner mode processing.
   6220  const int max_winner_mode_count =
   6221      winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
   6222  zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
   6223  x->winner_mode_count = 0;
   6224  store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
   6225                          NULL, bsize, best_rd_so_far,
   6226                          sf->winner_mode_sf.multi_winner_mode_type, 0);
   6227 
   6228  int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
   6229  if (sf->inter_sf.prune_inter_modes_if_skippable) {
   6230    // Higher multiplication factor values for lower quantizers.
   6231    mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
   6232  }
   6233 
   6234  // Initialize arguments for mode loop speed features
   6235  InterModeSFArgs sf_args = { &args.skip_motion_mode,
   6236                              &mode_skip_mask,
   6237                              &search_state,
   6238                              skip_ref_frame_mask,
   6239                              0,
   6240                              mode_thresh_mul_fact,
   6241                              0,
   6242                              0 };
   6243  int64_t best_inter_yrd = INT64_MAX;
   6244 
   6245  // This is the main loop of this function. It loops over all possible inter
   6246  // modes and calls handle_inter_mode() to compute the RD for each.
   6247  // Here midx is just an iterator index that should not be used by itself
   6248  // except to keep track of the number of modes searched. It should be used
   6249  // with av1_default_mode_order to get the enum that defines the mode, which
   6250  // can be used with av1_mode_defs to get the prediction mode and the ref
   6251  // frames.
   6252  // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
   6253  // good speedup for real time case. If we decide to use compound mode in real
   6254  // time, maybe we can modify av1_default_mode_order table.
   6255  THR_MODES mode_start = THR_INTER_MODE_START;
   6256  THR_MODES mode_end = THR_INTER_MODE_END;
   6257  const CurrentFrame *const current_frame = &cm->current_frame;
   6258  if (current_frame->reference_mode == SINGLE_REFERENCE) {
   6259    mode_start = SINGLE_REF_MODE_START;
   6260    mode_end = SINGLE_REF_MODE_END;
   6261  }
   6262 
   6263  for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
   6264    // Get the actual prediction mode we are trying in this iteration
   6265    const THR_MODES mode_enum = av1_default_mode_order[midx];
   6266    const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
   6267    const PREDICTION_MODE this_mode = mode_def->mode;
   6268    const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
   6269 
   6270    const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
   6271    const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
   6272    const int is_single_pred =
   6273        ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
   6274    const int comp_pred = second_ref_frame > INTRA_FRAME;
   6275 
   6276    init_mbmi(mbmi, this_mode, ref_frames, cm);
   6277 
   6278    txfm_info->skip_txfm = 0;
   6279    sf_args.num_single_modes_processed += is_single_pred;
   6280    set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
   6281 #if CONFIG_COLLECT_COMPONENT_TIMING
   6282    start_timing(cpi, skip_inter_mode_time);
   6283 #endif
   6284    // Apply speed features to decide if this inter mode can be skipped
   6285    const int is_skip_inter_mode = skip_inter_mode(
   6286        cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
   6287 #if CONFIG_COLLECT_COMPONENT_TIMING
   6288    end_timing(cpi, skip_inter_mode_time);
   6289 #endif
   6290    if (is_skip_inter_mode) continue;
   6291 
   6292    // Select prediction reference frames.
   6293    for (i = 0; i < num_planes; i++) {
   6294      xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
   6295      if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
   6296    }
   6297 
   6298    mbmi->angle_delta[PLANE_TYPE_Y] = 0;
   6299    mbmi->angle_delta[PLANE_TYPE_UV] = 0;
   6300    mbmi->filter_intra_mode_info.use_filter_intra = 0;
   6301    mbmi->ref_mv_idx = 0;
   6302 
   6303    const int64_t ref_best_rd = search_state.best_rd;
   6304    RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
   6305    av1_init_rd_stats(&rd_stats);
   6306 
   6307    const int ref_frame_cost = comp_pred
   6308                                   ? ref_costs_comp[ref_frame][second_ref_frame]
   6309                                   : ref_costs_single[ref_frame];
   6310    const int compmode_cost =
   6311        is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
   6312    const int real_compmode_cost =
   6313        cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
   6314            ? compmode_cost
   6315            : 0;
   6316    // Point to variables that are maintained between loop iterations
   6317    args.single_newmv = search_state.single_newmv;
   6318    args.single_newmv_rate = search_state.single_newmv_rate;
   6319    args.single_newmv_valid = search_state.single_newmv_valid;
   6320    args.single_comp_cost = real_compmode_cost;
   6321    args.ref_frame_cost = ref_frame_cost;
   6322    args.best_pred_sse = search_state.best_pred_sse;
   6323    args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
   6324    int64_t skip_rd[2] = { search_state.best_skip_rd[0],
   6325                           search_state.best_skip_rd[1] };
   6326    int64_t this_yrd = INT64_MAX;
   6327 #if CONFIG_COLLECT_COMPONENT_TIMING
   6328    start_timing(cpi, handle_inter_mode_time);
   6329 #endif
   6330    int64_t this_rd = handle_inter_mode(
   6331        cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
   6332        ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
   6333        inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
   6334        &this_yrd);
   6335 #if CONFIG_COLLECT_COMPONENT_TIMING
   6336    end_timing(cpi, handle_inter_mode_time);
   6337 #endif
   6338    if (current_frame->reference_mode != SINGLE_REFERENCE) {
   6339      if (!args.skip_ifs &&
   6340          sf->inter_sf.prune_comp_search_by_single_result > 0 &&
   6341          is_inter_singleref_mode(this_mode)) {
   6342        collect_single_states(x, &search_state, mbmi);
   6343      }
   6344 
   6345      if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
   6346          is_inter_singleref_mode(this_mode))
   6347        update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
   6348    }
   6349 
   6350    if (this_rd == INT64_MAX) continue;
   6351 
   6352    if (mbmi->skip_txfm) {
   6353      rd_stats_y.rate = 0;
   6354      rd_stats_uv.rate = 0;
   6355    }
   6356 
   6357    if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
   6358        this_rd < ref_frame_rd[ref_frame]) {
   6359      ref_frame_rd[ref_frame] = this_rd;
   6360    }
   6361 
   6362    adjust_cost(cpi, x, &this_rd);
   6363    adjust_rdcost(cpi, x, &rd_stats);
   6364 
   6365    // Did this mode help, i.e., is it the new best mode
   6366    if (this_rd < search_state.best_rd) {
   6367      assert(IMPLIES(comp_pred,
   6368                     cm->current_frame.reference_mode != SINGLE_REFERENCE));
   6369      search_state.best_pred_sse = x->pred_sse[ref_frame];
   6370      best_inter_yrd = this_yrd;
   6371      update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
   6372                          &rd_stats_uv, mode_enum, x, do_tx_search);
   6373      if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
   6374      // skip_rd[0] is the best total rd for a skip mode so far.
   6375      // skip_rd[1] is the best total rd for a skip mode so far in luma.
   6376      // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
   6377      // When do_tx_search = 0, skip_rd[1] is updated.
   6378      search_state.best_skip_rd[1] = skip_rd[1];
   6379    }
   6380    if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
   6381      // Add this mode to motion mode candidate list for motion mode search
   6382      // if using motion_mode_for_winner_cand speed feature
   6383      handle_winner_cand(mbmi, &best_motion_mode_cands,
   6384                         max_winner_motion_mode_cand, this_rd,
   6385                         &motion_mode_cand, args.skip_motion_mode);
   6386    }
   6387 
   6388    /* keep record of best compound/single-only prediction */
   6389    record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
   6390                         x->rdmult, &search_state, compmode_cost);
   6391  }
   6392 
   6393 #if CONFIG_COLLECT_COMPONENT_TIMING
   6394  start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
   6395 #endif
   6396  if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
   6397    // For the single ref winner candidates, evaluate other motion modes (non
   6398    // simple translation).
   6399    evaluate_motion_mode_for_winner_candidates(
   6400        cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
   6401        &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
   6402        &search_state, &best_inter_yrd);
   6403  }
   6404 #if CONFIG_COLLECT_COMPONENT_TIMING
   6405  end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
   6406 #endif
   6407 
   6408 #if CONFIG_COLLECT_COMPONENT_TIMING
   6409  start_timing(cpi, do_tx_search_time);
   6410 #endif
   6411  if (do_tx_search != 1) {
   6412    // A full tx search has not yet been done, do tx search for
   6413    // top mode candidates
   6414    tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
   6415                                    yv12_mb, mi_row, mi_col, &search_state,
   6416                                    rd_cost, ctx, &best_inter_yrd);
   6417  }
   6418 #if CONFIG_COLLECT_COMPONENT_TIMING
   6419  end_timing(cpi, do_tx_search_time);
   6420 #endif
   6421 
   6422 #if CONFIG_COLLECT_COMPONENT_TIMING
   6423  start_timing(cpi, handle_intra_mode_time);
   6424 #endif
   6425  // Gate intra mode evaluation if best of inter is skip except when source
   6426  // variance is extremely low and also based on max intra bsize.
   6427  skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
   6428                                 intra_cost);
   6429 
   6430  const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
   6431  search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
   6432                                   &sf_args, intra_ref_frame_cost,
   6433                                   best_inter_yrd);
   6434 #if CONFIG_COLLECT_COMPONENT_TIMING
   6435  end_timing(cpi, handle_intra_mode_time);
   6436 #endif
   6437 
   6438 #if CONFIG_COLLECT_COMPONENT_TIMING
   6439  start_timing(cpi, refine_winner_mode_tx_time);
   6440 #endif
   6441  int winner_mode_count =
   6442      sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
   6443  // In effect only when fast tx search speed features are enabled.
   6444  refine_winner_mode_tx(
   6445      cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
   6446      &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
   6447      search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
   6448 #if CONFIG_COLLECT_COMPONENT_TIMING
   6449  end_timing(cpi, refine_winner_mode_tx_time);
   6450 #endif
   6451 
   6452  // Initialize default mode evaluation params
   6453  set_mode_eval_params(cpi, x, DEFAULT_EVAL);
   6454 
   6455  // Only try palette mode when the best mode so far is an intra mode.
   6456  const int try_palette =
   6457      cpi->oxcf.tool_cfg.enable_palette &&
   6458      av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
   6459      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
   6460  RD_STATS this_rd_cost;
   6461  int this_skippable = 0;
   6462  if (try_palette) {
   6463 #if CONFIG_COLLECT_COMPONENT_TIMING
   6464    start_timing(cpi, av1_search_palette_mode_time);
   6465 #endif
   6466    this_skippable = av1_search_palette_mode(
   6467        &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
   6468        ctx, &this_rd_cost, search_state.best_rd);
   6469 #if CONFIG_COLLECT_COMPONENT_TIMING
   6470    end_timing(cpi, av1_search_palette_mode_time);
   6471 #endif
   6472    if (this_rd_cost.rdcost < search_state.best_rd) {
   6473      search_state.best_mode_index = THR_DC;
   6474      mbmi->mv[0].as_int = 0;
   6475      rd_cost->rate = this_rd_cost.rate;
   6476      rd_cost->dist = this_rd_cost.dist;
   6477      rd_cost->rdcost = this_rd_cost.rdcost;
   6478      search_state.best_rd = rd_cost->rdcost;
   6479      search_state.best_mbmode = *mbmi;
   6480      search_state.best_skip2 = 0;
   6481      search_state.best_mode_skippable = this_skippable;
   6482      memcpy(ctx->blk_skip, txfm_info->blk_skip,
   6483             sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
   6484      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
   6485    }
   6486  }
   6487 
   6488  search_state.best_mbmode.skip_mode = 0;
   6489  if (cm->current_frame.skip_mode_info.skip_mode_flag &&
   6490      cpi->oxcf.algo_cfg.sharpness != 3 && is_comp_ref_allowed(bsize)) {
   6491    const struct segmentation *const seg = &cm->seg;
   6492    unsigned char segment_id = mbmi->segment_id;
   6493    if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
   6494      rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
   6495    }
   6496  }
   6497 
   6498  // Make sure that the ref_mv_idx is only nonzero when we're
   6499  // using a mode which can support ref_mv_idx
   6500  if (search_state.best_mbmode.ref_mv_idx != 0 &&
   6501      !(search_state.best_mbmode.mode == NEWMV ||
   6502        search_state.best_mbmode.mode == NEW_NEWMV ||
   6503        have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
   6504    search_state.best_mbmode.ref_mv_idx = 0;
   6505  }
   6506 
   6507  if (search_state.best_mode_index == THR_INVALID ||
   6508      search_state.best_rd >= best_rd_so_far) {
   6509    rd_cost->rate = INT_MAX;
   6510    rd_cost->rdcost = INT64_MAX;
   6511    return;
   6512  }
   6513 
   6514  const InterpFilter interp_filter = features->interp_filter;
   6515  assert((interp_filter == SWITCHABLE) ||
   6516         (interp_filter ==
   6517          search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
   6518         !is_inter_block(&search_state.best_mbmode));
   6519  assert((interp_filter == SWITCHABLE) ||
   6520         (interp_filter ==
   6521          search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
   6522         !is_inter_block(&search_state.best_mbmode));
   6523 
   6524  if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
   6525    av1_update_rd_thresh_fact(
   6526        cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
   6527        search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
   6528  }
   6529 
   6530  // macroblock modes
   6531  *mbmi = search_state.best_mbmode;
   6532  txfm_info->skip_txfm |= search_state.best_skip2;
   6533 
   6534  // Note: this section is needed since the mode may have been forced to
   6535  // GLOBALMV by the all-zero mode handling of ref-mv.
   6536  if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
   6537    // Correct the interp filters for GLOBALMV
   6538    if (is_nontrans_global_motion(xd, xd->mi[0])) {
   6539      int_interpfilters filters =
   6540          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
   6541      assert(mbmi->interp_filters.as_int == filters.as_int);
   6542      (void)filters;
   6543    }
   6544  }
   6545 
   6546  txfm_info->skip_txfm |= search_state.best_mode_skippable;
   6547 
   6548  assert(search_state.best_mode_index != THR_INVALID);
   6549 
   6550 #if CONFIG_INTERNAL_STATS
   6551  store_coding_context(x, ctx, search_state.best_mode_index,
   6552                       search_state.best_mode_skippable);
   6553 #else
   6554  store_coding_context(x, ctx, search_state.best_mode_skippable);
   6555 #endif  // CONFIG_INTERNAL_STATS
   6556 
   6557  if (mbmi->palette_mode_info.palette_size[1] > 0) {
   6558    assert(try_palette);
   6559    av1_restore_uv_color_map(cpi, x);
   6560  }
   6561 }
   6562 
   6563 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
   6564                                        TileDataEnc *tile_data, MACROBLOCK *x,
   6565                                        int mi_row, int mi_col,
   6566                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
   6567                                        PICK_MODE_CONTEXT *ctx,
   6568                                        int64_t best_rd_so_far) {
   6569  const AV1_COMMON *const cm = &cpi->common;
   6570  const FeatureFlags *const features = &cm->features;
   6571  MACROBLOCKD *const xd = &x->e_mbd;
   6572  MB_MODE_INFO *const mbmi = xd->mi[0];
   6573  unsigned char segment_id = mbmi->segment_id;
   6574  const int comp_pred = 0;
   6575  int i;
   6576  unsigned int ref_costs_single[REF_FRAMES];
   6577  unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
   6578  const ModeCosts *mode_costs = &x->mode_costs;
   6579  const int *comp_inter_cost =
   6580      mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
   6581  InterpFilter best_filter = SWITCHABLE;
   6582  int64_t this_rd = INT64_MAX;
   6583  int rate2 = 0;
   6584  const int64_t distortion2 = 0;
   6585  (void)mi_row;
   6586  (void)mi_col;
   6587  (void)tile_data;
   6588 
   6589  av1_collect_neighbors_ref_counts(xd);
   6590 
   6591  estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
   6592                           ref_costs_comp);
   6593 
   6594  for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
   6595  for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
   6596 
   6597  rd_cost->rate = INT_MAX;
   6598 
   6599  assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
   6600 
   6601  mbmi->palette_mode_info.palette_size[0] = 0;
   6602  mbmi->palette_mode_info.palette_size[1] = 0;
   6603  mbmi->filter_intra_mode_info.use_filter_intra = 0;
   6604  mbmi->mode = GLOBALMV;
   6605  mbmi->motion_mode = SIMPLE_TRANSLATION;
   6606  mbmi->uv_mode = UV_DC_PRED;
   6607  if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
   6608    mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
   6609  else
   6610    mbmi->ref_frame[0] = LAST_FRAME;
   6611  mbmi->ref_frame[1] = NONE_FRAME;
   6612  mbmi->mv[0].as_int =
   6613      gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
   6614                           features->allow_high_precision_mv, bsize, mi_col,
   6615                           mi_row, features->cur_frame_force_integer_mv)
   6616          .as_int;
   6617  mbmi->tx_size = max_txsize_lookup[bsize];
   6618  x->txfm_search_info.skip_txfm = 1;
   6619 
   6620  mbmi->ref_mv_idx = 0;
   6621 
   6622  mbmi->motion_mode = SIMPLE_TRANSLATION;
   6623  av1_count_overlappable_neighbors(cm, xd);
   6624  if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
   6625    int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
   6626    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
   6627    // Select the samples according to motion vector difference
   6628    if (mbmi->num_proj_ref > 1) {
   6629      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
   6630                                             mbmi->num_proj_ref, bsize);
   6631    }
   6632  }
   6633 
   6634  const InterpFilter interp_filter = features->interp_filter;
   6635  set_default_interp_filters(mbmi, interp_filter);
   6636 
   6637  if (interp_filter != SWITCHABLE) {
   6638    best_filter = interp_filter;
   6639  } else {
   6640    best_filter = EIGHTTAP_REGULAR;
   6641    if (av1_is_interp_needed(xd)) {
   6642      int rs;
   6643      int best_rs = INT_MAX;
   6644      for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
   6645        mbmi->interp_filters = av1_broadcast_interp_filter(i);
   6646        rs = av1_get_switchable_rate(x, xd, interp_filter,
   6647                                     cm->seq_params->enable_dual_filter);
   6648        if (rs < best_rs) {
   6649          best_rs = rs;
   6650          best_filter = mbmi->interp_filters.as_filters.y_filter;
   6651        }
   6652      }
   6653    }
   6654  }
   6655  // Set the appropriate filter
   6656  mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
   6657  rate2 += av1_get_switchable_rate(x, xd, interp_filter,
   6658                                   cm->seq_params->enable_dual_filter);
   6659 
   6660  if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
   6661    rate2 += comp_inter_cost[comp_pred];
   6662 
   6663  // Estimate the reference frame signaling cost and add it
   6664  // to the rolling cost variable.
   6665  rate2 += ref_costs_single[LAST_FRAME];
   6666  this_rd = RDCOST(x->rdmult, rate2, distortion2);
   6667 
   6668  rd_cost->rate = rate2;
   6669  rd_cost->dist = distortion2;
   6670  rd_cost->rdcost = this_rd;
   6671 
   6672  if (this_rd >= best_rd_so_far) {
   6673    rd_cost->rate = INT_MAX;
   6674    rd_cost->rdcost = INT64_MAX;
   6675    return;
   6676  }
   6677 
   6678  assert((interp_filter == SWITCHABLE) ||
   6679         (interp_filter == mbmi->interp_filters.as_filters.y_filter));
   6680 
   6681  if (cpi->sf.inter_sf.adaptive_rd_thresh) {
   6682    av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
   6683                              cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
   6684                              THR_GLOBALMV, THR_INTER_MODE_START,
   6685                              THR_INTER_MODE_END, THR_DC, MAX_MODES);
   6686  }
   6687 
   6688 #if CONFIG_INTERNAL_STATS
   6689  store_coding_context(x, ctx, THR_GLOBALMV, 0);
   6690 #else
   6691  store_coding_context(x, ctx, 0);
   6692 #endif  // CONFIG_INTERNAL_STATS
   6693 }
   6694 
   6695 /*!\cond */
   6696 struct calc_target_weighted_pred_ctxt {
   6697  const OBMCBuffer *obmc_buffer;
   6698  const uint8_t *tmp;
   6699  int tmp_stride;
   6700  int overlap;
   6701 };
   6702 /*!\endcond */
   6703 
   6704 static inline void calc_target_weighted_pred_above(
   6705    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
   6706    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
   6707  (void)nb_mi;
   6708  (void)num_planes;
   6709  (void)rel_mi_row;
   6710  (void)dir;
   6711 
   6712  struct calc_target_weighted_pred_ctxt *ctxt =
   6713      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
   6714 
   6715  const int bw = xd->width << MI_SIZE_LOG2;
   6716  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
   6717 
   6718  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
   6719  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
   6720  const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
   6721  const int is_hbd = is_cur_buf_hbd(xd);
   6722 
   6723  if (!is_hbd) {
   6724    for (int row = 0; row < ctxt->overlap; ++row) {
   6725      const uint8_t m0 = mask1d[row];
   6726      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   6727      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
   6728        wsrc[col] = m1 * tmp[col];
   6729        mask[col] = m0;
   6730      }
   6731      wsrc += bw;
   6732      mask += bw;
   6733      tmp += ctxt->tmp_stride;
   6734    }
   6735  } else {
   6736    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
   6737 
   6738    for (int row = 0; row < ctxt->overlap; ++row) {
   6739      const uint8_t m0 = mask1d[row];
   6740      const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   6741      for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
   6742        wsrc[col] = m1 * tmp16[col];
   6743        mask[col] = m0;
   6744      }
   6745      wsrc += bw;
   6746      mask += bw;
   6747      tmp16 += ctxt->tmp_stride;
   6748    }
   6749  }
   6750 }
   6751 
   6752 static inline void calc_target_weighted_pred_left(
   6753    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
   6754    int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
   6755  (void)nb_mi;
   6756  (void)num_planes;
   6757  (void)rel_mi_col;
   6758  (void)dir;
   6759 
   6760  struct calc_target_weighted_pred_ctxt *ctxt =
   6761      (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
   6762 
   6763  const int bw = xd->width << MI_SIZE_LOG2;
   6764  const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
   6765 
   6766  int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
   6767  int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
   6768  const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
   6769  const int is_hbd = is_cur_buf_hbd(xd);
   6770 
   6771  if (!is_hbd) {
   6772    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
   6773      for (int col = 0; col < ctxt->overlap; ++col) {
   6774        const uint8_t m0 = mask1d[col];
   6775        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   6776        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
   6777                    (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
   6778        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
   6779      }
   6780      wsrc += bw;
   6781      mask += bw;
   6782      tmp += ctxt->tmp_stride;
   6783    }
   6784  } else {
   6785    const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
   6786 
   6787    for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
   6788      for (int col = 0; col < ctxt->overlap; ++col) {
   6789        const uint8_t m0 = mask1d[col];
   6790        const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   6791        wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
   6792                    (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
   6793        mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
   6794      }
   6795      wsrc += bw;
   6796      mask += bw;
   6797      tmp16 += ctxt->tmp_stride;
   6798    }
   6799  }
   6800 }
   6801 
   6802 // This function has a structure similar to av1_build_obmc_inter_prediction
   6803 //
   6804 // The OBMC predictor is computed as:
   6805 //
   6806 //  PObmc(x,y) =
   6807 //    AOM_BLEND_A64(Mh(x),
   6808 //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
   6809 //                  PLeft(x, y))
   6810 //
   6811 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
   6812 // rounding, this can be written as:
   6813 //
   6814 //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
   6815 //    Mh(x) * Mv(y) * P(x,y) +
   6816 //      Mh(x) * Cv(y) * Pabove(x,y) +
   6817 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
   6818 //
   6819 // Where :
   6820 //
   6821 //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
   6822 //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
   6823 //
   6824 // This function computes 'wsrc' and 'mask' as:
   6825 //
   6826 //  wsrc(x, y) =
   6827 //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
   6828 //      Mh(x) * Cv(y) * Pabove(x,y) +
   6829 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
   6830 //
   6831 //  mask(x, y) = Mh(x) * Mv(y)
   6832 //
   6833 // These can then be used to efficiently approximate the error for any
   6834 // predictor P in the context of the provided neighbouring predictors by
   6835 // computing:
   6836 //
   6837 //  error(x, y) =
   6838 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
   6839 //
   6840 static inline void calc_target_weighted_pred(
   6841    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
   6842    const uint8_t *above, int above_stride, const uint8_t *left,
   6843    int left_stride) {
   6844  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
   6845  const int bw = xd->width << MI_SIZE_LOG2;
   6846  const int bh = xd->height << MI_SIZE_LOG2;
   6847  const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
   6848  int32_t *mask_buf = obmc_buffer->mask;
   6849  int32_t *wsrc_buf = obmc_buffer->wsrc;
   6850 
   6851  const int is_hbd = is_cur_buf_hbd(xd);
   6852  const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
   6853 
   6854  // plane 0 should not be sub-sampled
   6855  assert(xd->plane[0].subsampling_x == 0);
   6856  assert(xd->plane[0].subsampling_y == 0);
   6857 
   6858  av1_zero_array(wsrc_buf, bw * bh);
   6859  for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
   6860 
   6861  // handle above row
   6862  if (xd->up_available) {
   6863    const int overlap =
   6864        AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
   6865    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
   6866                                                   above_stride, overlap };
   6867    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
   6868                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
   6869                                  calc_target_weighted_pred_above, &ctxt);
   6870  }
   6871 
   6872  for (int i = 0; i < bw * bh; ++i) {
   6873    wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
   6874    mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
   6875  }
   6876 
   6877  // handle left column
   6878  if (xd->left_available) {
   6879    const int overlap =
   6880        AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
   6881    struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
   6882                                                   left_stride, overlap };
   6883    foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
   6884                                 max_neighbor_obmc[mi_size_high_log2[bsize]],
   6885                                 calc_target_weighted_pred_left, &ctxt);
   6886  }
   6887 
   6888  if (!is_hbd) {
   6889    const uint8_t *src = x->plane[0].src.buf;
   6890 
   6891    for (int row = 0; row < bh; ++row) {
   6892      for (int col = 0; col < bw; ++col) {
   6893        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
   6894      }
   6895      wsrc_buf += bw;
   6896      src += x->plane[0].src.stride;
   6897    }
   6898  } else {
   6899    const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
   6900 
   6901    for (int row = 0; row < bh; ++row) {
   6902      for (int col = 0; col < bw; ++col) {
   6903        wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
   6904      }
   6905      wsrc_buf += bw;
   6906      src += x->plane[0].src.stride;
   6907    }
   6908  }
   6909 }