tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nonrd_opt.h (28710B)


      1 /*
      2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
     13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
     14 
     15 #include "av1/encoder/context_tree.h"
     16 #include "av1/encoder/rdopt_utils.h"
     17 #include "av1/encoder/rdopt.h"
     18 
     19 #define RTC_INTER_MODES (4)
     20 #define RTC_INTRA_MODES (4)
     21 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
     22 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
     23 #define NUM_COMP_INTER_MODES_RT (6)
     24 #define NUM_INTER_MODES 12
     25 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
     26  (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
     27 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
     28 #define FILTER_SEARCH_SIZE 2
     29 #if !CONFIG_REALTIME_ONLY
     30 #define MOTION_MODE_SEARCH_SIZE 2
     31 #endif
     32 
     33 extern int g_pick_inter_mode_cnt;
     34 /*!\cond */
     35 typedef struct {
     36  uint8_t *data;
     37  int stride;
     38  int in_use;
     39 } PRED_BUFFER;
     40 
     41 typedef struct {
     42  PRED_BUFFER *best_pred;
     43  PREDICTION_MODE best_mode;
     44  TX_SIZE best_tx_size;
     45  TX_TYPE tx_type;
     46  MV_REFERENCE_FRAME best_ref_frame;
     47  MV_REFERENCE_FRAME best_second_ref_frame;
     48  uint8_t best_mode_skip_txfm;
     49  uint8_t best_mode_initial_skip_flag;
     50  int_interpfilters best_pred_filter;
     51  MOTION_MODE best_motion_mode;
     52  WarpedMotionParams wm_params;
     53  int num_proj_ref;
     54  PALETTE_MODE_INFO pmi;
     55  int64_t best_sse;
     56 } BEST_PICKMODE;
     57 
     58 typedef struct {
     59  MV_REFERENCE_FRAME ref_frame;
     60  PREDICTION_MODE pred_mode;
     61 } REF_MODE;
     62 
     63 typedef struct {
     64  MV_REFERENCE_FRAME ref_frame[2];
     65  PREDICTION_MODE pred_mode;
     66 } COMP_REF_MODE;
     67 
     68 struct estimate_block_intra_args {
     69  AV1_COMP *cpi;
     70  MACROBLOCK *x;
     71  PREDICTION_MODE mode;
     72  int skippable;
     73  RD_STATS *rdc;
     74  unsigned int best_sad;
     75  bool prune_mode_based_on_sad;
     76  bool prune_palette_sad;
     77 };
     78 /*!\endcond */
     79 
     80 /*!\brief Structure to store parameters and statistics used in non-rd inter mode
     81 * evaluation.
     82 */
     83 typedef struct {
     84  //! Structure to hold best inter mode data
     85  BEST_PICKMODE best_pickmode;
     86  //! Structure to RD cost of current mode
     87  RD_STATS this_rdc;
     88  //! Pointer to the RD Cost for the best mode found so far
     89  RD_STATS best_rdc;
     90  //! Distortion of chroma planes for all modes and reference frames
     91  int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
     92  //! Buffer to hold predicted block for all reference frames and planes
     93  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
     94  //! Array to hold variance of all modes and reference frames
     95  unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
     96  //! Array to hold ref cost of single reference mode for all ref frames
     97  unsigned int ref_costs_single[REF_FRAMES];
     98  //! Array to hold motion vector for all modes and reference frames
     99  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
    100  //! Array to hold best mv for all modes and reference frames
    101  int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
    102  //! Array to hold inter mode cost of single ref mode for all ref frames
    103  int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
    104  //! Array to hold use reference frame mask for each reference frame
    105  int use_ref_frame_mask[REF_FRAMES];
    106  //! Array to hold flags of evaluated modes for each reference frame
    107  uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
    108  //! Array to hold flag indicating if scaled reference frame is used.
    109  bool use_scaled_ref_frame[REF_FRAMES];
    110 } InterModeSearchStateNonrd;
    111 
    112 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
    113                                                          2, 2, 3, 3, 3, 4,
    114                                                          4, 4, 5, 5 };
    115 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
    116                                                           2, 3, 2, 3, 4, 3,
    117                                                           4, 5, 4, 5 };
    118 
    119 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
    120                                                   SMOOTH_PRED };
    121 
    122 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
    123                                                   NEWMV };
    124 
    125 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
    126  { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
    127  { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
    128  { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
    129  { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
    130  { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
    131  { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
    132  { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
    133  { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
    134 };
    135 
    136 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
    137 // mode
    138 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
    139  { LAST_FRAME, NEARESTMV },   { LAST_FRAME, NEARMV },
    140  { LAST_FRAME, GLOBALMV },    { LAST_FRAME, NEWMV },
    141  { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
    142  { GOLDEN_FRAME, GLOBALMV },  { GOLDEN_FRAME, NEWMV },
    143  { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
    144  { ALTREF_FRAME, GLOBALMV },  { ALTREF_FRAME, NEWMV },
    145 };
    146 
    147 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
    148  { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
    149  { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
    150  { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
    151  { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
    152  { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
    153  { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
    154 };
    155 
    156 static const int_interpfilters filters_ref_set[9] = {
    157  [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
    158  [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
    159  [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
    160  [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
    161  [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
    162  [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
    163  [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
    164  [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
    165  [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
    166 };
    167 
    168 enum {
    169  //  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
    170  INTER_NEAREST = (1 << NEARESTMV),
    171  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
    172  INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
    173  INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
    174 };
    175 
    176 // The original scan order (default_scan_8x8) is modified according to the extra
    177 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
    178 // aom_hadamard_8x8_c.
    179 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
    180  0,  8,  1,  2,  9,  16, 24, 17, 10, 3,  4,  11, 18, 25, 32, 40,
    181  33, 26, 19, 12, 5,  6,  13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
    182  28, 21, 14, 7,  15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
    183  23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
    184 };
    185 
    186 // The original scan order (av1_default_iscan_8x8) is modified to match
    187 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
    188 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
    189 // order of coefficients, such that the normal scan order is no longer
    190 // guaranteed to scan low coefficients first, therefore we modify the scan order
    191 // accordingly.
    192 // Note that this one has to be used together with default_scan_8x8_transpose.
    193 DECLARE_ALIGNED(16, static const int16_t,
    194                av1_default_iscan_8x8_transpose[64]) = {
    195  0,  2,  3,  9,  10, 20, 21, 35, 1,  4,  8,  11, 19, 22, 34, 36,
    196  5,  7,  12, 18, 23, 33, 37, 48, 6,  13, 17, 24, 32, 38, 47, 49,
    197  14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
    198  27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
    199 };
    200 
    201 // The original scan order (default_scan_16x16) is modified according to the
    202 // extra transpose in hadamard c implementation in lp case, i.e.,
    203 // aom_hadamard_lp_16x16_c.
    204 DECLARE_ALIGNED(16, static const int16_t,
    205                default_scan_lp_16x16_transpose[256]) = {
    206  0,   8,   2,   4,   10,  16,  24,  18,  12,  6,   64,  14,  20,  26,  32,
    207  40,  34,  28,  22,  72,  66,  68,  74,  80,  30,  36,  42,  48,  56,  50,
    208  44,  38,  88,  82,  76,  70,  128, 78,  84,  90,  96,  46,  52,  58,  1,
    209  9,   3,   60,  54,  104, 98,  92,  86,  136, 130, 132, 138, 144, 94,  100,
    210  106, 112, 62,  5,   11,  17,  25,  19,  13,  7,   120, 114, 108, 102, 152,
    211  146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65,  15,  21,  27,
    212  33,  41,  35,  29,  23,  73,  67,  124, 118, 168, 162, 156, 150, 200, 194,
    213  196, 202, 208, 158, 164, 170, 176, 126, 69,  75,  81,  31,  37,  43,  49,
    214  57,  51,  45,  39,  89,  83,  77,  71,  184, 178, 172, 166, 216, 210, 204,
    215  198, 206, 212, 218, 224, 174, 180, 186, 129, 79,  85,  91,  97,  47,  53,
    216  59,  61,  55,  105, 99,  93,  87,  137, 131, 188, 182, 232, 226, 220, 214,
    217  222, 228, 234, 240, 190, 133, 139, 145, 95,  101, 107, 113, 63,  121, 115,
    218  109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
    219  149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
    220  246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
    221  211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
    222  215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
    223  255
    224 };
    225 
    226 #if CONFIG_AV1_HIGHBITDEPTH
    227 // The original scan order (default_scan_16x16) is modified according to the
    228 // extra shift in hadamard c implementation in fp case, i.e.,
    229 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
    230 // outputs, so we handle them separately.
    231 DECLARE_ALIGNED(16, static const int16_t,
    232                default_scan_fp_16x16_transpose[256]) = {
    233  0,   4,   2,   8,   6,   16,  20,  18,  12,  10,  64,  14,  24,  22,  32,
    234  36,  34,  28,  26,  68,  66,  72,  70,  80,  30,  40,  38,  48,  52,  50,
    235  44,  42,  84,  82,  76,  74,  128, 78,  88,  86,  96,  46,  56,  54,  1,
    236  5,   3,   60,  58,  100, 98,  92,  90,  132, 130, 136, 134, 144, 94,  104,
    237  102, 112, 62,  9,   7,   17,  21,  19,  13,  11,  116, 114, 108, 106, 148,
    238  146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65,  15,  25,  23,
    239  33,  37,  35,  29,  27,  69,  67,  124, 122, 164, 162, 156, 154, 196, 194,
    240  200, 198, 208, 158, 168, 166, 176, 126, 73,  71,  81,  31,  41,  39,  49,
    241  53,  51,  45,  43,  85,  83,  77,  75,  180, 178, 172, 170, 212, 210, 204,
    242  202, 206, 216, 214, 224, 174, 184, 182, 129, 79,  89,  87,  97,  47,  57,
    243  55,  61,  59,  101, 99,  93,  91,  133, 131, 188, 186, 228, 226, 220, 218,
    244  222, 232, 230, 240, 190, 137, 135, 145, 95,  105, 103, 113, 63,  117, 115,
    245  109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
    246  153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
    247  250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
    248  211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
    249  219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
    250  255
    251 };
    252 #endif
    253 
    254 // The original scan order (av1_default_iscan_16x16) is modified to match
    255 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
    256 // Since hadamard AVX2 implementation will modify the order of coefficients,
    257 // such that the normal scan order is no longer guaranteed to scan low
    258 // coefficients first, therefore we modify the scan order accordingly. Note that
    259 // this one has to be used together with default_scan_lp_16x16_transpose.
    260 DECLARE_ALIGNED(16, static const int16_t,
    261                av1_default_iscan_lp_16x16_transpose[256]) = {
    262  0,   44,  2,   46,  3,   63,  9,   69,  1,   45,  4,   64,  8,   68,  11,
    263  87,  5,   65,  7,   67,  12,  88,  18,  94,  6,   66,  13,  89,  17,  93,
    264  24,  116, 14,  90,  16,  92,  25,  117, 31,  123, 15,  91,  26,  118, 30,
    265  122, 41,  148, 27,  119, 29,  121, 42,  149, 48,  152, 28,  120, 43,  150,
    266  47,  151, 62,  177, 10,  86,  20,  96,  21,  113, 35,  127, 19,  95,  22,
    267  114, 34,  126, 37,  144, 23,  115, 33,  125, 38,  145, 52,  156, 32,  124,
    268  39,  146, 51,  155, 58,  173, 40,  147, 50,  154, 59,  174, 73,  181, 49,
    269  153, 60,  175, 72,  180, 83,  198, 61,  176, 71,  179, 84,  199, 98,  202,
    270  70,  178, 85,  200, 97,  201, 112, 219, 36,  143, 54,  158, 55,  170, 77,
    271  185, 53,  157, 56,  171, 76,  184, 79,  194, 57,  172, 75,  183, 80,  195,
    272  102, 206, 74,  182, 81,  196, 101, 205, 108, 215, 82,  197, 100, 204, 109,
    273  216, 131, 223, 99,  203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
    274  141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78,  193, 104,
    275  208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
    276  133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
    277  231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
    278  168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
    279  255
    280 };
    281 
    282 #if CONFIG_AV1_HIGHBITDEPTH
    283 // The original scan order (av1_default_iscan_16x16) is modified to match
    284 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
    285 // Since hadamard AVX2 implementation will modify the order of coefficients,
    286 // such that the normal scan order is no longer guaranteed to scan low
    287 // coefficients first, therefore we modify the scan order accordingly. Note that
    288 // this one has to be used together with default_scan_fp_16x16_transpose.
    289 DECLARE_ALIGNED(16, static const int16_t,
    290                av1_default_iscan_fp_16x16_transpose[256]) = {
    291  0,   44,  2,   46,  1,   45,  4,   64,  3,   63,  9,   69,  8,   68,  11,
    292  87,  5,   65,  7,   67,  6,   66,  13,  89,  12,  88,  18,  94,  17,  93,
    293  24,  116, 14,  90,  16,  92,  15,  91,  26,  118, 25,  117, 31,  123, 30,
    294  122, 41,  148, 27,  119, 29,  121, 28,  120, 43,  150, 42,  149, 48,  152,
    295  47,  151, 62,  177, 10,  86,  20,  96,  19,  95,  22,  114, 21,  113, 35,
    296  127, 34,  126, 37,  144, 23,  115, 33,  125, 32,  124, 39,  146, 38,  145,
    297  52,  156, 51,  155, 58,  173, 40,  147, 50,  154, 49,  153, 60,  175, 59,
    298  174, 73,  181, 72,  180, 83,  198, 61,  176, 71,  179, 70,  178, 85,  200,
    299  84,  199, 98,  202, 97,  201, 112, 219, 36,  143, 54,  158, 53,  157, 56,
    300  171, 55,  170, 77,  185, 76,  184, 79,  194, 57,  172, 75,  183, 74,  182,
    301  81,  196, 80,  195, 102, 206, 101, 205, 108, 215, 82,  197, 100, 204, 99,
    302  203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
    303  128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78,  193, 104,
    304  208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
    305  133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
    306  231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
    307  168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
    308  255
    309 };
    310 #endif
    311 
    312 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
    313 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
    314 // results in coefficients that are a transposition of the entropy coding
    315 // versions. These tables are used as substitute for the scan order for the
    316 // faster version of IDTX.
    317 
    318 // Must be used together with av1_fast_idtx_iscan_4x4
    319 DECLARE_ALIGNED(16, static const int16_t,
    320                av1_fast_idtx_scan_4x4[16]) = { 0, 1,  4,  8,  5, 2,  3,  6,
    321                                                9, 12, 13, 10, 7, 11, 14, 15 };
    322 
    323 // Must be used together with av1_fast_idtx_scan_4x4
    324 DECLARE_ALIGNED(16, static const int16_t,
    325                av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5,  6,  2, 4,  7,  12,
    326                                                 3, 8, 11, 13, 9, 10, 14, 15 };
    327 
    328 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
    329  av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
    330 };
    331 
    332 // Must be used together with av1_fast_idtx_iscan_8x8
    333 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
    334  0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
    335  12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
    336  35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
    337  58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
    338 };
    339 
    340 // Must be used together with av1_fast_idtx_scan_8x8
    341 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
    342  0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42,
    343  3,  8,  12, 17, 25, 30, 41, 43, 9,  11, 18, 24, 31, 40, 44, 53,
    344  10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
    345  21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
    346 };
    347 
    348 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
    349  av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
    350 };
    351 
    352 // Must be used together with av1_fast_idtx_iscan_16x16
    353 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
    354  0,   1,   16,  32,  17,  2,   3,   18,  33,  48,  64,  49,  34,  19,  4,
    355  5,   20,  35,  50,  65,  80,  96,  81,  66,  51,  36,  21,  6,   7,   22,
    356  37,  52,  67,  82,  97,  112, 128, 113, 98,  83,  68,  53,  38,  23,  8,
    357  9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 160, 145, 130, 115, 100,
    358  85,  70,  55,  40,  25,  10,  11,  26,  41,  56,  71,  86,  101, 116, 131,
    359  146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87,  72,  57,  42,  27,
    360  12,  13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208,
    361  224, 209, 194, 179, 164, 149, 134, 119, 104, 89,  74,  59,  44,  29,  14,
    362  15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180, 195, 210, 225,
    363  240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91,  76,  61,  46,
    364  31,  47,  62,  77,  92,  107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
    365  243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,  78,  63,  79,  94,
    366  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
    367  170, 155, 140, 125, 110, 95,  111, 126, 141, 156, 171, 186, 201, 216, 231,
    368  246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
    369  218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
    370  250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
    371  255
    372 };
    373 
    374 // Must be used together with av1_fast_idtx_scan_16x16
    375 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
    376  0,   1,   5,   6,   14,  15,  27,  28,  44,  45,  65,  66,  90,  91,  119,
    377  120, 2,   4,   7,   13,  16,  26,  29,  43,  46,  64,  67,  89,  92,  118,
    378  121, 150, 3,   8,   12,  17,  25,  30,  42,  47,  63,  68,  88,  93,  117,
    379  122, 149, 151, 9,   11,  18,  24,  31,  41,  48,  62,  69,  87,  94,  116,
    380  123, 148, 152, 177, 10,  19,  23,  32,  40,  49,  61,  70,  86,  95,  115,
    381  124, 147, 153, 176, 178, 20,  22,  33,  39,  50,  60,  71,  85,  96,  114,
    382  125, 146, 154, 175, 179, 200, 21,  34,  38,  51,  59,  72,  84,  97,  113,
    383  126, 145, 155, 174, 180, 199, 201, 35,  37,  52,  58,  73,  83,  98,  112,
    384  127, 144, 156, 173, 181, 198, 202, 219, 36,  53,  57,  74,  82,  99,  111,
    385  128, 143, 157, 172, 182, 197, 203, 218, 220, 54,  56,  75,  81,  100, 110,
    386  129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55,  76,  80,  101, 109,
    387  130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77,  79,  102, 108,
    388  131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78,  103, 107,
    389  132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
    390  133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
    391  134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
    392  135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
    393  255
    394 };
    395 
    396 // Indicates the blocks for which RD model should be based on special logic
    397 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
    398                                    BLOCK_SIZE bsize) {
    399  const AV1_COMMON *const cm = &cpi->common;
    400  const int large_block = bsize >= BLOCK_32X32;
    401  // Only enable for low bitdepth to mitigate issue: b/303023614.
    402  return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
    403         !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
    404         cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
    405 }
    406 /*!\brief Finds predicted motion vectors for a block.
    407 *
    408 * \ingroup nonrd_mode_search
    409 * \callgraph
    410 * \callergraph
    411 * Finds predicted motion vectors for a block from a certain reference frame.
    412 * First, it fills reference MV stack, then picks the test from the stack and
    413 * predicts the final MV for a block for each mode.
    414 * \param[in]    cpi                      Top-level encoder structure
    415 * \param[in]    x                        Pointer to structure holding all the
    416 *                                        data for the current macroblock
    417 * \param[in]    ref_frame                Reference frame for which to find
    418 *                                        ref MVs
    419 * \param[out]   frame_mv                 Predicted MVs for a block
    420 * \param[in]    yv12_mb                  Buffer to hold predicted block
    421 * \param[in]    bsize                    Current block size
    422 * \param[in]    force_skip_low_temp_var  Flag indicating possible mode search
    423 *                                        prune for low temporal variance block
    424 * \param[in]    skip_pred_mv             Flag indicating to skip av1_mv_pred
    425 * \param[out]   use_scaled_ref_frame     Flag to indicate if scaled reference
    426 *                                        frame is used.
    427 *
    428 * \remark Nothing is returned. Instead, predicted MVs are placed into
    429 * \c frame_mv array, and use_scaled_ref_frame is set.
    430 */
    431 static inline void find_predictors(
    432    AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
    433    int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
    434    struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
    435    int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
    436  AV1_COMMON *const cm = &cpi->common;
    437  MACROBLOCKD *const xd = &x->e_mbd;
    438  MB_MODE_INFO *const mbmi = xd->mi[0];
    439  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
    440  const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
    441  const bool ref_is_scaled =
    442      ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
    443  const YV12_BUFFER_CONFIG *scaled_ref =
    444      av1_get_scaled_ref_frame(cpi, ref_frame);
    445  const YV12_BUFFER_CONFIG *yv12 =
    446      ref_is_scaled && scaled_ref ? scaled_ref : ref;
    447  const int num_planes = av1_num_planes(cm);
    448  x->pred_mv_sad[ref_frame] = INT_MAX;
    449  x->pred_mv0_sad[ref_frame] = INT_MAX;
    450  x->pred_mv1_sad[ref_frame] = INT_MAX;
    451  frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
    452  // TODO(kyslov) this needs various further optimizations. to be continued..
    453  assert(yv12 != NULL);
    454  if (yv12 != NULL) {
    455    struct scale_factors *const sf =
    456        scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
    457    av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
    458    av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
    459                     xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
    460                     mbmi_ext->mode_context);
    461    // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
    462    // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
    463    av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
    464    av1_find_best_ref_mvs_from_stack(
    465        cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
    466        &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
    467    frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
    468    // Early exit for non-LAST frame if force_skip_low_temp_var is set.
    469    if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
    470        !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
    471      av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
    472                  bsize);
    473    }
    474  }
    475  if (cm->features.switchable_motion_mode) {
    476    av1_count_overlappable_neighbors(cm, xd);
    477  }
    478  mbmi->num_proj_ref = 1;
    479  *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
    480 }
    481 
    482 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
    483                                   PREDICTION_MODE pred_mode,
    484                                   MV_REFERENCE_FRAME ref_frame0,
    485                                   MV_REFERENCE_FRAME ref_frame1,
    486                                   const AV1_COMMON *cm) {
    487  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    488  mbmi->ref_mv_idx = 0;
    489  mbmi->mode = pred_mode;
    490  mbmi->uv_mode = UV_DC_PRED;
    491  mbmi->ref_frame[0] = ref_frame0;
    492  mbmi->ref_frame[1] = ref_frame1;
    493  pmi->palette_size[PLANE_TYPE_Y] = 0;
    494  pmi->palette_size[PLANE_TYPE_UV] = 0;
    495  mbmi->filter_intra_mode_info.use_filter_intra = 0;
    496  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
    497  mbmi->motion_mode = SIMPLE_TRANSLATION;
    498  mbmi->num_proj_ref = 1;
    499  mbmi->interintra_mode = 0;
    500  set_default_interp_filters(mbmi, cm->features.interp_filter);
    501 }
    502 
    503 static inline void init_estimate_block_intra_args(
    504    struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
    505  args->cpi = cpi;
    506  args->x = x;
    507  args->mode = DC_PRED;
    508  args->skippable = 1;
    509  args->rdc = 0;
    510  args->best_sad = UINT_MAX;
    511  args->prune_mode_based_on_sad = false;
    512  args->prune_palette_sad = false;
    513 }
    514 
    515 static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
    516  for (int buf_idx = 0; buf_idx < len; buf_idx++) {
    517    if (!p[buf_idx].in_use) {
    518      p[buf_idx].in_use = 1;
    519      return buf_idx;
    520    }
    521  }
    522  return -1;
    523 }
    524 
    525 static inline bool prune_palette_testing_inter(AV1_COMP *cpi,
    526                                               unsigned int source_variance) {
    527  return (
    528      cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
    529      cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad &&
    530      ((cpi->sf.rt_sf.prune_palette_search_nonrd > 2) ||
    531       (cpi->sf.rt_sf.rc_compute_spatial_var_sc &&
    532        cpi->rc.frame_spatial_variance < 1200 &&
    533        cpi->rc.perc_spatial_flat_blocks < 5 &&
    534        cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000)));
    535 }
    536 
    537 static inline void free_pred_buffer(PRED_BUFFER *p) {
    538  if (p != NULL) p->in_use = 0;
    539 }
    540 
    541 #if CONFIG_INTERNAL_STATS
    542 static inline void store_coding_context_nonrd(MACROBLOCK *x,
    543                                              PICK_MODE_CONTEXT *ctx,
    544                                              int mode_index) {
    545 #else
    546 static inline void store_coding_context_nonrd(MACROBLOCK *x,
    547                                              PICK_MODE_CONTEXT *ctx) {
    548 #endif  // CONFIG_INTERNAL_STATS
    549  MACROBLOCKD *const xd = &x->e_mbd;
    550  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
    551 
    552  // Take a snapshot of the coding context so it can be
    553  // restored if we decide to encode this way
    554  ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
    555 
    556  ctx->skippable = txfm_info->skip_txfm;
    557 #if CONFIG_INTERNAL_STATS
    558  ctx->best_mode_index = mode_index;
    559 #endif  // CONFIG_INTERNAL_STATS
    560  ctx->mic = *xd->mi[0];
    561  ctx->skippable = txfm_info->skip_txfm;
    562  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
    563                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
    564 }
    565 
    566 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
    567                   BLOCK_SIZE bsize, TX_SIZE tx_size);
    568 
    569 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
    570                        int pred_stride, RD_STATS *this_rdc, int *skippable,
    571                        BLOCK_SIZE bsize, TX_SIZE tx_size);
    572 
    573 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
    574                               MACROBLOCK *x, MACROBLOCKD *xd,
    575                               RD_STATS *this_rdc, int start_plane,
    576                               int stop_plane);
    577 
    578 void av1_estimate_block_intra(int plane, int block, int row, int col,
    579                              BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
    580                              void *arg);
    581 
    582 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
    583                             int best_early_term, unsigned int ref_cost_intra,
    584                             int reuse_prediction, struct buf_2d *orig_dst,
    585                             PRED_BUFFER *tmp_buffers,
    586                             PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
    587                             BEST_PICKMODE *best_pickmode,
    588                             PICK_MODE_CONTEXT *ctx,
    589                             unsigned int *best_sad_norm);
    590 
    591 #endif  // AOM_AV1_ENCODER_NONRD_OPT_H_