tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

intra_mode_search_utils.h (28672B)


      1 /*
      2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 /*!\file
     13 * \brief Defines utility functions used in intra mode search.
     14 *
     15 * This includes rdcost estimations, histogram based pruning, etc.
     16 */
     17 #ifndef AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_
     18 #define AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_
     19 
     20 #include "av1/common/enums.h"
     21 #include "av1/common/pred_common.h"
     22 #include "av1/common/reconintra.h"
     23 
     24 #include "av1/encoder/encoder.h"
     25 #include "av1/encoder/encodeframe.h"
     26 #include "av1/encoder/model_rd.h"
     27 #include "av1/encoder/palette.h"
     28 #include "av1/encoder/hybrid_fwd_txfm.h"
     29 
     30 #ifdef __cplusplus
     31 extern "C" {
     32 #endif
     33 
     34 /*!\cond */
     35 // Macro for computing the speed-preset dependent threshold which is used for
     36 // deciding whether to enable/disable variance calculations in
     37 // intra_rd_variance_factor().
     38 #define INTRA_RD_VAR_THRESH(X) (1.0 - (0.25 * (X)))
     39 
     40 #define BINS 32
     41 static const float av1_intra_hog_model_bias[DIRECTIONAL_MODES] = {
     42  0.450578f,  0.695518f,  -0.717944f, -0.639894f,
     43  -0.602019f, -0.453454f, 0.055857f,  -0.465480f,
     44 };
     45 
     46 static const float av1_intra_hog_model_weights[BINS * DIRECTIONAL_MODES] = {
     47  -3.076402f, -3.757063f, -3.275266f, -3.180665f, -3.452105f, -3.216593f,
     48  -2.871212f, -3.134296f, -1.822324f, -2.401411f, -1.541016f, -1.195322f,
     49  -0.434156f, 0.322868f,  2.260546f,  3.368715f,  3.989290f,  3.308487f,
     50  2.277893f,  0.923793f,  0.026412f,  -0.385174f, -0.718622f, -1.408867f,
     51  -1.050558f, -2.323941f, -2.225827f, -2.585453f, -3.054283f, -2.875087f,
     52  -2.985709f, -3.447155f, 3.758139f,  3.204353f,  2.170998f,  0.826587f,
     53  -0.269665f, -0.702068f, -1.085776f, -2.175249f, -1.623180f, -2.975142f,
     54  -2.779629f, -3.190799f, -3.521900f, -3.375480f, -3.319355f, -3.897389f,
     55  -3.172334f, -3.594528f, -2.879132f, -2.547777f, -2.921023f, -2.281844f,
     56  -1.818988f, -2.041771f, -0.618268f, -1.396458f, -0.567153f, -0.285868f,
     57  -0.088058f, 0.753494f,  2.092413f,  3.215266f,  -3.300277f, -2.748658f,
     58  -2.315784f, -2.423671f, -2.257283f, -2.269583f, -2.196660f, -2.301076f,
     59  -2.646516f, -2.271319f, -2.254366f, -2.300102f, -2.217960f, -2.473300f,
     60  -2.116866f, -2.528246f, -3.314712f, -1.701010f, -0.589040f, -0.088077f,
     61  0.813112f,  1.702213f,  2.653045f,  3.351749f,  3.243554f,  3.199409f,
     62  2.437856f,  1.468854f,  0.533039f,  -0.099065f, -0.622643f, -2.200732f,
     63  -4.228861f, -2.875263f, -1.273956f, -0.433280f, 0.803771f,  1.975043f,
     64  3.179528f,  3.939064f,  3.454379f,  3.689386f,  3.116411f,  1.970991f,
     65  0.798406f,  -0.628514f, -1.252546f, -2.825176f, -4.090178f, -3.777448f,
     66  -3.227314f, -3.479403f, -3.320569f, -3.159372f, -2.729202f, -2.722341f,
     67  -3.054913f, -2.742923f, -2.612703f, -2.662632f, -2.907314f, -3.117794f,
     68  -3.102660f, -3.970972f, -4.891357f, -3.935582f, -3.347758f, -2.721924f,
     69  -2.219011f, -1.702391f, -0.866529f, -0.153743f, 0.107733f,  1.416882f,
     70  2.572884f,  3.607755f,  3.974820f,  3.997783f,  2.970459f,  0.791687f,
     71  -1.478921f, -1.228154f, -1.216955f, -1.765932f, -1.951003f, -1.985301f,
     72  -1.975881f, -1.985593f, -2.422371f, -2.419978f, -2.531288f, -2.951853f,
     73  -3.071380f, -3.277027f, -3.373539f, -4.462010f, -0.967888f, 0.805524f,
     74  2.794130f,  3.685984f,  3.745195f,  3.252444f,  2.316108f,  1.399146f,
     75  -0.136519f, -0.162811f, -1.004357f, -1.667911f, -1.964662f, -2.937579f,
     76  -3.019533f, -3.942766f, -5.102767f, -3.882073f, -3.532027f, -3.451956f,
     77  -2.944015f, -2.643064f, -2.529872f, -2.077290f, -2.809965f, -1.803734f,
     78  -1.783593f, -1.662585f, -1.415484f, -1.392673f, -0.788794f, -1.204819f,
     79  -1.998864f, -1.182102f, -0.892110f, -1.317415f, -1.359112f, -1.522867f,
     80  -1.468552f, -1.779072f, -2.332959f, -2.160346f, -2.329387f, -2.631259f,
     81  -2.744936f, -3.052494f, -2.787363f, -3.442548f, -4.245075f, -3.032172f,
     82  -2.061609f, -1.768116f, -1.286072f, -0.706587f, -0.192413f, 0.386938f,
     83  0.716997f,  1.481393f,  2.216702f,  2.737986f,  3.109809f,  3.226084f,
     84  2.490098f,  -0.095827f, -3.864816f, -3.507248f, -3.128925f, -2.908251f,
     85  -2.883836f, -2.881411f, -2.524377f, -2.624478f, -2.399573f, -2.367718f,
     86  -1.918255f, -1.926277f, -1.694584f, -1.723790f, -0.966491f, -1.183115f,
     87  -1.430687f, 0.872896f,  2.766550f,  3.610080f,  3.578041f,  3.334928f,
     88  2.586680f,  1.895721f,  1.122195f,  0.488519f,  -0.140689f, -0.799076f,
     89  -1.222860f, -1.502437f, -1.900969f, -3.206816f,
     90 };
     91 
     92 static const NN_CONFIG av1_intra_hog_model_nnconfig = {
     93  BINS,               // num_inputs
     94  DIRECTIONAL_MODES,  // num_outputs
     95  0,                  // num_hidden_layers
     96  { 0 },
     97  {
     98      av1_intra_hog_model_weights,
     99  },
    100  {
    101      av1_intra_hog_model_bias,
    102  },
    103 };
    104 
    105 #define FIX_PREC_BITS (16)
    106 static inline int get_hist_bin_idx(int dx, int dy) {
    107  const int32_t ratio = (dy * (1 << FIX_PREC_BITS)) / dx;
    108 
    109  // Find index by bisection
    110  static const int thresholds[BINS] = {
    111    -1334015, -441798, -261605, -183158, -138560, -109331, -88359, -72303,
    112    -59392,   -48579,  -39272,  -30982,  -23445,  -16400,  -9715,  -3194,
    113    3227,     9748,    16433,   23478,   31015,   39305,   48611,  59425,
    114    72336,    88392,   109364,  138593,  183191,  261638,  441831, INT32_MAX
    115  };
    116 
    117  int lo_idx = 0, hi_idx = BINS - 1;
    118  // Divide into segments of size 8 gives better performance than binary search
    119  // here.
    120  if (ratio <= thresholds[7]) {
    121    lo_idx = 0;
    122    hi_idx = 7;
    123  } else if (ratio <= thresholds[15]) {
    124    lo_idx = 8;
    125    hi_idx = 15;
    126  } else if (ratio <= thresholds[23]) {
    127    lo_idx = 16;
    128    hi_idx = 23;
    129  } else {
    130    lo_idx = 24;
    131    hi_idx = 31;
    132  }
    133 
    134  for (int idx = lo_idx; idx <= hi_idx; idx++) {
    135    if (ratio <= thresholds[idx]) {
    136      return idx;
    137    }
    138  }
    139  assert(0 && "No valid histogram bin found!");
    140  return BINS - 1;
    141 }
    142 #undef FIX_PREC_BITS
    143 
    144 // Normalizes the hog data.
    145 static inline void normalize_hog(float total, float *hist) {
    146  for (int i = 0; i < BINS; ++i) hist[i] /= total;
    147 }
    148 
    149 static inline void lowbd_generate_hog(const uint8_t *src, int stride, int rows,
    150                                      int cols, float *hist) {
    151  float total = 0.1f;
    152  src += stride;
    153  for (int r = 1; r < rows - 1; ++r) {
    154    for (int c = 1; c < cols - 1; ++c) {
    155      const uint8_t *above = &src[c - stride];
    156      const uint8_t *below = &src[c + stride];
    157      const uint8_t *left = &src[c - 1];
    158      const uint8_t *right = &src[c + 1];
    159      // Calculate gradient using Sobel filters.
    160      const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
    161                     (left[-stride] + 2 * left[0] + left[stride]);
    162      const int dy = (below[-1] + 2 * below[0] + below[1]) -
    163                     (above[-1] + 2 * above[0] + above[1]);
    164      if (dx == 0 && dy == 0) continue;
    165      const int temp = abs(dx) + abs(dy);
    166      if (!temp) continue;
    167      total += temp;
    168      if (dx == 0) {
    169        hist[0] += temp / 2;
    170        hist[BINS - 1] += temp / 2;
    171      } else {
    172        const int idx = get_hist_bin_idx(dx, dy);
    173        assert(idx >= 0 && idx < BINS);
    174        hist[idx] += temp;
    175      }
    176    }
    177    src += stride;
    178  }
    179 
    180  normalize_hog(total, hist);
    181 }
    182 
    183 // Computes and stores pixel level gradient information of a given superblock
    184 // for LBD encode.
    185 static inline void lowbd_compute_gradient_info_sb(MACROBLOCK *const x,
    186                                                  BLOCK_SIZE sb_size,
    187                                                  PLANE_TYPE plane) {
    188  PixelLevelGradientInfo *const grad_info_sb =
    189      x->pixel_gradient_info + plane * MAX_SB_SQUARE;
    190  const uint8_t *src = x->plane[plane].src.buf;
    191  const int stride = x->plane[plane].src.stride;
    192  const int ss_x = x->e_mbd.plane[plane].subsampling_x;
    193  const int ss_y = x->e_mbd.plane[plane].subsampling_y;
    194  const int sb_height = block_size_high[sb_size] >> ss_y;
    195  const int sb_width = block_size_wide[sb_size] >> ss_x;
    196  src += stride;
    197  for (int r = 1; r < sb_height - 1; ++r) {
    198    for (int c = 1; c < sb_width - 1; ++c) {
    199      const uint8_t *above = &src[c - stride];
    200      const uint8_t *below = &src[c + stride];
    201      const uint8_t *left = &src[c - 1];
    202      const uint8_t *right = &src[c + 1];
    203      // Calculate gradient using Sobel filters.
    204      const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
    205                     (left[-stride] + 2 * left[0] + left[stride]);
    206      const int dy = (below[-1] + 2 * below[0] + below[1]) -
    207                     (above[-1] + 2 * above[0] + above[1]);
    208      grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0);
    209      grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum =
    210          (uint16_t)(abs(dx) + abs(dy));
    211      grad_info_sb[r * sb_width + c].hist_bin_idx =
    212          (dx != 0) ? get_hist_bin_idx(dx, dy) : -1;
    213    }
    214    src += stride;
    215  }
    216 }
    217 
    218 #if CONFIG_AV1_HIGHBITDEPTH
    219 static inline void highbd_generate_hog(const uint8_t *src8, int stride,
    220                                       int rows, int cols, float *hist) {
    221  float total = 0.1f;
    222  const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    223  src += stride;
    224  for (int r = 1; r < rows - 1; ++r) {
    225    for (int c = 1; c < cols - 1; ++c) {
    226      const uint16_t *above = &src[c - stride];
    227      const uint16_t *below = &src[c + stride];
    228      const uint16_t *left = &src[c - 1];
    229      const uint16_t *right = &src[c + 1];
    230      // Calculate gradient using Sobel filters.
    231      const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
    232                     (left[-stride] + 2 * left[0] + left[stride]);
    233      const int dy = (below[-1] + 2 * below[0] + below[1]) -
    234                     (above[-1] + 2 * above[0] + above[1]);
    235      if (dx == 0 && dy == 0) continue;
    236      const int temp = abs(dx) + abs(dy);
    237      if (!temp) continue;
    238      total += temp;
    239      if (dx == 0) {
    240        hist[0] += temp / 2;
    241        hist[BINS - 1] += temp / 2;
    242      } else {
    243        const int idx = get_hist_bin_idx(dx, dy);
    244        assert(idx >= 0 && idx < BINS);
    245        hist[idx] += temp;
    246      }
    247    }
    248    src += stride;
    249  }
    250 
    251  normalize_hog(total, hist);
    252 }
    253 
    254 // Computes and stores pixel level gradient information of a given superblock
    255 // for HBD encode.
    256 static inline void highbd_compute_gradient_info_sb(MACROBLOCK *const x,
    257                                                   BLOCK_SIZE sb_size,
    258                                                   PLANE_TYPE plane) {
    259  PixelLevelGradientInfo *const grad_info_sb =
    260      x->pixel_gradient_info + plane * MAX_SB_SQUARE;
    261  const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[plane].src.buf);
    262  const int stride = x->plane[plane].src.stride;
    263  const int ss_x = x->e_mbd.plane[plane].subsampling_x;
    264  const int ss_y = x->e_mbd.plane[plane].subsampling_y;
    265  const int sb_height = block_size_high[sb_size] >> ss_y;
    266  const int sb_width = block_size_wide[sb_size] >> ss_x;
    267  src += stride;
    268  for (int r = 1; r < sb_height - 1; ++r) {
    269    for (int c = 1; c < sb_width - 1; ++c) {
    270      const uint16_t *above = &src[c - stride];
    271      const uint16_t *below = &src[c + stride];
    272      const uint16_t *left = &src[c - 1];
    273      const uint16_t *right = &src[c + 1];
    274      // Calculate gradient using Sobel filters.
    275      const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
    276                     (left[-stride] + 2 * left[0] + left[stride]);
    277      const int dy = (below[-1] + 2 * below[0] + below[1]) -
    278                     (above[-1] + 2 * above[0] + above[1]);
    279      grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0);
    280      grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum =
    281          (uint16_t)(abs(dx) + abs(dy));
    282      grad_info_sb[r * sb_width + c].hist_bin_idx =
    283          (dx != 0) ? get_hist_bin_idx(dx, dy) : -1;
    284    }
    285    src += stride;
    286  }
    287 }
    288 #endif  // CONFIG_AV1_HIGHBITDEPTH
    289 
    290 static inline void generate_hog(const uint8_t *src8, int stride, int rows,
    291                                int cols, float *hist, int highbd) {
    292 #if CONFIG_AV1_HIGHBITDEPTH
    293  if (highbd) {
    294    highbd_generate_hog(src8, stride, rows, cols, hist);
    295    return;
    296  }
    297 #else
    298  (void)highbd;
    299 #endif  // CONFIG_AV1_HIGHBITDEPTH
    300  lowbd_generate_hog(src8, stride, rows, cols, hist);
    301 }
    302 
    303 static inline void compute_gradient_info_sb(MACROBLOCK *const x,
    304                                            BLOCK_SIZE sb_size,
    305                                            PLANE_TYPE plane) {
    306 #if CONFIG_AV1_HIGHBITDEPTH
    307  if (is_cur_buf_hbd(&x->e_mbd)) {
    308    highbd_compute_gradient_info_sb(x, sb_size, plane);
    309    return;
    310  }
    311 #endif  // CONFIG_AV1_HIGHBITDEPTH
    312  lowbd_compute_gradient_info_sb(x, sb_size, plane);
    313 }
    314 
    315 // Gradient caching at superblock level is allowed only if all of the following
    316 // conditions are satisfied:
    317 // (1) The current frame is an intra only frame
    318 // (2) Non-RD mode decisions are not enabled
    319 // (3) The sf partition_search_type is set to SEARCH_PARTITION
    320 // (4) Either intra_pruning_with_hog or chroma_intra_pruning_with_hog is enabled
    321 //
    322 // SB level caching of gradient data may not help in speedup for the following
    323 // cases:
    324 // (1) Inter frames (due to early intra gating)
    325 // (2) When partition_search_type is not SEARCH_PARTITION
    326 // Hence, gradient data is computed at block level in such cases.
    327 static inline bool is_gradient_caching_for_hog_enabled(
    328    const AV1_COMP *const cpi) {
    329  const SPEED_FEATURES *const sf = &cpi->sf;
    330  return frame_is_intra_only(&cpi->common) && !sf->rt_sf.use_nonrd_pick_mode &&
    331         (sf->part_sf.partition_search_type == SEARCH_PARTITION) &&
    332         (sf->intra_sf.intra_pruning_with_hog ||
    333          sf->intra_sf.chroma_intra_pruning_with_hog);
    334 }
    335 
    336 // Function to generate pixel level gradient information for a given superblock.
    337 // Sets the flags 'is_sb_gradient_cached' for the specific plane-type if
    338 // gradient info is generated for the same.
    339 static inline void produce_gradients_for_sb(AV1_COMP *cpi, MACROBLOCK *x,
    340                                            BLOCK_SIZE sb_size, int mi_row,
    341                                            int mi_col) {
    342  // Initialise flags related to hog data caching.
    343  x->is_sb_gradient_cached[PLANE_TYPE_Y] = false;
    344  x->is_sb_gradient_cached[PLANE_TYPE_UV] = false;
    345  if (!is_gradient_caching_for_hog_enabled(cpi)) return;
    346 
    347  const SPEED_FEATURES *sf = &cpi->sf;
    348  const int num_planes = av1_num_planes(&cpi->common);
    349 
    350  av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
    351 
    352  if (sf->intra_sf.intra_pruning_with_hog) {
    353    compute_gradient_info_sb(x, sb_size, PLANE_TYPE_Y);
    354    x->is_sb_gradient_cached[PLANE_TYPE_Y] = true;
    355  }
    356  if (sf->intra_sf.chroma_intra_pruning_with_hog && num_planes > 1) {
    357    compute_gradient_info_sb(x, sb_size, PLANE_TYPE_UV);
    358    x->is_sb_gradient_cached[PLANE_TYPE_UV] = true;
    359  }
    360 }
    361 
    362 // Reuses the pixel level gradient data generated at superblock level for block
    363 // level histogram computation.
    364 static inline void generate_hog_using_gradient_cache(const MACROBLOCK *x,
    365                                                     int rows, int cols,
    366                                                     BLOCK_SIZE sb_size,
    367                                                     PLANE_TYPE plane,
    368                                                     float *hist) {
    369  float total = 0.1f;
    370  const int ss_x = x->e_mbd.plane[plane].subsampling_x;
    371  const int ss_y = x->e_mbd.plane[plane].subsampling_y;
    372  const int sb_width = block_size_wide[sb_size] >> ss_x;
    373 
    374  // Derive the offset from the starting of the superblock in order to locate
    375  // the block level gradient data in the cache.
    376  const int mi_row_in_sb = x->e_mbd.mi_row & (mi_size_high[sb_size] - 1);
    377  const int mi_col_in_sb = x->e_mbd.mi_col & (mi_size_wide[sb_size] - 1);
    378  const int block_offset_in_grad_cache =
    379      sb_width * (mi_row_in_sb << (MI_SIZE_LOG2 - ss_y)) +
    380      (mi_col_in_sb << (MI_SIZE_LOG2 - ss_x));
    381  const PixelLevelGradientInfo *grad_info_blk = x->pixel_gradient_info +
    382                                                plane * MAX_SB_SQUARE +
    383                                                block_offset_in_grad_cache;
    384 
    385  // Retrieve the cached gradient information and generate the histogram.
    386  for (int r = 1; r < rows - 1; ++r) {
    387    for (int c = 1; c < cols - 1; ++c) {
    388      const uint16_t abs_dx_abs_dy_sum =
    389          grad_info_blk[r * sb_width + c].abs_dx_abs_dy_sum;
    390      if (!abs_dx_abs_dy_sum) continue;
    391      total += abs_dx_abs_dy_sum;
    392      const bool is_dx_zero = grad_info_blk[r * sb_width + c].is_dx_zero;
    393      if (is_dx_zero) {
    394        hist[0] += abs_dx_abs_dy_sum >> 1;
    395        hist[BINS - 1] += abs_dx_abs_dy_sum >> 1;
    396      } else {
    397        const int8_t idx = grad_info_blk[r * sb_width + c].hist_bin_idx;
    398        assert(idx >= 0 && idx < BINS);
    399        hist[idx] += abs_dx_abs_dy_sum;
    400      }
    401    }
    402  }
    403  normalize_hog(total, hist);
    404 }
    405 
    406 static inline void collect_hog_data(const MACROBLOCK *x, BLOCK_SIZE bsize,
    407                                    BLOCK_SIZE sb_size, int plane, float *hog) {
    408  const MACROBLOCKD *xd = &x->e_mbd;
    409  const struct macroblockd_plane *const pd = &xd->plane[plane];
    410  const int ss_x = pd->subsampling_x;
    411  const int ss_y = pd->subsampling_y;
    412  const int bh = block_size_high[bsize];
    413  const int bw = block_size_wide[bsize];
    414  const int rows =
    415      ((xd->mb_to_bottom_edge >= 0) ? bh : (xd->mb_to_bottom_edge >> 3) + bh) >>
    416      ss_y;
    417  const int cols =
    418      ((xd->mb_to_right_edge >= 0) ? bw : (xd->mb_to_right_edge >> 3) + bw) >>
    419      ss_x;
    420 
    421  // If gradient data is already generated at SB level, reuse the cached data.
    422  // Otherwise, compute the data.
    423  if (x->is_sb_gradient_cached[plane]) {
    424    generate_hog_using_gradient_cache(x, rows, cols, sb_size, plane, hog);
    425  } else {
    426    const uint8_t *src = x->plane[plane].src.buf;
    427    const int src_stride = x->plane[plane].src.stride;
    428    generate_hog(src, src_stride, rows, cols, hog, is_cur_buf_hbd(xd));
    429  }
    430 
    431  // Scale the hog so the luma and chroma are on the same scale
    432  for (int b = 0; b < BINS; ++b) {
    433    hog[b] *= (1 + ss_x) * (1 + ss_y);
    434  }
    435 }
    436 
    437 static inline void prune_intra_mode_with_hog(
    438    const MACROBLOCK *x, BLOCK_SIZE bsize, BLOCK_SIZE sb_size, float th,
    439    uint8_t *directional_mode_skip_mask, int is_chroma) {
    440  const int plane = is_chroma ? AOM_PLANE_U : AOM_PLANE_Y;
    441  float hist[BINS] = { 0.0f };
    442  collect_hog_data(x, bsize, sb_size, plane, hist);
    443 
    444  // Make prediction for each of the mode
    445  float scores[DIRECTIONAL_MODES] = { 0.0f };
    446  av1_nn_predict(hist, &av1_intra_hog_model_nnconfig, 1, scores);
    447  for (UV_PREDICTION_MODE uv_mode = UV_V_PRED; uv_mode <= UV_D67_PRED;
    448       uv_mode++) {
    449    if (scores[uv_mode - UV_V_PRED] <= th) {
    450      directional_mode_skip_mask[uv_mode] = 1;
    451    }
    452  }
    453 }
    454 #undef BINS
    455 
    456 int av1_calc_normalized_variance(aom_variance_fn_t vf, const uint8_t *const buf,
    457                                 const int stride, const int is_hbd);
    458 
    459 // Returns whether caching of source variance for 4x4 sub-blocks is allowed.
    460 static inline bool is_src_var_for_4x4_sub_blocks_caching_enabled(
    461    const AV1_COMP *const cpi) {
    462  const SPEED_FEATURES *const sf = &cpi->sf;
    463  if (cpi->oxcf.mode != ALLINTRA) return false;
    464 
    465  if (sf->part_sf.partition_search_type == SEARCH_PARTITION) return true;
    466 
    467  if (INTRA_RD_VAR_THRESH(cpi->oxcf.speed) <= 0 ||
    468      (sf->rt_sf.use_nonrd_pick_mode && !sf->rt_sf.hybrid_intra_pickmode))
    469    return false;
    470 
    471  return true;
    472 }
    473 
    474 // Initialize the members of Block4x4VarInfo structure to -1 at the start
    475 // of every superblock.
    476 static inline void init_src_var_info_of_4x4_sub_blocks(
    477    const AV1_COMP *const cpi, Block4x4VarInfo *src_var_info_of_4x4_sub_blocks,
    478    const BLOCK_SIZE sb_size) {
    479  if (!is_src_var_for_4x4_sub_blocks_caching_enabled(cpi)) return;
    480 
    481  const int mi_count_in_sb = mi_size_wide[sb_size] * mi_size_high[sb_size];
    482  for (int i = 0; i < mi_count_in_sb; i++) {
    483    src_var_info_of_4x4_sub_blocks[i].var = -1;
    484    src_var_info_of_4x4_sub_blocks[i].log_var = -1.0;
    485  }
    486 }
    487 
    488 // Returns the cost needed to send a uniformly distributed r.v.
    489 static inline int write_uniform_cost(int n, int v) {
    490  const int l = get_unsigned_bits(n);
    491  const int m = (1 << l) - n;
    492  if (l == 0) return 0;
    493  if (v < m)
    494    return av1_cost_literal(l - 1);
    495  else
    496    return av1_cost_literal(l);
    497 }
    498 /*!\endcond */
    499 
    500 /*!\brief Returns the rate cost for luma prediction mode info of intra blocks.
    501 *
    502 * \callergraph
    503 */
    504 static inline int intra_mode_info_cost_y(const AV1_COMP *cpi,
    505                                         const MACROBLOCK *x,
    506                                         const MB_MODE_INFO *mbmi,
    507                                         BLOCK_SIZE bsize, int mode_cost,
    508                                         int discount_color_cost) {
    509  int total_rate = mode_cost;
    510  const ModeCosts *mode_costs = &x->mode_costs;
    511  const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0;
    512  const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra;
    513  const int use_intrabc = mbmi->use_intrabc;
    514  // Can only activate one mode.
    515  assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc +
    516          use_filter_intra) <= 1);
    517  const int try_palette = av1_allow_palette(
    518      cpi->common.features.allow_screen_content_tools, mbmi->bsize);
    519  if (try_palette && mbmi->mode == DC_PRED) {
    520    const MACROBLOCKD *xd = &x->e_mbd;
    521    const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
    522    const int mode_ctx = av1_get_palette_mode_ctx(xd);
    523    total_rate +=
    524        mode_costs->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette];
    525    if (use_palette) {
    526      const uint8_t *const color_map = xd->plane[0].color_index_map;
    527      int block_width, block_height, rows, cols;
    528      av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
    529                               &cols);
    530      const int plt_size = mbmi->palette_mode_info.palette_size[0];
    531      int palette_mode_cost =
    532          mode_costs
    533              ->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
    534          write_uniform_cost(plt_size, color_map[0]);
    535      uint16_t color_cache[2 * PALETTE_MAX_SIZE];
    536      const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
    537      palette_mode_cost +=
    538          av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
    539                                   n_cache, cpi->common.seq_params->bit_depth);
    540      if (!discount_color_cost)
    541        palette_mode_cost +=
    542            av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
    543 
    544      total_rate += palette_mode_cost;
    545    }
    546  }
    547  if (av1_filter_intra_allowed(&cpi->common, mbmi)) {
    548    total_rate += mode_costs->filter_intra_cost[mbmi->bsize][use_filter_intra];
    549    if (use_filter_intra) {
    550      total_rate +=
    551          mode_costs->filter_intra_mode_cost[mbmi->filter_intra_mode_info
    552                                                 .filter_intra_mode];
    553    }
    554  }
    555  if (av1_is_directional_mode(mbmi->mode)) {
    556    if (av1_use_angle_delta(bsize)) {
    557      total_rate +=
    558          mode_costs->angle_delta_cost[mbmi->mode - V_PRED]
    559                                      [MAX_ANGLE_DELTA +
    560                                       mbmi->angle_delta[PLANE_TYPE_Y]];
    561    }
    562  }
    563  if (av1_allow_intrabc(&cpi->common))
    564    total_rate += mode_costs->intrabc_cost[use_intrabc];
    565  return total_rate;
    566 }
    567 
    568 /*!\brief Return the rate cost for chroma prediction mode info of intra blocks.
    569 *
    570 * \callergraph
    571 */
    572 static inline int intra_mode_info_cost_uv(const AV1_COMP *cpi,
    573                                          const MACROBLOCK *x,
    574                                          const MB_MODE_INFO *mbmi,
    575                                          BLOCK_SIZE bsize, int mode_cost) {
    576  int total_rate = mode_cost;
    577  const ModeCosts *mode_costs = &x->mode_costs;
    578  const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0;
    579  const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
    580  // Can only activate one mode.
    581  assert(((uv_mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1);
    582 
    583  const int try_palette = av1_allow_palette(
    584      cpi->common.features.allow_screen_content_tools, mbmi->bsize);
    585  if (try_palette && uv_mode == UV_DC_PRED) {
    586    const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
    587    total_rate +=
    588        mode_costs->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette];
    589    if (use_palette) {
    590      const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
    591      const int plt_size = pmi->palette_size[1];
    592      const MACROBLOCKD *xd = &x->e_mbd;
    593      const uint8_t *const color_map = xd->plane[1].color_index_map;
    594      int palette_mode_cost =
    595          mode_costs
    596              ->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
    597          write_uniform_cost(plt_size, color_map[0]);
    598      uint16_t color_cache[2 * PALETTE_MAX_SIZE];
    599      const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
    600      palette_mode_cost += av1_palette_color_cost_uv(
    601          pmi, color_cache, n_cache, cpi->common.seq_params->bit_depth);
    602      palette_mode_cost +=
    603          av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP);
    604      total_rate += palette_mode_cost;
    605    }
    606  }
    607  const PREDICTION_MODE intra_mode = get_uv_mode(uv_mode);
    608  if (av1_is_directional_mode(intra_mode)) {
    609    if (av1_use_angle_delta(bsize)) {
    610      total_rate +=
    611          mode_costs->angle_delta_cost[intra_mode - V_PRED]
    612                                      [mbmi->angle_delta[PLANE_TYPE_UV] +
    613                                       MAX_ANGLE_DELTA];
    614    }
    615  }
    616  return total_rate;
    617 }
    618 
    619 /*!\cond */
    620 // Makes a quick intra prediction and estimate the rdcost with a model without
    621 // going through the whole txfm/quantize/itxfm process.
    622 static int64_t intra_model_rd(const AV1_COMMON *cm, MACROBLOCK *const x,
    623                              int plane, BLOCK_SIZE plane_bsize,
    624                              TX_SIZE tx_size, int use_hadamard) {
    625  MACROBLOCKD *const xd = &x->e_mbd;
    626  const BitDepthInfo bd_info = get_bit_depth_info(xd);
    627  int row, col;
    628  assert(!is_inter_block(xd->mi[0]));
    629  const int stepr = tx_size_high_unit[tx_size];
    630  const int stepc = tx_size_wide_unit[tx_size];
    631  const int txbw = tx_size_wide[tx_size];
    632  const int txbh = tx_size_high[tx_size];
    633  const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
    634  const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
    635  int64_t satd_cost = 0;
    636  struct macroblock_plane *p = &x->plane[plane];
    637  struct macroblockd_plane *pd = &xd->plane[plane];
    638  // Prediction.
    639  for (row = 0; row < max_blocks_high; row += stepr) {
    640    for (col = 0; col < max_blocks_wide; col += stepc) {
    641      av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
    642      // Here we use p->src_diff and p->coeff as temporary buffers for
    643      // prediction residue and transform coefficients. The buffers are only
    644      // used in this for loop, therefore we don't need to properly add offset
    645      // to the buffers.
    646      av1_subtract_block(
    647          bd_info, txbh, txbw, p->src_diff, block_size_wide[plane_bsize],
    648          p->src.buf + (((row * p->src.stride) + col) << 2), p->src.stride,
    649          pd->dst.buf + (((row * pd->dst.stride) + col) << 2), pd->dst.stride);
    650      av1_quick_txfm(use_hadamard, tx_size, bd_info, p->src_diff,
    651                     block_size_wide[plane_bsize], p->coeff);
    652      satd_cost += aom_satd(p->coeff, tx_size_2d[tx_size]);
    653    }
    654  }
    655  return satd_cost;
    656 }
    657 /*!\endcond */
    658 
    659 /*!\brief Estimate the luma rdcost of a given intra mode and try to prune it.
    660 *
    661 * \ingroup intra_mode_search
    662 * \callergraph
    663 * This function first makes a quick luma prediction and estimates the rdcost
    664 * with a model without going through the txfm, then try to prune the current
    665 * mode if the new estimate y_rd > 1.25 * best_model_rd.
    666 *
    667 * \return Returns 1 if the given mode is prune; 0 otherwise.
    668 */
    669 static inline int model_intra_yrd_and_prune(const AV1_COMP *const cpi,
    670                                            MACROBLOCK *x, BLOCK_SIZE bsize,
    671                                            int64_t *best_model_rd) {
    672  const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
    673  const int plane = 0;
    674  const AV1_COMMON *cm = &cpi->common;
    675  const int64_t this_model_rd =
    676      intra_model_rd(cm, x, plane, bsize, tx_size, /*use_hadamard=*/1);
    677  if (*best_model_rd != INT64_MAX &&
    678      this_model_rd > *best_model_rd + (*best_model_rd >> 2)) {
    679    return 1;
    680  } else if (this_model_rd < *best_model_rd) {
    681    *best_model_rd = this_model_rd;
    682  }
    683  return 0;
    684 }
    685 
    686 #ifdef __cplusplus
    687 }  // extern "C"
    688 #endif
    689 
    690 #endif  // AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_