tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

var_based_part.c (83658B)


      1 /*
      2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <limits.h>
     13 #include <math.h>
     14 #include <stdbool.h>
     15 #include <stdio.h>
     16 
     17 #include "config/aom_config.h"
     18 #include "config/aom_dsp_rtcd.h"
     19 #include "config/av1_rtcd.h"
     20 
     21 #include "aom_dsp/aom_dsp_common.h"
     22 #include "aom_dsp/binary_codes_writer.h"
     23 #include "aom_ports/mem.h"
     24 #include "aom_ports/aom_timer.h"
     25 
     26 #include "av1/common/reconinter.h"
     27 #include "av1/common/blockd.h"
     28 #include "av1/common/quant_common.h"
     29 
     30 #include "av1/encoder/encodeframe.h"
     31 #include "av1/encoder/encodeframe_utils.h"
     32 #include "av1/encoder/var_based_part.h"
     33 #include "av1/encoder/reconinter_enc.h"
     34 #include "av1/encoder/rdopt_utils.h"
     35 
     36 // Possible values for the force_split variable while evaluating variance based
     37 // partitioning.
     38 enum {
     39  // Evaluate all partition types
     40  PART_EVAL_ALL = 0,
     41  // Force PARTITION_SPLIT
     42  PART_EVAL_ONLY_SPLIT = 1,
     43  // Force PARTITION_NONE
     44  PART_EVAL_ONLY_NONE = 2
     45 } UENUM1BYTE(PART_EVAL_STATUS);
     46 
     47 typedef struct {
     48  VPVariance *part_variances;
     49  VPartVar *split[4];
     50 } variance_node;
     51 
     52 static inline void tree_to_node(void *data, BLOCK_SIZE bsize,
     53                                variance_node *node) {
     54  node->part_variances = NULL;
     55  switch (bsize) {
     56    case BLOCK_128X128: {
     57      VP128x128 *vt = (VP128x128 *)data;
     58      node->part_variances = &vt->part_variances;
     59      for (int split_idx = 0; split_idx < 4; split_idx++)
     60        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
     61      break;
     62    }
     63    case BLOCK_64X64: {
     64      VP64x64 *vt = (VP64x64 *)data;
     65      node->part_variances = &vt->part_variances;
     66      for (int split_idx = 0; split_idx < 4; split_idx++)
     67        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
     68      break;
     69    }
     70    case BLOCK_32X32: {
     71      VP32x32 *vt = (VP32x32 *)data;
     72      node->part_variances = &vt->part_variances;
     73      for (int split_idx = 0; split_idx < 4; split_idx++)
     74        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
     75      break;
     76    }
     77    case BLOCK_16X16: {
     78      VP16x16 *vt = (VP16x16 *)data;
     79      node->part_variances = &vt->part_variances;
     80      for (int split_idx = 0; split_idx < 4; split_idx++)
     81        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
     82      break;
     83    }
     84    case BLOCK_8X8: {
     85      VP8x8 *vt = (VP8x8 *)data;
     86      node->part_variances = &vt->part_variances;
     87      for (int split_idx = 0; split_idx < 4; split_idx++)
     88        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
     89      break;
     90    }
     91    default: {
     92      VP4x4 *vt = (VP4x4 *)data;
     93      assert(bsize == BLOCK_4X4);
     94      node->part_variances = &vt->part_variances;
     95      for (int split_idx = 0; split_idx < 4; split_idx++)
     96        node->split[split_idx] = &vt->split[split_idx];
     97      break;
     98    }
     99  }
    100 }
    101 
    102 // Set variance values given sum square error, sum error, count.
    103 static inline void fill_variance(uint32_t s2, int32_t s, int c, VPartVar *v) {
    104  v->sum_square_error = s2;
    105  v->sum_error = s;
    106  v->log2_count = c;
    107 }
    108 
    109 static inline void get_variance(VPartVar *v) {
    110  v->variance =
    111      (int)(256 * (v->sum_square_error -
    112                   (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
    113                              v->log2_count)) >>
    114            v->log2_count);
    115 }
    116 
    117 static inline void sum_2_variances(const VPartVar *a, const VPartVar *b,
    118                                   VPartVar *r) {
    119  assert(a->log2_count == b->log2_count);
    120  fill_variance(a->sum_square_error + b->sum_square_error,
    121                a->sum_error + b->sum_error, a->log2_count + 1, r);
    122 }
    123 
    124 static inline void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
    125  variance_node node;
    126  memset(&node, 0, sizeof(node));
    127  tree_to_node(data, bsize, &node);
    128  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
    129  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
    130  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
    131  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
    132  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
    133                  &node.part_variances->none);
    134 }
    135 
    136 static inline void set_block_size(AV1_COMP *const cpi, int mi_row, int mi_col,
    137                                  BLOCK_SIZE bsize) {
    138  if (cpi->common.mi_params.mi_cols > mi_col &&
    139      cpi->common.mi_params.mi_rows > mi_row) {
    140    CommonModeInfoParams *mi_params = &cpi->common.mi_params;
    141    const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
    142    const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col);
    143    MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] =
    144        &mi_params->mi_alloc[mi_alloc_idx];
    145    mi->bsize = bsize;
    146  }
    147 }
    148 
    149 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd,
    150                               const TileInfo *const tile, void *data,
    151                               BLOCK_SIZE bsize, int mi_row, int mi_col,
    152                               int64_t threshold, BLOCK_SIZE bsize_min,
    153                               PART_EVAL_STATUS force_split) {
    154  AV1_COMMON *const cm = &cpi->common;
    155  variance_node vt;
    156  const int block_width = mi_size_wide[bsize];
    157  const int block_height = mi_size_high[bsize];
    158  int bs_width_check = block_width;
    159  int bs_height_check = block_height;
    160  int bs_width_vert_check = block_width >> 1;
    161  int bs_height_horiz_check = block_height >> 1;
    162  // On the right and bottom boundary we only need to check
    163  // if half the bsize fits, because boundary is extended
    164  // up to 64. So do this check only for sb_size = 64X64.
    165  if (cm->seq_params->sb_size == BLOCK_64X64) {
    166    if (tile->mi_col_end == cm->mi_params.mi_cols) {
    167      bs_width_check = (block_width >> 1) + 1;
    168      bs_width_vert_check = (block_width >> 2) + 1;
    169    }
    170    if (tile->mi_row_end == cm->mi_params.mi_rows) {
    171      bs_height_check = (block_height >> 1) + 1;
    172      bs_height_horiz_check = (block_height >> 2) + 1;
    173    }
    174  }
    175 
    176  assert(block_height == block_width);
    177  tree_to_node(data, bsize, &vt);
    178 
    179  if (mi_col + bs_width_check <= tile->mi_col_end &&
    180      mi_row + bs_height_check <= tile->mi_row_end &&
    181      force_split == PART_EVAL_ONLY_NONE) {
    182    set_block_size(cpi, mi_row, mi_col, bsize);
    183    return 1;
    184  }
    185  if (force_split == PART_EVAL_ONLY_SPLIT) return 0;
    186 
    187  // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
    188  // variance is below threshold, otherwise split will be selected.
    189  // No check for vert/horiz split as too few samples for variance.
    190  if (bsize == bsize_min) {
    191    // Variance already computed to set the force_split.
    192    if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
    193    if (mi_col + bs_width_check <= tile->mi_col_end &&
    194        mi_row + bs_height_check <= tile->mi_row_end &&
    195        vt.part_variances->none.variance < threshold) {
    196      set_block_size(cpi, mi_row, mi_col, bsize);
    197      return 1;
    198    }
    199    return 0;
    200  } else if (bsize > bsize_min) {
    201    // Variance already computed to set the force_split.
    202    if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
    203    // For key frame: take split for bsize above 32X32 or very high variance.
    204    if (frame_is_intra_only(cm) &&
    205        (bsize > BLOCK_32X32 ||
    206         vt.part_variances->none.variance > (threshold << 4))) {
    207      return 0;
    208    }
    209    // If variance is low, take the bsize (no split).
    210    if (mi_col + bs_width_check <= tile->mi_col_end &&
    211        mi_row + bs_height_check <= tile->mi_row_end &&
    212        vt.part_variances->none.variance < threshold) {
    213      set_block_size(cpi, mi_row, mi_col, bsize);
    214      return 1;
    215    }
    216    // Check vertical split.
    217    if (mi_row + bs_height_check <= tile->mi_row_end &&
    218        mi_col + bs_width_vert_check <= tile->mi_col_end) {
    219      BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
    220      BLOCK_SIZE plane_bsize =
    221          get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
    222                               xd->plane[AOM_PLANE_U].subsampling_y);
    223      get_variance(&vt.part_variances->vert[0]);
    224      get_variance(&vt.part_variances->vert[1]);
    225      if (vt.part_variances->vert[0].variance < threshold &&
    226          vt.part_variances->vert[1].variance < threshold &&
    227          plane_bsize < BLOCK_INVALID) {
    228        set_block_size(cpi, mi_row, mi_col, subsize);
    229        set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
    230        return 1;
    231      }
    232    }
    233    // Check horizontal split.
    234    if (mi_col + bs_width_check <= tile->mi_col_end &&
    235        mi_row + bs_height_horiz_check <= tile->mi_row_end) {
    236      BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
    237      BLOCK_SIZE plane_bsize =
    238          get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
    239                               xd->plane[AOM_PLANE_U].subsampling_y);
    240      get_variance(&vt.part_variances->horz[0]);
    241      get_variance(&vt.part_variances->horz[1]);
    242      if (vt.part_variances->horz[0].variance < threshold &&
    243          vt.part_variances->horz[1].variance < threshold &&
    244          plane_bsize < BLOCK_INVALID) {
    245        set_block_size(cpi, mi_row, mi_col, subsize);
    246        set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
    247        return 1;
    248      }
    249    }
    250    return 0;
    251  }
    252  return 0;
    253 }
    254 
    255 static inline int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide,
    256                                  int pixels_high) {
    257  int all_inside = 1;
    258  for (int idx = 0; idx < 4; idx++) {
    259    all_inside &= ((x16_idx + GET_BLK_IDX_X(idx, 3)) < pixels_wide);
    260    all_inside &= ((y16_idx + GET_BLK_IDX_Y(idx, 3)) < pixels_high);
    261  }
    262  return all_inside;
    263 }
    264 
    265 #if CONFIG_AV1_HIGHBITDEPTH
    266 // TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd
    267 static inline void fill_variance_8x8avg_highbd(
    268    const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
    269    int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
    270    int pixels_high) {
    271  for (int idx = 0; idx < 4; idx++) {
    272    const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
    273    const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
    274    unsigned int sse = 0;
    275    int sum = 0;
    276    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
    277      int src_avg = aom_highbd_avg_8x8(src_buf + y8_idx * src_stride + x8_idx,
    278                                       src_stride);
    279      int dst_avg = aom_highbd_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx,
    280                                       dst_stride);
    281 
    282      sum = src_avg - dst_avg;
    283      sse = sum * sum;
    284    }
    285    fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
    286  }
    287 }
    288 #endif
    289 
    290 static inline void fill_variance_8x8avg_lowbd(
    291    const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
    292    int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
    293    int pixels_high) {
    294  unsigned int sse[4] = { 0 };
    295  int sum[4] = { 0 };
    296 
    297  if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) {
    298    int src_avg[4];
    299    int dst_avg[4];
    300    aom_avg_8x8_quad(src_buf, src_stride, x16_idx, y16_idx, src_avg);
    301    aom_avg_8x8_quad(dst_buf, dst_stride, x16_idx, y16_idx, dst_avg);
    302    for (int idx = 0; idx < 4; idx++) {
    303      sum[idx] = src_avg[idx] - dst_avg[idx];
    304      sse[idx] = sum[idx] * sum[idx];
    305    }
    306  } else {
    307    for (int idx = 0; idx < 4; idx++) {
    308      const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
    309      const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
    310      if (x8_idx < pixels_wide && y8_idx < pixels_high) {
    311        int src_avg =
    312            aom_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride);
    313        int dst_avg =
    314            aom_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, dst_stride);
    315        sum[idx] = src_avg - dst_avg;
    316        sse[idx] = sum[idx] * sum[idx];
    317      }
    318    }
    319  }
    320 
    321  for (int idx = 0; idx < 4; idx++) {
    322    fill_variance(sse[idx], sum[idx], 0, &vst->split[idx].part_variances.none);
    323  }
    324 }
    325 
    326 // Obtain parameters required to calculate variance (such as sum, sse, etc,.)
    327 // at 8x8 sub-block level for a given 16x16 block.
    328 // The function can be called only when is_key_frame is false since sum is
    329 // computed between source and reference frames.
    330 static inline void fill_variance_8x8avg(const uint8_t *src_buf, int src_stride,
    331                                        const uint8_t *dst_buf, int dst_stride,
    332                                        int x16_idx, int y16_idx, VP16x16 *vst,
    333                                        int highbd_flag, int pixels_wide,
    334                                        int pixels_high) {
    335 #if CONFIG_AV1_HIGHBITDEPTH
    336  if (highbd_flag) {
    337    fill_variance_8x8avg_highbd(src_buf, src_stride, dst_buf, dst_stride,
    338                                x16_idx, y16_idx, vst, pixels_wide,
    339                                pixels_high);
    340    return;
    341  }
    342 #else
    343  (void)highbd_flag;
    344 #endif  // CONFIG_AV1_HIGHBITDEPTH
    345  fill_variance_8x8avg_lowbd(src_buf, src_stride, dst_buf, dst_stride, x16_idx,
    346                             y16_idx, vst, pixels_wide, pixels_high);
    347 }
    348 
    349 static int compute_minmax_8x8(const uint8_t *src_buf, int src_stride,
    350                              const uint8_t *dst_buf, int dst_stride,
    351                              int x16_idx, int y16_idx,
    352 #if CONFIG_AV1_HIGHBITDEPTH
    353                              int highbd_flag,
    354 #endif
    355                              int pixels_wide, int pixels_high) {
    356  int minmax_max = 0;
    357  int minmax_min = 255;
    358  // Loop over the 4 8x8 subblocks.
    359  for (int idx = 0; idx < 4; idx++) {
    360    const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
    361    const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
    362    int min = 0;
    363    int max = 0;
    364    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
    365 #if CONFIG_AV1_HIGHBITDEPTH
    366      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
    367        aom_highbd_minmax_8x8(
    368            src_buf + y8_idx * src_stride + x8_idx, src_stride,
    369            dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, &max);
    370      } else {
    371        aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
    372                       dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
    373                       &max);
    374      }
    375 #else
    376      aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
    377                     dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
    378                     &max);
    379 #endif
    380      if ((max - min) > minmax_max) minmax_max = (max - min);
    381      if ((max - min) < minmax_min) minmax_min = (max - min);
    382    }
    383  }
    384  return (minmax_max - minmax_min);
    385 }
    386 
    387 // Function to compute average and variance of 4x4 sub-block.
    388 // The function can be called only when is_key_frame is true since sum is
    389 // computed using source frame only.
    390 static inline void fill_variance_4x4avg(const uint8_t *src_buf, int src_stride,
    391                                        int x8_idx, int y8_idx, VP8x8 *vst,
    392 #if CONFIG_AV1_HIGHBITDEPTH
    393                                        int highbd_flag,
    394 #endif
    395                                        int pixels_wide, int pixels_high,
    396                                        int border_offset_4x4) {
    397  for (int idx = 0; idx < 4; idx++) {
    398    const int x4_idx = x8_idx + GET_BLK_IDX_X(idx, 2);
    399    const int y4_idx = y8_idx + GET_BLK_IDX_Y(idx, 2);
    400    unsigned int sse = 0;
    401    int sum = 0;
    402    if (x4_idx < pixels_wide - border_offset_4x4 &&
    403        y4_idx < pixels_high - border_offset_4x4) {
    404      int src_avg;
    405      int dst_avg = 128;
    406 #if CONFIG_AV1_HIGHBITDEPTH
    407      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
    408        src_avg = aom_highbd_avg_4x4(src_buf + y4_idx * src_stride + x4_idx,
    409                                     src_stride);
    410      } else {
    411        src_avg =
    412            aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
    413      }
    414 #else
    415      src_avg = aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
    416 #endif
    417 
    418      sum = src_avg - dst_avg;
    419      sse = sum * sum;
    420    }
    421    fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
    422  }
    423 }
    424 
    425 static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
    426                                         int non_reference_frame,
    427                                         int is_static) {
    428  int64_t threshold = threshold_base;
    429  if (non_reference_frame && !is_static) threshold = (3 * threshold) >> 1;
    430  if (speed >= 8) {
    431    return (5 * threshold) >> 2;
    432  }
    433  return threshold;
    434 }
    435 
    436 // Tune thresholds less or more aggressively to prefer larger partitions
    437 static inline void tune_thresh_based_on_qindex(
    438    AV1_COMP *cpi, int64_t thresholds[], uint64_t block_sad, int current_qindex,
    439    int num_pixels, bool is_segment_id_boosted, int source_sad_nonrd,
    440    int lighting_change) {
    441  double weight;
    442  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
    443    const int win = 20;
    444    if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
    445      weight = 1.0;
    446    else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
    447      weight = 0.0;
    448    else
    449      weight =
    450          1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
    451    if (num_pixels > RESOLUTION_480P) {
    452      for (int i = 0; i < 4; i++) {
    453        thresholds[i] <<= 1;
    454      }
    455    }
    456    if (num_pixels <= RESOLUTION_288P) {
    457      thresholds[3] = INT64_MAX;
    458      if (is_segment_id_boosted == false) {
    459        thresholds[1] <<= 2;
    460        thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
    461      } else {
    462        thresholds[1] <<= 1;
    463        thresholds[2] <<= 3;
    464      }
    465      // Allow for split to 8x8 for superblocks where part of it has
    466      // moving boundary. So allow for sb with source_sad above threshold,
    467      // and avoid very large source_sad or high source content, to avoid
    468      // too many 8x8 within superblock.
    469      uint64_t avg_source_sad_thresh = 25000;
    470      uint64_t block_sad_low = 25000;
    471      uint64_t block_sad_high = 50000;
    472      if (cpi->svc.temporal_layer_id == 0 &&
    473          cpi->svc.number_temporal_layers > 1) {
    474        // Increase the sad thresholds for base TL0, as reference/LAST is
    475        // 2/4 frames behind (for 2/3 #TL).
    476        avg_source_sad_thresh = 40000;
    477        block_sad_high = 70000;
    478      }
    479      if (is_segment_id_boosted == false &&
    480          cpi->rc.avg_source_sad < avg_source_sad_thresh &&
    481          block_sad > block_sad_low && block_sad < block_sad_high &&
    482          !lighting_change) {
    483        thresholds[2] = (3 * thresholds[2]) >> 2;
    484        thresholds[3] = thresholds[2] << 3;
    485      }
    486      // Condition the increase of partition thresholds on the segment
    487      // and the content. Avoid the increase for superblocks which have
    488      // high source sad, unless the whole frame has very high motion
    489      // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
    490      // have high source sad).
    491    } else if (num_pixels > RESOLUTION_480P && is_segment_id_boosted == false &&
    492               (source_sad_nonrd != kHighSad ||
    493                cpi->rc.avg_source_sad > 50000)) {
    494      thresholds[0] = (3 * thresholds[0]) >> 1;
    495      thresholds[3] = INT64_MAX;
    496      if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
    497        thresholds[1] =
    498            (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
    499        thresholds[2] =
    500            (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
    501      }
    502    } else if (current_qindex > QINDEX_LARGE_BLOCK_THR &&
    503               is_segment_id_boosted == false &&
    504               (source_sad_nonrd != kHighSad ||
    505                cpi->rc.avg_source_sad > 50000)) {
    506      thresholds[1] =
    507          (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
    508      thresholds[2] =
    509          (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
    510      thresholds[3] = INT64_MAX;
    511    }
    512  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
    513    thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0;
    514    thresholds[2] =
    515        (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2];
    516  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
    517    const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1;
    518    if (current_qindex < QINDEX_LARGE_BLOCK_THR - 45)
    519      weight = 1.0;
    520    else if (current_qindex > QINDEX_LARGE_BLOCK_THR + 45)
    521      weight = 0.0;
    522    else
    523      weight = 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + 45) / (2 * 45);
    524    thresholds[1] =
    525        (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
    526    thresholds[2] =
    527        (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
    528    thresholds[3] =
    529        (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
    530  }
    531  if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
    532    thresholds[3] = INT64_MAX;
    533 }
    534 
    535 static void set_vbp_thresholds_key_frame(AV1_COMP *cpi, int64_t thresholds[],
    536                                         int64_t threshold_base,
    537                                         int threshold_left_shift,
    538                                         int num_pixels) {
    539  if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
    540    const int shift_steps =
    541        threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8);
    542    assert(shift_steps >= 0);
    543    threshold_base <<= shift_steps;
    544  }
    545  thresholds[0] = threshold_base;
    546  thresholds[1] = threshold_base;
    547  if (num_pixels < RESOLUTION_720P) {
    548    thresholds[2] = threshold_base / 3;
    549    thresholds[3] = threshold_base >> 1;
    550  } else {
    551    int shift_val = 2;
    552    if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
    553      shift_val = (cpi->oxcf.mode == ALLINTRA ? 1 : 0);
    554    }
    555 
    556    thresholds[2] = threshold_base >> shift_val;
    557    thresholds[3] = threshold_base >> shift_val;
    558  }
    559  thresholds[4] = threshold_base << 2;
    560 }
    561 
    562 static inline void tune_thresh_based_on_resolution(
    563    AV1_COMP *cpi, int64_t thresholds[], int64_t threshold_base,
    564    int current_qindex, int source_sad_rd, int num_pixels) {
    565  if (num_pixels >= RESOLUTION_720P) thresholds[3] = thresholds[3] << 1;
    566  if (num_pixels <= RESOLUTION_288P) {
    567    const int qindex_thr[5][2] = {
    568      { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 },
    569    };
    570    int th_idx = 0;
    571    if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1)
    572      th_idx =
    573          (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0;
    574    if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3)
    575      th_idx = cpi->sf.rt_sf.var_part_based_on_qidx;
    576    const int qindex_low_thr = qindex_thr[th_idx][0];
    577    const int qindex_high_thr = qindex_thr[th_idx][1];
    578    if (current_qindex >= qindex_high_thr) {
    579      threshold_base = (5 * threshold_base) >> 1;
    580      thresholds[1] = threshold_base >> 3;
    581      thresholds[2] = threshold_base << 2;
    582      thresholds[3] = threshold_base << 5;
    583    } else if (current_qindex < qindex_low_thr) {
    584      thresholds[1] = threshold_base >> 3;
    585      thresholds[2] = threshold_base >> 1;
    586      thresholds[3] = threshold_base << 3;
    587    } else {
    588      int64_t qi_diff_low = current_qindex - qindex_low_thr;
    589      int64_t qi_diff_high = qindex_high_thr - current_qindex;
    590      int64_t threshold_diff = qindex_high_thr - qindex_low_thr;
    591      int64_t threshold_base_high = (5 * threshold_base) >> 1;
    592 
    593      threshold_diff = threshold_diff > 0 ? threshold_diff : 1;
    594      threshold_base =
    595          (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) /
    596          threshold_diff;
    597      thresholds[1] = threshold_base >> 3;
    598      thresholds[2] = ((qi_diff_low * threshold_base) +
    599                       qi_diff_high * (threshold_base >> 1)) /
    600                      threshold_diff;
    601      thresholds[3] = ((qi_diff_low * (threshold_base << 5)) +
    602                       qi_diff_high * (threshold_base << 3)) /
    603                      threshold_diff;
    604    }
    605  } else if (num_pixels < RESOLUTION_720P) {
    606    thresholds[2] = (5 * threshold_base) >> 2;
    607  } else if (num_pixels < RESOLUTION_1080P) {
    608    thresholds[2] = threshold_base << 1;
    609  } else {
    610    // num_pixels >= RESOLUTION_1080P
    611    if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
    612      if (num_pixels < RESOLUTION_1440P) {
    613        thresholds[2] = (5 * threshold_base) >> 1;
    614      } else {
    615        thresholds[2] = (7 * threshold_base) >> 1;
    616      }
    617    } else {
    618      if (cpi->oxcf.speed > 7) {
    619        thresholds[2] = 6 * threshold_base;
    620      } else {
    621        thresholds[2] = 3 * threshold_base;
    622      }
    623    }
    624  }
    625 }
    626 
    627 // Increase the base partition threshold, based on content and noise level.
    628 static inline int64_t tune_base_thresh_content(AV1_COMP *cpi,
    629                                               int64_t threshold_base,
    630                                               int content_lowsumdiff,
    631                                               int source_sad_nonrd,
    632                                               int num_pixels) {
    633  AV1_COMMON *const cm = &cpi->common;
    634  int64_t updated_thresh_base = threshold_base;
    635  if (cpi->noise_estimate.enabled && content_lowsumdiff &&
    636      num_pixels > RESOLUTION_480P && cm->current_frame.frame_number > 60) {
    637    NOISE_LEVEL noise_level =
    638        av1_noise_estimate_extract_level(&cpi->noise_estimate);
    639    if (noise_level == kHigh)
    640      updated_thresh_base = (5 * updated_thresh_base) >> 1;
    641    else if (noise_level == kMedium &&
    642             !cpi->sf.rt_sf.prefer_large_partition_blocks)
    643      updated_thresh_base = (5 * updated_thresh_base) >> 2;
    644  }
    645  updated_thresh_base = scale_part_thresh_content(
    646      updated_thresh_base, cpi->oxcf.speed,
    647      cpi->ppi->rtc_ref.non_reference_frame, cpi->rc.frame_source_sad == 0);
    648  if (cpi->oxcf.speed >= 11 && source_sad_nonrd > kLowSad &&
    649      cpi->rc.high_motion_content_screen_rtc)
    650    updated_thresh_base = updated_thresh_base << 4;
    651  return updated_thresh_base;
    652 }
    653 
    654 static inline void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
    655                                      uint64_t blk_sad, int qindex,
    656                                      int content_lowsumdiff,
    657                                      int source_sad_nonrd, int source_sad_rd,
    658                                      bool is_segment_id_boosted,
    659                                      int lighting_change) {
    660  AV1_COMMON *const cm = &cpi->common;
    661  const int is_key_frame = frame_is_intra_only(cm);
    662  const int threshold_multiplier = is_key_frame ? 120 : 1;
    663  const int ac_q = av1_ac_quant_QTX(qindex, 0, cm->seq_params->bit_depth);
    664  int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q);
    665  const int current_qindex = cm->quant_params.base_qindex;
    666  const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift;
    667  const int num_pixels = cm->width * cm->height;
    668 
    669  if (is_key_frame) {
    670    set_vbp_thresholds_key_frame(cpi, thresholds, threshold_base,
    671                                 threshold_left_shift, num_pixels);
    672    return;
    673  }
    674 
    675  threshold_base = tune_base_thresh_content(
    676      cpi, threshold_base, content_lowsumdiff, source_sad_nonrd, num_pixels);
    677  thresholds[0] = threshold_base >> 1;
    678  thresholds[1] = threshold_base;
    679  thresholds[3] = threshold_base << threshold_left_shift;
    680 
    681  tune_thresh_based_on_resolution(cpi, thresholds, threshold_base,
    682                                  current_qindex, source_sad_rd, num_pixels);
    683 
    684  tune_thresh_based_on_qindex(cpi, thresholds, blk_sad, current_qindex,
    685                              num_pixels, is_segment_id_boosted,
    686                              source_sad_nonrd, lighting_change);
    687 }
    688 
    689 // Set temporal variance low flag for superblock 64x64.
    690 // Only first 25 in the array are used in this case.
    691 static inline void set_low_temp_var_flag_64x64(CommonModeInfoParams *mi_params,
    692                                               PartitionSearchInfo *part_info,
    693                                               MACROBLOCKD *xd, VP64x64 *vt,
    694                                               const int64_t thresholds[],
    695                                               int mi_col, int mi_row) {
    696  if (xd->mi[0]->bsize == BLOCK_64X64) {
    697    if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
    698      part_info->variance_low[0] = 1;
    699  } else if (xd->mi[0]->bsize == BLOCK_64X32) {
    700    for (int part_idx = 0; part_idx < 2; part_idx++) {
    701      if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
    702        part_info->variance_low[part_idx + 1] = 1;
    703    }
    704  } else if (xd->mi[0]->bsize == BLOCK_32X64) {
    705    for (int part_idx = 0; part_idx < 2; part_idx++) {
    706      if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
    707        part_info->variance_low[part_idx + 3] = 1;
    708    }
    709  } else {
    710    static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
    711    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
    712      const int idx_str = mi_params->mi_stride * (mi_row + idx[lvl1_idx][0]) +
    713                          mi_col + idx[lvl1_idx][1];
    714      MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str;
    715 
    716      if (mi_params->mi_cols <= mi_col + idx[lvl1_idx][1] ||
    717          mi_params->mi_rows <= mi_row + idx[lvl1_idx][0])
    718        continue;
    719 
    720      if (*this_mi == NULL) continue;
    721 
    722      if ((*this_mi)->bsize == BLOCK_32X32) {
    723        int64_t threshold_32x32 = (5 * thresholds[1]) >> 3;
    724        if (vt->split[lvl1_idx].part_variances.none.variance < threshold_32x32)
    725          part_info->variance_low[lvl1_idx + 5] = 1;
    726      } else {
    727        // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
    728        // inside.
    729        if ((*this_mi)->bsize == BLOCK_16X16 ||
    730            (*this_mi)->bsize == BLOCK_32X16 ||
    731            (*this_mi)->bsize == BLOCK_16X32) {
    732          for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
    733            if (vt->split[lvl1_idx]
    734                    .split[lvl2_idx]
    735                    .part_variances.none.variance < (thresholds[2] >> 8))
    736              part_info->variance_low[(lvl1_idx << 2) + lvl2_idx + 9] = 1;
    737          }
    738        }
    739      }
    740    }
    741  }
    742 }
    743 
    744 static inline void set_low_temp_var_flag_128x128(
    745    CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
    746    MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col,
    747    int mi_row) {
    748  if (xd->mi[0]->bsize == BLOCK_128X128) {
    749    if (vt->part_variances.none.variance < (thresholds[0] >> 1))
    750      part_info->variance_low[0] = 1;
    751  } else if (xd->mi[0]->bsize == BLOCK_128X64) {
    752    for (int part_idx = 0; part_idx < 2; part_idx++) {
    753      if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
    754        part_info->variance_low[part_idx + 1] = 1;
    755    }
    756  } else if (xd->mi[0]->bsize == BLOCK_64X128) {
    757    for (int part_idx = 0; part_idx < 2; part_idx++) {
    758      if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
    759        part_info->variance_low[part_idx + 3] = 1;
    760    }
    761  } else {
    762    static const int idx64[4][2] = {
    763      { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 }
    764    };
    765    static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
    766    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
    767      const int idx_str = mi_params->mi_stride * (mi_row + idx64[lvl1_idx][0]) +
    768                          mi_col + idx64[lvl1_idx][1];
    769      MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str;
    770      if (*mi_64 == NULL) continue;
    771      if (mi_params->mi_cols <= mi_col + idx64[lvl1_idx][1] ||
    772          mi_params->mi_rows <= mi_row + idx64[lvl1_idx][0])
    773        continue;
    774      const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
    775      if ((*mi_64)->bsize == BLOCK_64X64) {
    776        if (vt->split[lvl1_idx].part_variances.none.variance < threshold_64x64)
    777          part_info->variance_low[5 + lvl1_idx] = 1;
    778      } else if ((*mi_64)->bsize == BLOCK_64X32) {
    779        for (int part_idx = 0; part_idx < 2; part_idx++)
    780          if (vt->split[lvl1_idx].part_variances.horz[part_idx].variance <
    781              (threshold_64x64 >> 1))
    782            part_info->variance_low[9 + (lvl1_idx << 1) + part_idx] = 1;
    783      } else if ((*mi_64)->bsize == BLOCK_32X64) {
    784        for (int part_idx = 0; part_idx < 2; part_idx++)
    785          if (vt->split[lvl1_idx].part_variances.vert[part_idx].variance <
    786              (threshold_64x64 >> 1))
    787            part_info->variance_low[17 + (lvl1_idx << 1) + part_idx] = 1;
    788      } else {
    789        for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
    790          const int idx_str1 =
    791              mi_params->mi_stride * idx32[lvl2_idx][0] + idx32[lvl2_idx][1];
    792          MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1;
    793          if (*mi_32 == NULL) continue;
    794 
    795          if (mi_params->mi_cols <=
    796                  mi_col + idx64[lvl1_idx][1] + idx32[lvl2_idx][1] ||
    797              mi_params->mi_rows <=
    798                  mi_row + idx64[lvl1_idx][0] + idx32[lvl2_idx][0])
    799            continue;
    800          const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
    801          if ((*mi_32)->bsize == BLOCK_32X32) {
    802            if (vt->split[lvl1_idx]
    803                    .split[lvl2_idx]
    804                    .part_variances.none.variance < threshold_32x32)
    805              part_info->variance_low[25 + (lvl1_idx << 2) + lvl2_idx] = 1;
    806          } else {
    807            // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
    808            // inside.
    809            if ((*mi_32)->bsize == BLOCK_16X16 ||
    810                (*mi_32)->bsize == BLOCK_32X16 ||
    811                (*mi_32)->bsize == BLOCK_16X32) {
    812              for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
    813                VPartVar *none_var = &vt->split[lvl1_idx]
    814                                          .split[lvl2_idx]
    815                                          .split[lvl3_idx]
    816                                          .part_variances.none;
    817                if (none_var->variance < (thresholds[3] >> 8))
    818                  part_info->variance_low[41 + (lvl1_idx << 4) +
    819                                          (lvl2_idx << 2) + lvl3_idx] = 1;
    820              }
    821            }
    822          }
    823        }
    824      }
    825    }
    826  }
    827 }
    828 
    829 static inline void set_low_temp_var_flag(
    830    AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd,
    831    VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition,
    832    int mi_col, int mi_row, const bool is_small_sb) {
    833  AV1_COMMON *const cm = &cpi->common;
    834  // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected.
    835  // If the temporal variance is small set the flag
    836  // variance_low for the block. The variance threshold can be adjusted, the
    837  // higher the more aggressive.
    838  if (ref_frame_partition == LAST_FRAME) {
    839    if (is_small_sb)
    840      set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
    841                                  &(vt->split[0]), thresholds, mi_col, mi_row);
    842    else
    843      set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt,
    844                                    thresholds, mi_col, mi_row);
    845  }
    846 }
    847 
    848 static const int pos_shift_16x16[4][4] = {
    849  { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
    850 };
    851 
    852 int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low,
    853                                             int mi_row, int mi_col,
    854                                             BLOCK_SIZE bsize) {
    855  // Relative indices of MB inside the superblock.
    856  const int mi_x = mi_row & 0xF;
    857  const int mi_y = mi_col & 0xF;
    858  // Relative indices of 16x16 block inside the superblock.
    859  const int i = mi_x >> 2;
    860  const int j = mi_y >> 2;
    861  int force_skip_low_temp_var = 0;
    862  // Set force_skip_low_temp_var based on the block size and block offset.
    863  switch (bsize) {
    864    case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break;
    865    case BLOCK_64X32:
    866      if (!mi_y && !mi_x) {
    867        force_skip_low_temp_var = variance_low[1];
    868      } else if (!mi_y && mi_x) {
    869        force_skip_low_temp_var = variance_low[2];
    870      }
    871      break;
    872    case BLOCK_32X64:
    873      if (!mi_y && !mi_x) {
    874        force_skip_low_temp_var = variance_low[3];
    875      } else if (mi_y && !mi_x) {
    876        force_skip_low_temp_var = variance_low[4];
    877      }
    878      break;
    879    case BLOCK_32X32:
    880      if (!mi_y && !mi_x) {
    881        force_skip_low_temp_var = variance_low[5];
    882      } else if (mi_y && !mi_x) {
    883        force_skip_low_temp_var = variance_low[6];
    884      } else if (!mi_y && mi_x) {
    885        force_skip_low_temp_var = variance_low[7];
    886      } else if (mi_y && mi_x) {
    887        force_skip_low_temp_var = variance_low[8];
    888      }
    889      break;
    890    case BLOCK_32X16:
    891    case BLOCK_16X32:
    892    case BLOCK_16X16:
    893      force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
    894      break;
    895    default: break;
    896  }
    897 
    898  return force_skip_low_temp_var;
    899 }
    900 
    901 int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row,
    902                                    int mi_col, BLOCK_SIZE bsize) {
    903  int force_skip_low_temp_var = 0;
    904  int x, y;
    905  x = (mi_col & 0x1F) >> 4;
    906  // y = (mi_row & 0x1F) >> 4;
    907  // const int idx64 = (y << 1) + x;
    908  y = (mi_row & 0x17) >> 3;
    909  const int idx64 = y + x;
    910 
    911  x = (mi_col & 0xF) >> 3;
    912  // y = (mi_row & 0xF) >> 3;
    913  // const int idx32 = (y << 1) + x;
    914  y = (mi_row & 0xB) >> 2;
    915  const int idx32 = y + x;
    916 
    917  x = (mi_col & 0x7) >> 2;
    918  // y = (mi_row & 0x7) >> 2;
    919  // const int idx16 = (y << 1) + x;
    920  y = (mi_row & 0x5) >> 1;
    921  const int idx16 = y + x;
    922  // Set force_skip_low_temp_var based on the block size and block offset.
    923  switch (bsize) {
    924    case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break;
    925    case BLOCK_128X64:
    926      assert((mi_col & 0x1F) == 0);
    927      force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)];
    928      break;
    929    case BLOCK_64X128:
    930      assert((mi_row & 0x1F) == 0);
    931      force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)];
    932      break;
    933    case BLOCK_64X64:
    934      // Location of this 64x64 block inside the 128x128 superblock
    935      force_skip_low_temp_var = variance_low[5 + idx64];
    936      break;
    937    case BLOCK_64X32:
    938      x = (mi_col & 0x1F) >> 4;
    939      y = (mi_row & 0x1F) >> 3;
    940      /*
    941      .---------------.---------------.
    942      | x=0,y=0,idx=0 | x=0,y=0,idx=2 |
    943      :---------------+---------------:
    944      | x=0,y=1,idx=1 | x=1,y=1,idx=3 |
    945      :---------------+---------------:
    946      | x=0,y=2,idx=4 | x=1,y=2,idx=6 |
    947      :---------------+---------------:
    948      | x=0,y=3,idx=5 | x=1,y=3,idx=7 |
    949      '---------------'---------------'
    950      */
    951      const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2);
    952      force_skip_low_temp_var = variance_low[9 + idx64x32];
    953      break;
    954    case BLOCK_32X64:
    955      x = (mi_col & 0x1F) >> 3;
    956      y = (mi_row & 0x1F) >> 4;
    957      const int idx32x64 = (y << 2) + x;
    958      force_skip_low_temp_var = variance_low[17 + idx32x64];
    959      break;
    960    case BLOCK_32X32:
    961      force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32];
    962      break;
    963    case BLOCK_32X16:
    964    case BLOCK_16X32:
    965    case BLOCK_16X16:
    966      force_skip_low_temp_var =
    967          variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16];
    968      break;
    969    default: break;
    970  }
    971  return force_skip_low_temp_var;
    972 }
    973 
    974 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int qindex,
    975                                           int content_lowsumdiff) {
    976  SPEED_FEATURES *const sf = &cpi->sf;
    977  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
    978    return;
    979  } else {
    980    set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, 0, qindex,
    981                       content_lowsumdiff, 0, 0, 0, 0);
    982    // The threshold below is not changed locally.
    983    cpi->vbp_info.threshold_minmax = 15 + (qindex >> 3);
    984  }
    985 }
    986 
    987 static inline void chroma_check(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
    988                                unsigned int y_sad, unsigned int y_sad_g,
    989                                unsigned int y_sad_alt, bool is_key_frame,
    990                                bool zero_motion, unsigned int *uv_sad) {
    991  MACROBLOCKD *xd = &x->e_mbd;
    992  const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
    993  int shift_upper_limit = 1;
    994  int shift_lower_limit = 3;
    995  int fac_uv = 6;
    996  if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
    997 
    998  // Use lower threshold (more conservative in setting color flag) for
    999  // higher resolutions non-screen, which tend to have more camera noise.
   1000  // Since this may be used to skip compound mode in nonrd pickmode, which
   1001  // is generally more effective for higher resolutions, better to be more
   1002  // conservative.
   1003  if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
   1004    if (cpi->common.width * cpi->common.height >= RESOLUTION_1080P)
   1005      fac_uv = 3;
   1006    else
   1007      fac_uv = 5;
   1008  }
   1009  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
   1010      cpi->rc.high_source_sad) {
   1011    shift_lower_limit = 7;
   1012  } else if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
   1013             cpi->rc.percent_blocks_with_motion > 90 &&
   1014             cpi->rc.frame_source_sad > 10000 && source_sad_nonrd > kLowSad) {
   1015    shift_lower_limit = 8;
   1016    shift_upper_limit = 3;
   1017  } else if (source_sad_nonrd >= kMedSad && x->source_variance > 500 &&
   1018             cpi->common.width * cpi->common.height >= 640 * 360) {
   1019    shift_upper_limit = 2;
   1020    shift_lower_limit = source_sad_nonrd > kMedSad ? 5 : 4;
   1021  }
   1022 
   1023  MB_MODE_INFO *mi = xd->mi[0];
   1024  const AV1_COMMON *const cm = &cpi->common;
   1025  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
   1026  const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
   1027  const YV12_BUFFER_CONFIG *yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
   1028  const struct scale_factors *const sf =
   1029      get_ref_scale_factors_const(cm, LAST_FRAME);
   1030  struct buf_2d dst;
   1031  unsigned int uv_sad_g = 0;
   1032  unsigned int uv_sad_alt = 0;
   1033 
   1034  for (int plane = AOM_PLANE_U; plane < MAX_MB_PLANE; ++plane) {
   1035    struct macroblock_plane *p = &x->plane[plane];
   1036    struct macroblockd_plane *pd = &xd->plane[plane];
   1037    const BLOCK_SIZE bs =
   1038        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
   1039 
   1040    if (bs != BLOCK_INVALID) {
   1041      // For last:
   1042      if (zero_motion) {
   1043        if (mi->ref_frame[0] == LAST_FRAME) {
   1044          uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
   1045              p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride);
   1046        } else {
   1047          uint8_t *src = (plane == 1) ? yv12->u_buffer : yv12->v_buffer;
   1048          setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width,
   1049                           yv12->uv_crop_height, yv12->uv_stride, xd->mi_row,
   1050                           xd->mi_col, sf, xd->plane[plane].subsampling_x,
   1051                           xd->plane[plane].subsampling_y);
   1052 
   1053          uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
   1054              p->src.buf, p->src.stride, dst.buf, dst.stride);
   1055        }
   1056      } else {
   1057        uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
   1058            p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride);
   1059      }
   1060 
   1061      // For golden:
   1062      if (y_sad_g != UINT_MAX) {
   1063        uint8_t *src = (plane == 1) ? yv12_g->u_buffer : yv12_g->v_buffer;
   1064        setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width,
   1065                         yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row,
   1066                         xd->mi_col, sf, xd->plane[plane].subsampling_x,
   1067                         xd->plane[plane].subsampling_y);
   1068        uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf,
   1069                                            dst.stride);
   1070      }
   1071 
   1072      // For altref:
   1073      if (y_sad_alt != UINT_MAX) {
   1074        uint8_t *src = (plane == 1) ? yv12_alt->u_buffer : yv12_alt->v_buffer;
   1075        setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_alt->uv_crop_width,
   1076                         yv12_alt->uv_crop_height, yv12_alt->uv_stride,
   1077                         xd->mi_row, xd->mi_col, sf,
   1078                         xd->plane[plane].subsampling_x,
   1079                         xd->plane[plane].subsampling_y);
   1080        uv_sad_alt = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
   1081                                              dst.buf, dst.stride);
   1082      }
   1083    }
   1084 
   1085    if (uv_sad[plane - 1] > (y_sad >> shift_upper_limit))
   1086      x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 1;
   1087    else if (uv_sad[plane - 1] < (y_sad >> shift_lower_limit))
   1088      x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 0;
   1089    // Borderline case: to be refined at coding block level in nonrd_pickmode,
   1090    // for coding block size < sb_size.
   1091    else
   1092      x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 2;
   1093 
   1094    x->color_sensitivity_sb_g[COLOR_SENS_IDX(plane)] =
   1095        uv_sad_g > y_sad_g / fac_uv;
   1096    x->color_sensitivity_sb_alt[COLOR_SENS_IDX(plane)] =
   1097        uv_sad_alt > y_sad_alt / fac_uv;
   1098  }
   1099 }
   1100 
   1101 static void fill_variance_tree_leaves(
   1102    AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, PART_EVAL_STATUS *force_split,
   1103    int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4],
   1104    int64_t *thresholds, const uint8_t *src_buf, int src_stride,
   1105    const uint8_t *dst_buf, int dst_stride, bool is_key_frame,
   1106    const bool is_small_sb) {
   1107  MACROBLOCKD *xd = &x->e_mbd;
   1108  const int num_64x64_blocks = is_small_sb ? 1 : 4;
   1109  // TODO(kyslov) Bring back compute_minmax_variance with content type detection
   1110  const int compute_minmax_variance = 0;
   1111  const int segment_id = xd->mi[0]->segment_id;
   1112  int pixels_wide = 128, pixels_high = 128;
   1113  int border_offset_4x4 = 0;
   1114  int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf;
   1115  // dst_buf pointer is not used for is_key_frame, so it should be NULL.
   1116  assert(IMPLIES(is_key_frame, dst_buf == NULL));
   1117  if (is_small_sb) {
   1118    pixels_wide = 64;
   1119    pixels_high = 64;
   1120  }
   1121  if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
   1122  if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
   1123 #if CONFIG_AV1_TEMPORAL_DENOISING
   1124  temporal_denoising |= cpi->oxcf.noise_sensitivity;
   1125 #endif
   1126  // For temporal filtering or temporal denoiser enabled: since the source
   1127  // is modified we need to avoid 4x4 avg along superblock boundary, since
   1128  // simd code will load 8 pixels for 4x4 avg and so can access source
   1129  // data outside superblock (while its being modified by temporal filter).
   1130  // Temporal filtering is never done on key frames.
   1131  if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4;
   1132  for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; blk64_idx++) {
   1133    const int x64_idx = GET_BLK_IDX_X(blk64_idx, 6);
   1134    const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 6);
   1135    const int blk64_scale_idx = blk64_idx << 2;
   1136    force_split[blk64_idx + 1] = PART_EVAL_ALL;
   1137 
   1138    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
   1139      const int x32_idx = x64_idx + GET_BLK_IDX_X(lvl1_idx, 5);
   1140      const int y32_idx = y64_idx + GET_BLK_IDX_Y(lvl1_idx, 5);
   1141      const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
   1142      force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ALL;
   1143      avg_16x16[blk64_idx][lvl1_idx] = 0;
   1144      maxvar_16x16[blk64_idx][lvl1_idx] = 0;
   1145      minvar_16x16[blk64_idx][lvl1_idx] = INT_MAX;
   1146      for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
   1147        const int x16_idx = x32_idx + GET_BLK_IDX_X(lvl2_idx, 4);
   1148        const int y16_idx = y32_idx + GET_BLK_IDX_Y(lvl2_idx, 4);
   1149        const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
   1150        VP16x16 *vst = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
   1151        force_split[split_index] = PART_EVAL_ALL;
   1152        if (is_key_frame) {
   1153          // Go down to 4x4 down-sampling for variance.
   1154          for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
   1155            const int x8_idx = x16_idx + GET_BLK_IDX_X(lvl3_idx, 3);
   1156            const int y8_idx = y16_idx + GET_BLK_IDX_Y(lvl3_idx, 3);
   1157            VP8x8 *vst2 = &vst->split[lvl3_idx];
   1158            fill_variance_4x4avg(src_buf, src_stride, x8_idx, y8_idx, vst2,
   1159 #if CONFIG_AV1_HIGHBITDEPTH
   1160                                 xd->cur_buf->flags,
   1161 #endif
   1162                                 pixels_wide, pixels_high, border_offset_4x4);
   1163          }
   1164        } else {
   1165          fill_variance_8x8avg(src_buf, src_stride, dst_buf, dst_stride,
   1166                               x16_idx, y16_idx, vst, is_cur_buf_hbd(xd),
   1167                               pixels_wide, pixels_high);
   1168 
   1169          fill_variance_tree(vst, BLOCK_16X16);
   1170          VPartVar *none_var = &vt->split[blk64_idx]
   1171                                    .split[lvl1_idx]
   1172                                    .split[lvl2_idx]
   1173                                    .part_variances.none;
   1174          get_variance(none_var);
   1175          const int val_none_var = none_var->variance;
   1176          avg_16x16[blk64_idx][lvl1_idx] += val_none_var;
   1177          minvar_16x16[blk64_idx][lvl1_idx] =
   1178              AOMMIN(minvar_16x16[blk64_idx][lvl1_idx], val_none_var);
   1179          maxvar_16x16[blk64_idx][lvl1_idx] =
   1180              AOMMAX(maxvar_16x16[blk64_idx][lvl1_idx], val_none_var);
   1181          if (val_none_var > thresholds[3]) {
   1182            // 16X16 variance is above threshold for split, so force split to
   1183            // 8x8 for this 16x16 block (this also forces splits for upper
   1184            // levels).
   1185            force_split[split_index] = PART_EVAL_ONLY_SPLIT;
   1186            force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
   1187            force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
   1188            force_split[0] = PART_EVAL_ONLY_SPLIT;
   1189          } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
   1190                     compute_minmax_variance && val_none_var > thresholds[2]) {
   1191            // We have some nominal amount of 16x16 variance (based on average),
   1192            // compute the minmax over the 8x8 sub-blocks, and if above
   1193            // threshold, force split to 8x8 block for this 16x16 block.
   1194            int minmax = compute_minmax_8x8(src_buf, src_stride, dst_buf,
   1195                                            dst_stride, x16_idx, y16_idx,
   1196 #if CONFIG_AV1_HIGHBITDEPTH
   1197                                            xd->cur_buf->flags,
   1198 #endif
   1199                                            pixels_wide, pixels_high);
   1200            const int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
   1201            if (minmax > thresh_minmax) {
   1202              force_split[split_index] = PART_EVAL_ONLY_SPLIT;
   1203              force_split[5 + blk64_scale_idx + lvl1_idx] =
   1204                  PART_EVAL_ONLY_SPLIT;
   1205              force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
   1206              force_split[0] = PART_EVAL_ONLY_SPLIT;
   1207            }
   1208          }
   1209        }
   1210      }
   1211    }
   1212  }
   1213 }
   1214 
   1215 static inline void set_ref_frame_for_partition(
   1216    AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
   1217    MV_REFERENCE_FRAME *ref_frame_partition, MB_MODE_INFO *mi,
   1218    unsigned int *y_sad, unsigned int *y_sad_g, unsigned int *y_sad_alt,
   1219    const YV12_BUFFER_CONFIG *yv12_g, const YV12_BUFFER_CONFIG *yv12_alt,
   1220    int mi_row, int mi_col, int num_planes) {
   1221  AV1_COMMON *const cm = &cpi->common;
   1222  const double fac =
   1223      (cpi->svc.spatial_layer_id > 0 && cpi->svc.has_lower_quality_layer) ? 1.0
   1224                                                                          : 0.9;
   1225  const bool is_set_golden_ref_frame =
   1226      *y_sad_g < fac * *y_sad && *y_sad_g < *y_sad_alt;
   1227  const bool is_set_altref_ref_frame =
   1228      *y_sad_alt < fac * *y_sad && *y_sad_alt < *y_sad_g;
   1229 
   1230  if (is_set_golden_ref_frame) {
   1231    av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
   1232                         get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
   1233    mi->ref_frame[0] = GOLDEN_FRAME;
   1234    mi->mv[0].as_int = 0;
   1235    *y_sad = *y_sad_g;
   1236    *ref_frame_partition = GOLDEN_FRAME;
   1237    x->nonrd_prune_ref_frame_search = 0;
   1238    x->sb_me_partition = 0;
   1239  } else if (is_set_altref_ref_frame) {
   1240    av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
   1241                         get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
   1242    mi->ref_frame[0] = ALTREF_FRAME;
   1243    mi->mv[0].as_int = 0;
   1244    *y_sad = *y_sad_alt;
   1245    *ref_frame_partition = ALTREF_FRAME;
   1246    x->nonrd_prune_ref_frame_search = 0;
   1247    x->sb_me_partition = 0;
   1248  } else {
   1249    *ref_frame_partition = LAST_FRAME;
   1250    x->nonrd_prune_ref_frame_search =
   1251        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
   1252  }
   1253 }
   1254 
   1255 static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0,
   1256                                        const FULLPEL_MV *mv1) {
   1257  return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col);
   1258 }
   1259 
   1260 static inline void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x,
   1261                                          unsigned int *y_sad, bool is_small_sb,
   1262                                          int est_motion) {
   1263  const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
   1264  // TODO(yunqingwang@google.com): test if this condition works with other
   1265  // speeds.
   1266  if (est_motion > 2 && source_sad_nonrd > kMedSad) return;
   1267 
   1268  MACROBLOCKD *xd = &x->e_mbd;
   1269  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
   1270  MB_MODE_INFO *mi = xd->mi[0];
   1271 
   1272  unsigned int above_y_sad = UINT_MAX;
   1273  unsigned int left_y_sad = UINT_MAX;
   1274  FULLPEL_MV above_mv = kZeroFullMv;
   1275  FULLPEL_MV left_mv = kZeroFullMv;
   1276  SubpelMvLimits subpel_mv_limits;
   1277  const MV dummy_mv = { 0, 0 };
   1278  av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, &dummy_mv);
   1279 
   1280  // Current best MV
   1281  FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv);
   1282  const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8;
   1283 
   1284  if (xd->up_available) {
   1285    const MB_MODE_INFO *above_mbmi = xd->above_mbmi;
   1286    if (above_mbmi->mode >= INTRA_MODE_END &&
   1287        above_mbmi->ref_frame[0] == LAST_FRAME) {
   1288      MV temp = above_mbmi->mv[0].as_mv;
   1289      clamp_mv(&temp, &subpel_mv_limits);
   1290      above_mv = get_fullmv_from_mv(&temp);
   1291 
   1292      if (mv_distance(&best_mv, &above_mv) > 0) {
   1293        uint8_t const *ref_buf =
   1294            get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv);
   1295        above_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
   1296            x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
   1297            xd->plane[0].pre[0].stride);
   1298      }
   1299    }
   1300  }
   1301  if (xd->left_available) {
   1302    const MB_MODE_INFO *left_mbmi = xd->left_mbmi;
   1303    if (left_mbmi->mode >= INTRA_MODE_END &&
   1304        left_mbmi->ref_frame[0] == LAST_FRAME) {
   1305      MV temp = left_mbmi->mv[0].as_mv;
   1306      clamp_mv(&temp, &subpel_mv_limits);
   1307      left_mv = get_fullmv_from_mv(&temp);
   1308 
   1309      if (mv_distance(&best_mv, &left_mv) > 0 &&
   1310          mv_distance(&above_mv, &left_mv) > 0) {
   1311        uint8_t const *ref_buf =
   1312            get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv);
   1313        left_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
   1314            x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
   1315            xd->plane[0].pre[0].stride);
   1316      }
   1317    }
   1318  }
   1319 
   1320  if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) {
   1321    *y_sad = above_y_sad;
   1322    mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv);
   1323    clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
   1324  }
   1325  if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) {
   1326    *y_sad = left_y_sad;
   1327    mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv);
   1328    clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
   1329  }
   1330 }
   1331 
   1332 static void do_int_pro_motion_estimation(AV1_COMP *cpi, MACROBLOCK *x,
   1333                                         unsigned int *y_sad, int mi_row,
   1334                                         int mi_col, int source_sad_nonrd) {
   1335  AV1_COMMON *const cm = &cpi->common;
   1336  MACROBLOCKD *xd = &x->e_mbd;
   1337  MB_MODE_INFO *mi = xd->mi[0];
   1338  const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
   1339  const int increase_col_sw = source_sad_nonrd > kMedSad &&
   1340                              !cpi->rc.high_motion_content_screen_rtc &&
   1341                              (cpi->svc.temporal_layer_id == 0 ||
   1342                               cpi->rc.num_col_blscroll_last_tl0 > 2);
   1343  int me_search_size_col = is_screen
   1344                               ? increase_col_sw ? 512 : 96
   1345                               : block_size_wide[cm->seq_params->sb_size] >> 1;
   1346  // For screen use larger search size row motion to capture
   1347  // vertical scroll, which can be larger motion.
   1348  int me_search_size_row = is_screen
   1349                               ? source_sad_nonrd > kMedSad ? 512 : 192
   1350                               : block_size_high[cm->seq_params->sb_size] >> 1;
   1351  if (cm->width * cm->height >= 3840 * 2160 &&
   1352      cpi->svc.temporal_layer_id == 0 && cpi->svc.number_temporal_layers > 1) {
   1353    me_search_size_row = me_search_size_row << 1;
   1354    me_search_size_col = me_search_size_col << 1;
   1355  }
   1356  unsigned int y_sad_zero;
   1357  *y_sad = av1_int_pro_motion_estimation(
   1358      cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, &y_sad_zero,
   1359      me_search_size_col, me_search_size_row);
   1360  // The logic below selects whether the motion estimated in the
   1361  // int_pro_motion() will be used in nonrd_pickmode. Only do this
   1362  // for screen for now.
   1363  if (is_screen) {
   1364    unsigned int thresh_sad =
   1365        (cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000;
   1366    if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) {
   1367      x->sb_me_partition = 1;
   1368      x->sb_me_mv.as_int = mi->mv[0].as_int;
   1369      if (cpi->svc.temporal_layer_id == 0) {
   1370        if (abs(mi->mv[0].as_mv.col) > 16 && abs(mi->mv[0].as_mv.row) == 0)
   1371          x->sb_col_scroll++;
   1372        else if (abs(mi->mv[0].as_mv.row) > 16 && abs(mi->mv[0].as_mv.col) == 0)
   1373          x->sb_row_scroll++;
   1374      }
   1375    } else {
   1376      x->sb_me_partition = 0;
   1377      // Fall back to using zero motion.
   1378      *y_sad = y_sad_zero;
   1379      mi->mv[0].as_int = 0;
   1380    }
   1381  }
   1382 }
   1383 
   1384 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
   1385                         unsigned int *y_sad_g, unsigned int *y_sad_alt,
   1386                         unsigned int *y_sad_last,
   1387                         MV_REFERENCE_FRAME *ref_frame_partition,
   1388                         struct scale_factors *sf_no_scale, int mi_row,
   1389                         int mi_col, bool is_small_sb, bool scaled_ref_last) {
   1390  AV1_COMMON *const cm = &cpi->common;
   1391  MACROBLOCKD *xd = &x->e_mbd;
   1392  const int num_planes = av1_num_planes(cm);
   1393  bool scaled_ref_golden = false;
   1394  bool scaled_ref_alt = false;
   1395  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
   1396  MB_MODE_INFO *mi = xd->mi[0];
   1397  const YV12_BUFFER_CONFIG *yv12 =
   1398      scaled_ref_last ? av1_get_scaled_ref_frame(cpi, LAST_FRAME)
   1399                      : get_ref_frame_yv12_buf(cm, LAST_FRAME);
   1400  assert(yv12 != NULL);
   1401  const YV12_BUFFER_CONFIG *yv12_g = NULL;
   1402  const YV12_BUFFER_CONFIG *yv12_alt = NULL;
   1403  // Check if LAST is a reference. For spatial layers always use it as
   1404  // reference scaling.
   1405  int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) ||
   1406                     cpi->svc.number_spatial_layers > 1;
   1407  int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG;
   1408  int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config ||
   1409                    cpi->sf.rt_sf.use_nonrd_altref_frame ||
   1410                    (cpi->sf.rt_sf.use_comp_ref_nonrd &&
   1411                     cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 1);
   1412 
   1413  // Check if GOLDEN should be used as reference for partitioning.
   1414  // Allow for spatial layers if lower layer has same resolution.
   1415  if ((cpi->svc.number_spatial_layers == 1 ||
   1416       cpi->svc.has_lower_quality_layer) &&
   1417      use_golden_ref &&
   1418      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
   1419    yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
   1420    if (yv12_g && (yv12_g->y_crop_height != cm->height ||
   1421                   yv12_g->y_crop_width != cm->width)) {
   1422      yv12_g = av1_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
   1423      scaled_ref_golden = true;
   1424    }
   1425    if (yv12_g && (yv12_g != yv12 || !use_last_ref)) {
   1426      av1_setup_pre_planes(
   1427          xd, 0, yv12_g, mi_row, mi_col,
   1428          scaled_ref_golden ? NULL : get_ref_scale_factors(cm, GOLDEN_FRAME),
   1429          num_planes);
   1430      *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
   1431          x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
   1432          xd->plane[AOM_PLANE_Y].pre[0].buf,
   1433          xd->plane[AOM_PLANE_Y].pre[0].stride);
   1434    }
   1435  }
   1436 
   1437  // Check if ALTREF should be used as reference for partitioning.
   1438  // Allow for spatial layers if lower layer has same resolution.
   1439  if ((cpi->svc.number_spatial_layers == 1 ||
   1440       cpi->svc.has_lower_quality_layer) &&
   1441      use_alt_ref && (cpi->ref_frame_flags & AOM_ALT_FLAG) &&
   1442      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
   1443    yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
   1444    if (yv12_alt && (yv12_alt->y_crop_height != cm->height ||
   1445                     yv12_alt->y_crop_width != cm->width)) {
   1446      yv12_alt = av1_get_scaled_ref_frame(cpi, ALTREF_FRAME);
   1447      scaled_ref_alt = true;
   1448    }
   1449    if (yv12_alt && (yv12_alt != yv12 || !use_last_ref)) {
   1450      av1_setup_pre_planes(
   1451          xd, 0, yv12_alt, mi_row, mi_col,
   1452          scaled_ref_alt ? NULL : get_ref_scale_factors(cm, ALTREF_FRAME),
   1453          num_planes);
   1454      *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf(
   1455          x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
   1456          xd->plane[AOM_PLANE_Y].pre[0].buf,
   1457          xd->plane[AOM_PLANE_Y].pre[0].stride);
   1458    }
   1459  }
   1460 
   1461  if (use_last_ref) {
   1462    const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
   1463    av1_setup_pre_planes(
   1464        xd, 0, yv12, mi_row, mi_col,
   1465        scaled_ref_last ? NULL : get_ref_scale_factors(cm, LAST_FRAME),
   1466        num_planes);
   1467    mi->ref_frame[0] = LAST_FRAME;
   1468    mi->ref_frame[1] = NONE_FRAME;
   1469    mi->bsize = cm->seq_params->sb_size;
   1470    mi->mv[0].as_int = 0;
   1471    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
   1472 
   1473    int est_motion = cpi->sf.rt_sf.estimate_motion_for_var_based_partition;
   1474    // TODO(b/290596301): Look into adjusting this condition.
   1475    // There is regression on color content when
   1476    // estimate_motion_for_var_based_partition = 3 and high motion,
   1477    // so for now force it to 2 based on superblock sad.
   1478    if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2;
   1479 
   1480    if ((est_motion == 1 || est_motion == 2) && xd->mb_to_right_edge >= 0 &&
   1481        xd->mb_to_bottom_edge >= 0 && x->source_variance > 100 &&
   1482        source_sad_nonrd > kLowSad) {
   1483      do_int_pro_motion_estimation(cpi, x, y_sad, mi_row, mi_col,
   1484                                   source_sad_nonrd);
   1485    }
   1486 
   1487    if (*y_sad == UINT_MAX) {
   1488      *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
   1489          x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
   1490          xd->plane[AOM_PLANE_Y].pre[0].buf,
   1491          xd->plane[AOM_PLANE_Y].pre[0].stride);
   1492    }
   1493 
   1494    // Evaluate if neighbours' MVs give better predictions. Zero MV is tested
   1495    // already, so only non-zero MVs are tested here. Here the neighbour blocks
   1496    // are the first block above or left to this superblock.
   1497    if (est_motion >= 2 && (xd->up_available || xd->left_available))
   1498      evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion);
   1499 
   1500    *y_sad_last = *y_sad;
   1501  }
   1502 
   1503  // Pick the ref frame for partitioning, use golden or altref frame only if
   1504  // its lower sad, bias to LAST with factor 0.9.
   1505  set_ref_frame_for_partition(cpi, x, xd, ref_frame_partition, mi, y_sad,
   1506                              y_sad_g, y_sad_alt, yv12_g, yv12_alt, mi_row,
   1507                              mi_col, num_planes);
   1508 
   1509  // Only calculate the predictor for non-zero MV.
   1510  if (mi->mv[0].as_int != 0) {
   1511    if (!scaled_ref_last) {
   1512      set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
   1513    } else {
   1514      xd->block_ref_scale_factors[0] = sf_no_scale;
   1515      xd->block_ref_scale_factors[1] = sf_no_scale;
   1516    }
   1517    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
   1518                                  cm->seq_params->sb_size, AOM_PLANE_Y,
   1519                                  num_planes - 1);
   1520  }
   1521 }
   1522 
   1523 // Decides whether to split or merge a 16x16 partition block in variance based
   1524 // partitioning based on the 8x8 sub-block variances.
   1525 static inline PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
   1526    VP16x16 *var_16x16_info, int64_t threshold16) {
   1527  int max_8x8_var = 0, min_8x8_var = INT_MAX;
   1528  for (int split_idx = 0; split_idx < 4; split_idx++) {
   1529    get_variance(&var_16x16_info->split[split_idx].part_variances.none);
   1530    int this_8x8_var =
   1531        var_16x16_info->split[split_idx].part_variances.none.variance;
   1532    max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var);
   1533    min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var);
   1534  }
   1535  // If the difference between maximum and minimum sub-block variances is high,
   1536  // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
   1537  // only PARTITION_NONE. The shift factor for threshold16 has been derived
   1538  // empirically.
   1539  return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
   1540             ? PART_EVAL_ONLY_SPLIT
   1541             : PART_EVAL_ONLY_NONE;
   1542 }
   1543 
   1544 static inline bool is_set_force_zeromv_skip_based_on_src_sad(
   1545    int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
   1546  if (set_zeromv_skip_based_on_source_sad == 0) return false;
   1547 
   1548  if (set_zeromv_skip_based_on_source_sad >= 3)
   1549    return source_sad_nonrd <= kLowSad;
   1550  else if (set_zeromv_skip_based_on_source_sad >= 2)
   1551    return source_sad_nonrd <= kVeryLowSad;
   1552  else if (set_zeromv_skip_based_on_source_sad >= 1)
   1553    return source_sad_nonrd == kZeroSad;
   1554 
   1555  return false;
   1556 }
   1557 
   1558 static inline bool set_force_zeromv_skip_for_sb(
   1559    AV1_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, VP128x128 *vt,
   1560    unsigned int *uv_sad, int mi_row, int mi_col, unsigned int y_sad,
   1561    BLOCK_SIZE bsize) {
   1562  AV1_COMMON *const cm = &cpi->common;
   1563  if (!is_set_force_zeromv_skip_based_on_src_sad(
   1564          cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
   1565          x->content_state_sb.source_sad_nonrd))
   1566    return false;
   1567  int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0;
   1568  const int block_width = mi_size_wide[cm->seq_params->sb_size];
   1569  const int block_height = mi_size_high[cm->seq_params->sb_size];
   1570  const unsigned int thresh_exit_part_y =
   1571      cpi->zeromv_skip_thresh_exit_part[bsize] << shift;
   1572  unsigned int thresh_exit_part_uv =
   1573      CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift;
   1574  // Be more aggressive in UV threshold if source_sad >= VeryLowSad
   1575  // to suppreess visual artifact caused by the speed feature:
   1576  // set_zeromv_skip_based_on_source_sad = 2. For now only for
   1577  // part_early_exit_zeromv = 1.
   1578  if (x->content_state_sb.source_sad_nonrd >= kVeryLowSad &&
   1579      cpi->sf.rt_sf.part_early_exit_zeromv == 1)
   1580    thresh_exit_part_uv = thresh_exit_part_uv >> 3;
   1581  if (mi_col + block_width <= tile->mi_col_end &&
   1582      mi_row + block_height <= tile->mi_row_end && y_sad < thresh_exit_part_y &&
   1583      uv_sad[0] < thresh_exit_part_uv && uv_sad[1] < thresh_exit_part_uv) {
   1584    set_block_size(cpi, mi_row, mi_col, bsize);
   1585    x->force_zeromv_skip_for_sb = 1;
   1586    aom_free(vt);
   1587    // Partition shape is set here at SB level.
   1588    // Exit needs to happen from av1_choose_var_based_partitioning().
   1589    return true;
   1590  } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
   1591             cpi->sf.rt_sf.part_early_exit_zeromv >= 2)
   1592    x->force_zeromv_skip_for_sb = 2;
   1593  return false;
   1594 }
   1595 
   1596 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   1597                                      ThreadData *td, MACROBLOCK *x, int mi_row,
   1598                                      int mi_col) {
   1599 #if CONFIG_COLLECT_COMPONENT_TIMING
   1600  start_timing(cpi, choose_var_based_partitioning_time);
   1601 #endif
   1602  AV1_COMMON *const cm = &cpi->common;
   1603  MACROBLOCKD *xd = &x->e_mbd;
   1604  const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds;
   1605  PART_EVAL_STATUS force_split[85];
   1606  int avg_64x64;
   1607  int max_var_32x32[4];
   1608  int min_var_32x32[4];
   1609  int var_32x32;
   1610  int var_64x64;
   1611  int min_var_64x64 = INT_MAX;
   1612  int max_var_64x64 = 0;
   1613  int avg_16x16[4][4];
   1614  int maxvar_16x16[4][4];
   1615  int minvar_16x16[4][4];
   1616  const uint8_t *src_buf;
   1617  const uint8_t *dst_buf;
   1618  int dst_stride;
   1619  unsigned int uv_sad[MAX_MB_PLANE - 1];
   1620  NOISE_LEVEL noise_level = kLow;
   1621  bool is_zero_motion = true;
   1622  bool scaled_ref_last = false;
   1623  struct scale_factors sf_no_scale;
   1624  av1_setup_scale_factors_for_frame(&sf_no_scale, cm->width, cm->height,
   1625                                    cm->width, cm->height);
   1626 
   1627  bool is_key_frame =
   1628      (frame_is_intra_only(cm) ||
   1629       (cpi->ppi->use_svc &&
   1630        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
   1631 
   1632  assert(cm->seq_params->sb_size == BLOCK_64X64 ||
   1633         cm->seq_params->sb_size == BLOCK_128X128);
   1634  const bool is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
   1635  const int num_64x64_blocks = is_small_sb ? 1 : 4;
   1636 
   1637  unsigned int y_sad = UINT_MAX;
   1638  unsigned int y_sad_g = UINT_MAX;
   1639  unsigned int y_sad_alt = UINT_MAX;
   1640  unsigned int y_sad_last = UINT_MAX;
   1641  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
   1642 
   1643  // Force skip encoding for all superblocks on slide change for
   1644  // non_reference_frames.
   1645  if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
   1646      cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
   1647    MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
   1648                        get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
   1649    av1_set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
   1650    x->force_zeromv_skip_for_sb = 1;
   1651    return 0;
   1652  }
   1653 
   1654  // Ref frame used in partitioning.
   1655  MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
   1656 
   1657  int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1],
   1658                            vbp_thresholds[2], vbp_thresholds[3],
   1659                            vbp_thresholds[4] };
   1660 
   1661  const int segment_id = xd->mi[0]->segment_id;
   1662  uint64_t blk_sad = 0;
   1663  if (cpi->src_sad_blk_64x64 != NULL &&
   1664      cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
   1665    const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
   1666                                  ? (cm->seq_params->mib_size >> 1)
   1667                                  : cm->seq_params->mib_size;
   1668    const int sb_cols =
   1669        (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
   1670    const int sbi_col = mi_col / sb_size_by_mb;
   1671    const int sbi_row = mi_row / sb_size_by_mb;
   1672    blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
   1673  }
   1674 
   1675  const bool is_segment_id_boosted =
   1676      cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
   1677      cyclic_refresh_segment_id_boosted(segment_id);
   1678  const int sb_qindex =
   1679      clamp(cm->delta_q_info.delta_q_present_flag
   1680                ? cm->quant_params.base_qindex + x->delta_qindex
   1681                : cm->quant_params.base_qindex,
   1682            0, QINDEX_RANGE - 1);
   1683  const int qindex = is_segment_id_boosted || cpi->roi.delta_qp_enabled
   1684                         ? av1_get_qindex(&cm->seg, segment_id, sb_qindex)
   1685                         : sb_qindex;
   1686  set_vbp_thresholds(
   1687      cpi, thresholds, blk_sad, qindex, x->content_state_sb.low_sumdiff,
   1688      x->content_state_sb.source_sad_nonrd, x->content_state_sb.source_sad_rd,
   1689      is_segment_id_boosted, x->content_state_sb.lighting_change);
   1690 
   1691  src_buf = x->plane[AOM_PLANE_Y].src.buf;
   1692  int src_stride = x->plane[AOM_PLANE_Y].src.stride;
   1693 
   1694  // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
   1695  // 5-20 for the 16x16 blocks.
   1696  force_split[0] = PART_EVAL_ALL;
   1697  memset(x->part_search_info.variance_low, 0,
   1698         sizeof(x->part_search_info.variance_low));
   1699 
   1700  // Check if LAST frame is NULL, and if so, treat this frame
   1701  // as a key frame, for the purpose of the superblock partitioning.
   1702  // LAST == NULL can happen in cases where enhancement spatial layers are
   1703  // enabled dyanmically and the only reference is the spatial(GOLDEN).
   1704  // If LAST frame has a different resolution: set the scaled_ref_last flag
   1705  // and check if ref_scaled is NULL.
   1706  if (!frame_is_intra_only(cm)) {
   1707    const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, LAST_FRAME);
   1708    if (ref == NULL) {
   1709      is_key_frame = true;
   1710    } else if (ref->y_crop_height != cm->height ||
   1711               ref->y_crop_width != cm->width) {
   1712      scaled_ref_last = true;
   1713      const YV12_BUFFER_CONFIG *ref_scaled =
   1714          av1_get_scaled_ref_frame(cpi, LAST_FRAME);
   1715      if (ref_scaled == NULL) is_key_frame = true;
   1716    }
   1717  }
   1718 
   1719  x->source_variance = UINT_MAX;
   1720  // For nord_pickmode: compute source_variance, only for superblocks with
   1721  // some motion for now. This input can then be used to bias the partitioning
   1722  // or the chroma_check.
   1723  if (cpi->sf.rt_sf.use_nonrd_pick_mode &&
   1724      x->content_state_sb.source_sad_nonrd > kLowSad)
   1725    x->source_variance = av1_get_perpixel_variance_facade(
   1726        cpi, xd, &x->plane[0].src, cm->seq_params->sb_size, AOM_PLANE_Y);
   1727 
   1728  if (!is_key_frame) {
   1729    setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last,
   1730                 &ref_frame_partition, &sf_no_scale, mi_row, mi_col,
   1731                 is_small_sb, scaled_ref_last);
   1732 
   1733    MB_MODE_INFO *mi = xd->mi[0];
   1734    // Use reference SB directly for zero mv.
   1735    if (mi->mv[0].as_int != 0) {
   1736      dst_buf = xd->plane[AOM_PLANE_Y].dst.buf;
   1737      dst_stride = xd->plane[AOM_PLANE_Y].dst.stride;
   1738      is_zero_motion = false;
   1739    } else {
   1740      dst_buf = xd->plane[AOM_PLANE_Y].pre[0].buf;
   1741      dst_stride = xd->plane[AOM_PLANE_Y].pre[0].stride;
   1742    }
   1743  } else {
   1744    dst_buf = NULL;
   1745    dst_stride = 0;
   1746  }
   1747 
   1748  // check and set the color sensitivity of sb.
   1749  av1_zero(uv_sad);
   1750  chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, y_sad_alt, is_key_frame,
   1751               is_zero_motion, uv_sad);
   1752 
   1753  x->force_zeromv_skip_for_sb = 0;
   1754 
   1755  VP128x128 *vt;
   1756  AOM_CHECK_MEM_ERROR(xd->error_info, vt, aom_malloc(sizeof(*vt)));
   1757  vt->split = td->vt64x64;
   1758 
   1759  // If the superblock is completely static (zero source sad) and
   1760  // the y_sad (relative to LAST ref) is very small, take the sb_size partition
   1761  // and exit, and force zeromv_last skip mode for nonrd_pickmode.
   1762  // Only do this on the base segment (so the QP-boosted segment, if applied,
   1763  // can still continue cleaning/ramping up the quality).
   1764  // Condition on color uv_sad is also added.
   1765  if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv &&
   1766      cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE &&
   1767      ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) {
   1768    // Exit here, if zero mv skip flag is set at SB level.
   1769    if (set_force_zeromv_skip_for_sb(cpi, x, tile, vt, uv_sad, mi_row, mi_col,
   1770                                     y_sad, bsize))
   1771      return 0;
   1772  }
   1773 
   1774  if (cpi->noise_estimate.enabled)
   1775    noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
   1776 
   1777  // Fill in the entire tree of 8x8 (for inter frames) or 4x4 (for key frames)
   1778  // variances for splits.
   1779  fill_variance_tree_leaves(cpi, x, vt, force_split, avg_16x16, maxvar_16x16,
   1780                            minvar_16x16, thresholds, src_buf, src_stride,
   1781                            dst_buf, dst_stride, is_key_frame, is_small_sb);
   1782 
   1783  avg_64x64 = 0;
   1784  for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
   1785    max_var_32x32[blk64_idx] = 0;
   1786    min_var_32x32[blk64_idx] = INT_MAX;
   1787    const int blk64_scale_idx = blk64_idx << 2;
   1788    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
   1789      const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
   1790      for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
   1791        if (!is_key_frame) continue;
   1792        VP16x16 *vtemp = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
   1793        for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++)
   1794          fill_variance_tree(&vtemp->split[lvl3_idx], BLOCK_8X8);
   1795        fill_variance_tree(vtemp, BLOCK_16X16);
   1796        // If variance of this 16x16 block is above the threshold, force block
   1797        // to split. This also forces a split on the upper levels.
   1798        get_variance(&vtemp->part_variances.none);
   1799        if (vtemp->part_variances.none.variance > thresholds[3]) {
   1800          const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
   1801          force_split[split_index] =
   1802              cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
   1803                  ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
   1804                  : PART_EVAL_ONLY_SPLIT;
   1805          force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
   1806          force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
   1807          force_split[0] = PART_EVAL_ONLY_SPLIT;
   1808        }
   1809      }
   1810      fill_variance_tree(&vt->split[blk64_idx].split[lvl1_idx], BLOCK_32X32);
   1811      // If variance of this 32x32 block is above the threshold, or if its above
   1812      // (some threshold of) the average variance over the sub-16x16 blocks,
   1813      // then force this block to split. This also forces a split on the upper
   1814      // (64x64) level.
   1815      uint64_t frame_sad_thresh = 20000;
   1816      const int is_360p_or_smaller = cm->width * cm->height <= RESOLUTION_360P;
   1817      if (cpi->svc.number_temporal_layers > 2 &&
   1818          cpi->svc.temporal_layer_id == 0)
   1819        frame_sad_thresh = frame_sad_thresh << 1;
   1820      if (force_split[5 + blk64_scale_idx + lvl1_idx] == PART_EVAL_ALL) {
   1821        get_variance(&vt->split[blk64_idx].split[lvl1_idx].part_variances.none);
   1822        var_32x32 =
   1823            vt->split[blk64_idx].split[lvl1_idx].part_variances.none.variance;
   1824        max_var_32x32[blk64_idx] = AOMMAX(var_32x32, max_var_32x32[blk64_idx]);
   1825        min_var_32x32[blk64_idx] = AOMMIN(var_32x32, min_var_32x32[blk64_idx]);
   1826        const int max_min_var_16X16_diff = (maxvar_16x16[blk64_idx][lvl1_idx] -
   1827                                            minvar_16x16[blk64_idx][lvl1_idx]);
   1828 
   1829        if (var_32x32 > thresholds[2] ||
   1830            (!is_key_frame && var_32x32 > (thresholds[2] >> 1) &&
   1831             var_32x32 > (avg_16x16[blk64_idx][lvl1_idx] >> 1))) {
   1832          force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
   1833          force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
   1834          force_split[0] = PART_EVAL_ONLY_SPLIT;
   1835        } else if (!is_key_frame && is_360p_or_smaller &&
   1836                   ((max_min_var_16X16_diff > (thresholds[2] >> 1) &&
   1837                     maxvar_16x16[blk64_idx][lvl1_idx] > thresholds[2]) ||
   1838                    (cpi->sf.rt_sf.prefer_large_partition_blocks &&
   1839                     x->content_state_sb.source_sad_nonrd > kLowSad &&
   1840                     cpi->rc.frame_source_sad < frame_sad_thresh &&
   1841                     maxvar_16x16[blk64_idx][lvl1_idx] > (thresholds[2] >> 4) &&
   1842                     maxvar_16x16[blk64_idx][lvl1_idx] >
   1843                         (minvar_16x16[blk64_idx][lvl1_idx] << 2)))) {
   1844          force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
   1845          force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
   1846          force_split[0] = PART_EVAL_ONLY_SPLIT;
   1847        }
   1848      }
   1849    }
   1850    if (force_split[1 + blk64_idx] == PART_EVAL_ALL) {
   1851      fill_variance_tree(&vt->split[blk64_idx], BLOCK_64X64);
   1852      get_variance(&vt->split[blk64_idx].part_variances.none);
   1853      var_64x64 = vt->split[blk64_idx].part_variances.none.variance;
   1854      max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
   1855      min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
   1856      // If the difference of the max-min variances of sub-blocks or max
   1857      // variance of a sub-block is above some threshold of then force this
   1858      // block to split. Only checking this for noise level >= medium, if
   1859      // encoder is in SVC or if we already forced large blocks.
   1860      const int max_min_var_32x32_diff =
   1861          max_var_32x32[blk64_idx] - min_var_32x32[blk64_idx];
   1862      const int check_max_var = max_var_32x32[blk64_idx] > thresholds[1] >> 1;
   1863      const bool check_noise_lvl = noise_level >= kMedium ||
   1864                                   cpi->ppi->use_svc ||
   1865                                   cpi->sf.rt_sf.prefer_large_partition_blocks;
   1866      const int64_t set_threshold = 3 * (thresholds[1] >> 3);
   1867 
   1868      if (!is_key_frame && max_min_var_32x32_diff > set_threshold &&
   1869          check_max_var && check_noise_lvl) {
   1870        force_split[1 + blk64_idx] = PART_EVAL_ONLY_SPLIT;
   1871        force_split[0] = PART_EVAL_ONLY_SPLIT;
   1872      }
   1873      avg_64x64 += var_64x64;
   1874    }
   1875    if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT;
   1876  }
   1877 
   1878  if (force_split[0] == PART_EVAL_ALL) {
   1879    fill_variance_tree(vt, BLOCK_128X128);
   1880    get_variance(&vt->part_variances.none);
   1881    const int set_avg_64x64 = (9 * avg_64x64) >> 5;
   1882    if (!is_key_frame && vt->part_variances.none.variance > set_avg_64x64)
   1883      force_split[0] = PART_EVAL_ONLY_SPLIT;
   1884 
   1885    if (!is_key_frame &&
   1886        (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
   1887        max_var_64x64 > thresholds[0] >> 1)
   1888      force_split[0] = PART_EVAL_ONLY_SPLIT;
   1889  }
   1890 
   1891  if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
   1892      !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
   1893                           thresholds[0], BLOCK_16X16, force_split[0])) {
   1894    for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
   1895      const int x64_idx = GET_BLK_IDX_X(blk64_idx, 4);
   1896      const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 4);
   1897      const int blk64_scale_idx = blk64_idx << 2;
   1898 
   1899      // Now go through the entire structure, splitting every block size until
   1900      // we get to one that's got a variance lower than our threshold.
   1901      if (set_vt_partitioning(cpi, xd, tile, &vt->split[blk64_idx], BLOCK_64X64,
   1902                              mi_row + y64_idx, mi_col + x64_idx, thresholds[1],
   1903                              BLOCK_16X16, force_split[1 + blk64_idx]))
   1904        continue;
   1905      for (int lvl1_idx = 0; lvl1_idx < 4; ++lvl1_idx) {
   1906        const int x32_idx = GET_BLK_IDX_X(lvl1_idx, 3);
   1907        const int y32_idx = GET_BLK_IDX_Y(lvl1_idx, 3);
   1908        const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
   1909        if (set_vt_partitioning(
   1910                cpi, xd, tile, &vt->split[blk64_idx].split[lvl1_idx],
   1911                BLOCK_32X32, (mi_row + y64_idx + y32_idx),
   1912                (mi_col + x64_idx + x32_idx), thresholds[2], BLOCK_16X16,
   1913                force_split[5 + blk64_scale_idx + lvl1_idx]))
   1914          continue;
   1915        for (int lvl2_idx = 0; lvl2_idx < 4; ++lvl2_idx) {
   1916          const int x16_idx = GET_BLK_IDX_X(lvl2_idx, 2);
   1917          const int y16_idx = GET_BLK_IDX_Y(lvl2_idx, 2);
   1918          const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
   1919          VP16x16 *vtemp =
   1920              &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
   1921          if (set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16,
   1922                                  mi_row + y64_idx + y32_idx + y16_idx,
   1923                                  mi_col + x64_idx + x32_idx + x16_idx,
   1924                                  thresholds[3], BLOCK_8X8,
   1925                                  force_split[split_index]))
   1926            continue;
   1927          for (int lvl3_idx = 0; lvl3_idx < 4; ++lvl3_idx) {
   1928            const int x8_idx = GET_BLK_IDX_X(lvl3_idx, 1);
   1929            const int y8_idx = GET_BLK_IDX_Y(lvl3_idx, 1);
   1930            set_block_size(cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
   1931                           (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
   1932                           BLOCK_8X8);
   1933          }
   1934        }
   1935      }
   1936    }
   1937  }
   1938 
   1939  if (cpi->sf.rt_sf.short_circuit_low_temp_var) {
   1940    set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
   1941                          ref_frame_partition, mi_col, mi_row, is_small_sb);
   1942  }
   1943 
   1944  aom_free(vt);
   1945 #if CONFIG_COLLECT_COMPONENT_TIMING
   1946  end_timing(cpi, choose_var_based_partitioning_time);
   1947 #endif
   1948  return 0;
   1949 }