tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pickcdef.c (42787B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <math.h>
     13 #include <stdbool.h>
     14 #include <string.h>
     15 
     16 #include "config/aom_dsp_rtcd.h"
     17 #include "config/aom_scale_rtcd.h"
     18 
     19 #include "aom/aom_integer.h"
     20 #include "av1/common/av1_common_int.h"
     21 #include "av1/common/reconinter.h"
     22 #include "av1/encoder/encoder.h"
     23 #include "av1/encoder/ethread.h"
     24 #include "av1/encoder/pickcdef.h"
     25 #include "av1/encoder/mcomp.h"
     26 
     27 // Get primary and secondary filter strength for the given strength index and
     28 // search method
     29 static inline void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
     30                                             int *pri_strength,
     31                                             int *sec_strength,
     32                                             int strength_idx) {
     33  const int tot_sec_filter =
     34      (pick_method == CDEF_FAST_SEARCH_LVL5)
     35          ? REDUCED_SEC_STRENGTHS_LVL5
     36          : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
     37                                                    : CDEF_SEC_STRENGTHS);
     38  const int pri_idx = strength_idx / tot_sec_filter;
     39  const int sec_idx = strength_idx % tot_sec_filter;
     40  *pri_strength = pri_idx;
     41  *sec_strength = sec_idx;
     42  if (pick_method == CDEF_FULL_SEARCH) return;
     43 
     44  switch (pick_method) {
     45    case CDEF_FAST_SEARCH_LVL1:
     46      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
     47      *pri_strength = priconv_lvl1[pri_idx];
     48      break;
     49    case CDEF_FAST_SEARCH_LVL2:
     50      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
     51      *pri_strength = priconv_lvl2[pri_idx];
     52      break;
     53    case CDEF_FAST_SEARCH_LVL3:
     54      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
     55      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
     56      *pri_strength = priconv_lvl2[pri_idx];
     57      *sec_strength = secconv_lvl3[sec_idx];
     58      break;
     59    case CDEF_FAST_SEARCH_LVL4:
     60      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
     61      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
     62      *pri_strength = priconv_lvl4[pri_idx];
     63      *sec_strength = secconv_lvl3[sec_idx];
     64      break;
     65    case CDEF_FAST_SEARCH_LVL5:
     66      assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
     67      assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
     68      *pri_strength = priconv_lvl5[pri_idx];
     69      *sec_strength = secconv_lvl5[sec_idx];
     70      break;
     71    default: assert(0 && "Invalid CDEF search method");
     72  }
     73 }
     74 
     75 // Store CDEF filter strength calculated from strength index for given search
     76 // method
     77 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
     78  do {                                                                       \
     79    get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength,   \
     80                              (strength_idx));                               \
     81    cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;        \
     82  } while (0)
     83 
     84 /* Search for the best strength to add as an option, knowing we
     85   already selected nb_strengths options. */
     86 static uint64_t search_one(int *lev, int nb_strengths,
     87                           uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
     88                           CDEF_PICK_METHOD pick_method) {
     89  uint64_t tot_mse[TOTAL_STRENGTHS];
     90  const int total_strengths = nb_cdef_strengths[pick_method];
     91  int i, j;
     92  uint64_t best_tot_mse = (uint64_t)1 << 63;
     93  int best_id = 0;
     94  memset(tot_mse, 0, sizeof(tot_mse));
     95  for (i = 0; i < sb_count; i++) {
     96    int gi;
     97    uint64_t best_mse = (uint64_t)1 << 63;
     98    /* Find best mse among already selected options. */
     99    for (gi = 0; gi < nb_strengths; gi++) {
    100      if (mse[i][lev[gi]] < best_mse) {
    101        best_mse = mse[i][lev[gi]];
    102      }
    103    }
    104    /* Find best mse when adding each possible new option. */
    105    for (j = 0; j < total_strengths; j++) {
    106      uint64_t best = best_mse;
    107      if (mse[i][j] < best) best = mse[i][j];
    108      tot_mse[j] += best;
    109    }
    110  }
    111  for (j = 0; j < total_strengths; j++) {
    112    if (tot_mse[j] < best_tot_mse) {
    113      best_tot_mse = tot_mse[j];
    114      best_id = j;
    115    }
    116  }
    117  lev[nb_strengths] = best_id;
    118  return best_tot_mse;
    119 }
    120 
    121 /* Search for the best luma+chroma strength to add as an option, knowing we
    122   already selected nb_strengths options. */
    123 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
    124                                uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
    125                                CDEF_PICK_METHOD pick_method) {
    126  uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
    127  int i, j;
    128  uint64_t best_tot_mse = (uint64_t)1 << 63;
    129  int best_id0 = 0;
    130  int best_id1 = 0;
    131  const int total_strengths = nb_cdef_strengths[pick_method];
    132  memset(tot_mse, 0, sizeof(tot_mse));
    133  for (i = 0; i < sb_count; i++) {
    134    int gi;
    135    uint64_t best_mse = (uint64_t)1 << 63;
    136    /* Find best mse among already selected options. */
    137    for (gi = 0; gi < nb_strengths; gi++) {
    138      uint64_t curr = mse[0][i][lev0[gi]];
    139      curr += mse[1][i][lev1[gi]];
    140      if (curr < best_mse) {
    141        best_mse = curr;
    142      }
    143    }
    144    /* Find best mse when adding each possible new option. */
    145    for (j = 0; j < total_strengths; j++) {
    146      int k;
    147      for (k = 0; k < total_strengths; k++) {
    148        uint64_t best = best_mse;
    149        uint64_t curr = mse[0][i][j];
    150        curr += mse[1][i][k];
    151        if (curr < best) best = curr;
    152        tot_mse[j][k] += best;
    153      }
    154    }
    155  }
    156  for (j = 0; j < total_strengths; j++) {
    157    int k;
    158    for (k = 0; k < total_strengths; k++) {
    159      if (tot_mse[j][k] < best_tot_mse) {
    160        best_tot_mse = tot_mse[j][k];
    161        best_id0 = j;
    162        best_id1 = k;
    163      }
    164    }
    165  }
    166  lev0[nb_strengths] = best_id0;
    167  lev1[nb_strengths] = best_id1;
    168  return best_tot_mse;
    169 }
    170 
    171 /* Search for the set of strengths that minimizes mse. */
    172 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
    173                                      uint64_t mse[][TOTAL_STRENGTHS],
    174                                      int sb_count,
    175                                      CDEF_PICK_METHOD pick_method) {
    176  uint64_t best_tot_mse;
    177  int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
    178              pick_method <= CDEF_FAST_SEARCH_LVL5);
    179  int i;
    180  best_tot_mse = (uint64_t)1 << 63;
    181  /* Greedy search: add one strength options at a time. */
    182  for (i = 0; i < nb_strengths; i++) {
    183    best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
    184  }
    185  /* Trying to refine the greedy search by reconsidering each
    186     already-selected option. */
    187  if (!fast) {
    188    for (i = 0; i < 4 * nb_strengths; i++) {
    189      int j;
    190      for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
    191      best_tot_mse =
    192          search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
    193    }
    194  }
    195  return best_tot_mse;
    196 }
    197 
    198 /* Search for the set of luma+chroma strengths that minimizes mse. */
    199 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
    200                                           int nb_strengths,
    201                                           uint64_t (**mse)[TOTAL_STRENGTHS],
    202                                           int sb_count,
    203                                           CDEF_PICK_METHOD pick_method) {
    204  uint64_t best_tot_mse;
    205  int i;
    206  best_tot_mse = (uint64_t)1 << 63;
    207  /* Greedy search: add one strength options at a time. */
    208  for (i = 0; i < nb_strengths; i++) {
    209    best_tot_mse =
    210        search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
    211  }
    212  /* Trying to refine the greedy search by reconsidering each
    213     already-selected option. */
    214  for (i = 0; i < 4 * nb_strengths; i++) {
    215    int j;
    216    for (j = 0; j < nb_strengths - 1; j++) {
    217      best_lev0[j] = best_lev0[j + 1];
    218      best_lev1[j] = best_lev1[j + 1];
    219    }
    220    best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
    221                                   sb_count, pick_method);
    222  }
    223  return best_tot_mse;
    224 }
    225 
    226 static inline void init_src_params(int *src_stride, int *width, int *height,
    227                                   int *width_log2, int *height_log2,
    228                                   BLOCK_SIZE bsize) {
    229  *src_stride = block_size_wide[bsize];
    230  *width = block_size_wide[bsize];
    231  *height = block_size_high[bsize];
    232  *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
    233  *height_log2 = MI_SIZE_LOG2 + mi_size_high_log2[bsize];
    234 }
    235 #if CONFIG_AV1_HIGHBITDEPTH
    236 /* Compute MSE only on the blocks we filtered. */
    237 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
    238                                         cdef_list *dlist, int cdef_count,
    239                                         BLOCK_SIZE bsize, int coeff_shift,
    240                                         int row, int col) {
    241  assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
    242         bsize == BLOCK_8X8);
    243  uint64_t sum = 0;
    244  int bi, bx, by;
    245  uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
    246  uint16_t *dst_buff = &dst16[row * dstride + col];
    247  int src_stride, width, height, width_log2, height_log2;
    248  init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
    249                  bsize);
    250  for (bi = 0; bi < cdef_count; bi++) {
    251    by = dlist[bi].by;
    252    bx = dlist[bi].bx;
    253    sum += aom_mse_wxh_16bit_highbd(
    254        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
    255        &src[bi << (height_log2 + width_log2)], src_stride, width, height);
    256  }
    257  return sum >> 2 * coeff_shift;
    258 }
    259 #endif
    260 
    261 // Checks dual and quad block processing is applicable for block widths 8 and 4
    262 // respectively.
    263 static inline int is_dual_or_quad_applicable(cdef_list *dlist, int width,
    264                                             int cdef_count, int bi, int iter) {
    265  assert(width == 8 || width == 4);
    266  const int blk_offset = (width == 8) ? 1 : 3;
    267  if ((iter + blk_offset) >= cdef_count) return 0;
    268 
    269  if (dlist[bi].by == dlist[bi + blk_offset].by &&
    270      dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
    271    return 1;
    272 
    273  return 0;
    274 }
    275 
    276 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
    277                                  cdef_list *dlist, int cdef_count,
    278                                  BLOCK_SIZE bsize, int coeff_shift, int row,
    279                                  int col) {
    280  assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
    281         bsize == BLOCK_8X8);
    282  uint64_t sum = 0;
    283  int bi, bx, by;
    284  int iter = 0;
    285  int inc = 1;
    286  uint8_t *dst8 = (uint8_t *)dst;
    287  uint8_t *dst_buff = &dst8[row * dstride + col];
    288  int src_stride, width, height, width_log2, height_log2;
    289  init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
    290                  bsize);
    291 
    292  const int num_blks = 16 / width;
    293  for (bi = 0; bi < cdef_count; bi += inc) {
    294    by = dlist[bi].by;
    295    bx = dlist[bi].bx;
    296    uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
    297    uint8_t *dst_tmp =
    298        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
    299 
    300    if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
    301      sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
    302      iter += num_blks;
    303      inc = num_blks;
    304    } else {
    305      sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
    306                               height);
    307      iter += 1;
    308      inc = 1;
    309    }
    310  }
    311 
    312  return sum >> 2 * coeff_shift;
    313 }
    314 
    315 // Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
    316 // region is outside frame boundary
    317 static inline void fill_borders_for_fbs_on_frame_boundary(
    318    uint16_t *inbuf, int hfilt_size, int vfilt_size,
    319    bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
    320    bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
    321  if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
    322      !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
    323    return;
    324  if (is_fb_on_frm_bottom_boundary) {
    325    // Fill bottom region of the block
    326    const int buf_offset =
    327        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
    328    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
    329              CDEF_VERY_LARGE);
    330  }
    331  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
    332    const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
    333    // Fill bottom-left region of the block
    334    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
    335              CDEF_VERY_LARGE);
    336  }
    337  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
    338    const int buf_offset =
    339        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
    340    // Fill bottom-right region of the block
    341    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
    342              CDEF_VERY_LARGE);
    343  }
    344  if (is_fb_on_frm_top_boundary) {
    345    // Fill top region of the block
    346    fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
    347              CDEF_VERY_LARGE);
    348  }
    349  if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
    350    // Fill top-left region of the block
    351    fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
    352  }
    353  if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
    354    const int buf_offset = hfilt_size + CDEF_HBORDER;
    355    // Fill top-right region of the block
    356    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
    357              CDEF_VERY_LARGE);
    358  }
    359  if (is_fb_on_frm_left_boundary) {
    360    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
    361    // Fill left region of the block
    362    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
    363              CDEF_VERY_LARGE);
    364  }
    365  if (is_fb_on_frm_right_boundary) {
    366    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
    367    // Fill right region of the block
    368    fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
    369              vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
    370  }
    371 }
    372 
    373 // Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
    374 // after CDEF filtering in single function call
    375 static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
    376    cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
    377    int subsampling_y) {
    378  // TODO(Ranjit): Extend the optimization for 422
    379  if (subsampling_x != subsampling_y) return 1;
    380 
    381  // Combining more blocks seems to increase encode time due to increase in
    382  // control code
    383  if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
    384      dlist[bi].bx + 3 == dlist[bi + 3].bx) {
    385    /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
    386     * logic if y co-ordinates match and x co-ordinates are
    387     * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
    388    return 4;
    389  }
    390  if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
    391      dlist[bi].bx + 1 == dlist[bi + 1].bx) {
    392    /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
    393     * logic if their y co-ordinates match and x co-ordinates are
    394     * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
    395    return 2;
    396  }
    397  return 1;
    398 }
    399 
    400 // Returns the block error after CDEF filtering for a given strength
    401 static inline uint64_t get_filt_error(
    402    const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
    403    cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
    404    int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
    405    int ref_stride, int row, int col, int pri_strength, int sec_strength,
    406    int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
    407  uint64_t curr_sse = 0;
    408  const BLOCK_SIZE plane_bsize =
    409      get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
    410  const int bw_log2 = 3 - pd->subsampling_x;
    411  const int bh_log2 = 3 - pd->subsampling_y;
    412 
    413  // TODO(Ranjit): Extend this optimization for HBD
    414  if (!cdef_search_ctx->use_highbitdepth) {
    415    // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
    416    // error at CDEF block level
    417    const int tot_blk_count =
    418        (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
    419        (bw_log2 + bh_log2);
    420    if (cdef_count == tot_blk_count) {
    421      // Calculate the offset in the buffer based on block position
    422      const FULLPEL_MV this_mv = { row, col };
    423      const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
    424      if (pri_strength == 0 && sec_strength == 0) {
    425        // When CDEF strength is zero, filtering is not applied. Hence
    426        // error is calculated between source and unfiltered pixels
    427        curr_sse =
    428            aom_sse(&ref_buffer[buf_offset], ref_stride,
    429                    get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
    430                    block_size_wide[plane_bsize], block_size_high[plane_bsize]);
    431      } else {
    432        DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
    433 
    434        av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
    435                           cdef_search_ctx->xdec[pli],
    436                           cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
    437                           dlist, cdef_count, pri_strength,
    438                           sec_strength + (sec_strength == 3),
    439                           cdef_search_ctx->damping, coeff_shift);
    440        curr_sse =
    441            aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
    442                    (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
    443                    block_size_high[plane_bsize]);
    444      }
    445    } else {
    446      // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
    447      // functions produce 8-bit output and the error is calculated in 8-bit
    448      // domain
    449      if (pri_strength == 0 && sec_strength == 0) {
    450        int num_error_calc_filt_units = 1;
    451        for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
    452          const uint8_t by = dlist[bi].by;
    453          const uint8_t bx = dlist[bi].bx;
    454          const int16_t by_pos = (by << bh_log2);
    455          const int16_t bx_pos = (bx << bw_log2);
    456          // Calculate the offset in the buffer based on block position
    457          const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
    458          const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
    459          num_error_calc_filt_units = get_error_calc_width_in_filt_units(
    460              dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
    461          curr_sse += aom_sse(
    462              &ref_buffer[buf_offset], ref_stride,
    463              get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
    464              num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
    465        }
    466      } else {
    467        DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
    468        av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
    469                           cdef_search_ctx->xdec[pli],
    470                           cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
    471                           dlist, cdef_count, pri_strength,
    472                           sec_strength + (sec_strength == 3),
    473                           cdef_search_ctx->damping, coeff_shift);
    474        int num_error_calc_filt_units = 1;
    475        for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
    476          const uint8_t by = dlist[bi].by;
    477          const uint8_t bx = dlist[bi].bx;
    478          const int16_t by_pos = (by << bh_log2);
    479          const int16_t bx_pos = (bx << bw_log2);
    480          // Calculate the offset in the buffer based on block position
    481          const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
    482          const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
    483          const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
    484          const int tmp_buf_offset =
    485              get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
    486          num_error_calc_filt_units = get_error_calc_width_in_filt_units(
    487              dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
    488          curr_sse += aom_sse(
    489              &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
    490              (1 << MAX_SB_SIZE_LOG2),
    491              num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
    492        }
    493      }
    494    }
    495  } else {
    496    DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
    497 
    498    av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
    499                       cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
    500                       dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
    501                       sec_strength + (sec_strength == 3),
    502                       cdef_search_ctx->damping, coeff_shift);
    503    curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
    504        ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
    505        cdef_search_ctx->bsize[pli], coeff_shift, row, col);
    506  }
    507  return curr_sse;
    508 }
    509 
    510 // Calculates MSE at block level.
    511 // Inputs:
    512 //   cdef_search_ctx: Pointer to the structure containing parameters related to
    513 //   CDEF search context.
    514 //   fbr: Row index in units of 64x64 block
    515 //   fbc: Column index in units of 64x64 block
    516 // Returns:
    517 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
    518 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
    519                             struct aom_internal_error_info *error_info,
    520                             int fbr, int fbc, int sb_count) {
    521  // TODO(aomedia:3276): Pass error_info to the low-level functions as required
    522  // in future to handle error propagation.
    523  (void)error_info;
    524  const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
    525  const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
    526  const int coeff_shift = cdef_search_ctx->coeff_shift;
    527  const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
    528  const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
    529 
    530  // Declare and initialize the temporary buffers.
    531  DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
    532  cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
    533  int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
    534  int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
    535  uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
    536  int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
    537  int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
    538  int hb_step = 1, vb_step = 1;
    539  BLOCK_SIZE bs;
    540 
    541  const MB_MODE_INFO *const mbmi =
    542      mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
    543                              MI_SIZE_64X64 * fbc];
    544 
    545  uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
    546                                        ref->v_buffer };
    547  int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
    548                                   ref->uv_stride };
    549 
    550  if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
    551      mbmi->bsize == BLOCK_64X128) {
    552    bs = mbmi->bsize;
    553    if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
    554      nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
    555      hb_step = 2;
    556    }
    557    if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
    558      nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
    559      vb_step = 2;
    560    }
    561  } else {
    562    bs = BLOCK_64X64;
    563  }
    564  // Get number of 8x8 blocks which are not skip. Cdef processing happens for
    565  // 8x8 blocks which are not skip.
    566  const int cdef_count = av1_cdef_compute_sb_list(
    567      mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
    568  const bool is_fb_on_frm_left_boundary = (fbc == 0);
    569  const bool is_fb_on_frm_right_boundary =
    570      (fbc + hb_step == cdef_search_ctx->nhfb);
    571  const bool is_fb_on_frm_top_boundary = (fbr == 0);
    572  const bool is_fb_on_frm_bottom_boundary =
    573      (fbr + vb_step == cdef_search_ctx->nvfb);
    574  const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
    575  const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
    576  int dirinit = 0;
    577  for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
    578    /* We avoid filtering the pixels for which some of the pixels to
    579    average are outside the frame. We could change the filter instead,
    580    but it would add special cases for any future vectorization. */
    581    const int hfilt_size = (nhb << mi_wide_l2[pli]);
    582    const int vfilt_size = (nvb << mi_high_l2[pli]);
    583    const int ysize =
    584        vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
    585    const int xsize =
    586        hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
    587    const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
    588    const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
    589    struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
    590    cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
    591                             pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
    592                             ysize, xsize);
    593    fill_borders_for_fbs_on_frame_boundary(
    594        inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
    595        is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
    596        is_fb_on_frm_bottom_boundary);
    597    for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
    598      int pri_strength, sec_strength;
    599      get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
    600                                &sec_strength, gi);
    601      const uint64_t curr_mse = get_filt_error(
    602          cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
    603          ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
    604          pli, coeff_shift, bs);
    605      if (pli < 2)
    606        cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
    607      else
    608        cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
    609    }
    610  }
    611  cdef_search_ctx->sb_index[sb_count] =
    612      MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
    613 }
    614 
    615 // MSE calculation at frame level.
    616 // Inputs:
    617 //   cdef_search_ctx: Pointer to the structure containing parameters related to
    618 //   CDEF search context.
    619 // Returns:
    620 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
    621 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
    622                                struct aom_internal_error_info *error_info) {
    623  // Loop over each sb.
    624  for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
    625    for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
    626      // Checks if cdef processing can be skipped for particular sb.
    627      if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
    628      // Calculate mse for each sb and store the relevant sb index.
    629      av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
    630                              cdef_search_ctx->sb_count);
    631      cdef_search_ctx->sb_count++;
    632    }
    633  }
    634 }
    635 
    636 // Allocates memory for members of CdefSearchCtx.
    637 // Inputs:
    638 //   cdef_search_ctx: Pointer to the structure containing parameters
    639 //   related to CDEF search context.
    640 // Returns:
    641 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
    642 static void cdef_alloc_data(AV1_COMMON *cm, CdefSearchCtx *cdef_search_ctx) {
    643  const int nvfb = cdef_search_ctx->nvfb;
    644  const int nhfb = cdef_search_ctx->nhfb;
    645  CHECK_MEM_ERROR(
    646      cm, cdef_search_ctx->sb_index,
    647      aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index[0])));
    648  cdef_search_ctx->sb_count = 0;
    649  CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[0],
    650                  aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
    651  CHECK_MEM_ERROR(cm, cdef_search_ctx->mse[1],
    652                  aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb));
    653 }
    654 
    655 // Deallocates the memory allocated for members of CdefSearchCtx.
    656 // Inputs:
    657 //   cdef_search_ctx: Pointer to the structure containing parameters
    658 //   related to CDEF search context.
    659 // Returns:
    660 //   Nothing will be returned.
    661 void av1_cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
    662  if (cdef_search_ctx) {
    663    aom_free(cdef_search_ctx->mse[0]);
    664    cdef_search_ctx->mse[0] = NULL;
    665    aom_free(cdef_search_ctx->mse[1]);
    666    cdef_search_ctx->mse[1] = NULL;
    667    aom_free(cdef_search_ctx->sb_index);
    668    cdef_search_ctx->sb_index = NULL;
    669  }
    670 }
    671 
    672 // Initialize the parameters related to CDEF search context.
    673 // Inputs:
    674 //   frame: Pointer to compressed frame buffer
    675 //   ref: Pointer to the frame buffer holding the source frame
    676 //   cm: Pointer to top level common structure
    677 //   xd: Pointer to common current coding block structure
    678 //   cdef_search_ctx: Pointer to the structure containing parameters related to
    679 //   CDEF search context.
    680 //   pick_method: Search method used to select CDEF parameters
    681 // Returns:
    682 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
    683 static inline void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
    684                                    const YV12_BUFFER_CONFIG *ref,
    685                                    AV1_COMMON *cm, MACROBLOCKD *xd,
    686                                    CdefSearchCtx *cdef_search_ctx,
    687                                    CDEF_PICK_METHOD pick_method) {
    688  const CommonModeInfoParams *const mi_params = &cm->mi_params;
    689  const int num_planes = av1_num_planes(cm);
    690  cdef_search_ctx->mi_params = &cm->mi_params;
    691  cdef_search_ctx->ref = ref;
    692  cdef_search_ctx->nvfb =
    693      (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    694  cdef_search_ctx->nhfb =
    695      (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    696  cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
    697  cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
    698  cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
    699  cdef_search_ctx->num_planes = num_planes;
    700  cdef_search_ctx->pick_method = pick_method;
    701  cdef_search_ctx->sb_count = 0;
    702  cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
    703  av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
    704                       num_planes);
    705  // Initialize plane wise information.
    706  for (int pli = 0; pli < num_planes; pli++) {
    707    cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
    708    cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
    709    cdef_search_ctx->bsize[pli] =
    710        cdef_search_ctx->ydec[pli]
    711            ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
    712            : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
    713    cdef_search_ctx->mi_wide_l2[pli] =
    714        MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
    715    cdef_search_ctx->mi_high_l2[pli] =
    716        MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
    717    cdef_search_ctx->plane[pli] = xd->plane[pli];
    718  }
    719  // Function pointer initialization.
    720 #if CONFIG_AV1_HIGHBITDEPTH
    721  if (cm->seq_params->use_highbitdepth) {
    722    cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
    723    cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
    724  } else {
    725    cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
    726    cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
    727  }
    728 #else
    729  cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
    730  cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
    731 #endif
    732 }
    733 
    734 void av1_pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
    735                           int is_screen_content) {
    736  const int bd = cm->seq_params->bit_depth;
    737  const int q =
    738      av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
    739  CdefInfo *const cdef_info = &cm->cdef_info;
    740  // Check the speed feature to avoid extra signaling.
    741  if (skip_cdef) {
    742    cdef_info->cdef_bits = 1;
    743    cdef_info->nb_cdef_strengths = 2;
    744  } else {
    745    cdef_info->cdef_bits = 0;
    746    cdef_info->nb_cdef_strengths = 1;
    747  }
    748  cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
    749 
    750  int predicted_y_f1 = 0;
    751  int predicted_y_f2 = 0;
    752  int predicted_uv_f1 = 0;
    753  int predicted_uv_f2 = 0;
    754  if (is_screen_content) {
    755    predicted_y_f1 =
    756        (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
    757    predicted_y_f2 =
    758        (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
    759    predicted_uv_f1 =
    760        (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
    761    predicted_uv_f2 =
    762        (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
    763    predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
    764    predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
    765    predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
    766    predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
    767  } else {
    768    if (!frame_is_intra_only(cm)) {
    769      predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
    770                                         q * 0.0068615186f + 0.02709886f),
    771                             0, 15);
    772      predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
    773                                         q * 0.0013993345f + 0.03831067f),
    774                             0, 3);
    775      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
    776                                          q * 0.0034628846f + 0.00887099f),
    777                              0, 15);
    778      predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
    779                                          q * 0.00028223585f + 0.05576307f),
    780                              0, 3);
    781    } else {
    782      predicted_y_f1 = clamp(
    783          (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
    784          0, 15);
    785      predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
    786                                         q * 0.0027798624f + 0.0079405f),
    787                             0, 3);
    788      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
    789                                          q * 0.012892405f - 0.00748388f),
    790                              0, 15);
    791      predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
    792                                          q * 0.00035520183f + 0.00228092f),
    793                              0, 3);
    794    }
    795  }
    796  cdef_info->cdef_strengths[0] =
    797      predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
    798  cdef_info->cdef_uv_strengths[0] =
    799      predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
    800 
    801  // mbmi->cdef_strength is already set in the encoding stage. We don't need to
    802  // set it again here.
    803  if (skip_cdef) {
    804    cdef_info->cdef_strengths[1] = 0;
    805    cdef_info->cdef_uv_strengths[1] = 0;
    806    return;
    807  }
    808 
    809  const CommonModeInfoParams *const mi_params = &cm->mi_params;
    810  const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    811  const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
    812  MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
    813  // mbmi is NULL when real-time rate control library is used.
    814  if (!mbmi) return;
    815  for (int r = 0; r < nvfb; ++r) {
    816    for (int c = 0; c < nhfb; ++c) {
    817      MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
    818      current_mbmi->cdef_strength = 0;
    819    }
    820    mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
    821  }
    822 }
    823 
    824 void av1_cdef_search(AV1_COMP *cpi) {
    825  AV1_COMMON *cm = &cpi->common;
    826  CDEF_CONTROL cdef_control = cpi->oxcf.tool_cfg.cdef_control;
    827 
    828  assert(cdef_control != CDEF_NONE);
    829  // For CDEF_ADAPTIVE, turning off CDEF around qindex 32 was best for still
    830  // pictures
    831  if ((cdef_control == CDEF_REFERENCE &&
    832       cpi->ppi->rtc_ref.non_reference_frame) ||
    833      (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
    834       (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
    835       cpi->oxcf.rc_cfg.cq_level <= 32)) {
    836    CdefInfo *const cdef_info = &cm->cdef_info;
    837    cdef_info->nb_cdef_strengths = 1;
    838    cdef_info->cdef_bits = 0;
    839    cdef_info->cdef_strengths[0] = 0;
    840    cdef_info->cdef_uv_strengths[0] = 0;
    841    return;
    842  }
    843 
    844  // Indicate if external RC is used for testing
    845  const int rtc_ext_rc = cpi->rc.rtc_external_ratectrl;
    846  if (rtc_ext_rc) {
    847    av1_pick_cdef_from_qp(cm, 0, 0);
    848    return;
    849  }
    850  CDEF_PICK_METHOD pick_method = cpi->sf.lpf_sf.cdef_pick_method;
    851  if (pick_method == CDEF_PICK_FROM_Q) {
    852    const int use_screen_content_model =
    853        cm->quant_params.base_qindex >
    854            AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
    855                   cpi->rc.best_quality + 5) &&
    856        cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
    857    av1_pick_cdef_from_qp(cm, cpi->sf.rt_sf.skip_cdef_sb,
    858                          use_screen_content_model);
    859    return;
    860  }
    861  const CommonModeInfoParams *const mi_params = &cm->mi_params;
    862  const int damping = 3 + (cm->quant_params.base_qindex >> 6);
    863  const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
    864                    pick_method <= CDEF_FAST_SEARCH_LVL5);
    865  const int num_planes = av1_num_planes(cm);
    866  MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
    867 
    868  if (!cpi->cdef_search_ctx)
    869    CHECK_MEM_ERROR(cm, cpi->cdef_search_ctx,
    870                    aom_malloc(sizeof(*cpi->cdef_search_ctx)));
    871  CdefSearchCtx *cdef_search_ctx = cpi->cdef_search_ctx;
    872 
    873  // Initialize parameters related to CDEF search context.
    874  cdef_params_init(&cm->cur_frame->buf, cpi->source, cm, xd, cdef_search_ctx,
    875                   pick_method);
    876  // Allocate CDEF search context buffers.
    877  cdef_alloc_data(cm, cdef_search_ctx);
    878  // Frame level mse calculation.
    879  if (cpi->mt_info.num_workers > 1) {
    880    av1_cdef_mse_calc_frame_mt(cpi);
    881  } else {
    882    cdef_mse_calc_frame(cdef_search_ctx, cm->error);
    883  }
    884 
    885  /* Search for different number of signaling bits. */
    886  int nb_strength_bits = 0;
    887  uint64_t best_rd = UINT64_MAX;
    888  CdefInfo *const cdef_info = &cm->cdef_info;
    889  int sb_count = cdef_search_ctx->sb_count;
    890  uint64_t(*mse[2])[TOTAL_STRENGTHS];
    891  mse[0] = cdef_search_ctx->mse[0];
    892  mse[1] = cdef_search_ctx->mse[1];
    893  /* Calculate the maximum number of bits required to signal CDEF strengths at
    894   * block level */
    895  const int total_strengths = nb_cdef_strengths[pick_method];
    896  const int joint_strengths =
    897      num_planes > 1 ? total_strengths * total_strengths : total_strengths;
    898  const int max_signaling_bits =
    899      joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
    900  int rdmult = cpi->td.mb.rdmult;
    901  for (int i = 0; i <= 3; i++) {
    902    if (i > max_signaling_bits) break;
    903    int best_lev0[CDEF_MAX_STRENGTHS] = { 0 };
    904    int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
    905    const int nb_strengths = 1 << i;
    906    uint64_t tot_mse;
    907    if (num_planes > 1) {
    908      tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
    909                                           mse, sb_count, pick_method);
    910    } else {
    911      tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
    912                                      pick_method);
    913    }
    914 
    915    const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
    916                                              (num_planes > 1 ? 2 : 1);
    917    const int rate_cost = av1_cost_literal(total_bits);
    918    const uint64_t dist = tot_mse * 16;
    919    const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
    920    if (rd < best_rd) {
    921      best_rd = rd;
    922      nb_strength_bits = i;
    923      memcpy(cdef_info->cdef_strengths, best_lev0,
    924             nb_strengths * sizeof(best_lev0[0]));
    925      if (num_planes > 1) {
    926        memcpy(cdef_info->cdef_uv_strengths, best_lev1,
    927               nb_strengths * sizeof(best_lev1[0]));
    928      }
    929    }
    930  }
    931 
    932  cdef_info->cdef_bits = nb_strength_bits;
    933  cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
    934  for (int i = 0; i < sb_count; i++) {
    935    uint64_t best_mse = UINT64_MAX;
    936    int best_gi = 0;
    937    for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
    938      uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
    939      if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
    940      if (curr < best_mse) {
    941        best_gi = gi;
    942        best_mse = curr;
    943      }
    944    }
    945    mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
    946        best_gi;
    947  }
    948  if (fast) {
    949    for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
    950      const int luma_strength = cdef_info->cdef_strengths[j];
    951      const int chroma_strength = cdef_info->cdef_uv_strengths[j];
    952      int pri_strength, sec_strength;
    953 
    954      STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
    955                                 luma_strength);
    956      STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
    957                                 chroma_strength);
    958    }
    959  }
    960 
    961  // For CDEF_ADAPTIVE, set primary and secondary CDEF at reduced strength for
    962  // qindexes 33 through 220.
    963  // Note 1: for odd strengths, the 0.5 discarded by ">> 1" is a significant
    964  // part of the strength when the strength is small, and because there are
    965  // few strength levels, odd strengths are reduced significantly more than a
    966  // half. This is intended behavior for reduced strength.
    967  // For example: a pri strength of 3 becomes 1, and a sec strength of 1
    968  // becomes 0.
    969  // Note 2: a (signaled) sec strength value of 3 is special as it results in an
    970  // actual sec strength of 4. We tried adding +1 to the sec strength 3 so it
    971  // maps to a reduced sec strength of 2. However, on Daala's subset1, the
    972  // resulting SSIMULACRA 2 scores were either exactly the same (at cpu-used 6),
    973  // or within noise level (at cpu-used 3). Given that there were no discernible
    974  // improvements, this special mapping was left out for reduced strength.
    975  if (cdef_control == CDEF_ADAPTIVE && cpi->oxcf.mode == ALLINTRA &&
    976      (cpi->oxcf.rc_cfg.mode == AOM_Q || cpi->oxcf.rc_cfg.mode == AOM_CQ) &&
    977      cpi->oxcf.rc_cfg.cq_level <= 220) {
    978    for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
    979      const int luma_strength = cdef_info->cdef_strengths[j];
    980      const int chroma_strength = cdef_info->cdef_uv_strengths[j];
    981 
    982      const int new_pri_luma_strength =
    983          (luma_strength / CDEF_SEC_STRENGTHS) >> 1;
    984      const int new_sec_luma_strength =
    985          (luma_strength % CDEF_SEC_STRENGTHS) >> 1;
    986      const int new_pri_chroma_strength =
    987          (chroma_strength / CDEF_SEC_STRENGTHS) >> 1;
    988      const int new_sec_chroma_strength =
    989          (chroma_strength % CDEF_SEC_STRENGTHS) >> 1;
    990 
    991      cdef_info->cdef_strengths[j] =
    992          new_pri_luma_strength * CDEF_SEC_STRENGTHS + new_sec_luma_strength;
    993      cdef_info->cdef_uv_strengths[j] =
    994          new_pri_chroma_strength * CDEF_SEC_STRENGTHS +
    995          new_sec_chroma_strength;
    996    }
    997  }
    998 
    999  cdef_info->cdef_damping = damping;
   1000  // Deallocate CDEF search context buffers.
   1001  av1_cdef_dealloc_data(cdef_search_ctx);
   1002 }