intra_mode_search.c (74833B)
1 /* 2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <stdbool.h> 13 14 #include "av1/common/av1_common_int.h" 15 #include "av1/common/cfl.h" 16 #include "av1/common/reconintra.h" 17 18 #include "av1/encoder/intra_mode_search.h" 19 #include "av1/encoder/intra_mode_search_utils.h" 20 #include "av1/encoder/palette.h" 21 #include "av1/encoder/speed_features.h" 22 #include "av1/encoder/tx_search.h" 23 24 // Even though there are 7 delta angles, this macro is set to 9 to facilitate 25 // the rd threshold check to prune -3 and 3 delta angles. 26 #define SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY (2 * MAX_ANGLE_DELTA + 3) 27 28 // The order for evaluating delta angles while processing the luma directional 29 // intra modes. Currently, this order of evaluation is applicable only when 30 // speed feature prune_luma_odd_delta_angles_in_intra is enabled. In this case, 31 // even angles are evaluated first in order to facilitate the pruning of odd 32 // delta angles based on the rd costs of the neighboring delta angles. 33 static const int8_t luma_delta_angles_order[2 * MAX_ANGLE_DELTA] = { 34 -2, 2, -3, -1, 1, 3, 35 }; 36 37 /*!\cond */ 38 static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = { 39 DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED, 40 SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED, D157_PRED, 41 D67_PRED, D113_PRED, D45_PRED, 42 }; 43 44 static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = { 45 UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED, 46 UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED, 47 UV_D135_PRED, UV_D203_PRED, UV_D157_PRED, UV_D67_PRED, 48 UV_D113_PRED, UV_D45_PRED, 49 }; 50 51 // The bitmask corresponds to the filter intra modes as defined in enums.h 52 // FILTER_INTRA_MODE enumeration type. Setting a bit to 0 in the mask means to 53 // disable the evaluation of corresponding filter intra mode. The table 54 // av1_derived_filter_intra_mode_used_flag is used when speed feature 55 // prune_filter_intra_level is 1. The evaluated filter intra modes are union 56 // of the following: 57 // 1) FILTER_DC_PRED 58 // 2) mode that corresponds to best mode so far of DC_PRED, V_PRED, H_PRED, 59 // D157_PRED and PAETH_PRED. (Eg: FILTER_V_PRED if best mode so far is V_PRED). 60 static const uint8_t av1_derived_filter_intra_mode_used_flag[INTRA_MODES] = { 61 0x01, // DC_PRED: 0000 0001 62 0x03, // V_PRED: 0000 0011 63 0x05, // H_PRED: 0000 0101 64 0x01, // D45_PRED: 0000 0001 65 0x01, // D135_PRED: 0000 0001 66 0x01, // D113_PRED: 0000 0001 67 0x09, // D157_PRED: 0000 1001 68 0x01, // D203_PRED: 0000 0001 69 0x01, // D67_PRED: 0000 0001 70 0x01, // SMOOTH_PRED: 0000 0001 71 0x01, // SMOOTH_V_PRED: 0000 0001 72 0x01, // SMOOTH_H_PRED: 0000 0001 73 0x11 // PAETH_PRED: 0001 0001 74 }; 75 76 // The bitmask corresponds to the chroma intra modes as defined in enums.h 77 // UV_PREDICTION_MODE enumeration type. Setting a bit to 0 in the mask means to 78 // disable the evaluation of corresponding chroma intra mode. The table 79 // av1_derived_chroma_intra_mode_used_flag is used when speed feature 80 // prune_chroma_modes_using_luma_winner is enabled. The evaluated chroma 81 // intra modes are union of the following: 82 // 1) UV_DC_PRED 83 // 2) UV_SMOOTH_PRED 84 // 3) UV_CFL_PRED 85 // 4) mode that corresponds to luma intra mode winner (Eg : UV_V_PRED if luma 86 // intra mode winner is V_PRED). 87 static const uint16_t av1_derived_chroma_intra_mode_used_flag[INTRA_MODES] = { 88 0x2201, // DC_PRED: 0010 0010 0000 0001 89 0x2203, // V_PRED: 0010 0010 0000 0011 90 0x2205, // H_PRED: 0010 0010 0000 0101 91 0x2209, // D45_PRED: 0010 0010 0000 1001 92 0x2211, // D135_PRED: 0010 0010 0001 0001 93 0x2221, // D113_PRED: 0010 0010 0010 0001 94 0x2241, // D157_PRED: 0010 0010 0100 0001 95 0x2281, // D203_PRED: 0010 0010 1000 0001 96 0x2301, // D67_PRED: 0010 0011 0000 0001 97 0x2201, // SMOOTH_PRED: 0010 0010 0000 0001 98 0x2601, // SMOOTH_V_PRED: 0010 0110 0000 0001 99 0x2a01, // SMOOTH_H_PRED: 0010 1010 0000 0001 100 0x3201 // PAETH_PRED: 0011 0010 0000 0001 101 }; 102 103 DECLARE_ALIGNED(16, static const uint8_t, all_zeros[MAX_SB_SIZE]) = { 0 }; 104 DECLARE_ALIGNED(16, static const uint16_t, 105 highbd_all_zeros[MAX_SB_SIZE]) = { 0 }; 106 107 int av1_calc_normalized_variance(aom_variance_fn_t vf, const uint8_t *const buf, 108 const int stride, const int is_hbd) { 109 unsigned int sse; 110 111 if (is_hbd) 112 return vf(buf, stride, CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse); 113 else 114 return vf(buf, stride, all_zeros, 0, &sse); 115 } 116 117 // Computes average of log(1 + variance) across 4x4 sub-blocks for source and 118 // reconstructed blocks. 119 static void compute_avg_log_variance(const AV1_COMP *const cpi, MACROBLOCK *x, 120 const BLOCK_SIZE bs, 121 double *avg_log_src_variance, 122 double *avg_log_recon_variance) { 123 const MACROBLOCKD *const xd = &x->e_mbd; 124 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size; 125 const int mi_row_in_sb = x->e_mbd.mi_row & (mi_size_high[sb_size] - 1); 126 const int mi_col_in_sb = x->e_mbd.mi_col & (mi_size_wide[sb_size] - 1); 127 const int right_overflow = 128 (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0; 129 const int bottom_overflow = 130 (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0; 131 const int bw = (MI_SIZE * mi_size_wide[bs] - right_overflow); 132 const int bh = (MI_SIZE * mi_size_high[bs] - bottom_overflow); 133 const int is_hbd = is_cur_buf_hbd(xd); 134 135 aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf; 136 for (int i = 0; i < bh; i += MI_SIZE) { 137 const int r = mi_row_in_sb + (i >> MI_SIZE_LOG2); 138 for (int j = 0; j < bw; j += MI_SIZE) { 139 const int c = mi_col_in_sb + (j >> MI_SIZE_LOG2); 140 const int mi_offset = r * mi_size_wide[sb_size] + c; 141 Block4x4VarInfo *block_4x4_var_info = 142 &x->src_var_info_of_4x4_sub_blocks[mi_offset]; 143 int src_var = block_4x4_var_info->var; 144 double log_src_var = block_4x4_var_info->log_var; 145 // Compute average of log(1 + variance) for the source block from 4x4 146 // sub-block variance values. Calculate and store 4x4 sub-block variance 147 // and log(1 + variance), if the values present in 148 // src_var_of_4x4_sub_blocks are invalid. Reuse the same if it is readily 149 // available with valid values. 150 if (src_var < 0) { 151 src_var = av1_calc_normalized_variance( 152 vf, x->plane[0].src.buf + i * x->plane[0].src.stride + j, 153 x->plane[0].src.stride, is_hbd); 154 block_4x4_var_info->var = src_var; 155 log_src_var = log1p(src_var / 16.0); 156 block_4x4_var_info->log_var = log_src_var; 157 } else { 158 // When source variance is already calculated and available for 159 // retrieval, check if log(1 + variance) is also available. If it is 160 // available, then retrieve from buffer. Else, calculate the same and 161 // store to the buffer. 162 if (log_src_var < 0) { 163 log_src_var = log1p(src_var / 16.0); 164 block_4x4_var_info->log_var = log_src_var; 165 } 166 } 167 *avg_log_src_variance += log_src_var; 168 169 const int recon_var = av1_calc_normalized_variance( 170 vf, xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j, 171 xd->plane[0].dst.stride, is_hbd); 172 *avg_log_recon_variance += log1p(recon_var / 16.0); 173 } 174 } 175 176 const int blocks = (bw * bh) / 16; 177 *avg_log_src_variance /= (double)blocks; 178 *avg_log_recon_variance /= (double)blocks; 179 } 180 181 // Returns a factor to be applied to the RD value based on how well the 182 // reconstructed block variance matches the source variance. 183 static double intra_rd_variance_factor(const AV1_COMP *cpi, MACROBLOCK *x, 184 BLOCK_SIZE bs) { 185 double threshold = INTRA_RD_VAR_THRESH(cpi->oxcf.speed); 186 // For non-positive threshold values, the comparison of source and 187 // reconstructed variances with threshold evaluates to false 188 // (src_var < threshold/rec_var < threshold) as these metrics are greater than 189 // than 0. Hence further calculations are skipped. 190 if (threshold <= 0) return 1.0; 191 192 double variance_rd_factor = 1.0; 193 double avg_log_src_variance = 0.0; 194 double avg_log_recon_variance = 0.0; 195 double var_diff = 0.0; 196 197 compute_avg_log_variance(cpi, x, bs, &avg_log_src_variance, 198 &avg_log_recon_variance); 199 200 // Dont allow 0 to prevent / 0 below. 201 avg_log_src_variance += 0.000001; 202 avg_log_recon_variance += 0.000001; 203 204 if (avg_log_src_variance >= avg_log_recon_variance) { 205 var_diff = (avg_log_src_variance - avg_log_recon_variance); 206 if ((var_diff > 0.5) && (avg_log_recon_variance < threshold)) { 207 variance_rd_factor = 1.0 + ((var_diff * 2) / avg_log_src_variance); 208 } 209 } else { 210 var_diff = (avg_log_recon_variance - avg_log_src_variance); 211 if ((var_diff > 0.5) && (avg_log_src_variance < threshold)) { 212 variance_rd_factor = 1.0 + (var_diff / (2 * avg_log_src_variance)); 213 } 214 } 215 216 // Limit adjustment; 217 variance_rd_factor = AOMMIN(3.0, variance_rd_factor); 218 219 return variance_rd_factor; 220 } 221 /*!\endcond */ 222 223 /*!\brief Search for the best filter_intra mode when coding intra frame. 224 * 225 * \ingroup intra_mode_search 226 * \callergraph 227 * This function loops through all filter_intra modes to find the best one. 228 * 229 * \return Returns 1 if a new filter_intra mode is selected; 0 otherwise. 230 */ 231 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, 232 int *rate, int *rate_tokenonly, 233 int64_t *distortion, uint8_t *skippable, 234 BLOCK_SIZE bsize, int mode_cost, 235 PREDICTION_MODE best_mode_so_far, 236 int64_t *best_rd, int64_t *best_model_rd, 237 PICK_MODE_CONTEXT *ctx) { 238 // Skip the evaluation of filter intra modes. 239 if (cpi->sf.intra_sf.prune_filter_intra_level == 2) return 0; 240 241 MACROBLOCKD *const xd = &x->e_mbd; 242 MB_MODE_INFO *mbmi = xd->mi[0]; 243 int filter_intra_selected_flag = 0; 244 FILTER_INTRA_MODE mode; 245 TX_SIZE best_tx_size = TX_8X8; 246 FILTER_INTRA_MODE_INFO filter_intra_mode_info; 247 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 248 av1_zero(filter_intra_mode_info); 249 mbmi->filter_intra_mode_info.use_filter_intra = 1; 250 mbmi->mode = DC_PRED; 251 mbmi->palette_mode_info.palette_size[0] = 0; 252 253 // Skip the evaluation of filter-intra if cached MB_MODE_INFO does not have 254 // filter-intra as winner. 255 if (x->use_mb_mode_cache && 256 !x->mb_mode_cache->filter_intra_mode_info.use_filter_intra) 257 return 0; 258 259 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) { 260 int64_t this_rd; 261 RD_STATS tokenonly_rd_stats; 262 mbmi->filter_intra_mode_info.filter_intra_mode = mode; 263 264 if ((cpi->sf.intra_sf.prune_filter_intra_level == 1) && 265 !(av1_derived_filter_intra_mode_used_flag[best_mode_so_far] & 266 (1 << mode))) 267 continue; 268 269 // Skip the evaluation of modes that do not match with the winner mode in 270 // x->mb_mode_cache. 271 if (x->use_mb_mode_cache && 272 mode != x->mb_mode_cache->filter_intra_mode_info.filter_intra_mode) 273 continue; 274 275 if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) { 276 continue; 277 } 278 av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize, 279 *best_rd); 280 if (tokenonly_rd_stats.rate == INT_MAX) continue; 281 const int this_rate = 282 tokenonly_rd_stats.rate + 283 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0); 284 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 285 286 // Visual quality adjustment based on recon vs source variance. 287 if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) { 288 this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize)); 289 } 290 291 // Collect mode stats for multiwinner mode processing 292 const int txfm_search_done = 1; 293 store_winner_mode_stats( 294 &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd, 295 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done); 296 if (this_rd < *best_rd) { 297 *best_rd = this_rd; 298 best_tx_size = mbmi->tx_size; 299 filter_intra_mode_info = mbmi->filter_intra_mode_info; 300 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 301 memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip, 302 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk); 303 *rate = this_rate; 304 *rate_tokenonly = tokenonly_rd_stats.rate; 305 *distortion = tokenonly_rd_stats.dist; 306 *skippable = tokenonly_rd_stats.skip_txfm; 307 filter_intra_selected_flag = 1; 308 } 309 } 310 311 if (filter_intra_selected_flag) { 312 mbmi->mode = DC_PRED; 313 mbmi->tx_size = best_tx_size; 314 mbmi->filter_intra_mode_info = filter_intra_mode_info; 315 av1_copy_array(ctx->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); 316 return 1; 317 } else { 318 return 0; 319 } 320 } 321 322 void av1_count_colors(const uint8_t *src, int stride, int rows, int cols, 323 int *val_count, int *num_colors) { 324 const int max_pix_val = 1 << 8; 325 memset(val_count, 0, max_pix_val * sizeof(val_count[0])); 326 for (int r = 0; r < rows; ++r) { 327 for (int c = 0; c < cols; ++c) { 328 const int this_val = src[r * stride + c]; 329 assert(this_val < max_pix_val); 330 ++val_count[this_val]; 331 } 332 } 333 int n = 0; 334 for (int i = 0; i < max_pix_val; ++i) { 335 if (val_count[i]) ++n; 336 } 337 *num_colors = n; 338 } 339 340 void av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, 341 int cols, int bit_depth, int *val_count, 342 int *bin_val_count, int *num_color_bins, 343 int *num_colors) { 344 assert(bit_depth <= 12); 345 const int max_bin_val = 1 << 8; 346 const int max_pix_val = 1 << bit_depth; 347 const uint16_t *src = CONVERT_TO_SHORTPTR(src8); 348 memset(bin_val_count, 0, max_bin_val * sizeof(val_count[0])); 349 if (val_count != NULL) 350 memset(val_count, 0, max_pix_val * sizeof(val_count[0])); 351 for (int r = 0; r < rows; ++r) { 352 for (int c = 0; c < cols; ++c) { 353 /* 354 * Down-convert the pixels to 8-bit domain before counting. 355 * This provides consistency of behavior for palette search 356 * between lbd and hbd encodes. This down-converted pixels 357 * are only used for calculating the threshold (n). 358 */ 359 const int this_val = ((src[r * stride + c]) >> (bit_depth - 8)); 360 assert(this_val < max_bin_val); 361 if (this_val >= max_bin_val) continue; 362 ++bin_val_count[this_val]; 363 if (val_count != NULL) ++val_count[(src[r * stride + c])]; 364 } 365 } 366 int n = 0; 367 // Count the colors based on 8-bit domain used to gate the palette path 368 for (int i = 0; i < max_bin_val; ++i) { 369 if (bin_val_count[i]) ++n; 370 } 371 *num_color_bins = n; 372 373 // Count the actual hbd colors used to create top_colors 374 n = 0; 375 if (val_count != NULL) { 376 for (int i = 0; i < max_pix_val; ++i) { 377 if (val_count[i]) ++n; 378 } 379 *num_colors = n; 380 } 381 } 382 383 bool av1_count_colors_with_threshold(const uint8_t *src, int stride, int rows, 384 int cols, int num_colors_threshold, 385 int *num_colors) { 386 bool has_color[1 << 8] = { false }; 387 *num_colors = 0; 388 389 for (int r = 0; r < rows; ++r) { 390 for (int c = 0; c < cols; ++c) { 391 const int this_val = src[r * stride + c]; 392 if (!has_color[this_val]) { 393 has_color[this_val] = true; 394 (*num_colors)++; 395 if (*num_colors > num_colors_threshold) { 396 // We're over the threshold, so we can exit early 397 return false; 398 } 399 } 400 } 401 } 402 return true; 403 } 404 405 void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi, 406 int reorder_delta_angle_eval) { 407 if (mode_idx < INTRA_MODE_END) { 408 mbmi->mode = intra_rd_search_mode_order[mode_idx]; 409 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 410 } else { 411 mbmi->mode = (mode_idx - INTRA_MODE_END) / (MAX_ANGLE_DELTA * 2) + V_PRED; 412 int delta_angle_eval_idx = 413 (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2); 414 if (reorder_delta_angle_eval) { 415 mbmi->angle_delta[PLANE_TYPE_Y] = 416 luma_delta_angles_order[delta_angle_eval_idx]; 417 } else { 418 mbmi->angle_delta[PLANE_TYPE_Y] = 419 (delta_angle_eval_idx < 3 ? (delta_angle_eval_idx - 3) 420 : (delta_angle_eval_idx - 2)); 421 } 422 } 423 } 424 425 static inline int get_model_rd_index_for_pruning( 426 const MACROBLOCK *const x, 427 const INTRA_MODE_SPEED_FEATURES *const intra_sf) { 428 const int top_intra_model_count_allowed = 429 intra_sf->top_intra_model_count_allowed; 430 if (!intra_sf->adapt_top_model_rd_count_using_neighbors) 431 return top_intra_model_count_allowed - 1; 432 433 const MACROBLOCKD *const xd = &x->e_mbd; 434 const PREDICTION_MODE mode = xd->mi[0]->mode; 435 int model_rd_index_for_pruning = top_intra_model_count_allowed - 1; 436 int is_left_mode_neq_cur_mode = 0, is_above_mode_neq_cur_mode = 0; 437 if (xd->left_available) 438 is_left_mode_neq_cur_mode = xd->left_mbmi->mode != mode; 439 if (xd->up_available) 440 is_above_mode_neq_cur_mode = xd->above_mbmi->mode != mode; 441 // The pruning of luma intra modes is made more aggressive at lower quantizers 442 // and vice versa. The value for model_rd_index_for_pruning is derived as 443 // follows. 444 // qidx 0 to 127: Reduce the index of a candidate used for comparison only if 445 // the current mode does not match either of the available neighboring modes. 446 // qidx 128 to 255: Reduce the index of a candidate used for comparison only 447 // if the current mode does not match both the available neighboring modes. 448 if (x->qindex <= 127) { 449 if (is_left_mode_neq_cur_mode || is_above_mode_neq_cur_mode) 450 model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0); 451 } else { 452 if (is_left_mode_neq_cur_mode && is_above_mode_neq_cur_mode) 453 model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0); 454 } 455 return model_rd_index_for_pruning; 456 } 457 458 /*! \brief prune luma intra mode based on the model rd. 459 * \param[in] this_model_rd model rd for current mode. 460 * \param[in] best_model_rd Best model RD seen for this block so 461 * far. 462 * \param[in] top_intra_model_rd Top intra model RD seen for this 463 * block so far. 464 * \param[in] max_model_cnt_allowed The maximum number of top intra 465 * model RD allowed. 466 * \param[in] model_rd_index_for_pruning Index of the candidate used for 467 * pruning based on model rd. 468 */ 469 static int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd, 470 int64_t top_intra_model_rd[], 471 int max_model_cnt_allowed, 472 int model_rd_index_for_pruning) { 473 const double thresh_best = 1.50; 474 const double thresh_top = 1.00; 475 for (int i = 0; i < max_model_cnt_allowed; i++) { 476 if (this_model_rd < top_intra_model_rd[i]) { 477 for (int j = max_model_cnt_allowed - 1; j > i; j--) { 478 top_intra_model_rd[j] = top_intra_model_rd[j - 1]; 479 } 480 top_intra_model_rd[i] = this_model_rd; 481 break; 482 } 483 } 484 if (top_intra_model_rd[model_rd_index_for_pruning] != INT64_MAX && 485 this_model_rd > 486 thresh_top * top_intra_model_rd[model_rd_index_for_pruning]) 487 return 1; 488 489 if (this_model_rd != INT64_MAX && 490 this_model_rd > thresh_best * (*best_model_rd)) 491 return 1; 492 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; 493 return 0; 494 } 495 496 // Run RD calculation with given chroma intra prediction angle., and return 497 // the RD cost. Update the best mode info. if the RD cost is the best so far. 498 static int64_t pick_intra_angle_routine_sbuv( 499 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 500 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats, 501 int *best_angle_delta, int64_t *best_rd) { 502 MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; 503 assert(!is_inter_block(mbmi)); 504 int this_rate; 505 int64_t this_rd; 506 RD_STATS tokenonly_rd_stats; 507 508 if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in)) 509 return INT64_MAX; 510 this_rate = tokenonly_rd_stats.rate + 511 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead); 512 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 513 if (this_rd < *best_rd) { 514 *best_rd = this_rd; 515 *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; 516 *rate = this_rate; 517 rd_stats->rate = tokenonly_rd_stats.rate; 518 rd_stats->dist = tokenonly_rd_stats.dist; 519 rd_stats->skip_txfm = tokenonly_rd_stats.skip_txfm; 520 } 521 return this_rd; 522 } 523 524 /*!\brief Search for the best angle delta for chroma prediction 525 * 526 * \ingroup intra_mode_search 527 * \callergraph 528 * Given a chroma directional intra prediction mode, this function will try to 529 * estimate the best delta_angle. 530 * 531 * \returns Return if there is a new mode with smaller rdcost than best_rd. 532 */ 533 static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, 534 BLOCK_SIZE bsize, int rate_overhead, 535 int64_t best_rd, int *rate, 536 RD_STATS *rd_stats) { 537 MACROBLOCKD *const xd = &x->e_mbd; 538 MB_MODE_INFO *mbmi = xd->mi[0]; 539 assert(!is_inter_block(mbmi)); 540 int i, angle_delta, best_angle_delta = 0; 541 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)]; 542 543 rd_stats->rate = INT_MAX; 544 rd_stats->skip_txfm = 0; 545 rd_stats->dist = INT64_MAX; 546 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX; 547 548 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { 549 for (i = 0; i < 2; ++i) { 550 best_rd_in = (best_rd == INT64_MAX) 551 ? INT64_MAX 552 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5))); 553 mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; 554 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, 555 best_rd_in, rate, rd_stats, 556 &best_angle_delta, &best_rd); 557 rd_cost[2 * angle_delta + i] = this_rd; 558 if (angle_delta == 0) { 559 if (this_rd == INT64_MAX) return 0; 560 rd_cost[1] = this_rd; 561 break; 562 } 563 } 564 } 565 566 assert(best_rd != INT64_MAX); 567 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { 568 int64_t rd_thresh; 569 for (i = 0; i < 2; ++i) { 570 int skip_search = 0; 571 rd_thresh = best_rd + (best_rd >> 5); 572 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh && 573 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh) 574 skip_search = 1; 575 if (!skip_search) { 576 mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; 577 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd, 578 rate, rd_stats, &best_angle_delta, 579 &best_rd); 580 } 581 } 582 } 583 584 mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta; 585 return rd_stats->rate != INT_MAX; 586 } 587 588 #define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \ 589 (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1) 590 591 static void cfl_idx_to_sign_and_alpha(int cfl_idx, CFL_SIGN_TYPE *cfl_sign, 592 int *cfl_alpha) { 593 int cfl_linear_idx = cfl_idx - CFL_INDEX_ZERO; 594 if (cfl_linear_idx == 0) { 595 *cfl_sign = CFL_SIGN_ZERO; 596 *cfl_alpha = 0; 597 } else { 598 *cfl_sign = cfl_linear_idx > 0 ? CFL_SIGN_POS : CFL_SIGN_NEG; 599 *cfl_alpha = abs(cfl_linear_idx) - 1; 600 } 601 } 602 603 static int64_t cfl_compute_rd(const AV1_COMP *const cpi, MACROBLOCK *x, 604 int plane, TX_SIZE tx_size, 605 BLOCK_SIZE plane_bsize, int cfl_idx, 606 int fast_mode, RD_STATS *rd_stats) { 607 assert(IMPLIES(fast_mode, rd_stats == NULL)); 608 const AV1_COMMON *const cm = &cpi->common; 609 MACROBLOCKD *const xd = &x->e_mbd; 610 MB_MODE_INFO *const mbmi = xd->mi[0]; 611 int cfl_plane = get_cfl_pred_type(plane); 612 CFL_SIGN_TYPE cfl_sign; 613 int cfl_alpha; 614 cfl_idx_to_sign_and_alpha(cfl_idx, &cfl_sign, &cfl_alpha); 615 // We conly build CFL for a given plane, the other plane's sign is dummy 616 int dummy_sign = CFL_SIGN_NEG; 617 const int8_t orig_cfl_alpha_signs = mbmi->cfl_alpha_signs; 618 const uint8_t orig_cfl_alpha_idx = mbmi->cfl_alpha_idx; 619 mbmi->cfl_alpha_signs = 620 PLANE_SIGN_TO_JOINT_SIGN(cfl_plane, cfl_sign, dummy_sign); 621 mbmi->cfl_alpha_idx = (cfl_alpha << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha; 622 int64_t cfl_cost; 623 if (fast_mode) { 624 cfl_cost = 625 intra_model_rd(cm, x, plane, plane_bsize, tx_size, /*use_hadamard=*/0); 626 } else { 627 av1_init_rd_stats(rd_stats); 628 av1_txfm_rd_in_plane(x, cpi, rd_stats, INT64_MAX, 0, plane, plane_bsize, 629 tx_size, FTXS_NONE, 0); 630 av1_rd_cost_update(x->rdmult, rd_stats); 631 cfl_cost = rd_stats->rdcost; 632 } 633 mbmi->cfl_alpha_signs = orig_cfl_alpha_signs; 634 mbmi->cfl_alpha_idx = orig_cfl_alpha_idx; 635 return cfl_cost; 636 } 637 638 static const int cfl_dir_ls[2] = { 1, -1 }; 639 640 // If cfl_search_range is CFL_MAGS_SIZE, return zero. Otherwise return the index 641 // of the best alpha found using intra_model_rd(). 642 static int cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x, 643 int plane, TX_SIZE tx_size, 644 int cfl_search_range) { 645 assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE); 646 647 if (cfl_search_range == CFL_MAGS_SIZE) return CFL_INDEX_ZERO; 648 649 const MACROBLOCKD *const xd = &x->e_mbd; 650 const MB_MODE_INFO *const mbmi = xd->mi[0]; 651 assert(mbmi->uv_mode == UV_CFL_PRED); 652 const MACROBLOCKD_PLANE *pd = &xd->plane[plane]; 653 const BLOCK_SIZE plane_bsize = 654 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y); 655 656 int est_best_cfl_idx = CFL_INDEX_ZERO; 657 int fast_mode = 1; 658 int start_cfl_idx = CFL_INDEX_ZERO; 659 int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, 660 start_cfl_idx, fast_mode, NULL); 661 for (int si = 0; si < 2; ++si) { 662 const int dir = cfl_dir_ls[si]; 663 for (int i = 1; i < CFL_MAGS_SIZE; ++i) { 664 int cfl_idx = start_cfl_idx + dir * i; 665 if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break; 666 int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, 667 cfl_idx, fast_mode, NULL); 668 if (cfl_cost < best_cfl_cost) { 669 best_cfl_cost = cfl_cost; 670 est_best_cfl_idx = cfl_idx; 671 } else { 672 break; 673 } 674 } 675 } 676 return est_best_cfl_idx; 677 } 678 679 static inline void set_invalid_cfl_parameters(uint8_t *best_cfl_alpha_idx, 680 int8_t *best_cfl_alpha_signs) { 681 *best_cfl_alpha_idx = 0; 682 *best_cfl_alpha_signs = 0; 683 } 684 685 static void cfl_pick_plane_rd(const AV1_COMP *const cpi, MACROBLOCK *x, 686 int plane, TX_SIZE tx_size, int cfl_search_range, 687 RD_STATS cfl_rd_arr[CFL_MAGS_SIZE], 688 int est_best_cfl_idx) { 689 assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE); 690 const MACROBLOCKD *const xd = &x->e_mbd; 691 const MB_MODE_INFO *const mbmi = xd->mi[0]; 692 assert(mbmi->uv_mode == UV_CFL_PRED); 693 const MACROBLOCKD_PLANE *pd = &xd->plane[plane]; 694 const BLOCK_SIZE plane_bsize = 695 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y); 696 697 for (int cfl_idx = 0; cfl_idx < CFL_MAGS_SIZE; ++cfl_idx) { 698 av1_invalid_rd_stats(&cfl_rd_arr[cfl_idx]); 699 } 700 701 int fast_mode = 0; 702 int start_cfl_idx = est_best_cfl_idx; 703 cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, start_cfl_idx, fast_mode, 704 &cfl_rd_arr[start_cfl_idx]); 705 706 if (cfl_search_range == 1) return; 707 708 for (int si = 0; si < 2; ++si) { 709 const int dir = cfl_dir_ls[si]; 710 for (int i = 1; i < cfl_search_range; ++i) { 711 int cfl_idx = start_cfl_idx + dir * i; 712 if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break; 713 cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, cfl_idx, fast_mode, 714 &cfl_rd_arr[cfl_idx]); 715 } 716 } 717 } 718 719 /*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component 720 * 721 * \ingroup intra_mode_search 722 * \callergraph 723 * 724 * This function will use DCT_DCT followed by computing SATD (sum of absolute 725 * transformed differences) to estimate the RD score and find the best possible 726 * CFL parameter. 727 * 728 * Then the function will apply a full RD search near the best possible CFL 729 * parameter to find the best actual CFL parameter. 730 * 731 * Side effect: 732 * We use ths buffers in x->plane[] and xd->plane[] as throw-away buffers for RD 733 * search. 734 * 735 * \param[in] x Encoder prediction block structure. 736 * \param[in] cpi Top-level encoder instance structure. 737 * \param[in] tx_size Transform size. 738 * \param[in] ref_best_rd Reference best RD. 739 * \param[in] cfl_search_range The search range of full RD search near the 740 * estimated best CFL parameter. 741 * 742 * \param[out] best_rd_stats RD stats of the best CFL parameter 743 * \param[out] best_cfl_alpha_idx Best CFL alpha index 744 * \param[out] best_cfl_alpha_signs Best CFL joint signs 745 * 746 */ 747 static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi, 748 TX_SIZE tx_size, int64_t ref_best_rd, 749 int cfl_search_range, RD_STATS *best_rd_stats, 750 uint8_t *best_cfl_alpha_idx, 751 int8_t *best_cfl_alpha_signs) { 752 assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE); 753 const ModeCosts *mode_costs = &x->mode_costs; 754 RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE]; 755 RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE]; 756 MACROBLOCKD *const xd = &x->e_mbd; 757 int est_best_cfl_idx_u, est_best_cfl_idx_v; 758 759 av1_invalid_rd_stats(best_rd_stats); 760 761 // As the dc pred data is same for different values of alpha, enable the 762 // caching of dc pred data. Call clear_cfl_dc_pred_cache_flags() before 763 // returning to avoid the unintentional usage of cached dc pred data. 764 xd->cfl.use_dc_pred_cache = true; 765 // Evaluate alpha parameter of each chroma plane. 766 est_best_cfl_idx_u = 767 cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range); 768 est_best_cfl_idx_v = 769 cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range); 770 771 if (cfl_search_range == 1) { 772 // For cfl_search_range=1, further refinement of alpha is not enabled. Hence 773 // CfL index=0 for both the chroma planes implies invalid CfL mode. 774 if (est_best_cfl_idx_u == CFL_INDEX_ZERO && 775 est_best_cfl_idx_v == CFL_INDEX_ZERO) { 776 set_invalid_cfl_parameters(best_cfl_alpha_idx, best_cfl_alpha_signs); 777 clear_cfl_dc_pred_cache_flags(&xd->cfl); 778 return 0; 779 } 780 781 int cfl_alpha_u, cfl_alpha_v; 782 CFL_SIGN_TYPE cfl_sign_u, cfl_sign_v; 783 const MB_MODE_INFO *mbmi = xd->mi[0]; 784 cfl_idx_to_sign_and_alpha(est_best_cfl_idx_u, &cfl_sign_u, &cfl_alpha_u); 785 cfl_idx_to_sign_and_alpha(est_best_cfl_idx_v, &cfl_sign_v, &cfl_alpha_v); 786 const int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1; 787 // Compute alpha and mode signaling rate. 788 const int rate_overhead = 789 mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u] + 790 mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v] + 791 mode_costs 792 ->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_CFL_PRED]; 793 // Skip the CfL mode evaluation if the RD cost derived using the rate needed 794 // to signal the CfL mode and alpha parameter exceeds the ref_best_rd. 795 if (RDCOST(x->rdmult, rate_overhead, 0) > ref_best_rd) { 796 set_invalid_cfl_parameters(best_cfl_alpha_idx, best_cfl_alpha_signs); 797 clear_cfl_dc_pred_cache_flags(&xd->cfl); 798 return 0; 799 } 800 } 801 802 // Compute the rd cost of each chroma plane using the alpha parameters which 803 // were already evaluated. 804 cfl_pick_plane_rd(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u, 805 est_best_cfl_idx_u); 806 cfl_pick_plane_rd(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v, 807 est_best_cfl_idx_v); 808 809 clear_cfl_dc_pred_cache_flags(&xd->cfl); 810 811 for (int ui = 0; ui < CFL_MAGS_SIZE; ++ui) { 812 if (cfl_rd_arr_u[ui].rate == INT_MAX) continue; 813 int cfl_alpha_u; 814 CFL_SIGN_TYPE cfl_sign_u; 815 cfl_idx_to_sign_and_alpha(ui, &cfl_sign_u, &cfl_alpha_u); 816 for (int vi = 0; vi < CFL_MAGS_SIZE; ++vi) { 817 if (cfl_rd_arr_v[vi].rate == INT_MAX) continue; 818 int cfl_alpha_v; 819 CFL_SIGN_TYPE cfl_sign_v; 820 cfl_idx_to_sign_and_alpha(vi, &cfl_sign_v, &cfl_alpha_v); 821 // cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO is not a 822 // valid parameter for CFL 823 if (cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO) continue; 824 int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1; 825 RD_STATS rd_stats = cfl_rd_arr_u[ui]; 826 av1_merge_rd_stats(&rd_stats, &cfl_rd_arr_v[vi]); 827 if (rd_stats.rate != INT_MAX) { 828 rd_stats.rate += 829 mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u]; 830 rd_stats.rate += 831 mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v]; 832 } 833 av1_rd_cost_update(x->rdmult, &rd_stats); 834 if (rd_stats.rdcost < best_rd_stats->rdcost) { 835 *best_rd_stats = rd_stats; 836 *best_cfl_alpha_idx = 837 (cfl_alpha_u << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha_v; 838 *best_cfl_alpha_signs = joint_sign; 839 } 840 } 841 } 842 if (best_rd_stats->rdcost >= ref_best_rd) { 843 av1_invalid_rd_stats(best_rd_stats); 844 // Set invalid CFL parameters here since the rdcost is not better than 845 // ref_best_rd. 846 set_invalid_cfl_parameters(best_cfl_alpha_idx, best_cfl_alpha_signs); 847 return 0; 848 } 849 return 1; 850 } 851 852 static bool should_prune_chroma_smooth_pred_based_on_source_variance( 853 const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bsize) { 854 if (!cpi->sf.intra_sf.prune_smooth_intra_mode_for_chroma) return false; 855 856 // If the source variance of both chroma planes is less than 20 (empirically 857 // derived), prune UV_SMOOTH_PRED. 858 for (int i = AOM_PLANE_U; i < av1_num_planes(&cpi->common); i++) { 859 const unsigned int variance = av1_get_perpixel_variance_facade( 860 cpi, &x->e_mbd, &x->plane[i].src, bsize, i); 861 if (variance >= 20) return false; 862 } 863 return true; 864 } 865 866 int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, 867 int *rate, int *rate_tokenonly, 868 int64_t *distortion, uint8_t *skippable, 869 BLOCK_SIZE bsize, TX_SIZE max_tx_size) { 870 const AV1_COMMON *const cm = &cpi->common; 871 MACROBLOCKD *xd = &x->e_mbd; 872 MB_MODE_INFO *mbmi = xd->mi[0]; 873 assert(!is_inter_block(mbmi)); 874 MB_MODE_INFO best_mbmi = *mbmi; 875 int64_t best_rd = INT64_MAX, this_rd; 876 const ModeCosts *mode_costs = &x->mode_costs; 877 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg; 878 879 init_sbuv_mode(mbmi); 880 881 // Return if the current block does not correspond to a chroma block. 882 if (!xd->is_chroma_ref) { 883 *rate = 0; 884 *rate_tokenonly = 0; 885 *distortion = 0; 886 *skippable = 1; 887 return INT64_MAX; 888 } 889 890 // Only store reconstructed luma when there's chroma RDO. When there's no 891 // chroma RDO, the reconstructed luma will be stored in encode_superblock(). 892 xd->cfl.store_y = store_cfl_required_rdo(cm, x); 893 if (xd->cfl.store_y) { 894 // Restore reconstructed luma values. 895 // TODO(chiyotsai@google.com): right now we are re-computing the txfm in 896 // this function everytime we search through uv modes. There is some 897 // potential speed up here if we cache the result to avoid redundant 898 // computation. 899 av1_encode_intra_block_plane(cpi, x, mbmi->bsize, AOM_PLANE_Y, 900 DRY_RUN_NORMAL, 901 cpi->optimize_seg_arr[mbmi->segment_id]); 902 xd->cfl.store_y = 0; 903 } 904 IntraModeSearchState intra_search_state; 905 init_intra_mode_search_state(&intra_search_state); 906 const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd); 907 908 // Search through all non-palette modes. 909 for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) { 910 int this_rate; 911 RD_STATS tokenonly_rd_stats; 912 UV_PREDICTION_MODE uv_mode = uv_rd_search_mode_order[mode_idx]; 913 914 // Skip the current mode evaluation if the RD cost derived using the mode 915 // signaling rate exceeds the best_rd so far. 916 const int mode_rate = 917 mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][uv_mode]; 918 if (RDCOST(x->rdmult, mode_rate, 0) > best_rd) continue; 919 920 PREDICTION_MODE intra_mode = get_uv_mode(uv_mode); 921 const int is_diagonal_mode = av1_is_diagonal_mode(intra_mode); 922 const int is_directional_mode = av1_is_directional_mode(intra_mode); 923 924 if (is_diagonal_mode && !cpi->oxcf.intra_mode_cfg.enable_diagonal_intra) 925 continue; 926 if (is_directional_mode && 927 !cpi->oxcf.intra_mode_cfg.enable_directional_intra) 928 continue; 929 930 if (!(cpi->sf.intra_sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] & 931 (1 << uv_mode))) 932 continue; 933 if (!intra_mode_cfg->enable_smooth_intra && uv_mode >= UV_SMOOTH_PRED && 934 uv_mode <= UV_SMOOTH_H_PRED) 935 continue; 936 937 if (!intra_mode_cfg->enable_paeth_intra && uv_mode == UV_PAETH_PRED) 938 continue; 939 940 assert(mbmi->mode < INTRA_MODES); 941 if (cpi->sf.intra_sf.prune_chroma_modes_using_luma_winner && 942 !(av1_derived_chroma_intra_mode_used_flag[mbmi->mode] & (1 << uv_mode))) 943 continue; 944 945 mbmi->uv_mode = uv_mode; 946 947 // Init variables for cfl and angle delta 948 const SPEED_FEATURES *sf = &cpi->sf; 949 mbmi->angle_delta[PLANE_TYPE_UV] = 0; 950 if (uv_mode == UV_CFL_PRED) { 951 if (!cfl_allowed || !intra_mode_cfg->enable_cfl_intra) continue; 952 assert(!is_directional_mode); 953 const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); 954 if (!cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd, 955 sf->intra_sf.cfl_search_range, &tokenonly_rd_stats, 956 &mbmi->cfl_alpha_idx, &mbmi->cfl_alpha_signs)) { 957 continue; 958 } 959 } else if (is_directional_mode && av1_use_angle_delta(mbmi->bsize) && 960 intra_mode_cfg->enable_angle_delta) { 961 if (sf->intra_sf.chroma_intra_pruning_with_hog && 962 !intra_search_state.dir_mode_skip_mask_ready) { 963 static const float thresh[2][4] = { 964 { -1.2f, 0.0f, 0.0f, 1.2f }, // Interframe 965 { -1.2f, -1.2f, -0.6f, 0.4f }, // Intraframe 966 }; 967 const int is_chroma = 1; 968 const int is_intra_frame = frame_is_intra_only(cm); 969 prune_intra_mode_with_hog( 970 x, bsize, cm->seq_params->sb_size, 971 thresh[is_intra_frame] 972 [sf->intra_sf.chroma_intra_pruning_with_hog - 1], 973 intra_search_state.directional_mode_skip_mask, is_chroma); 974 intra_search_state.dir_mode_skip_mask_ready = 1; 975 } 976 if (intra_search_state.directional_mode_skip_mask[uv_mode]) { 977 continue; 978 } 979 980 // Search through angle delta 981 const int rate_overhead = 982 mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][uv_mode]; 983 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd, 984 &this_rate, &tokenonly_rd_stats)) 985 continue; 986 } else { 987 if (uv_mode == UV_SMOOTH_PRED && 988 should_prune_chroma_smooth_pred_based_on_source_variance(cpi, x, 989 bsize)) 990 continue; 991 992 // Predict directly if we don't need to search for angle delta. 993 if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) { 994 continue; 995 } 996 } 997 const int mode_cost = 998 mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][uv_mode]; 999 this_rate = tokenonly_rd_stats.rate + 1000 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost); 1001 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); 1002 1003 if (this_rd < best_rd) { 1004 best_mbmi = *mbmi; 1005 best_rd = this_rd; 1006 *rate = this_rate; 1007 *rate_tokenonly = tokenonly_rd_stats.rate; 1008 *distortion = tokenonly_rd_stats.dist; 1009 *skippable = tokenonly_rd_stats.skip_txfm; 1010 } 1011 } 1012 1013 // Search palette mode 1014 const int try_palette = 1015 cpi->oxcf.tool_cfg.enable_palette && 1016 av1_allow_palette(cpi->common.features.allow_screen_content_tools, 1017 mbmi->bsize); 1018 if (try_palette) { 1019 uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map; 1020 av1_rd_pick_palette_intra_sbuv( 1021 cpi, x, 1022 mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][UV_DC_PRED], 1023 best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly, 1024 distortion, skippable); 1025 } 1026 1027 *mbmi = best_mbmi; 1028 // Make sure we actually chose a mode 1029 assert(best_rd < INT64_MAX); 1030 return best_rd; 1031 } 1032 1033 // Searches palette mode for luma channel in inter frame. 1034 int av1_search_palette_mode(IntraModeSearchState *intra_search_state, 1035 const AV1_COMP *cpi, MACROBLOCK *x, 1036 BLOCK_SIZE bsize, unsigned int ref_frame_cost, 1037 PICK_MODE_CONTEXT *ctx, RD_STATS *this_rd_cost, 1038 int64_t best_rd) { 1039 const AV1_COMMON *const cm = &cpi->common; 1040 MB_MODE_INFO *const mbmi = x->e_mbd.mi[0]; 1041 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 1042 const int num_planes = av1_num_planes(cm); 1043 MACROBLOCKD *const xd = &x->e_mbd; 1044 int rate2 = 0; 1045 int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd; 1046 int skippable = 0; 1047 uint8_t *const best_palette_color_map = 1048 x->palette_buffer->best_palette_color_map; 1049 uint8_t *const color_map = xd->plane[0].color_index_map; 1050 MB_MODE_INFO best_mbmi_palette = *mbmi; 1051 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1052 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1053 const ModeCosts *mode_costs = &x->mode_costs; 1054 const int *const intra_mode_cost = 1055 mode_costs->mbmode_cost[size_group_lookup[bsize]]; 1056 const int rows = block_size_high[bsize]; 1057 const int cols = block_size_wide[bsize]; 1058 1059 mbmi->mode = DC_PRED; 1060 mbmi->uv_mode = UV_DC_PRED; 1061 mbmi->ref_frame[0] = INTRA_FRAME; 1062 mbmi->ref_frame[1] = NONE_FRAME; 1063 av1_zero(pmi->palette_size); 1064 1065 RD_STATS rd_stats_y; 1066 av1_invalid_rd_stats(&rd_stats_y); 1067 av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED], 1068 &best_mbmi_palette, best_palette_color_map, 1069 &best_rd_palette, &rd_stats_y.rate, NULL, 1070 &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL, 1071 ctx, best_blk_skip, best_tx_type_map); 1072 if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) { 1073 this_rd_cost->rdcost = INT64_MAX; 1074 return skippable; 1075 } 1076 1077 memcpy(x->txfm_search_info.blk_skip, best_blk_skip, 1078 sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize)); 1079 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); 1080 memcpy(color_map, best_palette_color_map, 1081 rows * cols * sizeof(best_palette_color_map[0])); 1082 1083 skippable = rd_stats_y.skip_txfm; 1084 distortion2 = rd_stats_y.dist; 1085 rate2 = rd_stats_y.rate + ref_frame_cost; 1086 if (num_planes > 1) { 1087 if (intra_search_state->rate_uv_intra == INT_MAX) { 1088 // We have not found any good uv mode yet, so we need to search for it. 1089 TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); 1090 av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra, 1091 &intra_search_state->rate_uv_tokenonly, 1092 &intra_search_state->dist_uvs, 1093 &intra_search_state->skip_uvs, bsize, uv_tx); 1094 intra_search_state->mode_uv = mbmi->uv_mode; 1095 intra_search_state->pmi_uv = *pmi; 1096 intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; 1097 } 1098 1099 // We have found at least one good uv mode before, so copy and paste it 1100 // over. 1101 mbmi->uv_mode = intra_search_state->mode_uv; 1102 pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1]; 1103 if (pmi->palette_size[1] > 0) { 1104 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, 1105 intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE, 1106 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); 1107 } 1108 mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta; 1109 skippable = skippable && intra_search_state->skip_uvs; 1110 distortion2 += intra_search_state->dist_uvs; 1111 rate2 += intra_search_state->rate_uv_intra; 1112 } 1113 1114 if (skippable) { 1115 rate2 -= rd_stats_y.rate; 1116 if (num_planes > 1) rate2 -= intra_search_state->rate_uv_tokenonly; 1117 rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1]; 1118 } else { 1119 rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0]; 1120 } 1121 this_rd = RDCOST(x->rdmult, rate2, distortion2); 1122 this_rd_cost->rate = rate2; 1123 this_rd_cost->dist = distortion2; 1124 this_rd_cost->rdcost = this_rd; 1125 return skippable; 1126 } 1127 1128 void av1_search_palette_mode_luma(const AV1_COMP *cpi, MACROBLOCK *x, 1129 BLOCK_SIZE bsize, unsigned int ref_frame_cost, 1130 PICK_MODE_CONTEXT *ctx, 1131 RD_STATS *this_rd_cost, int64_t best_rd) { 1132 MB_MODE_INFO *const mbmi = x->e_mbd.mi[0]; 1133 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 1134 MACROBLOCKD *const xd = &x->e_mbd; 1135 int64_t best_rd_palette = best_rd, this_rd; 1136 uint8_t *const best_palette_color_map = 1137 x->palette_buffer->best_palette_color_map; 1138 uint8_t *const color_map = xd->plane[0].color_index_map; 1139 MB_MODE_INFO best_mbmi_palette = *mbmi; 1140 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1141 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1142 const ModeCosts *mode_costs = &x->mode_costs; 1143 const int *const intra_mode_cost = 1144 mode_costs->mbmode_cost[size_group_lookup[bsize]]; 1145 const int rows = block_size_high[bsize]; 1146 const int cols = block_size_wide[bsize]; 1147 1148 mbmi->mode = DC_PRED; 1149 mbmi->uv_mode = UV_DC_PRED; 1150 mbmi->ref_frame[0] = INTRA_FRAME; 1151 mbmi->ref_frame[1] = NONE_FRAME; 1152 av1_zero(pmi->palette_size); 1153 1154 RD_STATS rd_stats_y; 1155 av1_invalid_rd_stats(&rd_stats_y); 1156 av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED], 1157 &best_mbmi_palette, best_palette_color_map, 1158 &best_rd_palette, &rd_stats_y.rate, NULL, 1159 &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL, 1160 ctx, best_blk_skip, best_tx_type_map); 1161 if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) { 1162 this_rd_cost->rdcost = INT64_MAX; 1163 return; 1164 } 1165 1166 memcpy(x->txfm_search_info.blk_skip, best_blk_skip, 1167 sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize)); 1168 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); 1169 memcpy(color_map, best_palette_color_map, 1170 rows * cols * sizeof(best_palette_color_map[0])); 1171 1172 rd_stats_y.rate += ref_frame_cost; 1173 1174 if (rd_stats_y.skip_txfm) { 1175 rd_stats_y.rate = 1176 ref_frame_cost + 1177 mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1]; 1178 } else { 1179 rd_stats_y.rate += 1180 mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0]; 1181 } 1182 this_rd = RDCOST(x->rdmult, rd_stats_y.rate, rd_stats_y.dist); 1183 this_rd_cost->rate = rd_stats_y.rate; 1184 this_rd_cost->dist = rd_stats_y.dist; 1185 this_rd_cost->rdcost = this_rd; 1186 this_rd_cost->skip_txfm = rd_stats_y.skip_txfm; 1187 } 1188 1189 /*!\brief Get the intra prediction by searching through tx_type and tx_size. 1190 * 1191 * \ingroup intra_mode_search 1192 * \callergraph 1193 * Currently this function is only used in the intra frame code path for 1194 * winner-mode processing. 1195 * 1196 * \return Returns whether the current mode is an improvement over best_rd. 1197 */ 1198 static inline int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, 1199 BLOCK_SIZE bsize, const int *bmode_costs, 1200 int64_t *best_rd, int *rate, 1201 int *rate_tokenonly, int64_t *distortion, 1202 uint8_t *skippable, MB_MODE_INFO *best_mbmi, 1203 PICK_MODE_CONTEXT *ctx) { 1204 MACROBLOCKD *const xd = &x->e_mbd; 1205 MB_MODE_INFO *const mbmi = xd->mi[0]; 1206 RD_STATS rd_stats; 1207 // In order to improve txfm search, avoid rd based breakouts during winner 1208 // mode evaluation. Hence passing ref_best_rd as INT64_MAX by default when the 1209 // speed feature use_rd_based_breakout_for_intra_tx_search is disabled. 1210 int64_t ref_best_rd = cpi->sf.tx_sf.use_rd_based_breakout_for_intra_tx_search 1211 ? *best_rd 1212 : INT64_MAX; 1213 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats, bsize, ref_best_rd); 1214 if (rd_stats.rate == INT_MAX) return 0; 1215 int this_rate_tokenonly = rd_stats.rate; 1216 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) { 1217 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size 1218 // in the tokenonly rate, but for intra blocks, tx_size is always coded 1219 // (prediction granularity), so we account for it in the full rate, 1220 // not the tokenonly rate. 1221 this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size); 1222 } 1223 const int this_rate = 1224 rd_stats.rate + 1225 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode], 0); 1226 const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist); 1227 if (this_rd < *best_rd) { 1228 *best_mbmi = *mbmi; 1229 *best_rd = this_rd; 1230 *rate = this_rate; 1231 *rate_tokenonly = this_rate_tokenonly; 1232 *distortion = rd_stats.dist; 1233 *skippable = rd_stats.skip_txfm; 1234 av1_copy_array(ctx->blk_skip, x->txfm_search_info.blk_skip, 1235 ctx->num_4x4_blk); 1236 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 1237 return 1; 1238 } 1239 return 0; 1240 } 1241 1242 /*!\brief Search for the best filter_intra mode when coding inter frame. 1243 * 1244 * \ingroup intra_mode_search 1245 * \callergraph 1246 * This function loops through all filter_intra modes to find the best one. 1247 * 1248 * \remark Returns nothing, but updates the mbmi and rd_stats. 1249 */ 1250 static inline void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x, 1251 BLOCK_SIZE bsize, 1252 const PICK_MODE_CONTEXT *ctx, 1253 RD_STATS *rd_stats_y, int mode_cost, 1254 int64_t best_rd, 1255 int64_t best_rd_so_far) { 1256 MACROBLOCKD *const xd = &x->e_mbd; 1257 MB_MODE_INFO *const mbmi = xd->mi[0]; 1258 assert(mbmi->mode == DC_PRED && 1259 av1_filter_intra_allowed_bsize(&cpi->common, bsize)); 1260 1261 RD_STATS rd_stats_y_fi; 1262 int filter_intra_selected_flag = 0; 1263 TX_SIZE best_tx_size = mbmi->tx_size; 1264 FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED; 1265 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1266 memcpy(best_blk_skip, x->txfm_search_info.blk_skip, 1267 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); 1268 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; 1269 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 1270 mbmi->filter_intra_mode_info.use_filter_intra = 1; 1271 for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; fi_mode < FILTER_INTRA_MODES; 1272 ++fi_mode) { 1273 mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode; 1274 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd); 1275 if (rd_stats_y_fi.rate == INT_MAX) continue; 1276 const int this_rate_tmp = 1277 rd_stats_y_fi.rate + 1278 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0); 1279 const int64_t this_rd_tmp = 1280 RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist); 1281 1282 if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > best_rd) { 1283 break; 1284 } 1285 if (this_rd_tmp < best_rd_so_far) { 1286 best_tx_size = mbmi->tx_size; 1287 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 1288 memcpy(best_blk_skip, x->txfm_search_info.blk_skip, 1289 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); 1290 best_fi_mode = fi_mode; 1291 *rd_stats_y = rd_stats_y_fi; 1292 filter_intra_selected_flag = 1; 1293 best_rd_so_far = this_rd_tmp; 1294 } 1295 } 1296 1297 mbmi->tx_size = best_tx_size; 1298 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); 1299 memcpy(x->txfm_search_info.blk_skip, best_blk_skip, 1300 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk); 1301 1302 if (filter_intra_selected_flag) { 1303 mbmi->filter_intra_mode_info.use_filter_intra = 1; 1304 mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode; 1305 } else { 1306 mbmi->filter_intra_mode_info.use_filter_intra = 0; 1307 } 1308 } 1309 1310 // Evaluate a given luma intra-mode in inter frames. 1311 int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state, 1312 const AV1_COMP *cpi, MACROBLOCK *x, 1313 BLOCK_SIZE bsize, unsigned int ref_frame_cost, 1314 const PICK_MODE_CONTEXT *ctx, RD_STATS *rd_stats_y, 1315 int64_t best_rd, int *mode_cost_y, int64_t *rd_y, 1316 int64_t *best_model_rd, 1317 int64_t top_intra_model_rd[]) { 1318 const AV1_COMMON *cm = &cpi->common; 1319 const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf; 1320 MACROBLOCKD *const xd = &x->e_mbd; 1321 MB_MODE_INFO *const mbmi = xd->mi[0]; 1322 assert(mbmi->ref_frame[0] == INTRA_FRAME); 1323 const PREDICTION_MODE mode = mbmi->mode; 1324 const ModeCosts *mode_costs = &x->mode_costs; 1325 const int mode_cost = 1326 mode_costs->mbmode_cost[size_group_lookup[bsize]][mode] + ref_frame_cost; 1327 const int skip_ctx = av1_get_skip_txfm_context(xd); 1328 1329 int known_rate = mode_cost; 1330 const int intra_cost_penalty = av1_get_intra_cost_penalty( 1331 cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q, 1332 cm->seq_params->bit_depth); 1333 1334 if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty; 1335 known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0], 1336 mode_costs->skip_txfm_cost[skip_ctx][1]); 1337 const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0); 1338 if (known_rd > best_rd) { 1339 intra_search_state->skip_intra_modes = 1; 1340 return 0; 1341 } 1342 1343 const int is_directional_mode = av1_is_directional_mode(mode); 1344 if (is_directional_mode && av1_use_angle_delta(bsize) && 1345 cpi->oxcf.intra_mode_cfg.enable_angle_delta) { 1346 if (intra_sf->intra_pruning_with_hog && 1347 !intra_search_state->dir_mode_skip_mask_ready) { 1348 const float thresh[4] = { -1.2f, 0.0f, 0.0f, 1.2f }; 1349 const int is_chroma = 0; 1350 prune_intra_mode_with_hog(x, bsize, cm->seq_params->sb_size, 1351 thresh[intra_sf->intra_pruning_with_hog - 1], 1352 intra_search_state->directional_mode_skip_mask, 1353 is_chroma); 1354 intra_search_state->dir_mode_skip_mask_ready = 1; 1355 } 1356 if (intra_search_state->directional_mode_skip_mask[mode]) return 0; 1357 } 1358 const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]); 1359 const int64_t this_model_rd = 1360 intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1); 1361 1362 const int model_rd_index_for_pruning = 1363 get_model_rd_index_for_pruning(x, intra_sf); 1364 1365 if (prune_intra_y_mode(this_model_rd, best_model_rd, top_intra_model_rd, 1366 intra_sf->top_intra_model_count_allowed, 1367 model_rd_index_for_pruning)) 1368 return 0; 1369 av1_init_rd_stats(rd_stats_y); 1370 av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd); 1371 1372 // Pick filter intra modes. 1373 if (mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) { 1374 int try_filter_intra = 1; 1375 int64_t best_rd_so_far = INT64_MAX; 1376 if (rd_stats_y->rate != INT_MAX) { 1377 // best_rd_so_far is the rdcost of DC_PRED without using filter_intra. 1378 // Later, in filter intra search, best_rd_so_far is used for comparison. 1379 mbmi->filter_intra_mode_info.use_filter_intra = 0; 1380 const int tmp_rate = 1381 rd_stats_y->rate + 1382 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0); 1383 best_rd_so_far = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist); 1384 try_filter_intra = (best_rd_so_far / 2) <= best_rd; 1385 } else if (intra_sf->skip_filter_intra_in_inter_frames >= 1) { 1386 // As rd cost of luma intra dc mode is more than best_rd (i.e., 1387 // rd_stats_y->rate = INT_MAX), skip the evaluation of filter intra modes. 1388 try_filter_intra = 0; 1389 } 1390 1391 if (try_filter_intra) { 1392 handle_filter_intra_mode(cpi, x, bsize, ctx, rd_stats_y, mode_cost, 1393 best_rd, best_rd_so_far); 1394 } 1395 } 1396 1397 if (rd_stats_y->rate == INT_MAX) return 0; 1398 1399 *mode_cost_y = intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0); 1400 const int rate_y = rd_stats_y->skip_txfm 1401 ? mode_costs->skip_txfm_cost[skip_ctx][1] 1402 : rd_stats_y->rate; 1403 *rd_y = RDCOST(x->rdmult, rate_y + *mode_cost_y, rd_stats_y->dist); 1404 if (best_rd < (INT64_MAX / 2) && *rd_y > (best_rd + (best_rd >> 2))) { 1405 intra_search_state->skip_intra_modes = 1; 1406 return 0; 1407 } 1408 1409 return 1; 1410 } 1411 1412 int av1_search_intra_uv_modes_in_interframe( 1413 IntraModeSearchState *intra_search_state, const AV1_COMP *cpi, 1414 MACROBLOCK *x, BLOCK_SIZE bsize, RD_STATS *rd_stats, 1415 const RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, int64_t best_rd) { 1416 const AV1_COMMON *cm = &cpi->common; 1417 MACROBLOCKD *const xd = &x->e_mbd; 1418 MB_MODE_INFO *const mbmi = xd->mi[0]; 1419 assert(mbmi->ref_frame[0] == INTRA_FRAME); 1420 1421 // TODO(chiyotsai@google.com): Consolidate the chroma search code here with 1422 // the one in av1_search_palette_mode. 1423 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 1424 const int try_palette = 1425 cpi->oxcf.tool_cfg.enable_palette && 1426 av1_allow_palette(cm->features.allow_screen_content_tools, mbmi->bsize); 1427 1428 assert(intra_search_state->rate_uv_intra == INT_MAX); 1429 if (intra_search_state->rate_uv_intra == INT_MAX) { 1430 // If no good uv-predictor had been found, search for it. 1431 const TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); 1432 av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra, 1433 &intra_search_state->rate_uv_tokenonly, 1434 &intra_search_state->dist_uvs, 1435 &intra_search_state->skip_uvs, bsize, uv_tx); 1436 intra_search_state->mode_uv = mbmi->uv_mode; 1437 if (try_palette) intra_search_state->pmi_uv = *pmi; 1438 intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; 1439 1440 const int uv_rate = intra_search_state->rate_uv_tokenonly; 1441 const int64_t uv_dist = intra_search_state->dist_uvs; 1442 const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist); 1443 if (uv_rd > best_rd) { 1444 // If there is no good intra uv-mode available, we can skip all intra 1445 // modes. 1446 intra_search_state->skip_intra_modes = 1; 1447 return 0; 1448 } 1449 } 1450 1451 // If we are here, then the encoder has found at least one good intra uv 1452 // predictor, so we can directly copy its statistics over. 1453 // TODO(any): the stats here is not right if the best uv mode is CFL but the 1454 // best y mode is palette. 1455 rd_stats_uv->rate = intra_search_state->rate_uv_tokenonly; 1456 rd_stats_uv->dist = intra_search_state->dist_uvs; 1457 rd_stats_uv->skip_txfm = intra_search_state->skip_uvs; 1458 rd_stats->skip_txfm = rd_stats_y->skip_txfm && rd_stats_uv->skip_txfm; 1459 mbmi->uv_mode = intra_search_state->mode_uv; 1460 if (try_palette) { 1461 pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1]; 1462 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, 1463 intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE, 1464 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); 1465 } 1466 mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta; 1467 1468 return 1; 1469 } 1470 1471 // Checks if odd delta angles can be pruned based on rdcosts of even delta 1472 // angles of the corresponding directional mode. 1473 static inline int prune_luma_odd_delta_angles_using_rd_cost( 1474 const MB_MODE_INFO *const mbmi, const int64_t *const intra_modes_rd_cost, 1475 int64_t best_rd, int prune_luma_odd_delta_angles_in_intra) { 1476 const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y]; 1477 if (!prune_luma_odd_delta_angles_in_intra || 1478 !av1_is_directional_mode(mbmi->mode) || !(abs(luma_delta_angle) & 1) || 1479 best_rd == INT64_MAX) 1480 return 0; 1481 1482 const int64_t rd_thresh = best_rd + (best_rd >> 3); 1483 1484 // Neighbour rdcosts are considered for pruning of odd delta angles as 1485 // mentioned below: 1486 // Delta angle Delta angle rdcost 1487 // to be pruned to be considered 1488 // -3 -2 1489 // -1 -2, 0 1490 // 1 0, 2 1491 // 3 2 1492 return intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA] > rd_thresh && 1493 intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA + 2] > 1494 rd_thresh; 1495 } 1496 1497 // Finds the best non-intrabc mode on an intra frame. 1498 int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, 1499 int *rate, int *rate_tokenonly, 1500 int64_t *distortion, uint8_t *skippable, 1501 BLOCK_SIZE bsize, int64_t best_rd, 1502 PICK_MODE_CONTEXT *ctx) { 1503 MACROBLOCKD *const xd = &x->e_mbd; 1504 MB_MODE_INFO *const mbmi = xd->mi[0]; 1505 assert(!is_inter_block(mbmi)); 1506 int64_t best_model_rd = INT64_MAX; 1507 int is_directional_mode; 1508 uint8_t directional_mode_skip_mask[INTRA_MODES] = { 0 }; 1509 // Flag to check rd of any intra mode is better than best_rd passed to this 1510 // function 1511 int beat_best_rd = 0; 1512 const int *bmode_costs; 1513 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg; 1514 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; 1515 const int try_palette = 1516 cpi->oxcf.tool_cfg.enable_palette && 1517 av1_allow_palette(cpi->common.features.allow_screen_content_tools, 1518 mbmi->bsize); 1519 uint8_t *best_palette_color_map = 1520 try_palette ? x->palette_buffer->best_palette_color_map : NULL; 1521 const MB_MODE_INFO *above_mi = xd->above_mbmi; 1522 const MB_MODE_INFO *left_mi = xd->left_mbmi; 1523 const PREDICTION_MODE A = av1_above_block_mode(above_mi); 1524 const PREDICTION_MODE L = av1_left_block_mode(left_mi); 1525 const int above_ctx = intra_mode_context[A]; 1526 const int left_ctx = intra_mode_context[L]; 1527 bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx]; 1528 1529 mbmi->angle_delta[PLANE_TYPE_Y] = 0; 1530 const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf; 1531 if (intra_sf->intra_pruning_with_hog) { 1532 // Less aggressive thresholds are used here than those used in inter frame 1533 // encoding in av1_handle_intra_y_mode() because we want key frames/intra 1534 // frames to have higher quality. 1535 const float thresh[4] = { -1.2f, -1.2f, -0.6f, 0.4f }; 1536 const int is_chroma = 0; 1537 prune_intra_mode_with_hog(x, bsize, cpi->common.seq_params->sb_size, 1538 thresh[intra_sf->intra_pruning_with_hog - 1], 1539 directional_mode_skip_mask, is_chroma); 1540 } 1541 mbmi->filter_intra_mode_info.use_filter_intra = 0; 1542 pmi->palette_size[0] = 0; 1543 1544 // Set params for mode evaluation 1545 set_mode_eval_params(cpi, x, MODE_EVAL); 1546 1547 MB_MODE_INFO best_mbmi = *mbmi; 1548 const int max_winner_mode_count = 1549 winner_mode_count_allowed[cpi->sf.winner_mode_sf.multi_winner_mode_type]; 1550 zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats); 1551 x->winner_mode_count = 0; 1552 1553 // Searches the intra-modes except for intrabc, palette, and filter_intra. 1554 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT]; 1555 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) { 1556 top_intra_model_rd[i] = INT64_MAX; 1557 } 1558 1559 // Initialize the rdcost corresponding to all the directional and 1560 // non-directional intra modes. 1561 // 1. For directional modes, it stores the rdcost values for delta angles -4, 1562 // -3, ..., 3, 4. 1563 // 2. The rdcost value for luma_delta_angle is stored at index 1564 // luma_delta_angle + MAX_ANGLE_DELTA + 1. 1565 // 3. The rdcost values for fictitious/nonexistent luma_delta_angle -4 and 4 1566 // (array indices 0 and 8) are always set to INT64_MAX (the initial value). 1567 int64_t intra_modes_rd_cost[INTRA_MODE_END] 1568 [SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY]; 1569 for (int i = 0; i < INTRA_MODE_END; i++) { 1570 for (int j = 0; j < SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY; j++) { 1571 intra_modes_rd_cost[i][j] = INT64_MAX; 1572 } 1573 } 1574 1575 for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT; 1576 ++mode_idx) { 1577 set_y_mode_and_delta_angle(mode_idx, mbmi, 1578 intra_sf->prune_luma_odd_delta_angles_in_intra); 1579 RD_STATS this_rd_stats; 1580 int this_rate, this_rate_tokenonly, s; 1581 int is_diagonal_mode; 1582 int64_t this_distortion, this_rd; 1583 const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y]; 1584 1585 is_diagonal_mode = av1_is_diagonal_mode(mbmi->mode); 1586 if (is_diagonal_mode && !intra_mode_cfg->enable_diagonal_intra) continue; 1587 if (av1_is_directional_mode(mbmi->mode) && 1588 !intra_mode_cfg->enable_directional_intra) 1589 continue; 1590 1591 // The smooth prediction mode appears to be more frequently picked 1592 // than horizontal / vertical smooth prediction modes. Hence treat 1593 // them differently in speed features. 1594 if ((!intra_mode_cfg->enable_smooth_intra || 1595 intra_sf->disable_smooth_intra) && 1596 (mbmi->mode == SMOOTH_H_PRED || mbmi->mode == SMOOTH_V_PRED)) 1597 continue; 1598 if (!intra_mode_cfg->enable_smooth_intra && mbmi->mode == SMOOTH_PRED) 1599 continue; 1600 1601 // The functionality of filter intra modes and smooth prediction 1602 // overlap. Hence smooth prediction is pruned only if all the 1603 // filter intra modes are enabled. 1604 if (intra_sf->disable_smooth_intra && 1605 intra_sf->prune_filter_intra_level == 0 && mbmi->mode == SMOOTH_PRED) 1606 continue; 1607 if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED) 1608 continue; 1609 1610 // Skip the evaluation of modes that do not match with the winner mode in 1611 // x->mb_mode_cache. 1612 if (x->use_mb_mode_cache && mbmi->mode != x->mb_mode_cache->mode) continue; 1613 1614 is_directional_mode = av1_is_directional_mode(mbmi->mode); 1615 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue; 1616 if (is_directional_mode && 1617 !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) && 1618 luma_delta_angle != 0) 1619 continue; 1620 1621 // Use intra_y_mode_mask speed feature to skip intra mode evaluation. 1622 if (!(intra_sf->intra_y_mode_mask[max_txsize_lookup[bsize]] & 1623 (1 << mbmi->mode))) 1624 continue; 1625 1626 if (prune_luma_odd_delta_angles_using_rd_cost( 1627 mbmi, intra_modes_rd_cost[mbmi->mode], best_rd, 1628 intra_sf->prune_luma_odd_delta_angles_in_intra)) 1629 continue; 1630 1631 const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]); 1632 const int64_t this_model_rd = 1633 intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1); 1634 1635 const int model_rd_index_for_pruning = 1636 get_model_rd_index_for_pruning(x, intra_sf); 1637 1638 if (prune_intra_y_mode(this_model_rd, &best_model_rd, top_intra_model_rd, 1639 intra_sf->top_intra_model_count_allowed, 1640 model_rd_index_for_pruning)) 1641 continue; 1642 1643 // Builds the actual prediction. The prediction from 1644 // model_intra_yrd_and_prune was just an estimation that did not take into 1645 // account the effect of txfm pipeline, so we need to redo it for real 1646 // here. 1647 av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd); 1648 this_rate_tokenonly = this_rd_stats.rate; 1649 this_distortion = this_rd_stats.dist; 1650 s = this_rd_stats.skip_txfm; 1651 1652 if (this_rate_tokenonly == INT_MAX) continue; 1653 1654 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) { 1655 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the 1656 // tx_size in the tokenonly rate, but for intra blocks, tx_size is always 1657 // coded (prediction granularity), so we account for it in the full rate, 1658 // not the tokenonly rate. 1659 this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size); 1660 } 1661 this_rate = 1662 this_rd_stats.rate + 1663 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode], 0); 1664 this_rd = RDCOST(x->rdmult, this_rate, this_distortion); 1665 1666 // Visual quality adjustment based on recon vs source variance. 1667 if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) { 1668 this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize)); 1669 } 1670 1671 intra_modes_rd_cost[mbmi->mode][luma_delta_angle + MAX_ANGLE_DELTA + 1] = 1672 this_rd; 1673 1674 // Collect mode stats for multiwinner mode processing 1675 const int txfm_search_done = 1; 1676 store_winner_mode_stats( 1677 &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd, 1678 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done); 1679 if (this_rd < best_rd) { 1680 best_mbmi = *mbmi; 1681 best_rd = this_rd; 1682 // Setting beat_best_rd flag because current mode rd is better than 1683 // best_rd passed to this function 1684 beat_best_rd = 1; 1685 *rate = this_rate; 1686 *rate_tokenonly = this_rate_tokenonly; 1687 *distortion = this_distortion; 1688 *skippable = s; 1689 memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip, 1690 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk); 1691 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); 1692 } 1693 } 1694 1695 // Searches palette 1696 if (try_palette) { 1697 av1_rd_pick_palette_intra_sby( 1698 cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map, 1699 &best_rd, rate, rate_tokenonly, distortion, skippable, &beat_best_rd, 1700 ctx, ctx->blk_skip, ctx->tx_type_map); 1701 } 1702 1703 // Searches filter_intra 1704 if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) { 1705 if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion, 1706 skippable, bsize, bmode_costs[DC_PRED], 1707 best_mbmi.mode, &best_rd, &best_model_rd, 1708 ctx)) { 1709 best_mbmi = *mbmi; 1710 } 1711 } 1712 1713 // No mode is identified with less rd value than best_rd passed to this 1714 // function. In such cases winner mode processing is not necessary and return 1715 // best_rd as INT64_MAX to indicate best mode is not identified 1716 if (!beat_best_rd) return INT64_MAX; 1717 1718 // In multi-winner mode processing, perform tx search for few best modes 1719 // identified during mode evaluation. Winner mode processing uses best tx 1720 // configuration for tx search. 1721 if (cpi->sf.winner_mode_sf.multi_winner_mode_type) { 1722 int best_mode_idx = 0; 1723 int block_width, block_height; 1724 uint8_t *color_map_dst = xd->plane[PLANE_TYPE_Y].color_index_map; 1725 av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width, 1726 &block_height, NULL, NULL); 1727 1728 for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) { 1729 *mbmi = x->winner_mode_stats[mode_idx].mbmi; 1730 if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) { 1731 // Restore color_map of palette mode before winner mode processing 1732 if (mbmi->palette_mode_info.palette_size[0] > 0) { 1733 uint8_t *color_map_src = 1734 x->winner_mode_stats[mode_idx].color_index_map; 1735 memcpy(color_map_dst, color_map_src, 1736 block_width * block_height * sizeof(*color_map_src)); 1737 } 1738 // Set params for winner mode evaluation 1739 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL); 1740 1741 // Winner mode processing 1742 // If previous searches use only the default tx type/no R-D optimization 1743 // of quantized coeffs, do an extra search for the best tx type/better 1744 // R-D optimization of quantized coeffs 1745 if (intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, 1746 rate_tokenonly, distortion, skippable, &best_mbmi, 1747 ctx)) 1748 best_mode_idx = mode_idx; 1749 } 1750 } 1751 // Copy color_map of palette mode for final winner mode 1752 if (best_mbmi.palette_mode_info.palette_size[0] > 0) { 1753 uint8_t *color_map_src = 1754 x->winner_mode_stats[best_mode_idx].color_index_map; 1755 memcpy(color_map_dst, color_map_src, 1756 block_width * block_height * sizeof(*color_map_src)); 1757 } 1758 } else { 1759 // If previous searches use only the default tx type/no R-D optimization of 1760 // quantized coeffs, do an extra search for the best tx type/better R-D 1761 // optimization of quantized coeffs 1762 if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) { 1763 // Set params for winner mode evaluation 1764 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL); 1765 *mbmi = best_mbmi; 1766 intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, 1767 rate_tokenonly, distortion, skippable, &best_mbmi, ctx); 1768 } 1769 } 1770 *mbmi = best_mbmi; 1771 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk); 1772 return best_rd; 1773 }