intra_mode_search_utils.h (28672B)
1 /* 2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 /*!\file 13 * \brief Defines utility functions used in intra mode search. 14 * 15 * This includes rdcost estimations, histogram based pruning, etc. 16 */ 17 #ifndef AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_ 18 #define AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_ 19 20 #include "av1/common/enums.h" 21 #include "av1/common/pred_common.h" 22 #include "av1/common/reconintra.h" 23 24 #include "av1/encoder/encoder.h" 25 #include "av1/encoder/encodeframe.h" 26 #include "av1/encoder/model_rd.h" 27 #include "av1/encoder/palette.h" 28 #include "av1/encoder/hybrid_fwd_txfm.h" 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 /*!\cond */ 35 // Macro for computing the speed-preset dependent threshold which is used for 36 // deciding whether to enable/disable variance calculations in 37 // intra_rd_variance_factor(). 38 #define INTRA_RD_VAR_THRESH(X) (1.0 - (0.25 * (X))) 39 40 #define BINS 32 41 static const float av1_intra_hog_model_bias[DIRECTIONAL_MODES] = { 42 0.450578f, 0.695518f, -0.717944f, -0.639894f, 43 -0.602019f, -0.453454f, 0.055857f, -0.465480f, 44 }; 45 46 static const float av1_intra_hog_model_weights[BINS * DIRECTIONAL_MODES] = { 47 -3.076402f, -3.757063f, -3.275266f, -3.180665f, -3.452105f, -3.216593f, 48 -2.871212f, -3.134296f, -1.822324f, -2.401411f, -1.541016f, -1.195322f, 49 -0.434156f, 0.322868f, 2.260546f, 3.368715f, 3.989290f, 3.308487f, 50 2.277893f, 0.923793f, 0.026412f, -0.385174f, -0.718622f, -1.408867f, 51 -1.050558f, -2.323941f, -2.225827f, -2.585453f, -3.054283f, -2.875087f, 52 -2.985709f, -3.447155f, 3.758139f, 3.204353f, 2.170998f, 0.826587f, 53 -0.269665f, -0.702068f, -1.085776f, -2.175249f, -1.623180f, -2.975142f, 54 -2.779629f, -3.190799f, -3.521900f, -3.375480f, -3.319355f, -3.897389f, 55 -3.172334f, -3.594528f, -2.879132f, -2.547777f, -2.921023f, -2.281844f, 56 -1.818988f, -2.041771f, -0.618268f, -1.396458f, -0.567153f, -0.285868f, 57 -0.088058f, 0.753494f, 2.092413f, 3.215266f, -3.300277f, -2.748658f, 58 -2.315784f, -2.423671f, -2.257283f, -2.269583f, -2.196660f, -2.301076f, 59 -2.646516f, -2.271319f, -2.254366f, -2.300102f, -2.217960f, -2.473300f, 60 -2.116866f, -2.528246f, -3.314712f, -1.701010f, -0.589040f, -0.088077f, 61 0.813112f, 1.702213f, 2.653045f, 3.351749f, 3.243554f, 3.199409f, 62 2.437856f, 1.468854f, 0.533039f, -0.099065f, -0.622643f, -2.200732f, 63 -4.228861f, -2.875263f, -1.273956f, -0.433280f, 0.803771f, 1.975043f, 64 3.179528f, 3.939064f, 3.454379f, 3.689386f, 3.116411f, 1.970991f, 65 0.798406f, -0.628514f, -1.252546f, -2.825176f, -4.090178f, -3.777448f, 66 -3.227314f, -3.479403f, -3.320569f, -3.159372f, -2.729202f, -2.722341f, 67 -3.054913f, -2.742923f, -2.612703f, -2.662632f, -2.907314f, -3.117794f, 68 -3.102660f, -3.970972f, -4.891357f, -3.935582f, -3.347758f, -2.721924f, 69 -2.219011f, -1.702391f, -0.866529f, -0.153743f, 0.107733f, 1.416882f, 70 2.572884f, 3.607755f, 3.974820f, 3.997783f, 2.970459f, 0.791687f, 71 -1.478921f, -1.228154f, -1.216955f, -1.765932f, -1.951003f, -1.985301f, 72 -1.975881f, -1.985593f, -2.422371f, -2.419978f, -2.531288f, -2.951853f, 73 -3.071380f, -3.277027f, -3.373539f, -4.462010f, -0.967888f, 0.805524f, 74 2.794130f, 3.685984f, 3.745195f, 3.252444f, 2.316108f, 1.399146f, 75 -0.136519f, -0.162811f, -1.004357f, -1.667911f, -1.964662f, -2.937579f, 76 -3.019533f, -3.942766f, -5.102767f, -3.882073f, -3.532027f, -3.451956f, 77 -2.944015f, -2.643064f, -2.529872f, -2.077290f, -2.809965f, -1.803734f, 78 -1.783593f, -1.662585f, -1.415484f, -1.392673f, -0.788794f, -1.204819f, 79 -1.998864f, -1.182102f, -0.892110f, -1.317415f, -1.359112f, -1.522867f, 80 -1.468552f, -1.779072f, -2.332959f, -2.160346f, -2.329387f, -2.631259f, 81 -2.744936f, -3.052494f, -2.787363f, -3.442548f, -4.245075f, -3.032172f, 82 -2.061609f, -1.768116f, -1.286072f, -0.706587f, -0.192413f, 0.386938f, 83 0.716997f, 1.481393f, 2.216702f, 2.737986f, 3.109809f, 3.226084f, 84 2.490098f, -0.095827f, -3.864816f, -3.507248f, -3.128925f, -2.908251f, 85 -2.883836f, -2.881411f, -2.524377f, -2.624478f, -2.399573f, -2.367718f, 86 -1.918255f, -1.926277f, -1.694584f, -1.723790f, -0.966491f, -1.183115f, 87 -1.430687f, 0.872896f, 2.766550f, 3.610080f, 3.578041f, 3.334928f, 88 2.586680f, 1.895721f, 1.122195f, 0.488519f, -0.140689f, -0.799076f, 89 -1.222860f, -1.502437f, -1.900969f, -3.206816f, 90 }; 91 92 static const NN_CONFIG av1_intra_hog_model_nnconfig = { 93 BINS, // num_inputs 94 DIRECTIONAL_MODES, // num_outputs 95 0, // num_hidden_layers 96 { 0 }, 97 { 98 av1_intra_hog_model_weights, 99 }, 100 { 101 av1_intra_hog_model_bias, 102 }, 103 }; 104 105 #define FIX_PREC_BITS (16) 106 static inline int get_hist_bin_idx(int dx, int dy) { 107 const int32_t ratio = (dy * (1 << FIX_PREC_BITS)) / dx; 108 109 // Find index by bisection 110 static const int thresholds[BINS] = { 111 -1334015, -441798, -261605, -183158, -138560, -109331, -88359, -72303, 112 -59392, -48579, -39272, -30982, -23445, -16400, -9715, -3194, 113 3227, 9748, 16433, 23478, 31015, 39305, 48611, 59425, 114 72336, 88392, 109364, 138593, 183191, 261638, 441831, INT32_MAX 115 }; 116 117 int lo_idx = 0, hi_idx = BINS - 1; 118 // Divide into segments of size 8 gives better performance than binary search 119 // here. 120 if (ratio <= thresholds[7]) { 121 lo_idx = 0; 122 hi_idx = 7; 123 } else if (ratio <= thresholds[15]) { 124 lo_idx = 8; 125 hi_idx = 15; 126 } else if (ratio <= thresholds[23]) { 127 lo_idx = 16; 128 hi_idx = 23; 129 } else { 130 lo_idx = 24; 131 hi_idx = 31; 132 } 133 134 for (int idx = lo_idx; idx <= hi_idx; idx++) { 135 if (ratio <= thresholds[idx]) { 136 return idx; 137 } 138 } 139 assert(0 && "No valid histogram bin found!"); 140 return BINS - 1; 141 } 142 #undef FIX_PREC_BITS 143 144 // Normalizes the hog data. 145 static inline void normalize_hog(float total, float *hist) { 146 for (int i = 0; i < BINS; ++i) hist[i] /= total; 147 } 148 149 static inline void lowbd_generate_hog(const uint8_t *src, int stride, int rows, 150 int cols, float *hist) { 151 float total = 0.1f; 152 src += stride; 153 for (int r = 1; r < rows - 1; ++r) { 154 for (int c = 1; c < cols - 1; ++c) { 155 const uint8_t *above = &src[c - stride]; 156 const uint8_t *below = &src[c + stride]; 157 const uint8_t *left = &src[c - 1]; 158 const uint8_t *right = &src[c + 1]; 159 // Calculate gradient using Sobel filters. 160 const int dx = (right[-stride] + 2 * right[0] + right[stride]) - 161 (left[-stride] + 2 * left[0] + left[stride]); 162 const int dy = (below[-1] + 2 * below[0] + below[1]) - 163 (above[-1] + 2 * above[0] + above[1]); 164 if (dx == 0 && dy == 0) continue; 165 const int temp = abs(dx) + abs(dy); 166 if (!temp) continue; 167 total += temp; 168 if (dx == 0) { 169 hist[0] += temp / 2; 170 hist[BINS - 1] += temp / 2; 171 } else { 172 const int idx = get_hist_bin_idx(dx, dy); 173 assert(idx >= 0 && idx < BINS); 174 hist[idx] += temp; 175 } 176 } 177 src += stride; 178 } 179 180 normalize_hog(total, hist); 181 } 182 183 // Computes and stores pixel level gradient information of a given superblock 184 // for LBD encode. 185 static inline void lowbd_compute_gradient_info_sb(MACROBLOCK *const x, 186 BLOCK_SIZE sb_size, 187 PLANE_TYPE plane) { 188 PixelLevelGradientInfo *const grad_info_sb = 189 x->pixel_gradient_info + plane * MAX_SB_SQUARE; 190 const uint8_t *src = x->plane[plane].src.buf; 191 const int stride = x->plane[plane].src.stride; 192 const int ss_x = x->e_mbd.plane[plane].subsampling_x; 193 const int ss_y = x->e_mbd.plane[plane].subsampling_y; 194 const int sb_height = block_size_high[sb_size] >> ss_y; 195 const int sb_width = block_size_wide[sb_size] >> ss_x; 196 src += stride; 197 for (int r = 1; r < sb_height - 1; ++r) { 198 for (int c = 1; c < sb_width - 1; ++c) { 199 const uint8_t *above = &src[c - stride]; 200 const uint8_t *below = &src[c + stride]; 201 const uint8_t *left = &src[c - 1]; 202 const uint8_t *right = &src[c + 1]; 203 // Calculate gradient using Sobel filters. 204 const int dx = (right[-stride] + 2 * right[0] + right[stride]) - 205 (left[-stride] + 2 * left[0] + left[stride]); 206 const int dy = (below[-1] + 2 * below[0] + below[1]) - 207 (above[-1] + 2 * above[0] + above[1]); 208 grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0); 209 grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum = 210 (uint16_t)(abs(dx) + abs(dy)); 211 grad_info_sb[r * sb_width + c].hist_bin_idx = 212 (dx != 0) ? get_hist_bin_idx(dx, dy) : -1; 213 } 214 src += stride; 215 } 216 } 217 218 #if CONFIG_AV1_HIGHBITDEPTH 219 static inline void highbd_generate_hog(const uint8_t *src8, int stride, 220 int rows, int cols, float *hist) { 221 float total = 0.1f; 222 const uint16_t *src = CONVERT_TO_SHORTPTR(src8); 223 src += stride; 224 for (int r = 1; r < rows - 1; ++r) { 225 for (int c = 1; c < cols - 1; ++c) { 226 const uint16_t *above = &src[c - stride]; 227 const uint16_t *below = &src[c + stride]; 228 const uint16_t *left = &src[c - 1]; 229 const uint16_t *right = &src[c + 1]; 230 // Calculate gradient using Sobel filters. 231 const int dx = (right[-stride] + 2 * right[0] + right[stride]) - 232 (left[-stride] + 2 * left[0] + left[stride]); 233 const int dy = (below[-1] + 2 * below[0] + below[1]) - 234 (above[-1] + 2 * above[0] + above[1]); 235 if (dx == 0 && dy == 0) continue; 236 const int temp = abs(dx) + abs(dy); 237 if (!temp) continue; 238 total += temp; 239 if (dx == 0) { 240 hist[0] += temp / 2; 241 hist[BINS - 1] += temp / 2; 242 } else { 243 const int idx = get_hist_bin_idx(dx, dy); 244 assert(idx >= 0 && idx < BINS); 245 hist[idx] += temp; 246 } 247 } 248 src += stride; 249 } 250 251 normalize_hog(total, hist); 252 } 253 254 // Computes and stores pixel level gradient information of a given superblock 255 // for HBD encode. 256 static inline void highbd_compute_gradient_info_sb(MACROBLOCK *const x, 257 BLOCK_SIZE sb_size, 258 PLANE_TYPE plane) { 259 PixelLevelGradientInfo *const grad_info_sb = 260 x->pixel_gradient_info + plane * MAX_SB_SQUARE; 261 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[plane].src.buf); 262 const int stride = x->plane[plane].src.stride; 263 const int ss_x = x->e_mbd.plane[plane].subsampling_x; 264 const int ss_y = x->e_mbd.plane[plane].subsampling_y; 265 const int sb_height = block_size_high[sb_size] >> ss_y; 266 const int sb_width = block_size_wide[sb_size] >> ss_x; 267 src += stride; 268 for (int r = 1; r < sb_height - 1; ++r) { 269 for (int c = 1; c < sb_width - 1; ++c) { 270 const uint16_t *above = &src[c - stride]; 271 const uint16_t *below = &src[c + stride]; 272 const uint16_t *left = &src[c - 1]; 273 const uint16_t *right = &src[c + 1]; 274 // Calculate gradient using Sobel filters. 275 const int dx = (right[-stride] + 2 * right[0] + right[stride]) - 276 (left[-stride] + 2 * left[0] + left[stride]); 277 const int dy = (below[-1] + 2 * below[0] + below[1]) - 278 (above[-1] + 2 * above[0] + above[1]); 279 grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0); 280 grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum = 281 (uint16_t)(abs(dx) + abs(dy)); 282 grad_info_sb[r * sb_width + c].hist_bin_idx = 283 (dx != 0) ? get_hist_bin_idx(dx, dy) : -1; 284 } 285 src += stride; 286 } 287 } 288 #endif // CONFIG_AV1_HIGHBITDEPTH 289 290 static inline void generate_hog(const uint8_t *src8, int stride, int rows, 291 int cols, float *hist, int highbd) { 292 #if CONFIG_AV1_HIGHBITDEPTH 293 if (highbd) { 294 highbd_generate_hog(src8, stride, rows, cols, hist); 295 return; 296 } 297 #else 298 (void)highbd; 299 #endif // CONFIG_AV1_HIGHBITDEPTH 300 lowbd_generate_hog(src8, stride, rows, cols, hist); 301 } 302 303 static inline void compute_gradient_info_sb(MACROBLOCK *const x, 304 BLOCK_SIZE sb_size, 305 PLANE_TYPE plane) { 306 #if CONFIG_AV1_HIGHBITDEPTH 307 if (is_cur_buf_hbd(&x->e_mbd)) { 308 highbd_compute_gradient_info_sb(x, sb_size, plane); 309 return; 310 } 311 #endif // CONFIG_AV1_HIGHBITDEPTH 312 lowbd_compute_gradient_info_sb(x, sb_size, plane); 313 } 314 315 // Gradient caching at superblock level is allowed only if all of the following 316 // conditions are satisfied: 317 // (1) The current frame is an intra only frame 318 // (2) Non-RD mode decisions are not enabled 319 // (3) The sf partition_search_type is set to SEARCH_PARTITION 320 // (4) Either intra_pruning_with_hog or chroma_intra_pruning_with_hog is enabled 321 // 322 // SB level caching of gradient data may not help in speedup for the following 323 // cases: 324 // (1) Inter frames (due to early intra gating) 325 // (2) When partition_search_type is not SEARCH_PARTITION 326 // Hence, gradient data is computed at block level in such cases. 327 static inline bool is_gradient_caching_for_hog_enabled( 328 const AV1_COMP *const cpi) { 329 const SPEED_FEATURES *const sf = &cpi->sf; 330 return frame_is_intra_only(&cpi->common) && !sf->rt_sf.use_nonrd_pick_mode && 331 (sf->part_sf.partition_search_type == SEARCH_PARTITION) && 332 (sf->intra_sf.intra_pruning_with_hog || 333 sf->intra_sf.chroma_intra_pruning_with_hog); 334 } 335 336 // Function to generate pixel level gradient information for a given superblock. 337 // Sets the flags 'is_sb_gradient_cached' for the specific plane-type if 338 // gradient info is generated for the same. 339 static inline void produce_gradients_for_sb(AV1_COMP *cpi, MACROBLOCK *x, 340 BLOCK_SIZE sb_size, int mi_row, 341 int mi_col) { 342 // Initialise flags related to hog data caching. 343 x->is_sb_gradient_cached[PLANE_TYPE_Y] = false; 344 x->is_sb_gradient_cached[PLANE_TYPE_UV] = false; 345 if (!is_gradient_caching_for_hog_enabled(cpi)) return; 346 347 const SPEED_FEATURES *sf = &cpi->sf; 348 const int num_planes = av1_num_planes(&cpi->common); 349 350 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size); 351 352 if (sf->intra_sf.intra_pruning_with_hog) { 353 compute_gradient_info_sb(x, sb_size, PLANE_TYPE_Y); 354 x->is_sb_gradient_cached[PLANE_TYPE_Y] = true; 355 } 356 if (sf->intra_sf.chroma_intra_pruning_with_hog && num_planes > 1) { 357 compute_gradient_info_sb(x, sb_size, PLANE_TYPE_UV); 358 x->is_sb_gradient_cached[PLANE_TYPE_UV] = true; 359 } 360 } 361 362 // Reuses the pixel level gradient data generated at superblock level for block 363 // level histogram computation. 364 static inline void generate_hog_using_gradient_cache(const MACROBLOCK *x, 365 int rows, int cols, 366 BLOCK_SIZE sb_size, 367 PLANE_TYPE plane, 368 float *hist) { 369 float total = 0.1f; 370 const int ss_x = x->e_mbd.plane[plane].subsampling_x; 371 const int ss_y = x->e_mbd.plane[plane].subsampling_y; 372 const int sb_width = block_size_wide[sb_size] >> ss_x; 373 374 // Derive the offset from the starting of the superblock in order to locate 375 // the block level gradient data in the cache. 376 const int mi_row_in_sb = x->e_mbd.mi_row & (mi_size_high[sb_size] - 1); 377 const int mi_col_in_sb = x->e_mbd.mi_col & (mi_size_wide[sb_size] - 1); 378 const int block_offset_in_grad_cache = 379 sb_width * (mi_row_in_sb << (MI_SIZE_LOG2 - ss_y)) + 380 (mi_col_in_sb << (MI_SIZE_LOG2 - ss_x)); 381 const PixelLevelGradientInfo *grad_info_blk = x->pixel_gradient_info + 382 plane * MAX_SB_SQUARE + 383 block_offset_in_grad_cache; 384 385 // Retrieve the cached gradient information and generate the histogram. 386 for (int r = 1; r < rows - 1; ++r) { 387 for (int c = 1; c < cols - 1; ++c) { 388 const uint16_t abs_dx_abs_dy_sum = 389 grad_info_blk[r * sb_width + c].abs_dx_abs_dy_sum; 390 if (!abs_dx_abs_dy_sum) continue; 391 total += abs_dx_abs_dy_sum; 392 const bool is_dx_zero = grad_info_blk[r * sb_width + c].is_dx_zero; 393 if (is_dx_zero) { 394 hist[0] += abs_dx_abs_dy_sum >> 1; 395 hist[BINS - 1] += abs_dx_abs_dy_sum >> 1; 396 } else { 397 const int8_t idx = grad_info_blk[r * sb_width + c].hist_bin_idx; 398 assert(idx >= 0 && idx < BINS); 399 hist[idx] += abs_dx_abs_dy_sum; 400 } 401 } 402 } 403 normalize_hog(total, hist); 404 } 405 406 static inline void collect_hog_data(const MACROBLOCK *x, BLOCK_SIZE bsize, 407 BLOCK_SIZE sb_size, int plane, float *hog) { 408 const MACROBLOCKD *xd = &x->e_mbd; 409 const struct macroblockd_plane *const pd = &xd->plane[plane]; 410 const int ss_x = pd->subsampling_x; 411 const int ss_y = pd->subsampling_y; 412 const int bh = block_size_high[bsize]; 413 const int bw = block_size_wide[bsize]; 414 const int rows = 415 ((xd->mb_to_bottom_edge >= 0) ? bh : (xd->mb_to_bottom_edge >> 3) + bh) >> 416 ss_y; 417 const int cols = 418 ((xd->mb_to_right_edge >= 0) ? bw : (xd->mb_to_right_edge >> 3) + bw) >> 419 ss_x; 420 421 // If gradient data is already generated at SB level, reuse the cached data. 422 // Otherwise, compute the data. 423 if (x->is_sb_gradient_cached[plane]) { 424 generate_hog_using_gradient_cache(x, rows, cols, sb_size, plane, hog); 425 } else { 426 const uint8_t *src = x->plane[plane].src.buf; 427 const int src_stride = x->plane[plane].src.stride; 428 generate_hog(src, src_stride, rows, cols, hog, is_cur_buf_hbd(xd)); 429 } 430 431 // Scale the hog so the luma and chroma are on the same scale 432 for (int b = 0; b < BINS; ++b) { 433 hog[b] *= (1 + ss_x) * (1 + ss_y); 434 } 435 } 436 437 static inline void prune_intra_mode_with_hog( 438 const MACROBLOCK *x, BLOCK_SIZE bsize, BLOCK_SIZE sb_size, float th, 439 uint8_t *directional_mode_skip_mask, int is_chroma) { 440 const int plane = is_chroma ? AOM_PLANE_U : AOM_PLANE_Y; 441 float hist[BINS] = { 0.0f }; 442 collect_hog_data(x, bsize, sb_size, plane, hist); 443 444 // Make prediction for each of the mode 445 float scores[DIRECTIONAL_MODES] = { 0.0f }; 446 av1_nn_predict(hist, &av1_intra_hog_model_nnconfig, 1, scores); 447 for (UV_PREDICTION_MODE uv_mode = UV_V_PRED; uv_mode <= UV_D67_PRED; 448 uv_mode++) { 449 if (scores[uv_mode - UV_V_PRED] <= th) { 450 directional_mode_skip_mask[uv_mode] = 1; 451 } 452 } 453 } 454 #undef BINS 455 456 int av1_calc_normalized_variance(aom_variance_fn_t vf, const uint8_t *const buf, 457 const int stride, const int is_hbd); 458 459 // Returns whether caching of source variance for 4x4 sub-blocks is allowed. 460 static inline bool is_src_var_for_4x4_sub_blocks_caching_enabled( 461 const AV1_COMP *const cpi) { 462 const SPEED_FEATURES *const sf = &cpi->sf; 463 if (cpi->oxcf.mode != ALLINTRA) return false; 464 465 if (sf->part_sf.partition_search_type == SEARCH_PARTITION) return true; 466 467 if (INTRA_RD_VAR_THRESH(cpi->oxcf.speed) <= 0 || 468 (sf->rt_sf.use_nonrd_pick_mode && !sf->rt_sf.hybrid_intra_pickmode)) 469 return false; 470 471 return true; 472 } 473 474 // Initialize the members of Block4x4VarInfo structure to -1 at the start 475 // of every superblock. 476 static inline void init_src_var_info_of_4x4_sub_blocks( 477 const AV1_COMP *const cpi, Block4x4VarInfo *src_var_info_of_4x4_sub_blocks, 478 const BLOCK_SIZE sb_size) { 479 if (!is_src_var_for_4x4_sub_blocks_caching_enabled(cpi)) return; 480 481 const int mi_count_in_sb = mi_size_wide[sb_size] * mi_size_high[sb_size]; 482 for (int i = 0; i < mi_count_in_sb; i++) { 483 src_var_info_of_4x4_sub_blocks[i].var = -1; 484 src_var_info_of_4x4_sub_blocks[i].log_var = -1.0; 485 } 486 } 487 488 // Returns the cost needed to send a uniformly distributed r.v. 489 static inline int write_uniform_cost(int n, int v) { 490 const int l = get_unsigned_bits(n); 491 const int m = (1 << l) - n; 492 if (l == 0) return 0; 493 if (v < m) 494 return av1_cost_literal(l - 1); 495 else 496 return av1_cost_literal(l); 497 } 498 /*!\endcond */ 499 500 /*!\brief Returns the rate cost for luma prediction mode info of intra blocks. 501 * 502 * \callergraph 503 */ 504 static inline int intra_mode_info_cost_y(const AV1_COMP *cpi, 505 const MACROBLOCK *x, 506 const MB_MODE_INFO *mbmi, 507 BLOCK_SIZE bsize, int mode_cost, 508 int discount_color_cost) { 509 int total_rate = mode_cost; 510 const ModeCosts *mode_costs = &x->mode_costs; 511 const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0; 512 const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra; 513 const int use_intrabc = mbmi->use_intrabc; 514 // Can only activate one mode. 515 assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc + 516 use_filter_intra) <= 1); 517 const int try_palette = av1_allow_palette( 518 cpi->common.features.allow_screen_content_tools, mbmi->bsize); 519 if (try_palette && mbmi->mode == DC_PRED) { 520 const MACROBLOCKD *xd = &x->e_mbd; 521 const int bsize_ctx = av1_get_palette_bsize_ctx(bsize); 522 const int mode_ctx = av1_get_palette_mode_ctx(xd); 523 total_rate += 524 mode_costs->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette]; 525 if (use_palette) { 526 const uint8_t *const color_map = xd->plane[0].color_index_map; 527 int block_width, block_height, rows, cols; 528 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, 529 &cols); 530 const int plt_size = mbmi->palette_mode_info.palette_size[0]; 531 int palette_mode_cost = 532 mode_costs 533 ->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + 534 write_uniform_cost(plt_size, color_map[0]); 535 uint16_t color_cache[2 * PALETTE_MAX_SIZE]; 536 const int n_cache = av1_get_palette_cache(xd, 0, color_cache); 537 palette_mode_cost += 538 av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache, 539 n_cache, cpi->common.seq_params->bit_depth); 540 if (!discount_color_cost) 541 palette_mode_cost += 542 av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP); 543 544 total_rate += palette_mode_cost; 545 } 546 } 547 if (av1_filter_intra_allowed(&cpi->common, mbmi)) { 548 total_rate += mode_costs->filter_intra_cost[mbmi->bsize][use_filter_intra]; 549 if (use_filter_intra) { 550 total_rate += 551 mode_costs->filter_intra_mode_cost[mbmi->filter_intra_mode_info 552 .filter_intra_mode]; 553 } 554 } 555 if (av1_is_directional_mode(mbmi->mode)) { 556 if (av1_use_angle_delta(bsize)) { 557 total_rate += 558 mode_costs->angle_delta_cost[mbmi->mode - V_PRED] 559 [MAX_ANGLE_DELTA + 560 mbmi->angle_delta[PLANE_TYPE_Y]]; 561 } 562 } 563 if (av1_allow_intrabc(&cpi->common)) 564 total_rate += mode_costs->intrabc_cost[use_intrabc]; 565 return total_rate; 566 } 567 568 /*!\brief Return the rate cost for chroma prediction mode info of intra blocks. 569 * 570 * \callergraph 571 */ 572 static inline int intra_mode_info_cost_uv(const AV1_COMP *cpi, 573 const MACROBLOCK *x, 574 const MB_MODE_INFO *mbmi, 575 BLOCK_SIZE bsize, int mode_cost) { 576 int total_rate = mode_cost; 577 const ModeCosts *mode_costs = &x->mode_costs; 578 const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0; 579 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; 580 // Can only activate one mode. 581 assert(((uv_mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1); 582 583 const int try_palette = av1_allow_palette( 584 cpi->common.features.allow_screen_content_tools, mbmi->bsize); 585 if (try_palette && uv_mode == UV_DC_PRED) { 586 const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info; 587 total_rate += 588 mode_costs->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette]; 589 if (use_palette) { 590 const int bsize_ctx = av1_get_palette_bsize_ctx(bsize); 591 const int plt_size = pmi->palette_size[1]; 592 const MACROBLOCKD *xd = &x->e_mbd; 593 const uint8_t *const color_map = xd->plane[1].color_index_map; 594 int palette_mode_cost = 595 mode_costs 596 ->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + 597 write_uniform_cost(plt_size, color_map[0]); 598 uint16_t color_cache[2 * PALETTE_MAX_SIZE]; 599 const int n_cache = av1_get_palette_cache(xd, 1, color_cache); 600 palette_mode_cost += av1_palette_color_cost_uv( 601 pmi, color_cache, n_cache, cpi->common.seq_params->bit_depth); 602 palette_mode_cost += 603 av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP); 604 total_rate += palette_mode_cost; 605 } 606 } 607 const PREDICTION_MODE intra_mode = get_uv_mode(uv_mode); 608 if (av1_is_directional_mode(intra_mode)) { 609 if (av1_use_angle_delta(bsize)) { 610 total_rate += 611 mode_costs->angle_delta_cost[intra_mode - V_PRED] 612 [mbmi->angle_delta[PLANE_TYPE_UV] + 613 MAX_ANGLE_DELTA]; 614 } 615 } 616 return total_rate; 617 } 618 619 /*!\cond */ 620 // Makes a quick intra prediction and estimate the rdcost with a model without 621 // going through the whole txfm/quantize/itxfm process. 622 static int64_t intra_model_rd(const AV1_COMMON *cm, MACROBLOCK *const x, 623 int plane, BLOCK_SIZE plane_bsize, 624 TX_SIZE tx_size, int use_hadamard) { 625 MACROBLOCKD *const xd = &x->e_mbd; 626 const BitDepthInfo bd_info = get_bit_depth_info(xd); 627 int row, col; 628 assert(!is_inter_block(xd->mi[0])); 629 const int stepr = tx_size_high_unit[tx_size]; 630 const int stepc = tx_size_wide_unit[tx_size]; 631 const int txbw = tx_size_wide[tx_size]; 632 const int txbh = tx_size_high[tx_size]; 633 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); 634 const int max_blocks_high = max_block_high(xd, plane_bsize, plane); 635 int64_t satd_cost = 0; 636 struct macroblock_plane *p = &x->plane[plane]; 637 struct macroblockd_plane *pd = &xd->plane[plane]; 638 // Prediction. 639 for (row = 0; row < max_blocks_high; row += stepr) { 640 for (col = 0; col < max_blocks_wide; col += stepc) { 641 av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size); 642 // Here we use p->src_diff and p->coeff as temporary buffers for 643 // prediction residue and transform coefficients. The buffers are only 644 // used in this for loop, therefore we don't need to properly add offset 645 // to the buffers. 646 av1_subtract_block( 647 bd_info, txbh, txbw, p->src_diff, block_size_wide[plane_bsize], 648 p->src.buf + (((row * p->src.stride) + col) << 2), p->src.stride, 649 pd->dst.buf + (((row * pd->dst.stride) + col) << 2), pd->dst.stride); 650 av1_quick_txfm(use_hadamard, tx_size, bd_info, p->src_diff, 651 block_size_wide[plane_bsize], p->coeff); 652 satd_cost += aom_satd(p->coeff, tx_size_2d[tx_size]); 653 } 654 } 655 return satd_cost; 656 } 657 /*!\endcond */ 658 659 /*!\brief Estimate the luma rdcost of a given intra mode and try to prune it. 660 * 661 * \ingroup intra_mode_search 662 * \callergraph 663 * This function first makes a quick luma prediction and estimates the rdcost 664 * with a model without going through the txfm, then try to prune the current 665 * mode if the new estimate y_rd > 1.25 * best_model_rd. 666 * 667 * \return Returns 1 if the given mode is prune; 0 otherwise. 668 */ 669 static inline int model_intra_yrd_and_prune(const AV1_COMP *const cpi, 670 MACROBLOCK *x, BLOCK_SIZE bsize, 671 int64_t *best_model_rd) { 672 const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]); 673 const int plane = 0; 674 const AV1_COMMON *cm = &cpi->common; 675 const int64_t this_model_rd = 676 intra_model_rd(cm, x, plane, bsize, tx_size, /*use_hadamard=*/1); 677 if (*best_model_rd != INT64_MAX && 678 this_model_rd > *best_model_rd + (*best_model_rd >> 2)) { 679 return 1; 680 } else if (this_model_rd < *best_model_rd) { 681 *best_model_rd = this_model_rd; 682 } 683 return 0; 684 } 685 686 #ifdef __cplusplus 687 } // extern "C" 688 #endif 689 690 #endif // AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_