var_based_part.c (83658B)
1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <limits.h> 13 #include <math.h> 14 #include <stdbool.h> 15 #include <stdio.h> 16 17 #include "config/aom_config.h" 18 #include "config/aom_dsp_rtcd.h" 19 #include "config/av1_rtcd.h" 20 21 #include "aom_dsp/aom_dsp_common.h" 22 #include "aom_dsp/binary_codes_writer.h" 23 #include "aom_ports/mem.h" 24 #include "aom_ports/aom_timer.h" 25 26 #include "av1/common/reconinter.h" 27 #include "av1/common/blockd.h" 28 #include "av1/common/quant_common.h" 29 30 #include "av1/encoder/encodeframe.h" 31 #include "av1/encoder/encodeframe_utils.h" 32 #include "av1/encoder/var_based_part.h" 33 #include "av1/encoder/reconinter_enc.h" 34 #include "av1/encoder/rdopt_utils.h" 35 36 // Possible values for the force_split variable while evaluating variance based 37 // partitioning. 38 enum { 39 // Evaluate all partition types 40 PART_EVAL_ALL = 0, 41 // Force PARTITION_SPLIT 42 PART_EVAL_ONLY_SPLIT = 1, 43 // Force PARTITION_NONE 44 PART_EVAL_ONLY_NONE = 2 45 } UENUM1BYTE(PART_EVAL_STATUS); 46 47 typedef struct { 48 VPVariance *part_variances; 49 VPartVar *split[4]; 50 } variance_node; 51 52 static inline void tree_to_node(void *data, BLOCK_SIZE bsize, 53 variance_node *node) { 54 node->part_variances = NULL; 55 switch (bsize) { 56 case BLOCK_128X128: { 57 VP128x128 *vt = (VP128x128 *)data; 58 node->part_variances = &vt->part_variances; 59 for (int split_idx = 0; split_idx < 4; split_idx++) 60 node->split[split_idx] = &vt->split[split_idx].part_variances.none; 61 break; 62 } 63 case BLOCK_64X64: { 64 VP64x64 *vt = (VP64x64 *)data; 65 node->part_variances = &vt->part_variances; 66 for (int split_idx = 0; split_idx < 4; split_idx++) 67 node->split[split_idx] = &vt->split[split_idx].part_variances.none; 68 break; 69 } 70 case BLOCK_32X32: { 71 VP32x32 *vt = (VP32x32 *)data; 72 node->part_variances = &vt->part_variances; 73 for (int split_idx = 0; split_idx < 4; split_idx++) 74 node->split[split_idx] = &vt->split[split_idx].part_variances.none; 75 break; 76 } 77 case BLOCK_16X16: { 78 VP16x16 *vt = (VP16x16 *)data; 79 node->part_variances = &vt->part_variances; 80 for (int split_idx = 0; split_idx < 4; split_idx++) 81 node->split[split_idx] = &vt->split[split_idx].part_variances.none; 82 break; 83 } 84 case BLOCK_8X8: { 85 VP8x8 *vt = (VP8x8 *)data; 86 node->part_variances = &vt->part_variances; 87 for (int split_idx = 0; split_idx < 4; split_idx++) 88 node->split[split_idx] = &vt->split[split_idx].part_variances.none; 89 break; 90 } 91 default: { 92 VP4x4 *vt = (VP4x4 *)data; 93 assert(bsize == BLOCK_4X4); 94 node->part_variances = &vt->part_variances; 95 for (int split_idx = 0; split_idx < 4; split_idx++) 96 node->split[split_idx] = &vt->split[split_idx]; 97 break; 98 } 99 } 100 } 101 102 // Set variance values given sum square error, sum error, count. 103 static inline void fill_variance(uint32_t s2, int32_t s, int c, VPartVar *v) { 104 v->sum_square_error = s2; 105 v->sum_error = s; 106 v->log2_count = c; 107 } 108 109 static inline void get_variance(VPartVar *v) { 110 v->variance = 111 (int)(256 * (v->sum_square_error - 112 (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> 113 v->log2_count)) >> 114 v->log2_count); 115 } 116 117 static inline void sum_2_variances(const VPartVar *a, const VPartVar *b, 118 VPartVar *r) { 119 assert(a->log2_count == b->log2_count); 120 fill_variance(a->sum_square_error + b->sum_square_error, 121 a->sum_error + b->sum_error, a->log2_count + 1, r); 122 } 123 124 static inline void fill_variance_tree(void *data, BLOCK_SIZE bsize) { 125 variance_node node; 126 memset(&node, 0, sizeof(node)); 127 tree_to_node(data, bsize, &node); 128 sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); 129 sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); 130 sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); 131 sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); 132 sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], 133 &node.part_variances->none); 134 } 135 136 static inline void set_block_size(AV1_COMP *const cpi, int mi_row, int mi_col, 137 BLOCK_SIZE bsize) { 138 if (cpi->common.mi_params.mi_cols > mi_col && 139 cpi->common.mi_params.mi_rows > mi_row) { 140 CommonModeInfoParams *mi_params = &cpi->common.mi_params; 141 const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col); 142 const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col); 143 MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] = 144 &mi_params->mi_alloc[mi_alloc_idx]; 145 mi->bsize = bsize; 146 } 147 } 148 149 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd, 150 const TileInfo *const tile, void *data, 151 BLOCK_SIZE bsize, int mi_row, int mi_col, 152 int64_t threshold, BLOCK_SIZE bsize_min, 153 PART_EVAL_STATUS force_split) { 154 AV1_COMMON *const cm = &cpi->common; 155 variance_node vt; 156 const int block_width = mi_size_wide[bsize]; 157 const int block_height = mi_size_high[bsize]; 158 int bs_width_check = block_width; 159 int bs_height_check = block_height; 160 int bs_width_vert_check = block_width >> 1; 161 int bs_height_horiz_check = block_height >> 1; 162 // On the right and bottom boundary we only need to check 163 // if half the bsize fits, because boundary is extended 164 // up to 64. So do this check only for sb_size = 64X64. 165 if (cm->seq_params->sb_size == BLOCK_64X64) { 166 if (tile->mi_col_end == cm->mi_params.mi_cols) { 167 bs_width_check = (block_width >> 1) + 1; 168 bs_width_vert_check = (block_width >> 2) + 1; 169 } 170 if (tile->mi_row_end == cm->mi_params.mi_rows) { 171 bs_height_check = (block_height >> 1) + 1; 172 bs_height_horiz_check = (block_height >> 2) + 1; 173 } 174 } 175 176 assert(block_height == block_width); 177 tree_to_node(data, bsize, &vt); 178 179 if (mi_col + bs_width_check <= tile->mi_col_end && 180 mi_row + bs_height_check <= tile->mi_row_end && 181 force_split == PART_EVAL_ONLY_NONE) { 182 set_block_size(cpi, mi_row, mi_col, bsize); 183 return 1; 184 } 185 if (force_split == PART_EVAL_ONLY_SPLIT) return 0; 186 187 // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if 188 // variance is below threshold, otherwise split will be selected. 189 // No check for vert/horiz split as too few samples for variance. 190 if (bsize == bsize_min) { 191 // Variance already computed to set the force_split. 192 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); 193 if (mi_col + bs_width_check <= tile->mi_col_end && 194 mi_row + bs_height_check <= tile->mi_row_end && 195 vt.part_variances->none.variance < threshold) { 196 set_block_size(cpi, mi_row, mi_col, bsize); 197 return 1; 198 } 199 return 0; 200 } else if (bsize > bsize_min) { 201 // Variance already computed to set the force_split. 202 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); 203 // For key frame: take split for bsize above 32X32 or very high variance. 204 if (frame_is_intra_only(cm) && 205 (bsize > BLOCK_32X32 || 206 vt.part_variances->none.variance > (threshold << 4))) { 207 return 0; 208 } 209 // If variance is low, take the bsize (no split). 210 if (mi_col + bs_width_check <= tile->mi_col_end && 211 mi_row + bs_height_check <= tile->mi_row_end && 212 vt.part_variances->none.variance < threshold) { 213 set_block_size(cpi, mi_row, mi_col, bsize); 214 return 1; 215 } 216 // Check vertical split. 217 if (mi_row + bs_height_check <= tile->mi_row_end && 218 mi_col + bs_width_vert_check <= tile->mi_col_end) { 219 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); 220 BLOCK_SIZE plane_bsize = 221 get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x, 222 xd->plane[AOM_PLANE_U].subsampling_y); 223 get_variance(&vt.part_variances->vert[0]); 224 get_variance(&vt.part_variances->vert[1]); 225 if (vt.part_variances->vert[0].variance < threshold && 226 vt.part_variances->vert[1].variance < threshold && 227 plane_bsize < BLOCK_INVALID) { 228 set_block_size(cpi, mi_row, mi_col, subsize); 229 set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize); 230 return 1; 231 } 232 } 233 // Check horizontal split. 234 if (mi_col + bs_width_check <= tile->mi_col_end && 235 mi_row + bs_height_horiz_check <= tile->mi_row_end) { 236 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); 237 BLOCK_SIZE plane_bsize = 238 get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x, 239 xd->plane[AOM_PLANE_U].subsampling_y); 240 get_variance(&vt.part_variances->horz[0]); 241 get_variance(&vt.part_variances->horz[1]); 242 if (vt.part_variances->horz[0].variance < threshold && 243 vt.part_variances->horz[1].variance < threshold && 244 plane_bsize < BLOCK_INVALID) { 245 set_block_size(cpi, mi_row, mi_col, subsize); 246 set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize); 247 return 1; 248 } 249 } 250 return 0; 251 } 252 return 0; 253 } 254 255 static inline int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide, 256 int pixels_high) { 257 int all_inside = 1; 258 for (int idx = 0; idx < 4; idx++) { 259 all_inside &= ((x16_idx + GET_BLK_IDX_X(idx, 3)) < pixels_wide); 260 all_inside &= ((y16_idx + GET_BLK_IDX_Y(idx, 3)) < pixels_high); 261 } 262 return all_inside; 263 } 264 265 #if CONFIG_AV1_HIGHBITDEPTH 266 // TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd 267 static inline void fill_variance_8x8avg_highbd( 268 const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf, 269 int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide, 270 int pixels_high) { 271 for (int idx = 0; idx < 4; idx++) { 272 const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); 273 const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); 274 unsigned int sse = 0; 275 int sum = 0; 276 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 277 int src_avg = aom_highbd_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, 278 src_stride); 279 int dst_avg = aom_highbd_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, 280 dst_stride); 281 282 sum = src_avg - dst_avg; 283 sse = sum * sum; 284 } 285 fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none); 286 } 287 } 288 #endif 289 290 static inline void fill_variance_8x8avg_lowbd( 291 const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf, 292 int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide, 293 int pixels_high) { 294 unsigned int sse[4] = { 0 }; 295 int sum[4] = { 0 }; 296 297 if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) { 298 int src_avg[4]; 299 int dst_avg[4]; 300 aom_avg_8x8_quad(src_buf, src_stride, x16_idx, y16_idx, src_avg); 301 aom_avg_8x8_quad(dst_buf, dst_stride, x16_idx, y16_idx, dst_avg); 302 for (int idx = 0; idx < 4; idx++) { 303 sum[idx] = src_avg[idx] - dst_avg[idx]; 304 sse[idx] = sum[idx] * sum[idx]; 305 } 306 } else { 307 for (int idx = 0; idx < 4; idx++) { 308 const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); 309 const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); 310 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 311 int src_avg = 312 aom_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride); 313 int dst_avg = 314 aom_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, dst_stride); 315 sum[idx] = src_avg - dst_avg; 316 sse[idx] = sum[idx] * sum[idx]; 317 } 318 } 319 } 320 321 for (int idx = 0; idx < 4; idx++) { 322 fill_variance(sse[idx], sum[idx], 0, &vst->split[idx].part_variances.none); 323 } 324 } 325 326 // Obtain parameters required to calculate variance (such as sum, sse, etc,.) 327 // at 8x8 sub-block level for a given 16x16 block. 328 // The function can be called only when is_key_frame is false since sum is 329 // computed between source and reference frames. 330 static inline void fill_variance_8x8avg(const uint8_t *src_buf, int src_stride, 331 const uint8_t *dst_buf, int dst_stride, 332 int x16_idx, int y16_idx, VP16x16 *vst, 333 int highbd_flag, int pixels_wide, 334 int pixels_high) { 335 #if CONFIG_AV1_HIGHBITDEPTH 336 if (highbd_flag) { 337 fill_variance_8x8avg_highbd(src_buf, src_stride, dst_buf, dst_stride, 338 x16_idx, y16_idx, vst, pixels_wide, 339 pixels_high); 340 return; 341 } 342 #else 343 (void)highbd_flag; 344 #endif // CONFIG_AV1_HIGHBITDEPTH 345 fill_variance_8x8avg_lowbd(src_buf, src_stride, dst_buf, dst_stride, x16_idx, 346 y16_idx, vst, pixels_wide, pixels_high); 347 } 348 349 static int compute_minmax_8x8(const uint8_t *src_buf, int src_stride, 350 const uint8_t *dst_buf, int dst_stride, 351 int x16_idx, int y16_idx, 352 #if CONFIG_AV1_HIGHBITDEPTH 353 int highbd_flag, 354 #endif 355 int pixels_wide, int pixels_high) { 356 int minmax_max = 0; 357 int minmax_min = 255; 358 // Loop over the 4 8x8 subblocks. 359 for (int idx = 0; idx < 4; idx++) { 360 const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3); 361 const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3); 362 int min = 0; 363 int max = 0; 364 if (x8_idx < pixels_wide && y8_idx < pixels_high) { 365 #if CONFIG_AV1_HIGHBITDEPTH 366 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 367 aom_highbd_minmax_8x8( 368 src_buf + y8_idx * src_stride + x8_idx, src_stride, 369 dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, &max); 370 } else { 371 aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride, 372 dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, 373 &max); 374 } 375 #else 376 aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride, 377 dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, 378 &max); 379 #endif 380 if ((max - min) > minmax_max) minmax_max = (max - min); 381 if ((max - min) < minmax_min) minmax_min = (max - min); 382 } 383 } 384 return (minmax_max - minmax_min); 385 } 386 387 // Function to compute average and variance of 4x4 sub-block. 388 // The function can be called only when is_key_frame is true since sum is 389 // computed using source frame only. 390 static inline void fill_variance_4x4avg(const uint8_t *src_buf, int src_stride, 391 int x8_idx, int y8_idx, VP8x8 *vst, 392 #if CONFIG_AV1_HIGHBITDEPTH 393 int highbd_flag, 394 #endif 395 int pixels_wide, int pixels_high, 396 int border_offset_4x4) { 397 for (int idx = 0; idx < 4; idx++) { 398 const int x4_idx = x8_idx + GET_BLK_IDX_X(idx, 2); 399 const int y4_idx = y8_idx + GET_BLK_IDX_Y(idx, 2); 400 unsigned int sse = 0; 401 int sum = 0; 402 if (x4_idx < pixels_wide - border_offset_4x4 && 403 y4_idx < pixels_high - border_offset_4x4) { 404 int src_avg; 405 int dst_avg = 128; 406 #if CONFIG_AV1_HIGHBITDEPTH 407 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { 408 src_avg = aom_highbd_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, 409 src_stride); 410 } else { 411 src_avg = 412 aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride); 413 } 414 #else 415 src_avg = aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride); 416 #endif 417 418 sum = src_avg - dst_avg; 419 sse = sum * sum; 420 } 421 fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none); 422 } 423 } 424 425 static int64_t scale_part_thresh_content(int64_t threshold_base, int speed, 426 int non_reference_frame, 427 int is_static) { 428 int64_t threshold = threshold_base; 429 if (non_reference_frame && !is_static) threshold = (3 * threshold) >> 1; 430 if (speed >= 8) { 431 return (5 * threshold) >> 2; 432 } 433 return threshold; 434 } 435 436 // Tune thresholds less or more aggressively to prefer larger partitions 437 static inline void tune_thresh_based_on_qindex( 438 AV1_COMP *cpi, int64_t thresholds[], uint64_t block_sad, int current_qindex, 439 int num_pixels, bool is_segment_id_boosted, int source_sad_nonrd, 440 int lighting_change) { 441 double weight; 442 if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) { 443 const int win = 20; 444 if (current_qindex < QINDEX_LARGE_BLOCK_THR - win) 445 weight = 1.0; 446 else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win) 447 weight = 0.0; 448 else 449 weight = 450 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win); 451 if (num_pixels > RESOLUTION_480P) { 452 for (int i = 0; i < 4; i++) { 453 thresholds[i] <<= 1; 454 } 455 } 456 if (num_pixels <= RESOLUTION_288P) { 457 thresholds[3] = INT64_MAX; 458 if (is_segment_id_boosted == false) { 459 thresholds[1] <<= 2; 460 thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4; 461 } else { 462 thresholds[1] <<= 1; 463 thresholds[2] <<= 3; 464 } 465 // Allow for split to 8x8 for superblocks where part of it has 466 // moving boundary. So allow for sb with source_sad above threshold, 467 // and avoid very large source_sad or high source content, to avoid 468 // too many 8x8 within superblock. 469 uint64_t avg_source_sad_thresh = 25000; 470 uint64_t block_sad_low = 25000; 471 uint64_t block_sad_high = 50000; 472 if (cpi->svc.temporal_layer_id == 0 && 473 cpi->svc.number_temporal_layers > 1) { 474 // Increase the sad thresholds for base TL0, as reference/LAST is 475 // 2/4 frames behind (for 2/3 #TL). 476 avg_source_sad_thresh = 40000; 477 block_sad_high = 70000; 478 } 479 if (is_segment_id_boosted == false && 480 cpi->rc.avg_source_sad < avg_source_sad_thresh && 481 block_sad > block_sad_low && block_sad < block_sad_high && 482 !lighting_change) { 483 thresholds[2] = (3 * thresholds[2]) >> 2; 484 thresholds[3] = thresholds[2] << 3; 485 } 486 // Condition the increase of partition thresholds on the segment 487 // and the content. Avoid the increase for superblocks which have 488 // high source sad, unless the whole frame has very high motion 489 // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks 490 // have high source sad). 491 } else if (num_pixels > RESOLUTION_480P && is_segment_id_boosted == false && 492 (source_sad_nonrd != kHighSad || 493 cpi->rc.avg_source_sad > 50000)) { 494 thresholds[0] = (3 * thresholds[0]) >> 1; 495 thresholds[3] = INT64_MAX; 496 if (current_qindex > QINDEX_LARGE_BLOCK_THR) { 497 thresholds[1] = 498 (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]); 499 thresholds[2] = 500 (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]); 501 } 502 } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && 503 is_segment_id_boosted == false && 504 (source_sad_nonrd != kHighSad || 505 cpi->rc.avg_source_sad > 50000)) { 506 thresholds[1] = 507 (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]); 508 thresholds[2] = 509 (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]); 510 thresholds[3] = INT64_MAX; 511 } 512 } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) { 513 thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0; 514 thresholds[2] = 515 (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2]; 516 } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) { 517 const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1; 518 if (current_qindex < QINDEX_LARGE_BLOCK_THR - 45) 519 weight = 1.0; 520 else if (current_qindex > QINDEX_LARGE_BLOCK_THR + 45) 521 weight = 0.0; 522 else 523 weight = 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + 45) / (2 * 45); 524 thresholds[1] = 525 (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]); 526 thresholds[2] = 527 (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]); 528 thresholds[3] = 529 (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]); 530 } 531 if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128)) 532 thresholds[3] = INT64_MAX; 533 } 534 535 static void set_vbp_thresholds_key_frame(AV1_COMP *cpi, int64_t thresholds[], 536 int64_t threshold_base, 537 int threshold_left_shift, 538 int num_pixels) { 539 if (cpi->sf.rt_sf.force_large_partition_blocks_intra) { 540 const int shift_steps = 541 threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8); 542 assert(shift_steps >= 0); 543 threshold_base <<= shift_steps; 544 } 545 thresholds[0] = threshold_base; 546 thresholds[1] = threshold_base; 547 if (num_pixels < RESOLUTION_720P) { 548 thresholds[2] = threshold_base / 3; 549 thresholds[3] = threshold_base >> 1; 550 } else { 551 int shift_val = 2; 552 if (cpi->sf.rt_sf.force_large_partition_blocks_intra) { 553 shift_val = (cpi->oxcf.mode == ALLINTRA ? 1 : 0); 554 } 555 556 thresholds[2] = threshold_base >> shift_val; 557 thresholds[3] = threshold_base >> shift_val; 558 } 559 thresholds[4] = threshold_base << 2; 560 } 561 562 static inline void tune_thresh_based_on_resolution( 563 AV1_COMP *cpi, int64_t thresholds[], int64_t threshold_base, 564 int current_qindex, int source_sad_rd, int num_pixels) { 565 if (num_pixels >= RESOLUTION_720P) thresholds[3] = thresholds[3] << 1; 566 if (num_pixels <= RESOLUTION_288P) { 567 const int qindex_thr[5][2] = { 568 { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 }, 569 }; 570 int th_idx = 0; 571 if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1) 572 th_idx = 573 (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0; 574 if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3) 575 th_idx = cpi->sf.rt_sf.var_part_based_on_qidx; 576 const int qindex_low_thr = qindex_thr[th_idx][0]; 577 const int qindex_high_thr = qindex_thr[th_idx][1]; 578 if (current_qindex >= qindex_high_thr) { 579 threshold_base = (5 * threshold_base) >> 1; 580 thresholds[1] = threshold_base >> 3; 581 thresholds[2] = threshold_base << 2; 582 thresholds[3] = threshold_base << 5; 583 } else if (current_qindex < qindex_low_thr) { 584 thresholds[1] = threshold_base >> 3; 585 thresholds[2] = threshold_base >> 1; 586 thresholds[3] = threshold_base << 3; 587 } else { 588 int64_t qi_diff_low = current_qindex - qindex_low_thr; 589 int64_t qi_diff_high = qindex_high_thr - current_qindex; 590 int64_t threshold_diff = qindex_high_thr - qindex_low_thr; 591 int64_t threshold_base_high = (5 * threshold_base) >> 1; 592 593 threshold_diff = threshold_diff > 0 ? threshold_diff : 1; 594 threshold_base = 595 (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) / 596 threshold_diff; 597 thresholds[1] = threshold_base >> 3; 598 thresholds[2] = ((qi_diff_low * threshold_base) + 599 qi_diff_high * (threshold_base >> 1)) / 600 threshold_diff; 601 thresholds[3] = ((qi_diff_low * (threshold_base << 5)) + 602 qi_diff_high * (threshold_base << 3)) / 603 threshold_diff; 604 } 605 } else if (num_pixels < RESOLUTION_720P) { 606 thresholds[2] = (5 * threshold_base) >> 2; 607 } else if (num_pixels < RESOLUTION_1080P) { 608 thresholds[2] = threshold_base << 1; 609 } else { 610 // num_pixels >= RESOLUTION_1080P 611 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { 612 if (num_pixels < RESOLUTION_1440P) { 613 thresholds[2] = (5 * threshold_base) >> 1; 614 } else { 615 thresholds[2] = (7 * threshold_base) >> 1; 616 } 617 } else { 618 if (cpi->oxcf.speed > 7) { 619 thresholds[2] = 6 * threshold_base; 620 } else { 621 thresholds[2] = 3 * threshold_base; 622 } 623 } 624 } 625 } 626 627 // Increase the base partition threshold, based on content and noise level. 628 static inline int64_t tune_base_thresh_content(AV1_COMP *cpi, 629 int64_t threshold_base, 630 int content_lowsumdiff, 631 int source_sad_nonrd, 632 int num_pixels) { 633 AV1_COMMON *const cm = &cpi->common; 634 int64_t updated_thresh_base = threshold_base; 635 if (cpi->noise_estimate.enabled && content_lowsumdiff && 636 num_pixels > RESOLUTION_480P && cm->current_frame.frame_number > 60) { 637 NOISE_LEVEL noise_level = 638 av1_noise_estimate_extract_level(&cpi->noise_estimate); 639 if (noise_level == kHigh) 640 updated_thresh_base = (5 * updated_thresh_base) >> 1; 641 else if (noise_level == kMedium && 642 !cpi->sf.rt_sf.prefer_large_partition_blocks) 643 updated_thresh_base = (5 * updated_thresh_base) >> 2; 644 } 645 updated_thresh_base = scale_part_thresh_content( 646 updated_thresh_base, cpi->oxcf.speed, 647 cpi->ppi->rtc_ref.non_reference_frame, cpi->rc.frame_source_sad == 0); 648 if (cpi->oxcf.speed >= 11 && source_sad_nonrd > kLowSad && 649 cpi->rc.high_motion_content_screen_rtc) 650 updated_thresh_base = updated_thresh_base << 4; 651 return updated_thresh_base; 652 } 653 654 static inline void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], 655 uint64_t blk_sad, int qindex, 656 int content_lowsumdiff, 657 int source_sad_nonrd, int source_sad_rd, 658 bool is_segment_id_boosted, 659 int lighting_change) { 660 AV1_COMMON *const cm = &cpi->common; 661 const int is_key_frame = frame_is_intra_only(cm); 662 const int threshold_multiplier = is_key_frame ? 120 : 1; 663 const int ac_q = av1_ac_quant_QTX(qindex, 0, cm->seq_params->bit_depth); 664 int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q); 665 const int current_qindex = cm->quant_params.base_qindex; 666 const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift; 667 const int num_pixels = cm->width * cm->height; 668 669 if (is_key_frame) { 670 set_vbp_thresholds_key_frame(cpi, thresholds, threshold_base, 671 threshold_left_shift, num_pixels); 672 return; 673 } 674 675 threshold_base = tune_base_thresh_content( 676 cpi, threshold_base, content_lowsumdiff, source_sad_nonrd, num_pixels); 677 thresholds[0] = threshold_base >> 1; 678 thresholds[1] = threshold_base; 679 thresholds[3] = threshold_base << threshold_left_shift; 680 681 tune_thresh_based_on_resolution(cpi, thresholds, threshold_base, 682 current_qindex, source_sad_rd, num_pixels); 683 684 tune_thresh_based_on_qindex(cpi, thresholds, blk_sad, current_qindex, 685 num_pixels, is_segment_id_boosted, 686 source_sad_nonrd, lighting_change); 687 } 688 689 // Set temporal variance low flag for superblock 64x64. 690 // Only first 25 in the array are used in this case. 691 static inline void set_low_temp_var_flag_64x64(CommonModeInfoParams *mi_params, 692 PartitionSearchInfo *part_info, 693 MACROBLOCKD *xd, VP64x64 *vt, 694 const int64_t thresholds[], 695 int mi_col, int mi_row) { 696 if (xd->mi[0]->bsize == BLOCK_64X64) { 697 if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) 698 part_info->variance_low[0] = 1; 699 } else if (xd->mi[0]->bsize == BLOCK_64X32) { 700 for (int part_idx = 0; part_idx < 2; part_idx++) { 701 if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2)) 702 part_info->variance_low[part_idx + 1] = 1; 703 } 704 } else if (xd->mi[0]->bsize == BLOCK_32X64) { 705 for (int part_idx = 0; part_idx < 2; part_idx++) { 706 if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2)) 707 part_info->variance_low[part_idx + 3] = 1; 708 } 709 } else { 710 static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } }; 711 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { 712 const int idx_str = mi_params->mi_stride * (mi_row + idx[lvl1_idx][0]) + 713 mi_col + idx[lvl1_idx][1]; 714 MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str; 715 716 if (mi_params->mi_cols <= mi_col + idx[lvl1_idx][1] || 717 mi_params->mi_rows <= mi_row + idx[lvl1_idx][0]) 718 continue; 719 720 if (*this_mi == NULL) continue; 721 722 if ((*this_mi)->bsize == BLOCK_32X32) { 723 int64_t threshold_32x32 = (5 * thresholds[1]) >> 3; 724 if (vt->split[lvl1_idx].part_variances.none.variance < threshold_32x32) 725 part_info->variance_low[lvl1_idx + 5] = 1; 726 } else { 727 // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block 728 // inside. 729 if ((*this_mi)->bsize == BLOCK_16X16 || 730 (*this_mi)->bsize == BLOCK_32X16 || 731 (*this_mi)->bsize == BLOCK_16X32) { 732 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { 733 if (vt->split[lvl1_idx] 734 .split[lvl2_idx] 735 .part_variances.none.variance < (thresholds[2] >> 8)) 736 part_info->variance_low[(lvl1_idx << 2) + lvl2_idx + 9] = 1; 737 } 738 } 739 } 740 } 741 } 742 } 743 744 static inline void set_low_temp_var_flag_128x128( 745 CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info, 746 MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col, 747 int mi_row) { 748 if (xd->mi[0]->bsize == BLOCK_128X128) { 749 if (vt->part_variances.none.variance < (thresholds[0] >> 1)) 750 part_info->variance_low[0] = 1; 751 } else if (xd->mi[0]->bsize == BLOCK_128X64) { 752 for (int part_idx = 0; part_idx < 2; part_idx++) { 753 if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2)) 754 part_info->variance_low[part_idx + 1] = 1; 755 } 756 } else if (xd->mi[0]->bsize == BLOCK_64X128) { 757 for (int part_idx = 0; part_idx < 2; part_idx++) { 758 if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2)) 759 part_info->variance_low[part_idx + 3] = 1; 760 } 761 } else { 762 static const int idx64[4][2] = { 763 { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 } 764 }; 765 static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } }; 766 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { 767 const int idx_str = mi_params->mi_stride * (mi_row + idx64[lvl1_idx][0]) + 768 mi_col + idx64[lvl1_idx][1]; 769 MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str; 770 if (*mi_64 == NULL) continue; 771 if (mi_params->mi_cols <= mi_col + idx64[lvl1_idx][1] || 772 mi_params->mi_rows <= mi_row + idx64[lvl1_idx][0]) 773 continue; 774 const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3; 775 if ((*mi_64)->bsize == BLOCK_64X64) { 776 if (vt->split[lvl1_idx].part_variances.none.variance < threshold_64x64) 777 part_info->variance_low[5 + lvl1_idx] = 1; 778 } else if ((*mi_64)->bsize == BLOCK_64X32) { 779 for (int part_idx = 0; part_idx < 2; part_idx++) 780 if (vt->split[lvl1_idx].part_variances.horz[part_idx].variance < 781 (threshold_64x64 >> 1)) 782 part_info->variance_low[9 + (lvl1_idx << 1) + part_idx] = 1; 783 } else if ((*mi_64)->bsize == BLOCK_32X64) { 784 for (int part_idx = 0; part_idx < 2; part_idx++) 785 if (vt->split[lvl1_idx].part_variances.vert[part_idx].variance < 786 (threshold_64x64 >> 1)) 787 part_info->variance_low[17 + (lvl1_idx << 1) + part_idx] = 1; 788 } else { 789 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { 790 const int idx_str1 = 791 mi_params->mi_stride * idx32[lvl2_idx][0] + idx32[lvl2_idx][1]; 792 MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1; 793 if (*mi_32 == NULL) continue; 794 795 if (mi_params->mi_cols <= 796 mi_col + idx64[lvl1_idx][1] + idx32[lvl2_idx][1] || 797 mi_params->mi_rows <= 798 mi_row + idx64[lvl1_idx][0] + idx32[lvl2_idx][0]) 799 continue; 800 const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3; 801 if ((*mi_32)->bsize == BLOCK_32X32) { 802 if (vt->split[lvl1_idx] 803 .split[lvl2_idx] 804 .part_variances.none.variance < threshold_32x32) 805 part_info->variance_low[25 + (lvl1_idx << 2) + lvl2_idx] = 1; 806 } else { 807 // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block 808 // inside. 809 if ((*mi_32)->bsize == BLOCK_16X16 || 810 (*mi_32)->bsize == BLOCK_32X16 || 811 (*mi_32)->bsize == BLOCK_16X32) { 812 for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) { 813 VPartVar *none_var = &vt->split[lvl1_idx] 814 .split[lvl2_idx] 815 .split[lvl3_idx] 816 .part_variances.none; 817 if (none_var->variance < (thresholds[3] >> 8)) 818 part_info->variance_low[41 + (lvl1_idx << 4) + 819 (lvl2_idx << 2) + lvl3_idx] = 1; 820 } 821 } 822 } 823 } 824 } 825 } 826 } 827 } 828 829 static inline void set_low_temp_var_flag( 830 AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd, 831 VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition, 832 int mi_col, int mi_row, const bool is_small_sb) { 833 AV1_COMMON *const cm = &cpi->common; 834 // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected. 835 // If the temporal variance is small set the flag 836 // variance_low for the block. The variance threshold can be adjusted, the 837 // higher the more aggressive. 838 if (ref_frame_partition == LAST_FRAME) { 839 if (is_small_sb) 840 set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd, 841 &(vt->split[0]), thresholds, mi_col, mi_row); 842 else 843 set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt, 844 thresholds, mi_col, mi_row); 845 } 846 } 847 848 static const int pos_shift_16x16[4][4] = { 849 { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 } 850 }; 851 852 int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low, 853 int mi_row, int mi_col, 854 BLOCK_SIZE bsize) { 855 // Relative indices of MB inside the superblock. 856 const int mi_x = mi_row & 0xF; 857 const int mi_y = mi_col & 0xF; 858 // Relative indices of 16x16 block inside the superblock. 859 const int i = mi_x >> 2; 860 const int j = mi_y >> 2; 861 int force_skip_low_temp_var = 0; 862 // Set force_skip_low_temp_var based on the block size and block offset. 863 switch (bsize) { 864 case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break; 865 case BLOCK_64X32: 866 if (!mi_y && !mi_x) { 867 force_skip_low_temp_var = variance_low[1]; 868 } else if (!mi_y && mi_x) { 869 force_skip_low_temp_var = variance_low[2]; 870 } 871 break; 872 case BLOCK_32X64: 873 if (!mi_y && !mi_x) { 874 force_skip_low_temp_var = variance_low[3]; 875 } else if (mi_y && !mi_x) { 876 force_skip_low_temp_var = variance_low[4]; 877 } 878 break; 879 case BLOCK_32X32: 880 if (!mi_y && !mi_x) { 881 force_skip_low_temp_var = variance_low[5]; 882 } else if (mi_y && !mi_x) { 883 force_skip_low_temp_var = variance_low[6]; 884 } else if (!mi_y && mi_x) { 885 force_skip_low_temp_var = variance_low[7]; 886 } else if (mi_y && mi_x) { 887 force_skip_low_temp_var = variance_low[8]; 888 } 889 break; 890 case BLOCK_32X16: 891 case BLOCK_16X32: 892 case BLOCK_16X16: 893 force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]]; 894 break; 895 default: break; 896 } 897 898 return force_skip_low_temp_var; 899 } 900 901 int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row, 902 int mi_col, BLOCK_SIZE bsize) { 903 int force_skip_low_temp_var = 0; 904 int x, y; 905 x = (mi_col & 0x1F) >> 4; 906 // y = (mi_row & 0x1F) >> 4; 907 // const int idx64 = (y << 1) + x; 908 y = (mi_row & 0x17) >> 3; 909 const int idx64 = y + x; 910 911 x = (mi_col & 0xF) >> 3; 912 // y = (mi_row & 0xF) >> 3; 913 // const int idx32 = (y << 1) + x; 914 y = (mi_row & 0xB) >> 2; 915 const int idx32 = y + x; 916 917 x = (mi_col & 0x7) >> 2; 918 // y = (mi_row & 0x7) >> 2; 919 // const int idx16 = (y << 1) + x; 920 y = (mi_row & 0x5) >> 1; 921 const int idx16 = y + x; 922 // Set force_skip_low_temp_var based on the block size and block offset. 923 switch (bsize) { 924 case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break; 925 case BLOCK_128X64: 926 assert((mi_col & 0x1F) == 0); 927 force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)]; 928 break; 929 case BLOCK_64X128: 930 assert((mi_row & 0x1F) == 0); 931 force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)]; 932 break; 933 case BLOCK_64X64: 934 // Location of this 64x64 block inside the 128x128 superblock 935 force_skip_low_temp_var = variance_low[5 + idx64]; 936 break; 937 case BLOCK_64X32: 938 x = (mi_col & 0x1F) >> 4; 939 y = (mi_row & 0x1F) >> 3; 940 /* 941 .---------------.---------------. 942 | x=0,y=0,idx=0 | x=0,y=0,idx=2 | 943 :---------------+---------------: 944 | x=0,y=1,idx=1 | x=1,y=1,idx=3 | 945 :---------------+---------------: 946 | x=0,y=2,idx=4 | x=1,y=2,idx=6 | 947 :---------------+---------------: 948 | x=0,y=3,idx=5 | x=1,y=3,idx=7 | 949 '---------------'---------------' 950 */ 951 const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2); 952 force_skip_low_temp_var = variance_low[9 + idx64x32]; 953 break; 954 case BLOCK_32X64: 955 x = (mi_col & 0x1F) >> 3; 956 y = (mi_row & 0x1F) >> 4; 957 const int idx32x64 = (y << 2) + x; 958 force_skip_low_temp_var = variance_low[17 + idx32x64]; 959 break; 960 case BLOCK_32X32: 961 force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32]; 962 break; 963 case BLOCK_32X16: 964 case BLOCK_16X32: 965 case BLOCK_16X16: 966 force_skip_low_temp_var = 967 variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16]; 968 break; 969 default: break; 970 } 971 return force_skip_low_temp_var; 972 } 973 974 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int qindex, 975 int content_lowsumdiff) { 976 SPEED_FEATURES *const sf = &cpi->sf; 977 if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) { 978 return; 979 } else { 980 set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, 0, qindex, 981 content_lowsumdiff, 0, 0, 0, 0); 982 // The threshold below is not changed locally. 983 cpi->vbp_info.threshold_minmax = 15 + (qindex >> 3); 984 } 985 } 986 987 static inline void chroma_check(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, 988 unsigned int y_sad, unsigned int y_sad_g, 989 unsigned int y_sad_alt, bool is_key_frame, 990 bool zero_motion, unsigned int *uv_sad) { 991 MACROBLOCKD *xd = &x->e_mbd; 992 const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; 993 int shift_upper_limit = 1; 994 int shift_lower_limit = 3; 995 int fac_uv = 6; 996 if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return; 997 998 // Use lower threshold (more conservative in setting color flag) for 999 // higher resolutions non-screen, which tend to have more camera noise. 1000 // Since this may be used to skip compound mode in nonrd pickmode, which 1001 // is generally more effective for higher resolutions, better to be more 1002 // conservative. 1003 if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) { 1004 if (cpi->common.width * cpi->common.height >= RESOLUTION_1080P) 1005 fac_uv = 3; 1006 else 1007 fac_uv = 5; 1008 } 1009 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && 1010 cpi->rc.high_source_sad) { 1011 shift_lower_limit = 7; 1012 } else if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && 1013 cpi->rc.percent_blocks_with_motion > 90 && 1014 cpi->rc.frame_source_sad > 10000 && source_sad_nonrd > kLowSad) { 1015 shift_lower_limit = 8; 1016 shift_upper_limit = 3; 1017 } else if (source_sad_nonrd >= kMedSad && x->source_variance > 500 && 1018 cpi->common.width * cpi->common.height >= 640 * 360) { 1019 shift_upper_limit = 2; 1020 shift_lower_limit = source_sad_nonrd > kMedSad ? 5 : 4; 1021 } 1022 1023 MB_MODE_INFO *mi = xd->mi[0]; 1024 const AV1_COMMON *const cm = &cpi->common; 1025 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME); 1026 const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME); 1027 const YV12_BUFFER_CONFIG *yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME); 1028 const struct scale_factors *const sf = 1029 get_ref_scale_factors_const(cm, LAST_FRAME); 1030 struct buf_2d dst; 1031 unsigned int uv_sad_g = 0; 1032 unsigned int uv_sad_alt = 0; 1033 1034 for (int plane = AOM_PLANE_U; plane < MAX_MB_PLANE; ++plane) { 1035 struct macroblock_plane *p = &x->plane[plane]; 1036 struct macroblockd_plane *pd = &xd->plane[plane]; 1037 const BLOCK_SIZE bs = 1038 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); 1039 1040 if (bs != BLOCK_INVALID) { 1041 // For last: 1042 if (zero_motion) { 1043 if (mi->ref_frame[0] == LAST_FRAME) { 1044 uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( 1045 p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride); 1046 } else { 1047 uint8_t *src = (plane == 1) ? yv12->u_buffer : yv12->v_buffer; 1048 setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width, 1049 yv12->uv_crop_height, yv12->uv_stride, xd->mi_row, 1050 xd->mi_col, sf, xd->plane[plane].subsampling_x, 1051 xd->plane[plane].subsampling_y); 1052 1053 uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( 1054 p->src.buf, p->src.stride, dst.buf, dst.stride); 1055 } 1056 } else { 1057 uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf( 1058 p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); 1059 } 1060 1061 // For golden: 1062 if (y_sad_g != UINT_MAX) { 1063 uint8_t *src = (plane == 1) ? yv12_g->u_buffer : yv12_g->v_buffer; 1064 setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width, 1065 yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row, 1066 xd->mi_col, sf, xd->plane[plane].subsampling_x, 1067 xd->plane[plane].subsampling_y); 1068 uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf, 1069 dst.stride); 1070 } 1071 1072 // For altref: 1073 if (y_sad_alt != UINT_MAX) { 1074 uint8_t *src = (plane == 1) ? yv12_alt->u_buffer : yv12_alt->v_buffer; 1075 setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_alt->uv_crop_width, 1076 yv12_alt->uv_crop_height, yv12_alt->uv_stride, 1077 xd->mi_row, xd->mi_col, sf, 1078 xd->plane[plane].subsampling_x, 1079 xd->plane[plane].subsampling_y); 1080 uv_sad_alt = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, 1081 dst.buf, dst.stride); 1082 } 1083 } 1084 1085 if (uv_sad[plane - 1] > (y_sad >> shift_upper_limit)) 1086 x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 1; 1087 else if (uv_sad[plane - 1] < (y_sad >> shift_lower_limit)) 1088 x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 0; 1089 // Borderline case: to be refined at coding block level in nonrd_pickmode, 1090 // for coding block size < sb_size. 1091 else 1092 x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 2; 1093 1094 x->color_sensitivity_sb_g[COLOR_SENS_IDX(plane)] = 1095 uv_sad_g > y_sad_g / fac_uv; 1096 x->color_sensitivity_sb_alt[COLOR_SENS_IDX(plane)] = 1097 uv_sad_alt > y_sad_alt / fac_uv; 1098 } 1099 } 1100 1101 static void fill_variance_tree_leaves( 1102 AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, PART_EVAL_STATUS *force_split, 1103 int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4], 1104 int64_t *thresholds, const uint8_t *src_buf, int src_stride, 1105 const uint8_t *dst_buf, int dst_stride, bool is_key_frame, 1106 const bool is_small_sb) { 1107 MACROBLOCKD *xd = &x->e_mbd; 1108 const int num_64x64_blocks = is_small_sb ? 1 : 4; 1109 // TODO(kyslov) Bring back compute_minmax_variance with content type detection 1110 const int compute_minmax_variance = 0; 1111 const int segment_id = xd->mi[0]->segment_id; 1112 int pixels_wide = 128, pixels_high = 128; 1113 int border_offset_4x4 = 0; 1114 int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf; 1115 // dst_buf pointer is not used for is_key_frame, so it should be NULL. 1116 assert(IMPLIES(is_key_frame, dst_buf == NULL)); 1117 if (is_small_sb) { 1118 pixels_wide = 64; 1119 pixels_high = 64; 1120 } 1121 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); 1122 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); 1123 #if CONFIG_AV1_TEMPORAL_DENOISING 1124 temporal_denoising |= cpi->oxcf.noise_sensitivity; 1125 #endif 1126 // For temporal filtering or temporal denoiser enabled: since the source 1127 // is modified we need to avoid 4x4 avg along superblock boundary, since 1128 // simd code will load 8 pixels for 4x4 avg and so can access source 1129 // data outside superblock (while its being modified by temporal filter). 1130 // Temporal filtering is never done on key frames. 1131 if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4; 1132 for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; blk64_idx++) { 1133 const int x64_idx = GET_BLK_IDX_X(blk64_idx, 6); 1134 const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 6); 1135 const int blk64_scale_idx = blk64_idx << 2; 1136 force_split[blk64_idx + 1] = PART_EVAL_ALL; 1137 1138 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { 1139 const int x32_idx = x64_idx + GET_BLK_IDX_X(lvl1_idx, 5); 1140 const int y32_idx = y64_idx + GET_BLK_IDX_Y(lvl1_idx, 5); 1141 const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; 1142 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ALL; 1143 avg_16x16[blk64_idx][lvl1_idx] = 0; 1144 maxvar_16x16[blk64_idx][lvl1_idx] = 0; 1145 minvar_16x16[blk64_idx][lvl1_idx] = INT_MAX; 1146 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { 1147 const int x16_idx = x32_idx + GET_BLK_IDX_X(lvl2_idx, 4); 1148 const int y16_idx = y32_idx + GET_BLK_IDX_Y(lvl2_idx, 4); 1149 const int split_index = 21 + lvl1_scale_idx + lvl2_idx; 1150 VP16x16 *vst = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; 1151 force_split[split_index] = PART_EVAL_ALL; 1152 if (is_key_frame) { 1153 // Go down to 4x4 down-sampling for variance. 1154 for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) { 1155 const int x8_idx = x16_idx + GET_BLK_IDX_X(lvl3_idx, 3); 1156 const int y8_idx = y16_idx + GET_BLK_IDX_Y(lvl3_idx, 3); 1157 VP8x8 *vst2 = &vst->split[lvl3_idx]; 1158 fill_variance_4x4avg(src_buf, src_stride, x8_idx, y8_idx, vst2, 1159 #if CONFIG_AV1_HIGHBITDEPTH 1160 xd->cur_buf->flags, 1161 #endif 1162 pixels_wide, pixels_high, border_offset_4x4); 1163 } 1164 } else { 1165 fill_variance_8x8avg(src_buf, src_stride, dst_buf, dst_stride, 1166 x16_idx, y16_idx, vst, is_cur_buf_hbd(xd), 1167 pixels_wide, pixels_high); 1168 1169 fill_variance_tree(vst, BLOCK_16X16); 1170 VPartVar *none_var = &vt->split[blk64_idx] 1171 .split[lvl1_idx] 1172 .split[lvl2_idx] 1173 .part_variances.none; 1174 get_variance(none_var); 1175 const int val_none_var = none_var->variance; 1176 avg_16x16[blk64_idx][lvl1_idx] += val_none_var; 1177 minvar_16x16[blk64_idx][lvl1_idx] = 1178 AOMMIN(minvar_16x16[blk64_idx][lvl1_idx], val_none_var); 1179 maxvar_16x16[blk64_idx][lvl1_idx] = 1180 AOMMAX(maxvar_16x16[blk64_idx][lvl1_idx], val_none_var); 1181 if (val_none_var > thresholds[3]) { 1182 // 16X16 variance is above threshold for split, so force split to 1183 // 8x8 for this 16x16 block (this also forces splits for upper 1184 // levels). 1185 force_split[split_index] = PART_EVAL_ONLY_SPLIT; 1186 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; 1187 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; 1188 force_split[0] = PART_EVAL_ONLY_SPLIT; 1189 } else if (!cyclic_refresh_segment_id_boosted(segment_id) && 1190 compute_minmax_variance && val_none_var > thresholds[2]) { 1191 // We have some nominal amount of 16x16 variance (based on average), 1192 // compute the minmax over the 8x8 sub-blocks, and if above 1193 // threshold, force split to 8x8 block for this 16x16 block. 1194 int minmax = compute_minmax_8x8(src_buf, src_stride, dst_buf, 1195 dst_stride, x16_idx, y16_idx, 1196 #if CONFIG_AV1_HIGHBITDEPTH 1197 xd->cur_buf->flags, 1198 #endif 1199 pixels_wide, pixels_high); 1200 const int thresh_minmax = (int)cpi->vbp_info.threshold_minmax; 1201 if (minmax > thresh_minmax) { 1202 force_split[split_index] = PART_EVAL_ONLY_SPLIT; 1203 force_split[5 + blk64_scale_idx + lvl1_idx] = 1204 PART_EVAL_ONLY_SPLIT; 1205 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; 1206 force_split[0] = PART_EVAL_ONLY_SPLIT; 1207 } 1208 } 1209 } 1210 } 1211 } 1212 } 1213 } 1214 1215 static inline void set_ref_frame_for_partition( 1216 AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, 1217 MV_REFERENCE_FRAME *ref_frame_partition, MB_MODE_INFO *mi, 1218 unsigned int *y_sad, unsigned int *y_sad_g, unsigned int *y_sad_alt, 1219 const YV12_BUFFER_CONFIG *yv12_g, const YV12_BUFFER_CONFIG *yv12_alt, 1220 int mi_row, int mi_col, int num_planes) { 1221 AV1_COMMON *const cm = &cpi->common; 1222 const double fac = 1223 (cpi->svc.spatial_layer_id > 0 && cpi->svc.has_lower_quality_layer) ? 1.0 1224 : 0.9; 1225 const bool is_set_golden_ref_frame = 1226 *y_sad_g < fac * *y_sad && *y_sad_g < *y_sad_alt; 1227 const bool is_set_altref_ref_frame = 1228 *y_sad_alt < fac * *y_sad && *y_sad_alt < *y_sad_g; 1229 1230 if (is_set_golden_ref_frame) { 1231 av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, 1232 get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes); 1233 mi->ref_frame[0] = GOLDEN_FRAME; 1234 mi->mv[0].as_int = 0; 1235 *y_sad = *y_sad_g; 1236 *ref_frame_partition = GOLDEN_FRAME; 1237 x->nonrd_prune_ref_frame_search = 0; 1238 x->sb_me_partition = 0; 1239 } else if (is_set_altref_ref_frame) { 1240 av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col, 1241 get_ref_scale_factors(cm, ALTREF_FRAME), num_planes); 1242 mi->ref_frame[0] = ALTREF_FRAME; 1243 mi->mv[0].as_int = 0; 1244 *y_sad = *y_sad_alt; 1245 *ref_frame_partition = ALTREF_FRAME; 1246 x->nonrd_prune_ref_frame_search = 0; 1247 x->sb_me_partition = 0; 1248 } else { 1249 *ref_frame_partition = LAST_FRAME; 1250 x->nonrd_prune_ref_frame_search = 1251 cpi->sf.rt_sf.nonrd_prune_ref_frame_search; 1252 } 1253 } 1254 1255 static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0, 1256 const FULLPEL_MV *mv1) { 1257 return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col); 1258 } 1259 1260 static inline void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x, 1261 unsigned int *y_sad, bool is_small_sb, 1262 int est_motion) { 1263 const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; 1264 // TODO(yunqingwang@google.com): test if this condition works with other 1265 // speeds. 1266 if (est_motion > 2 && source_sad_nonrd > kMedSad) return; 1267 1268 MACROBLOCKD *xd = &x->e_mbd; 1269 BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; 1270 MB_MODE_INFO *mi = xd->mi[0]; 1271 1272 unsigned int above_y_sad = UINT_MAX; 1273 unsigned int left_y_sad = UINT_MAX; 1274 FULLPEL_MV above_mv = kZeroFullMv; 1275 FULLPEL_MV left_mv = kZeroFullMv; 1276 SubpelMvLimits subpel_mv_limits; 1277 const MV dummy_mv = { 0, 0 }; 1278 av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, &dummy_mv); 1279 1280 // Current best MV 1281 FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv); 1282 const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8; 1283 1284 if (xd->up_available) { 1285 const MB_MODE_INFO *above_mbmi = xd->above_mbmi; 1286 if (above_mbmi->mode >= INTRA_MODE_END && 1287 above_mbmi->ref_frame[0] == LAST_FRAME) { 1288 MV temp = above_mbmi->mv[0].as_mv; 1289 clamp_mv(&temp, &subpel_mv_limits); 1290 above_mv = get_fullmv_from_mv(&temp); 1291 1292 if (mv_distance(&best_mv, &above_mv) > 0) { 1293 uint8_t const *ref_buf = 1294 get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv); 1295 above_y_sad = cpi->ppi->fn_ptr[bsize].sdf( 1296 x->plane[0].src.buf, x->plane[0].src.stride, ref_buf, 1297 xd->plane[0].pre[0].stride); 1298 } 1299 } 1300 } 1301 if (xd->left_available) { 1302 const MB_MODE_INFO *left_mbmi = xd->left_mbmi; 1303 if (left_mbmi->mode >= INTRA_MODE_END && 1304 left_mbmi->ref_frame[0] == LAST_FRAME) { 1305 MV temp = left_mbmi->mv[0].as_mv; 1306 clamp_mv(&temp, &subpel_mv_limits); 1307 left_mv = get_fullmv_from_mv(&temp); 1308 1309 if (mv_distance(&best_mv, &left_mv) > 0 && 1310 mv_distance(&above_mv, &left_mv) > 0) { 1311 uint8_t const *ref_buf = 1312 get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv); 1313 left_y_sad = cpi->ppi->fn_ptr[bsize].sdf( 1314 x->plane[0].src.buf, x->plane[0].src.stride, ref_buf, 1315 xd->plane[0].pre[0].stride); 1316 } 1317 } 1318 } 1319 1320 if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) { 1321 *y_sad = above_y_sad; 1322 mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv); 1323 clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits); 1324 } 1325 if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) { 1326 *y_sad = left_y_sad; 1327 mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv); 1328 clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits); 1329 } 1330 } 1331 1332 static void do_int_pro_motion_estimation(AV1_COMP *cpi, MACROBLOCK *x, 1333 unsigned int *y_sad, int mi_row, 1334 int mi_col, int source_sad_nonrd) { 1335 AV1_COMMON *const cm = &cpi->common; 1336 MACROBLOCKD *xd = &x->e_mbd; 1337 MB_MODE_INFO *mi = xd->mi[0]; 1338 const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; 1339 const int increase_col_sw = source_sad_nonrd > kMedSad && 1340 !cpi->rc.high_motion_content_screen_rtc && 1341 (cpi->svc.temporal_layer_id == 0 || 1342 cpi->rc.num_col_blscroll_last_tl0 > 2); 1343 int me_search_size_col = is_screen 1344 ? increase_col_sw ? 512 : 96 1345 : block_size_wide[cm->seq_params->sb_size] >> 1; 1346 // For screen use larger search size row motion to capture 1347 // vertical scroll, which can be larger motion. 1348 int me_search_size_row = is_screen 1349 ? source_sad_nonrd > kMedSad ? 512 : 192 1350 : block_size_high[cm->seq_params->sb_size] >> 1; 1351 if (cm->width * cm->height >= 3840 * 2160 && 1352 cpi->svc.temporal_layer_id == 0 && cpi->svc.number_temporal_layers > 1) { 1353 me_search_size_row = me_search_size_row << 1; 1354 me_search_size_col = me_search_size_col << 1; 1355 } 1356 unsigned int y_sad_zero; 1357 *y_sad = av1_int_pro_motion_estimation( 1358 cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, &y_sad_zero, 1359 me_search_size_col, me_search_size_row); 1360 // The logic below selects whether the motion estimated in the 1361 // int_pro_motion() will be used in nonrd_pickmode. Only do this 1362 // for screen for now. 1363 if (is_screen) { 1364 unsigned int thresh_sad = 1365 (cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000; 1366 if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) { 1367 x->sb_me_partition = 1; 1368 x->sb_me_mv.as_int = mi->mv[0].as_int; 1369 if (cpi->svc.temporal_layer_id == 0) { 1370 if (abs(mi->mv[0].as_mv.col) > 16 && abs(mi->mv[0].as_mv.row) == 0) 1371 x->sb_col_scroll++; 1372 else if (abs(mi->mv[0].as_mv.row) > 16 && abs(mi->mv[0].as_mv.col) == 0) 1373 x->sb_row_scroll++; 1374 } 1375 } else { 1376 x->sb_me_partition = 0; 1377 // Fall back to using zero motion. 1378 *y_sad = y_sad_zero; 1379 mi->mv[0].as_int = 0; 1380 } 1381 } 1382 } 1383 1384 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad, 1385 unsigned int *y_sad_g, unsigned int *y_sad_alt, 1386 unsigned int *y_sad_last, 1387 MV_REFERENCE_FRAME *ref_frame_partition, 1388 struct scale_factors *sf_no_scale, int mi_row, 1389 int mi_col, bool is_small_sb, bool scaled_ref_last) { 1390 AV1_COMMON *const cm = &cpi->common; 1391 MACROBLOCKD *xd = &x->e_mbd; 1392 const int num_planes = av1_num_planes(cm); 1393 bool scaled_ref_golden = false; 1394 bool scaled_ref_alt = false; 1395 BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; 1396 MB_MODE_INFO *mi = xd->mi[0]; 1397 const YV12_BUFFER_CONFIG *yv12 = 1398 scaled_ref_last ? av1_get_scaled_ref_frame(cpi, LAST_FRAME) 1399 : get_ref_frame_yv12_buf(cm, LAST_FRAME); 1400 assert(yv12 != NULL); 1401 const YV12_BUFFER_CONFIG *yv12_g = NULL; 1402 const YV12_BUFFER_CONFIG *yv12_alt = NULL; 1403 // Check if LAST is a reference. For spatial layers always use it as 1404 // reference scaling. 1405 int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) || 1406 cpi->svc.number_spatial_layers > 1; 1407 int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG; 1408 int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config || 1409 cpi->sf.rt_sf.use_nonrd_altref_frame || 1410 (cpi->sf.rt_sf.use_comp_ref_nonrd && 1411 cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 1); 1412 1413 // Check if GOLDEN should be used as reference for partitioning. 1414 // Allow for spatial layers if lower layer has same resolution. 1415 if ((cpi->svc.number_spatial_layers == 1 || 1416 cpi->svc.has_lower_quality_layer) && 1417 use_golden_ref && 1418 (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) { 1419 yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME); 1420 if (yv12_g && (yv12_g->y_crop_height != cm->height || 1421 yv12_g->y_crop_width != cm->width)) { 1422 yv12_g = av1_get_scaled_ref_frame(cpi, GOLDEN_FRAME); 1423 scaled_ref_golden = true; 1424 } 1425 if (yv12_g && (yv12_g != yv12 || !use_last_ref)) { 1426 av1_setup_pre_planes( 1427 xd, 0, yv12_g, mi_row, mi_col, 1428 scaled_ref_golden ? NULL : get_ref_scale_factors(cm, GOLDEN_FRAME), 1429 num_planes); 1430 *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf( 1431 x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, 1432 xd->plane[AOM_PLANE_Y].pre[0].buf, 1433 xd->plane[AOM_PLANE_Y].pre[0].stride); 1434 } 1435 } 1436 1437 // Check if ALTREF should be used as reference for partitioning. 1438 // Allow for spatial layers if lower layer has same resolution. 1439 if ((cpi->svc.number_spatial_layers == 1 || 1440 cpi->svc.has_lower_quality_layer) && 1441 use_alt_ref && (cpi->ref_frame_flags & AOM_ALT_FLAG) && 1442 (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) { 1443 yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME); 1444 if (yv12_alt && (yv12_alt->y_crop_height != cm->height || 1445 yv12_alt->y_crop_width != cm->width)) { 1446 yv12_alt = av1_get_scaled_ref_frame(cpi, ALTREF_FRAME); 1447 scaled_ref_alt = true; 1448 } 1449 if (yv12_alt && (yv12_alt != yv12 || !use_last_ref)) { 1450 av1_setup_pre_planes( 1451 xd, 0, yv12_alt, mi_row, mi_col, 1452 scaled_ref_alt ? NULL : get_ref_scale_factors(cm, ALTREF_FRAME), 1453 num_planes); 1454 *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf( 1455 x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, 1456 xd->plane[AOM_PLANE_Y].pre[0].buf, 1457 xd->plane[AOM_PLANE_Y].pre[0].stride); 1458 } 1459 } 1460 1461 if (use_last_ref) { 1462 const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd; 1463 av1_setup_pre_planes( 1464 xd, 0, yv12, mi_row, mi_col, 1465 scaled_ref_last ? NULL : get_ref_scale_factors(cm, LAST_FRAME), 1466 num_planes); 1467 mi->ref_frame[0] = LAST_FRAME; 1468 mi->ref_frame[1] = NONE_FRAME; 1469 mi->bsize = cm->seq_params->sb_size; 1470 mi->mv[0].as_int = 0; 1471 mi->interp_filters = av1_broadcast_interp_filter(BILINEAR); 1472 1473 int est_motion = cpi->sf.rt_sf.estimate_motion_for_var_based_partition; 1474 // TODO(b/290596301): Look into adjusting this condition. 1475 // There is regression on color content when 1476 // estimate_motion_for_var_based_partition = 3 and high motion, 1477 // so for now force it to 2 based on superblock sad. 1478 if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2; 1479 1480 if ((est_motion == 1 || est_motion == 2) && xd->mb_to_right_edge >= 0 && 1481 xd->mb_to_bottom_edge >= 0 && x->source_variance > 100 && 1482 source_sad_nonrd > kLowSad) { 1483 do_int_pro_motion_estimation(cpi, x, y_sad, mi_row, mi_col, 1484 source_sad_nonrd); 1485 } 1486 1487 if (*y_sad == UINT_MAX) { 1488 *y_sad = cpi->ppi->fn_ptr[bsize].sdf( 1489 x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride, 1490 xd->plane[AOM_PLANE_Y].pre[0].buf, 1491 xd->plane[AOM_PLANE_Y].pre[0].stride); 1492 } 1493 1494 // Evaluate if neighbours' MVs give better predictions. Zero MV is tested 1495 // already, so only non-zero MVs are tested here. Here the neighbour blocks 1496 // are the first block above or left to this superblock. 1497 if (est_motion >= 2 && (xd->up_available || xd->left_available)) 1498 evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion); 1499 1500 *y_sad_last = *y_sad; 1501 } 1502 1503 // Pick the ref frame for partitioning, use golden or altref frame only if 1504 // its lower sad, bias to LAST with factor 0.9. 1505 set_ref_frame_for_partition(cpi, x, xd, ref_frame_partition, mi, y_sad, 1506 y_sad_g, y_sad_alt, yv12_g, yv12_alt, mi_row, 1507 mi_col, num_planes); 1508 1509 // Only calculate the predictor for non-zero MV. 1510 if (mi->mv[0].as_int != 0) { 1511 if (!scaled_ref_last) { 1512 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); 1513 } else { 1514 xd->block_ref_scale_factors[0] = sf_no_scale; 1515 xd->block_ref_scale_factors[1] = sf_no_scale; 1516 } 1517 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, 1518 cm->seq_params->sb_size, AOM_PLANE_Y, 1519 num_planes - 1); 1520 } 1521 } 1522 1523 // Decides whether to split or merge a 16x16 partition block in variance based 1524 // partitioning based on the 8x8 sub-block variances. 1525 static inline PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var( 1526 VP16x16 *var_16x16_info, int64_t threshold16) { 1527 int max_8x8_var = 0, min_8x8_var = INT_MAX; 1528 for (int split_idx = 0; split_idx < 4; split_idx++) { 1529 get_variance(&var_16x16_info->split[split_idx].part_variances.none); 1530 int this_8x8_var = 1531 var_16x16_info->split[split_idx].part_variances.none.variance; 1532 max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var); 1533 min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var); 1534 } 1535 // If the difference between maximum and minimum sub-block variances is high, 1536 // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate 1537 // only PARTITION_NONE. The shift factor for threshold16 has been derived 1538 // empirically. 1539 return ((max_8x8_var - min_8x8_var) > (threshold16 << 2)) 1540 ? PART_EVAL_ONLY_SPLIT 1541 : PART_EVAL_ONLY_NONE; 1542 } 1543 1544 static inline bool is_set_force_zeromv_skip_based_on_src_sad( 1545 int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) { 1546 if (set_zeromv_skip_based_on_source_sad == 0) return false; 1547 1548 if (set_zeromv_skip_based_on_source_sad >= 3) 1549 return source_sad_nonrd <= kLowSad; 1550 else if (set_zeromv_skip_based_on_source_sad >= 2) 1551 return source_sad_nonrd <= kVeryLowSad; 1552 else if (set_zeromv_skip_based_on_source_sad >= 1) 1553 return source_sad_nonrd == kZeroSad; 1554 1555 return false; 1556 } 1557 1558 static inline bool set_force_zeromv_skip_for_sb( 1559 AV1_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, VP128x128 *vt, 1560 unsigned int *uv_sad, int mi_row, int mi_col, unsigned int y_sad, 1561 BLOCK_SIZE bsize) { 1562 AV1_COMMON *const cm = &cpi->common; 1563 if (!is_set_force_zeromv_skip_based_on_src_sad( 1564 cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad, 1565 x->content_state_sb.source_sad_nonrd)) 1566 return false; 1567 int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0; 1568 const int block_width = mi_size_wide[cm->seq_params->sb_size]; 1569 const int block_height = mi_size_high[cm->seq_params->sb_size]; 1570 const unsigned int thresh_exit_part_y = 1571 cpi->zeromv_skip_thresh_exit_part[bsize] << shift; 1572 unsigned int thresh_exit_part_uv = 1573 CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift; 1574 // Be more aggressive in UV threshold if source_sad >= VeryLowSad 1575 // to suppreess visual artifact caused by the speed feature: 1576 // set_zeromv_skip_based_on_source_sad = 2. For now only for 1577 // part_early_exit_zeromv = 1. 1578 if (x->content_state_sb.source_sad_nonrd >= kVeryLowSad && 1579 cpi->sf.rt_sf.part_early_exit_zeromv == 1) 1580 thresh_exit_part_uv = thresh_exit_part_uv >> 3; 1581 if (mi_col + block_width <= tile->mi_col_end && 1582 mi_row + block_height <= tile->mi_row_end && y_sad < thresh_exit_part_y && 1583 uv_sad[0] < thresh_exit_part_uv && uv_sad[1] < thresh_exit_part_uv) { 1584 set_block_size(cpi, mi_row, mi_col, bsize); 1585 x->force_zeromv_skip_for_sb = 1; 1586 aom_free(vt); 1587 // Partition shape is set here at SB level. 1588 // Exit needs to happen from av1_choose_var_based_partitioning(). 1589 return true; 1590 } else if (x->content_state_sb.source_sad_nonrd == kZeroSad && 1591 cpi->sf.rt_sf.part_early_exit_zeromv >= 2) 1592 x->force_zeromv_skip_for_sb = 2; 1593 return false; 1594 } 1595 1596 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile, 1597 ThreadData *td, MACROBLOCK *x, int mi_row, 1598 int mi_col) { 1599 #if CONFIG_COLLECT_COMPONENT_TIMING 1600 start_timing(cpi, choose_var_based_partitioning_time); 1601 #endif 1602 AV1_COMMON *const cm = &cpi->common; 1603 MACROBLOCKD *xd = &x->e_mbd; 1604 const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds; 1605 PART_EVAL_STATUS force_split[85]; 1606 int avg_64x64; 1607 int max_var_32x32[4]; 1608 int min_var_32x32[4]; 1609 int var_32x32; 1610 int var_64x64; 1611 int min_var_64x64 = INT_MAX; 1612 int max_var_64x64 = 0; 1613 int avg_16x16[4][4]; 1614 int maxvar_16x16[4][4]; 1615 int minvar_16x16[4][4]; 1616 const uint8_t *src_buf; 1617 const uint8_t *dst_buf; 1618 int dst_stride; 1619 unsigned int uv_sad[MAX_MB_PLANE - 1]; 1620 NOISE_LEVEL noise_level = kLow; 1621 bool is_zero_motion = true; 1622 bool scaled_ref_last = false; 1623 struct scale_factors sf_no_scale; 1624 av1_setup_scale_factors_for_frame(&sf_no_scale, cm->width, cm->height, 1625 cm->width, cm->height); 1626 1627 bool is_key_frame = 1628 (frame_is_intra_only(cm) || 1629 (cpi->ppi->use_svc && 1630 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); 1631 1632 assert(cm->seq_params->sb_size == BLOCK_64X64 || 1633 cm->seq_params->sb_size == BLOCK_128X128); 1634 const bool is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64); 1635 const int num_64x64_blocks = is_small_sb ? 1 : 4; 1636 1637 unsigned int y_sad = UINT_MAX; 1638 unsigned int y_sad_g = UINT_MAX; 1639 unsigned int y_sad_alt = UINT_MAX; 1640 unsigned int y_sad_last = UINT_MAX; 1641 BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128; 1642 1643 // Force skip encoding for all superblocks on slide change for 1644 // non_reference_frames. 1645 if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change && 1646 cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) { 1647 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + 1648 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col); 1649 av1_set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize); 1650 x->force_zeromv_skip_for_sb = 1; 1651 return 0; 1652 } 1653 1654 // Ref frame used in partitioning. 1655 MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; 1656 1657 int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1], 1658 vbp_thresholds[2], vbp_thresholds[3], 1659 vbp_thresholds[4] }; 1660 1661 const int segment_id = xd->mi[0]->segment_id; 1662 uint64_t blk_sad = 0; 1663 if (cpi->src_sad_blk_64x64 != NULL && 1664 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { 1665 const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128) 1666 ? (cm->seq_params->mib_size >> 1) 1667 : cm->seq_params->mib_size; 1668 const int sb_cols = 1669 (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb; 1670 const int sbi_col = mi_col / sb_size_by_mb; 1671 const int sbi_row = mi_row / sb_size_by_mb; 1672 blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols]; 1673 } 1674 1675 const bool is_segment_id_boosted = 1676 cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && 1677 cyclic_refresh_segment_id_boosted(segment_id); 1678 const int sb_qindex = 1679 clamp(cm->delta_q_info.delta_q_present_flag 1680 ? cm->quant_params.base_qindex + x->delta_qindex 1681 : cm->quant_params.base_qindex, 1682 0, QINDEX_RANGE - 1); 1683 const int qindex = is_segment_id_boosted || cpi->roi.delta_qp_enabled 1684 ? av1_get_qindex(&cm->seg, segment_id, sb_qindex) 1685 : sb_qindex; 1686 set_vbp_thresholds( 1687 cpi, thresholds, blk_sad, qindex, x->content_state_sb.low_sumdiff, 1688 x->content_state_sb.source_sad_nonrd, x->content_state_sb.source_sad_rd, 1689 is_segment_id_boosted, x->content_state_sb.lighting_change); 1690 1691 src_buf = x->plane[AOM_PLANE_Y].src.buf; 1692 int src_stride = x->plane[AOM_PLANE_Y].src.stride; 1693 1694 // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, 1695 // 5-20 for the 16x16 blocks. 1696 force_split[0] = PART_EVAL_ALL; 1697 memset(x->part_search_info.variance_low, 0, 1698 sizeof(x->part_search_info.variance_low)); 1699 1700 // Check if LAST frame is NULL, and if so, treat this frame 1701 // as a key frame, for the purpose of the superblock partitioning. 1702 // LAST == NULL can happen in cases where enhancement spatial layers are 1703 // enabled dyanmically and the only reference is the spatial(GOLDEN). 1704 // If LAST frame has a different resolution: set the scaled_ref_last flag 1705 // and check if ref_scaled is NULL. 1706 if (!frame_is_intra_only(cm)) { 1707 const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, LAST_FRAME); 1708 if (ref == NULL) { 1709 is_key_frame = true; 1710 } else if (ref->y_crop_height != cm->height || 1711 ref->y_crop_width != cm->width) { 1712 scaled_ref_last = true; 1713 const YV12_BUFFER_CONFIG *ref_scaled = 1714 av1_get_scaled_ref_frame(cpi, LAST_FRAME); 1715 if (ref_scaled == NULL) is_key_frame = true; 1716 } 1717 } 1718 1719 x->source_variance = UINT_MAX; 1720 // For nord_pickmode: compute source_variance, only for superblocks with 1721 // some motion for now. This input can then be used to bias the partitioning 1722 // or the chroma_check. 1723 if (cpi->sf.rt_sf.use_nonrd_pick_mode && 1724 x->content_state_sb.source_sad_nonrd > kLowSad) 1725 x->source_variance = av1_get_perpixel_variance_facade( 1726 cpi, xd, &x->plane[0].src, cm->seq_params->sb_size, AOM_PLANE_Y); 1727 1728 if (!is_key_frame) { 1729 setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last, 1730 &ref_frame_partition, &sf_no_scale, mi_row, mi_col, 1731 is_small_sb, scaled_ref_last); 1732 1733 MB_MODE_INFO *mi = xd->mi[0]; 1734 // Use reference SB directly for zero mv. 1735 if (mi->mv[0].as_int != 0) { 1736 dst_buf = xd->plane[AOM_PLANE_Y].dst.buf; 1737 dst_stride = xd->plane[AOM_PLANE_Y].dst.stride; 1738 is_zero_motion = false; 1739 } else { 1740 dst_buf = xd->plane[AOM_PLANE_Y].pre[0].buf; 1741 dst_stride = xd->plane[AOM_PLANE_Y].pre[0].stride; 1742 } 1743 } else { 1744 dst_buf = NULL; 1745 dst_stride = 0; 1746 } 1747 1748 // check and set the color sensitivity of sb. 1749 av1_zero(uv_sad); 1750 chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, y_sad_alt, is_key_frame, 1751 is_zero_motion, uv_sad); 1752 1753 x->force_zeromv_skip_for_sb = 0; 1754 1755 VP128x128 *vt; 1756 AOM_CHECK_MEM_ERROR(xd->error_info, vt, aom_malloc(sizeof(*vt))); 1757 vt->split = td->vt64x64; 1758 1759 // If the superblock is completely static (zero source sad) and 1760 // the y_sad (relative to LAST ref) is very small, take the sb_size partition 1761 // and exit, and force zeromv_last skip mode for nonrd_pickmode. 1762 // Only do this on the base segment (so the QP-boosted segment, if applied, 1763 // can still continue cleaning/ramping up the quality). 1764 // Condition on color uv_sad is also added. 1765 if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv && 1766 cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE && 1767 ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) { 1768 // Exit here, if zero mv skip flag is set at SB level. 1769 if (set_force_zeromv_skip_for_sb(cpi, x, tile, vt, uv_sad, mi_row, mi_col, 1770 y_sad, bsize)) 1771 return 0; 1772 } 1773 1774 if (cpi->noise_estimate.enabled) 1775 noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate); 1776 1777 // Fill in the entire tree of 8x8 (for inter frames) or 4x4 (for key frames) 1778 // variances for splits. 1779 fill_variance_tree_leaves(cpi, x, vt, force_split, avg_16x16, maxvar_16x16, 1780 minvar_16x16, thresholds, src_buf, src_stride, 1781 dst_buf, dst_stride, is_key_frame, is_small_sb); 1782 1783 avg_64x64 = 0; 1784 for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) { 1785 max_var_32x32[blk64_idx] = 0; 1786 min_var_32x32[blk64_idx] = INT_MAX; 1787 const int blk64_scale_idx = blk64_idx << 2; 1788 for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) { 1789 const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; 1790 for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) { 1791 if (!is_key_frame) continue; 1792 VP16x16 *vtemp = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; 1793 for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) 1794 fill_variance_tree(&vtemp->split[lvl3_idx], BLOCK_8X8); 1795 fill_variance_tree(vtemp, BLOCK_16X16); 1796 // If variance of this 16x16 block is above the threshold, force block 1797 // to split. This also forces a split on the upper levels. 1798 get_variance(&vtemp->part_variances.none); 1799 if (vtemp->part_variances.none.variance > thresholds[3]) { 1800 const int split_index = 21 + lvl1_scale_idx + lvl2_idx; 1801 force_split[split_index] = 1802 cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var 1803 ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3]) 1804 : PART_EVAL_ONLY_SPLIT; 1805 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; 1806 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; 1807 force_split[0] = PART_EVAL_ONLY_SPLIT; 1808 } 1809 } 1810 fill_variance_tree(&vt->split[blk64_idx].split[lvl1_idx], BLOCK_32X32); 1811 // If variance of this 32x32 block is above the threshold, or if its above 1812 // (some threshold of) the average variance over the sub-16x16 blocks, 1813 // then force this block to split. This also forces a split on the upper 1814 // (64x64) level. 1815 uint64_t frame_sad_thresh = 20000; 1816 const int is_360p_or_smaller = cm->width * cm->height <= RESOLUTION_360P; 1817 if (cpi->svc.number_temporal_layers > 2 && 1818 cpi->svc.temporal_layer_id == 0) 1819 frame_sad_thresh = frame_sad_thresh << 1; 1820 if (force_split[5 + blk64_scale_idx + lvl1_idx] == PART_EVAL_ALL) { 1821 get_variance(&vt->split[blk64_idx].split[lvl1_idx].part_variances.none); 1822 var_32x32 = 1823 vt->split[blk64_idx].split[lvl1_idx].part_variances.none.variance; 1824 max_var_32x32[blk64_idx] = AOMMAX(var_32x32, max_var_32x32[blk64_idx]); 1825 min_var_32x32[blk64_idx] = AOMMIN(var_32x32, min_var_32x32[blk64_idx]); 1826 const int max_min_var_16X16_diff = (maxvar_16x16[blk64_idx][lvl1_idx] - 1827 minvar_16x16[blk64_idx][lvl1_idx]); 1828 1829 if (var_32x32 > thresholds[2] || 1830 (!is_key_frame && var_32x32 > (thresholds[2] >> 1) && 1831 var_32x32 > (avg_16x16[blk64_idx][lvl1_idx] >> 1))) { 1832 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; 1833 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; 1834 force_split[0] = PART_EVAL_ONLY_SPLIT; 1835 } else if (!is_key_frame && is_360p_or_smaller && 1836 ((max_min_var_16X16_diff > (thresholds[2] >> 1) && 1837 maxvar_16x16[blk64_idx][lvl1_idx] > thresholds[2]) || 1838 (cpi->sf.rt_sf.prefer_large_partition_blocks && 1839 x->content_state_sb.source_sad_nonrd > kLowSad && 1840 cpi->rc.frame_source_sad < frame_sad_thresh && 1841 maxvar_16x16[blk64_idx][lvl1_idx] > (thresholds[2] >> 4) && 1842 maxvar_16x16[blk64_idx][lvl1_idx] > 1843 (minvar_16x16[blk64_idx][lvl1_idx] << 2)))) { 1844 force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT; 1845 force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT; 1846 force_split[0] = PART_EVAL_ONLY_SPLIT; 1847 } 1848 } 1849 } 1850 if (force_split[1 + blk64_idx] == PART_EVAL_ALL) { 1851 fill_variance_tree(&vt->split[blk64_idx], BLOCK_64X64); 1852 get_variance(&vt->split[blk64_idx].part_variances.none); 1853 var_64x64 = vt->split[blk64_idx].part_variances.none.variance; 1854 max_var_64x64 = AOMMAX(var_64x64, max_var_64x64); 1855 min_var_64x64 = AOMMIN(var_64x64, min_var_64x64); 1856 // If the difference of the max-min variances of sub-blocks or max 1857 // variance of a sub-block is above some threshold of then force this 1858 // block to split. Only checking this for noise level >= medium, if 1859 // encoder is in SVC or if we already forced large blocks. 1860 const int max_min_var_32x32_diff = 1861 max_var_32x32[blk64_idx] - min_var_32x32[blk64_idx]; 1862 const int check_max_var = max_var_32x32[blk64_idx] > thresholds[1] >> 1; 1863 const bool check_noise_lvl = noise_level >= kMedium || 1864 cpi->ppi->use_svc || 1865 cpi->sf.rt_sf.prefer_large_partition_blocks; 1866 const int64_t set_threshold = 3 * (thresholds[1] >> 3); 1867 1868 if (!is_key_frame && max_min_var_32x32_diff > set_threshold && 1869 check_max_var && check_noise_lvl) { 1870 force_split[1 + blk64_idx] = PART_EVAL_ONLY_SPLIT; 1871 force_split[0] = PART_EVAL_ONLY_SPLIT; 1872 } 1873 avg_64x64 += var_64x64; 1874 } 1875 if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT; 1876 } 1877 1878 if (force_split[0] == PART_EVAL_ALL) { 1879 fill_variance_tree(vt, BLOCK_128X128); 1880 get_variance(&vt->part_variances.none); 1881 const int set_avg_64x64 = (9 * avg_64x64) >> 5; 1882 if (!is_key_frame && vt->part_variances.none.variance > set_avg_64x64) 1883 force_split[0] = PART_EVAL_ONLY_SPLIT; 1884 1885 if (!is_key_frame && 1886 (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) && 1887 max_var_64x64 > thresholds[0] >> 1) 1888 force_split[0] = PART_EVAL_ONLY_SPLIT; 1889 } 1890 1891 if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end || 1892 !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col, 1893 thresholds[0], BLOCK_16X16, force_split[0])) { 1894 for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) { 1895 const int x64_idx = GET_BLK_IDX_X(blk64_idx, 4); 1896 const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 4); 1897 const int blk64_scale_idx = blk64_idx << 2; 1898 1899 // Now go through the entire structure, splitting every block size until 1900 // we get to one that's got a variance lower than our threshold. 1901 if (set_vt_partitioning(cpi, xd, tile, &vt->split[blk64_idx], BLOCK_64X64, 1902 mi_row + y64_idx, mi_col + x64_idx, thresholds[1], 1903 BLOCK_16X16, force_split[1 + blk64_idx])) 1904 continue; 1905 for (int lvl1_idx = 0; lvl1_idx < 4; ++lvl1_idx) { 1906 const int x32_idx = GET_BLK_IDX_X(lvl1_idx, 3); 1907 const int y32_idx = GET_BLK_IDX_Y(lvl1_idx, 3); 1908 const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2; 1909 if (set_vt_partitioning( 1910 cpi, xd, tile, &vt->split[blk64_idx].split[lvl1_idx], 1911 BLOCK_32X32, (mi_row + y64_idx + y32_idx), 1912 (mi_col + x64_idx + x32_idx), thresholds[2], BLOCK_16X16, 1913 force_split[5 + blk64_scale_idx + lvl1_idx])) 1914 continue; 1915 for (int lvl2_idx = 0; lvl2_idx < 4; ++lvl2_idx) { 1916 const int x16_idx = GET_BLK_IDX_X(lvl2_idx, 2); 1917 const int y16_idx = GET_BLK_IDX_Y(lvl2_idx, 2); 1918 const int split_index = 21 + lvl1_scale_idx + lvl2_idx; 1919 VP16x16 *vtemp = 1920 &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx]; 1921 if (set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16, 1922 mi_row + y64_idx + y32_idx + y16_idx, 1923 mi_col + x64_idx + x32_idx + x16_idx, 1924 thresholds[3], BLOCK_8X8, 1925 force_split[split_index])) 1926 continue; 1927 for (int lvl3_idx = 0; lvl3_idx < 4; ++lvl3_idx) { 1928 const int x8_idx = GET_BLK_IDX_X(lvl3_idx, 1); 1929 const int y8_idx = GET_BLK_IDX_Y(lvl3_idx, 1); 1930 set_block_size(cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx), 1931 (mi_col + x64_idx + x32_idx + x16_idx + x8_idx), 1932 BLOCK_8X8); 1933 } 1934 } 1935 } 1936 } 1937 } 1938 1939 if (cpi->sf.rt_sf.short_circuit_low_temp_var) { 1940 set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds, 1941 ref_frame_partition, mi_col, mi_row, is_small_sb); 1942 } 1943 1944 aom_free(vt); 1945 #if CONFIG_COLLECT_COMPONENT_TIMING 1946 end_timing(cpi, choose_var_based_partitioning_time); 1947 #endif 1948 return 0; 1949 }