partition_strategy.c (106719B)
1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <float.h> 13 14 #include "config/aom_config.h" 15 16 #include "av1/encoder/encodeframe_utils.h" 17 #if CONFIG_THREE_PASS 18 #include "av1/encoder/thirdpass.h" 19 #endif 20 #include "config/aom_dsp_rtcd.h" 21 22 #include "av1/common/enums.h" 23 #include "av1/common/reconinter.h" 24 25 #if !CONFIG_REALTIME_ONLY 26 #include "av1/encoder/cnn.h" 27 #include "av1/encoder/partition_model_weights.h" 28 #include "av1/encoder/partition_cnn_weights.h" 29 #endif 30 #include "av1/encoder/encoder.h" 31 32 #include "av1/encoder/motion_search_facade.h" 33 #include "av1/encoder/partition_strategy.h" 34 #include "av1/encoder/partition_search.h" 35 #include "av1/encoder/rdopt.h" 36 37 #if !CONFIG_REALTIME_ONLY 38 static inline void simple_motion_search_prune_part_features( 39 AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, 40 int mi_row, int mi_col, BLOCK_SIZE bsize, float *features, 41 int features_to_get); 42 43 static bool ext_ml_model_decision_before_none( 44 AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT], 45 int *partition_none_allowed, int *partition_horz_allowed, 46 int *partition_vert_allowed, int *do_rectangular_split, 47 int *do_square_split); 48 49 static bool ext_ml_model_decision_before_none_part2( 50 AV1_COMP *cpi, 51 const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART], 52 int *prune_horz, int *prune_vert); 53 54 static bool ext_ml_model_decision_after_none( 55 ExtPartController *const ext_part_controller, const int is_intra_frame, 56 const float *const features_after_none, int *do_square_split, 57 int *do_rectangular_split); 58 59 static bool ext_ml_model_decision_after_none_part2( 60 AV1_COMP *const cpi, const float *const features_terminate, 61 int *terminate_partition_search); 62 63 static bool ext_ml_model_decision_after_split( 64 AV1_COMP *const cpi, const float *const features_terminate, 65 int *terminate_partition_search); 66 67 static bool ext_ml_model_decision_after_split_part2( 68 ExtPartController *const ext_part_controller, const int is_intra_frame, 69 const float *const features_prune, int *prune_rect_part_horz, 70 int *prune_rect_part_vert); 71 72 static bool ext_ml_model_decision_after_rect( 73 ExtPartController *const ext_part_controller, const int is_intra_frame, 74 const float *const features_after_rect, int *horza_partition_allowed, 75 int *horzb_partition_allowed, int *verta_partition_allowed, 76 int *vertb_partition_allowed); 77 78 static bool ext_ml_model_decision_after_part_ab( 79 AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx, 80 int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], 81 int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed, 82 int *const partition_vert4_allowed, unsigned int pb_source_variance, 83 int mi_row, int mi_col); 84 85 static inline int convert_bsize_to_idx(BLOCK_SIZE bsize) { 86 switch (bsize) { 87 case BLOCK_128X128: return 0; 88 case BLOCK_64X64: return 1; 89 case BLOCK_32X32: return 2; 90 case BLOCK_16X16: return 3; 91 case BLOCK_8X8: return 4; 92 default: assert(0 && "Invalid bsize"); return -1; 93 } 94 } 95 96 static char *get_feature_file_name(int id) { 97 static char *feature_file_names[] = { 98 "feature_before_partition_none", 99 "feature_before_partition_none_prune_rect", 100 "feature_after_partition_none_prune", 101 "feature_after_partition_none_terminate", 102 "feature_after_partition_split_terminate", 103 "feature_after_partition_split_prune_rect", 104 "feature_after_partition_rect", 105 "feature_after_partition_ab", 106 }; 107 108 return feature_file_names[id]; 109 } 110 111 static void write_features_to_file(const char *const path, 112 const bool is_test_mode, 113 const float *features, 114 const int feature_size, const int id, 115 const BLOCK_SIZE bsize, const int mi_row, 116 const int mi_col) { 117 if (!WRITE_FEATURE_TO_FILE && !is_test_mode) return; 118 119 char filename[256]; 120 snprintf(filename, sizeof(filename), "%s/%s", path, 121 get_feature_file_name(id)); 122 FILE *pfile = fopen(filename, "a"); 123 if (pfile == NULL) return; 124 if (!is_test_mode) { 125 fprintf(pfile, "%d,%d,%d,%d,%d\n", id, (int)bsize, mi_row, mi_col, 126 feature_size); 127 } 128 for (int i = 0; i < feature_size; ++i) { 129 fprintf(pfile, "%.6f", features[i]); 130 if (i < feature_size - 1) fprintf(pfile, ","); 131 } 132 fprintf(pfile, "\n"); 133 fclose(pfile); 134 } 135 136 // TODO(chiyotsai@google.com): This is very much a work in progress. We still 137 // need to the following: 138 // -- add support for hdres 139 // -- add support for pruning rectangular partitions 140 // -- use reconstructed pixels instead of source pixels for padding 141 // -- use chroma pixels in addition to luma pixels 142 static void intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x, 143 int quad_tree_idx, 144 int intra_cnn_based_part_prune_level, 145 PartitionSearchState *part_state) { 146 assert(cm->seq_params->sb_size >= BLOCK_64X64 && 147 "Invalid sb_size for intra_cnn!"); 148 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 149 const BLOCK_SIZE bsize = blk_params->bsize; 150 151 const int bsize_idx = convert_bsize_to_idx(bsize); 152 153 if (bsize == BLOCK_128X128) { 154 return; 155 } 156 157 PartitionSearchInfo *part_info = &x->part_search_info; 158 159 // Precompute the CNN part and cache the result in MACROBLOCK 160 if (bsize == BLOCK_64X64 && !part_info->cnn_output_valid) { 161 const CNN_CONFIG *cnn_config = &av1_intra_mode_cnn_partition_cnn_config; 162 163 // Prepare the output 164 const CNN_THREAD_DATA thread_data = { .num_workers = 1, .workers = NULL }; 165 const int num_outputs = 4; 166 const int output_dims[4] = { 1, 2, 4, 8 }; 167 const int out_chs[4] = { CNN_BRANCH_0_OUT_CH, CNN_BRANCH_1_OUT_CH, 168 CNN_BRANCH_2_OUT_CH, CNN_BRANCH_3_OUT_CH }; 169 float *output_buffer[CNN_TOT_OUT_CH]; 170 171 float **cur_output_buf = output_buffer; 172 float *curr_buf_ptr = part_info->cnn_buffer; 173 for (int output_idx = 0; output_idx < num_outputs; output_idx++) { 174 const int num_chs = out_chs[output_idx]; 175 const int ch_size = output_dims[output_idx] * output_dims[output_idx]; 176 for (int ch = 0; ch < num_chs; ch++) { 177 cur_output_buf[ch] = curr_buf_ptr; 178 curr_buf_ptr += ch_size; 179 } 180 cur_output_buf += num_chs; 181 } 182 183 CNN_MULTI_OUT output = { 184 .num_outputs = 4, 185 .output_channels = out_chs, 186 .output_strides = output_dims, 187 .output_buffer = output_buffer, 188 }; 189 190 // Prepare the input 191 const MACROBLOCKD *xd = &x->e_mbd; 192 const int bit_depth = xd->bd; 193 const int dc_q = 194 av1_dc_quant_QTX(x->qindex, 0, bit_depth) >> (bit_depth - 8); 195 part_info->log_q = log1pf((float)(dc_q * dc_q) / 256.0f); 196 part_info->log_q = 197 (part_info->log_q - av1_intra_mode_cnn_partition_mean[0]) / 198 av1_intra_mode_cnn_partition_std[0]; 199 200 const int width = 65, height = 65, 201 stride = x->plane[AOM_PLANE_Y].src.stride; 202 203 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { 204 uint16_t *image[1] = { 205 CONVERT_TO_SHORTPTR(x->plane[AOM_PLANE_Y].src.buf) - stride - 1 206 }; 207 208 if (!av1_cnn_predict_img_multi_out_highbd(image, width, height, stride, 209 cnn_config, &thread_data, 210 bit_depth, &output)) { 211 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 212 "Error allocating CNN data"); 213 return; 214 } 215 } else { 216 uint8_t *image[1] = { x->plane[AOM_PLANE_Y].src.buf - stride - 1 }; 217 218 if (!av1_cnn_predict_img_multi_out(image, width, height, stride, 219 cnn_config, &thread_data, &output)) { 220 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR, 221 "Error allocating CNN data"); 222 return; 223 } 224 } 225 226 part_info->cnn_output_valid = 1; 227 } 228 229 if (!part_info->cnn_output_valid) { 230 return; 231 } 232 233 const NN_CONFIG *dnn_configs[5] = { 234 NULL, 235 &av1_intra_mode_cnn_partition_branch_0_dnn_config, 236 &av1_intra_mode_cnn_partition_branch_1_dnn_config, 237 &av1_intra_mode_cnn_partition_branch_2_dnn_config, 238 &av1_intra_mode_cnn_partition_branch_3_dnn_config, 239 }; 240 241 const NN_CONFIG *dnn_config = dnn_configs[bsize_idx]; 242 243 float dnn_features[100]; 244 float logits[4] = { 0.0f }; 245 246 const float *branch_0 = part_info->cnn_buffer; 247 const float *branch_1 = branch_0 + CNN_BRANCH_0_OUT_SIZE; 248 const float *branch_2 = branch_1 + CNN_BRANCH_1_OUT_SIZE; 249 const float *branch_3 = branch_2 + CNN_BRANCH_2_OUT_SIZE; 250 251 if (bsize == BLOCK_64X64) { 252 int f_idx = 0; 253 for (int ch_idx = 0; ch_idx < CNN_BRANCH_0_OUT_CH; ch_idx++) { 254 dnn_features[f_idx++] = branch_0[ch_idx]; 255 } 256 257 const int spa_stride = 2 * 2; 258 for (int lin_idx = 0; lin_idx < spa_stride; lin_idx++) { 259 for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { 260 dnn_features[f_idx++] = branch_1[lin_idx + ch_idx * spa_stride]; 261 } 262 } 263 dnn_features[f_idx++] = part_info->log_q; 264 } else if (bsize == BLOCK_32X32) { 265 int f_idx = 0; 266 for (int idx = 0; idx < CNN_BRANCH_0_OUT_CH; idx++) { 267 dnn_features[f_idx++] = branch_0[idx]; 268 } 269 270 const int curr_lin_idx = quad_to_linear_1[quad_tree_idx - 1]; 271 const int spa_stride = 2 * 2; 272 for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { 273 dnn_features[f_idx++] = branch_1[curr_lin_idx + ch_idx * spa_stride]; 274 } 275 dnn_features[f_idx++] = part_info->log_q; 276 } else if (bsize == BLOCK_16X16) { 277 int f_idx = 0; 278 const int prev_quad_idx = (quad_tree_idx - 1) / 4; 279 const int prev_lin_idx = quad_to_linear_1[prev_quad_idx - 1]; 280 const int prev_spa_stride = 2 * 2; 281 for (int ch_idx = 0; ch_idx < CNN_BRANCH_1_OUT_CH; ch_idx++) { 282 dnn_features[f_idx++] = branch_1[prev_lin_idx + ch_idx * prev_spa_stride]; 283 } 284 285 const int curr_lin_idx = quad_to_linear_2[quad_tree_idx - 5]; 286 const int spa_stride = 4 * 4; 287 for (int ch_idx = 0; ch_idx < CNN_BRANCH_2_OUT_CH; ch_idx++) { 288 dnn_features[f_idx++] = branch_2[curr_lin_idx + ch_idx * spa_stride]; 289 } 290 dnn_features[f_idx++] = part_info->log_q; 291 } else if (bsize == BLOCK_8X8) { 292 int f_idx = 0; 293 const int prev_quad_idx = (quad_tree_idx - 1) / 4; 294 const int prev_lin_idx = quad_to_linear_2[prev_quad_idx - 5]; 295 const int prev_spa_stride = 4 * 4; 296 for (int ch_idx = 0; ch_idx < CNN_BRANCH_2_OUT_CH; ch_idx++) { 297 dnn_features[f_idx++] = branch_2[prev_lin_idx + ch_idx * prev_spa_stride]; 298 } 299 300 const int curr_lin_idx = quad_to_linear_3[quad_tree_idx - 21]; 301 const int spa_stride = 8 * 8; 302 for (int ch_idx = 0; ch_idx < CNN_BRANCH_3_OUT_CH; ch_idx++) { 303 dnn_features[f_idx++] = branch_3[curr_lin_idx + ch_idx * spa_stride]; 304 } 305 dnn_features[f_idx++] = part_info->log_q; 306 } else { 307 assert(0 && "Invalid bsize in intra_cnn partition"); 308 } 309 310 // Make decision 311 av1_nn_predict(dnn_features, dnn_config, 1, logits); 312 313 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 314 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 315 float split_only_thresh = 100.0f, no_split_thresh = -100.0f; 316 if (is_720p_or_larger) { 317 split_only_thresh = 318 av1_intra_mode_cnn_partition_split_thresh_hdres[bsize_idx]; 319 no_split_thresh = 320 av1_intra_mode_cnn_partition_no_split_thresh_hdres[bsize_idx]; 321 } else if (is_480p_or_larger) { 322 split_only_thresh = 323 av1_intra_mode_cnn_partition_split_thresh_midres[bsize_idx]; 324 no_split_thresh = 325 av1_intra_mode_cnn_partition_no_split_thresh_midres[bsize_idx]; 326 } else { 327 split_only_thresh = 328 av1_intra_mode_cnn_partition_split_thresh_lowres[bsize_idx]; 329 no_split_thresh = 330 av1_intra_mode_cnn_partition_no_split_thresh_lowres[bsize_idx]; 331 } 332 333 if (logits[0] > split_only_thresh) { 334 // As screen contents tend to choose larger partitions, do not prune 335 // PARTITION_NONE when intra_cnn_based_part_prune_level=1. 336 if (intra_cnn_based_part_prune_level != 1) { 337 part_state->partition_none_allowed = 0; 338 } 339 part_state->do_square_split = 1; 340 av1_disable_rect_partitions(part_state); 341 } 342 343 if (logits[0] < no_split_thresh) { 344 av1_disable_square_split_partition(part_state); 345 } 346 } 347 348 static inline int get_simple_motion_search_prune_agg(int qindex, 349 int prune_level, 350 int is_rect_part) { 351 assert(prune_level < TOTAL_AGG_LVLS); 352 if (prune_level == NO_PRUNING) { 353 return -1; 354 } 355 356 // Aggressiveness value for SIMPLE_MOTION_SEARCH_PRUNE_LEVEL except 357 // QIDX_BASED_AGG_LVL 358 const int sms_prune_agg_levels[TOTAL_SIMPLE_AGG_LVLS] = { 0, 1, 2, 3, 4, 5 }; 359 if (prune_level < TOTAL_SIMPLE_AGG_LVLS) { 360 return sms_prune_agg_levels[prune_level]; 361 } 362 363 // Map the QIDX_BASED_AGG_LVL to corresponding aggressiveness value. 364 // Aggressive pruning for lower quantizers in non-boosted frames to prune 365 // rectangular partitions. 366 const int qband = is_rect_part ? (qindex <= 90 ? 1 : 0) : 0; 367 const int sms_prune_agg_qindex_based[2] = { 3, 4 }; 368 return sms_prune_agg_qindex_based[qband]; 369 } 370 371 // Performs a simple_motion_search with a single reference frame and extract 372 // the variance of residues. Then use the features to determine whether we want 373 // to go straight to splitting without trying PARTITION_NONE 374 static void simple_motion_search_based_split(AV1_COMP *const cpi, MACROBLOCK *x, 375 SIMPLE_MOTION_DATA_TREE *sms_tree, 376 PartitionSearchState *part_state) { 377 const AV1_COMMON *const cm = &cpi->common; 378 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 379 const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; 380 const BLOCK_SIZE bsize = blk_params->bsize; 381 382 const int bsize_idx = convert_bsize_to_idx(bsize); 383 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 384 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 385 // res_idx is 0 for res < 480p, 1 for 480p, 2 for 720p+ 386 const int res_idx = is_480p_or_larger + is_720p_or_larger; 387 388 assert(bsize_idx >= 0 && bsize_idx <= 4 && 389 "Invalid bsize in simple_motion_search_based_split"); 390 391 const int agg = get_simple_motion_search_prune_agg( 392 x->qindex, cpi->sf.part_sf.simple_motion_search_prune_agg, 0); 393 if (agg < 0) { 394 return; 395 } 396 397 int ml_model_index = (agg == SIMPLE_AGG_LVL1 || agg == SIMPLE_AGG_LVL2); 398 399 const float *ml_mean = 400 av1_simple_motion_search_split_mean[ml_model_index][bsize_idx]; 401 const float *ml_std = 402 av1_simple_motion_search_split_std[ml_model_index][bsize_idx]; 403 const NN_CONFIG *nn_config = 404 av1_simple_motion_search_split_nn_config[ml_model_index][bsize_idx]; 405 406 const float split_only_thresh = 407 av1_simple_motion_search_split_thresh[agg][res_idx][bsize_idx]; 408 const float no_split_thresh = 409 av1_simple_motion_search_no_split_thresh[agg][res_idx][bsize_idx]; 410 411 float features[FEATURE_SIZE_SMS_SPLIT] = { 0.0f }; 412 simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, 413 bsize, features, 414 FEATURE_SMS_SPLIT_MODEL_FLAG); 415 416 // Write features to file 417 write_features_to_file(cpi->oxcf.partition_info_path, 418 cpi->ext_part_controller.test_mode, features, 419 FEATURE_SIZE_SMS_SPLIT, 0, bsize, mi_row, mi_col); 420 421 // Note: it is intended to not normalize the features here, to keep it 422 // consistent for all features collected and passed to the external model. 423 if (ext_ml_model_decision_before_none( 424 cpi, features, &part_state->partition_none_allowed, 425 &part_state->partition_rect_allowed[HORZ], 426 &part_state->partition_rect_allowed[VERT], 427 &part_state->do_rectangular_split, &part_state->do_square_split)) { 428 return; 429 } 430 431 for (int idx = 0; idx < FEATURE_SIZE_SMS_SPLIT; idx++) { 432 features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx]; 433 } 434 435 float score = 0.0f; 436 437 av1_nn_predict(features, nn_config, 1, &score); 438 439 if (score > split_only_thresh) { 440 av1_set_square_split_only(part_state); 441 } 442 443 if (cpi->sf.part_sf.simple_motion_search_split >= 2 && 444 score < no_split_thresh) { 445 av1_disable_square_split_partition(part_state); 446 } 447 448 // If the score is very low, prune rectangular split since it is unlikely to 449 // occur. 450 if (cpi->sf.part_sf.simple_motion_search_rect_split) { 451 const float scale = res_idx >= 2 ? 3.0f : 2.0f; 452 const float rect_split_thresh = 453 scale * av1_simple_motion_search_no_split_thresh[SIMPLE_AGG_LVL3] 454 [res_idx][bsize_idx]; 455 if (score < rect_split_thresh) { 456 part_state->do_rectangular_split = 0; 457 } 458 } 459 } 460 461 // Given a list of ref frames in refs, performs simple_motion_search on each of 462 // the refs and returns the ref with the smallest sse. Returns -1 if none of the 463 // ref in the list is available. Also stores the best sse and var in best_sse, 464 // best_var, respectively. If save_mv is 0, don't update mv_ref_fulls in 465 // sms_tree. If save_mv is 1, update mv_ref_fulls under sms_tree and the 466 // subtrees. 467 static int simple_motion_search_get_best_ref( 468 AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, 469 int mi_row, int mi_col, BLOCK_SIZE bsize, const int *const refs, 470 int num_refs, int use_subpixel, int save_mv, unsigned int *best_sse, 471 unsigned int *best_var) { 472 const AV1_COMMON *const cm = &cpi->common; 473 int best_ref = -1; 474 475 if (mi_col >= cm->mi_params.mi_cols || mi_row >= cm->mi_params.mi_rows) { 476 // If the whole block is outside of the image, set the var and sse to 0. 477 *best_var = 0; 478 *best_sse = 0; 479 480 return best_ref; 481 } 482 483 // Otherwise do loop through the reference frames and find the one with the 484 // minimum SSE 485 const int num_planes = 1; 486 487 *best_sse = INT_MAX; 488 489 for (int ref_idx = 0; ref_idx < num_refs; ref_idx++) { 490 const int ref = refs[ref_idx]; 491 492 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref]) { 493 const FULLPEL_MV *start_mvs = sms_tree->start_mvs; 494 unsigned int curr_sse = 0, curr_var = 0; 495 const int_mv best_mv = av1_simple_motion_search_sse_var( 496 cpi, x, mi_row, mi_col, bsize, ref, start_mvs[ref], num_planes, 497 use_subpixel, &curr_sse, &curr_var); 498 if (curr_sse < *best_sse) { 499 *best_sse = curr_sse; 500 *best_var = curr_var; 501 best_ref = ref; 502 } 503 504 if (save_mv) { 505 sms_tree->start_mvs[ref].row = best_mv.as_mv.row / 8; 506 sms_tree->start_mvs[ref].col = best_mv.as_mv.col / 8; 507 508 if (bsize >= BLOCK_8X8) { 509 for (int r_idx = 0; r_idx < SUB_PARTITIONS_SPLIT; r_idx++) { 510 // Propagate the new motion vectors to a lower level 511 SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx]; 512 sub_tree->start_mvs[ref] = sms_tree->start_mvs[ref]; 513 } 514 } 515 } 516 } 517 } 518 519 return best_ref; 520 } 521 522 // Collects features using simple_motion_search and store them in features. The 523 // features are also cached in SIMPLE_MOTION_DATA_TREE. By default, the features 524 // collected are the sse and var from the subblocks flagged by features_to_get. 525 // Furthermore, if features is not NULL, then 7 more features are appended to 526 // the end of features: 527 // - log(1.0 + dc_q ** 2) 528 // - whether an above macroblock exists 529 // - width of above macroblock 530 // - height of above macroblock 531 // - whether a left marcoblock exists 532 // - width of left macroblock 533 // - height of left macroblock 534 static inline void simple_motion_search_prune_part_features( 535 AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, 536 int mi_row, int mi_col, BLOCK_SIZE bsize, float *features, 537 int features_to_get) { 538 const int w_mi = mi_size_wide[bsize]; 539 const int h_mi = mi_size_high[bsize]; 540 assert(mi_size_wide[bsize] == mi_size_high[bsize]); 541 assert(bsize >= BLOCK_8X8); 542 assert(cpi->ref_frame_flags & av1_ref_frame_flag_list[LAST_FRAME] || 543 cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME]); 544 545 // Setting up motion search 546 const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME 547 : LAST_FRAME }; 548 const int num_refs = 1; 549 const int use_subpixel = 1; 550 551 // Doing whole block first to update the mv 552 if (!sms_tree->sms_none_valid && features_to_get & FEATURE_SMS_NONE_FLAG) { 553 simple_motion_search_get_best_ref(cpi, x, sms_tree, mi_row, mi_col, bsize, 554 ref_list, num_refs, use_subpixel, 1, 555 &sms_tree->sms_none_feat[0], 556 &sms_tree->sms_none_feat[1]); 557 sms_tree->sms_none_valid = 1; 558 } 559 560 // Split subblocks 561 if (features_to_get & FEATURE_SMS_SPLIT_FLAG) { 562 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 563 for (int r_idx = 0; r_idx < SUB_PARTITIONS_SPLIT; r_idx++) { 564 const int sub_mi_col = mi_col + (r_idx & 1) * w_mi / 2; 565 const int sub_mi_row = mi_row + (r_idx >> 1) * h_mi / 2; 566 SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[r_idx]; 567 568 if (!sub_tree->sms_none_valid) { 569 simple_motion_search_get_best_ref( 570 cpi, x, sub_tree, sub_mi_row, sub_mi_col, subsize, ref_list, 571 num_refs, use_subpixel, 1, &sub_tree->sms_none_feat[0], 572 &sub_tree->sms_none_feat[1]); 573 sub_tree->sms_none_valid = 1; 574 } 575 } 576 } 577 578 // Rectangular subblocks 579 if (!sms_tree->sms_rect_valid && features_to_get & FEATURE_SMS_RECT_FLAG) { 580 // Horz subblock 581 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); 582 for (int r_idx = 0; r_idx < SUB_PARTITIONS_RECT; r_idx++) { 583 const int sub_mi_col = mi_col + 0; 584 const int sub_mi_row = mi_row + r_idx * h_mi / 2; 585 586 simple_motion_search_get_best_ref( 587 cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, 588 use_subpixel, 0, &sms_tree->sms_rect_feat[2 * r_idx], 589 &sms_tree->sms_rect_feat[2 * r_idx + 1]); 590 } 591 592 // Vert subblock 593 subsize = get_partition_subsize(bsize, PARTITION_VERT); 594 for (int r_idx = 0; r_idx < SUB_PARTITIONS_RECT; r_idx++) { 595 const int sub_mi_col = mi_col + r_idx * w_mi / 2; 596 const int sub_mi_row = mi_row + 0; 597 598 simple_motion_search_get_best_ref( 599 cpi, x, sms_tree, sub_mi_row, sub_mi_col, subsize, ref_list, num_refs, 600 use_subpixel, 0, &sms_tree->sms_rect_feat[4 + 2 * r_idx], 601 &sms_tree->sms_rect_feat[4 + 2 * r_idx + 1]); 602 } 603 sms_tree->sms_rect_valid = 1; 604 } 605 606 if (!features) return; 607 608 int f_idx = 0; 609 if (features_to_get & FEATURE_SMS_NONE_FLAG) { 610 for (int sub_idx = 0; sub_idx < 2; sub_idx++) { 611 features[f_idx++] = log1pf((float)sms_tree->sms_none_feat[sub_idx]); 612 } 613 } 614 615 if (features_to_get & FEATURE_SMS_SPLIT_FLAG) { 616 for (int sub_idx = 0; sub_idx < SUB_PARTITIONS_SPLIT; sub_idx++) { 617 SIMPLE_MOTION_DATA_TREE *sub_tree = sms_tree->split[sub_idx]; 618 features[f_idx++] = log1pf((float)sub_tree->sms_none_feat[0]); 619 features[f_idx++] = log1pf((float)sub_tree->sms_none_feat[1]); 620 } 621 } 622 623 if (features_to_get & FEATURE_SMS_RECT_FLAG) { 624 for (int sub_idx = 0; sub_idx < 8; sub_idx++) { 625 features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[sub_idx]); 626 } 627 } 628 629 const MACROBLOCKD *xd = &x->e_mbd; 630 set_offsets_for_motion_search(cpi, x, mi_row, mi_col, bsize); 631 632 // Q_INDEX 633 const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); 634 features[f_idx++] = log1pf((float)(dc_q * dc_q) / 256.0f); 635 636 // Neighbor stuff 637 const int has_above = !!xd->above_mbmi; 638 const int has_left = !!xd->left_mbmi; 639 const BLOCK_SIZE above_bsize = has_above ? xd->above_mbmi->bsize : bsize; 640 const BLOCK_SIZE left_bsize = has_left ? xd->left_mbmi->bsize : bsize; 641 features[f_idx++] = (float)has_above; 642 features[f_idx++] = (float)mi_size_wide_log2[above_bsize]; 643 features[f_idx++] = (float)mi_size_high_log2[above_bsize]; 644 features[f_idx++] = (float)has_left; 645 features[f_idx++] = (float)mi_size_wide_log2[left_bsize]; 646 features[f_idx++] = (float)mi_size_high_log2[left_bsize]; 647 } 648 649 // Performs a simple_motion_search with two reference frames and extract 650 // the variance of residues. Then use the features to determine whether we want 651 // to prune some partitions. 652 static void simple_motion_search_prune_rect(AV1_COMP *const cpi, MACROBLOCK *x, 653 SIMPLE_MOTION_DATA_TREE *sms_tree, 654 PartitionSearchState *part_state) { 655 const AV1_COMMON *const cm = &cpi->common; 656 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 657 const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; 658 const BLOCK_SIZE bsize = blk_params->bsize; 659 660 const int bsize_idx = convert_bsize_to_idx(bsize); 661 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 662 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 663 // res_idx is 0 for lowres, 1 for 48p, 2 for 720p+ 664 const int res_idx = is_480p_or_larger + is_720p_or_larger; 665 666 // Get model parameters 667 const NN_CONFIG *nn_config = 668 av1_simple_motion_search_prune_rect_nn_config[bsize_idx]; 669 const float *ml_mean = av1_simple_motion_search_prune_rect_mean[bsize_idx], 670 *ml_std = av1_simple_motion_search_prune_rect_std[bsize_idx]; 671 672 const int agg = get_simple_motion_search_prune_agg( 673 x->qindex, cpi->sf.part_sf.simple_motion_search_prune_agg, 1); 674 if (agg < 0) { 675 return; 676 } 677 678 const float prune_thresh = 679 av1_simple_motion_search_prune_rect_thresh[agg][res_idx][bsize_idx]; 680 681 // If there is no valid threshold, return immediately. 682 if (!nn_config || prune_thresh == 0.0f) { 683 return; 684 } 685 686 // Get features 687 float features[FEATURE_SIZE_SMS_PRUNE_PART] = { 0.0f }; 688 simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, 689 bsize, features, 690 FEATURE_SMS_PRUNE_PART_FLAG); 691 692 // Note: it is intended to not normalize the features here, to keep it 693 // consistent for all features collected and passed to the external model. 694 if (cpi->sf.part_sf.simple_motion_search_prune_rect && 695 !frame_is_intra_only(cm) && 696 (part_state->partition_rect_allowed[HORZ] || 697 part_state->partition_rect_allowed[VERT]) && 698 bsize >= BLOCK_8X8 && !av1_superres_scaled(cm)) { 699 // Write features to file 700 write_features_to_file( 701 cpi->oxcf.partition_info_path, cpi->ext_part_controller.test_mode, 702 features, FEATURE_SIZE_SMS_PRUNE_PART, 1, bsize, mi_row, mi_col); 703 704 if (ext_ml_model_decision_before_none_part2( 705 cpi, features, &part_state->prune_rect_part[HORZ], 706 &part_state->prune_rect_part[VERT])) { 707 return; 708 } 709 } 710 711 for (int f_idx = 0; f_idx < FEATURE_SIZE_SMS_PRUNE_PART; f_idx++) { 712 features[f_idx] = (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; 713 } 714 715 // Get probabilities 716 float scores[EXT_PARTITION_TYPES] = { 0.0f }, 717 probs[EXT_PARTITION_TYPES] = { 0.0f }; 718 const int num_classes = (bsize == BLOCK_128X128 || bsize == BLOCK_8X8) 719 ? PARTITION_TYPES 720 : EXT_PARTITION_TYPES; 721 722 av1_nn_predict(features, nn_config, 1, scores); 723 724 av1_nn_softmax(scores, probs, num_classes); 725 726 // Determine if we should prune rectangular partitions. 727 if (probs[PARTITION_HORZ] <= prune_thresh) { 728 part_state->prune_rect_part[HORZ] = 1; 729 } 730 if (probs[PARTITION_VERT] <= prune_thresh) { 731 part_state->prune_rect_part[VERT] = 1; 732 } 733 } 734 735 // Early terminates PARTITION_NONE using simple_motion_search features and the 736 // rate, distortion, and rdcost of PARTITION_NONE. This is only called when: 737 // - The frame is a show frame 738 // - The frame is not intra only 739 // - The current bsize is > BLOCK_8X8 740 // - blk_row + blk_height/2 < total_rows and blk_col + blk_width/2 < total_cols 741 void av1_simple_motion_search_early_term_none( 742 AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, 743 const RD_STATS *none_rdc, PartitionSearchState *part_state) { 744 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 745 const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; 746 const BLOCK_SIZE bsize = blk_params->bsize; 747 748 float features[FEATURE_SIZE_SMS_TERM_NONE] = { 0.0f }; 749 simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, 750 bsize, features, 751 FEATURE_SMS_PRUNE_PART_FLAG); 752 int f_idx = FEATURE_SIZE_SMS_PRUNE_PART; 753 754 features[f_idx++] = log1pf((float)none_rdc->rate); 755 features[f_idx++] = log1pf((float)none_rdc->dist); 756 features[f_idx++] = log1pf((float)none_rdc->rdcost); 757 758 assert(f_idx == FEATURE_SIZE_SMS_TERM_NONE); 759 760 const float *ml_mean = NULL; 761 const float *ml_std = NULL; 762 const float *ml_model = NULL; 763 764 if (bsize == BLOCK_128X128) { 765 ml_mean = av1_simple_motion_search_term_none_mean_128; 766 ml_std = av1_simple_motion_search_term_none_std_128; 767 ml_model = av1_simple_motion_search_term_none_model_128; 768 } else if (bsize == BLOCK_64X64) { 769 ml_mean = av1_simple_motion_search_term_none_mean_64; 770 ml_std = av1_simple_motion_search_term_none_std_64; 771 ml_model = av1_simple_motion_search_term_none_model_64; 772 } else if (bsize == BLOCK_32X32) { 773 ml_mean = av1_simple_motion_search_term_none_mean_32; 774 ml_std = av1_simple_motion_search_term_none_std_32; 775 ml_model = av1_simple_motion_search_term_none_model_32; 776 } else if (bsize == BLOCK_16X16) { 777 ml_mean = av1_simple_motion_search_term_none_mean_16; 778 ml_std = av1_simple_motion_search_term_none_std_16; 779 ml_model = av1_simple_motion_search_term_none_model_16; 780 } else { 781 assert(0 && "Unexpected block size in simple_motion_term_none"); 782 } 783 784 // Write features to file 785 write_features_to_file(cpi->oxcf.partition_info_path, 786 cpi->ext_part_controller.test_mode, features, 787 FEATURE_SIZE_SMS_TERM_NONE, 3, bsize, mi_row, mi_col); 788 789 if (ext_ml_model_decision_after_none_part2( 790 cpi, features, &part_state->terminate_partition_search)) { 791 return; 792 } 793 794 if (ml_model) { 795 float score = 0.0f; 796 for (f_idx = 0; f_idx < FEATURE_SIZE_SMS_TERM_NONE; f_idx++) { 797 score += 798 ml_model[f_idx] * (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx]; 799 } 800 score += ml_model[FEATURE_SIZE_SMS_TERM_NONE]; 801 802 if (score >= 0.0f) { 803 part_state->terminate_partition_search = 1; 804 } 805 } 806 } 807 808 void av1_get_max_min_partition_features(AV1_COMP *const cpi, MACROBLOCK *x, 809 int mi_row, int mi_col, 810 float *features) { 811 AV1_COMMON *const cm = &cpi->common; 812 MACROBLOCKD *xd = &x->e_mbd; 813 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 814 815 // Currently this only allows 128X128 SB size. May extend it to 64X64 SB size. 816 assert(sb_size == BLOCK_128X128); 817 818 int f_idx = 0; 819 820 const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); 821 const float log_q_sq = log1pf((float)(dc_q * dc_q) / 256.0f); 822 823 // Perform full-pixel single motion search in Y plane of 16x16 mbs in the sb 824 float sum_mv_row_sq = 0; 825 float sum_mv_row = 0; 826 float min_abs_mv_row = FLT_MAX; 827 float max_abs_mv_row = 0; 828 829 float sum_mv_col_sq = 0; 830 float sum_mv_col = 0; 831 float min_abs_mv_col = FLT_MAX; 832 float max_abs_mv_col = 0; 833 834 float sum_log_sse_sq = 0; 835 float sum_log_sse = 0; 836 float min_log_sse = FLT_MAX; 837 float max_log_sse = 0; 838 839 const BLOCK_SIZE mb_size = BLOCK_16X16; 840 const int mb_rows = block_size_high[sb_size] / block_size_high[mb_size]; 841 const int mb_cols = block_size_wide[sb_size] / block_size_wide[mb_size]; 842 const int mb_in_mi_size_high_log2 = mi_size_high_log2[mb_size]; 843 const int mb_in_mi_size_wide_log2 = mi_size_wide_log2[mb_size]; 844 845 for (int mb_row = 0; mb_row < mb_rows; mb_row++) 846 for (int mb_col = 0; mb_col < mb_cols; mb_col++) { 847 const int this_mi_row = mi_row + (mb_row << mb_in_mi_size_high_log2); 848 const int this_mi_col = mi_col + (mb_col << mb_in_mi_size_wide_log2); 849 unsigned int sse = 0; 850 unsigned int var = 0; 851 const FULLPEL_MV start_mv = kZeroFullMv; 852 const MV_REFERENCE_FRAME ref = 853 cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; 854 const int_mv best_mv = av1_simple_motion_search_sse_var( 855 cpi, x, this_mi_row, this_mi_col, mb_size, ref, start_mv, 1, 0, &sse, 856 &var); 857 858 const float mv_row = (float)(best_mv.as_mv.row / 8); 859 const float mv_col = (float)(best_mv.as_mv.col / 8); 860 const float log_sse = log1pf((float)sse); 861 const float abs_mv_row = fabsf(mv_row); 862 const float abs_mv_col = fabsf(mv_col); 863 864 sum_mv_row_sq += mv_row * mv_row; 865 sum_mv_row += mv_row; 866 sum_mv_col_sq += mv_col * mv_col; 867 sum_mv_col += mv_col; 868 869 if (abs_mv_row < min_abs_mv_row) min_abs_mv_row = abs_mv_row; 870 if (abs_mv_row > max_abs_mv_row) max_abs_mv_row = abs_mv_row; 871 if (abs_mv_col < min_abs_mv_col) min_abs_mv_col = abs_mv_col; 872 if (abs_mv_col > max_abs_mv_col) max_abs_mv_col = abs_mv_col; 873 874 sum_log_sse_sq += log_sse * log_sse; 875 sum_log_sse += log_sse; 876 if (log_sse < min_log_sse) min_log_sse = log_sse; 877 if (log_sse > max_log_sse) max_log_sse = log_sse; 878 } 879 const int blks = mb_rows * mb_cols; 880 const float avg_mv_row = sum_mv_row / (float)blks; 881 const float var_mv_row = 882 sum_mv_row_sq / (float)blks - avg_mv_row * avg_mv_row; 883 884 const float avg_mv_col = sum_mv_col / (float)blks; 885 const float var_mv_col = 886 sum_mv_col_sq / (float)blks - avg_mv_col * avg_mv_col; 887 888 const float avg_log_sse = sum_log_sse / (float)blks; 889 const float var_log_sse = 890 sum_log_sse_sq / (float)blks - avg_log_sse * avg_log_sse; 891 892 features[f_idx++] = avg_log_sse; 893 features[f_idx++] = avg_mv_col; 894 features[f_idx++] = avg_mv_row; 895 features[f_idx++] = log_q_sq; 896 features[f_idx++] = max_abs_mv_col; 897 features[f_idx++] = max_abs_mv_row; 898 features[f_idx++] = max_log_sse; 899 features[f_idx++] = min_abs_mv_col; 900 features[f_idx++] = min_abs_mv_row; 901 features[f_idx++] = min_log_sse; 902 features[f_idx++] = var_log_sse; 903 features[f_idx++] = var_mv_col; 904 features[f_idx++] = var_mv_row; 905 906 assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED); 907 } 908 909 // Convert result index to block size. 910 // result idx block size 911 // 0 BLOCK_16X16 912 // 1 BLOCK_32X32 913 // 2 BLOCK_64X64 914 // 3 BLOCK_128X128 915 static BLOCK_SIZE get_block_size(int idx) { 916 return (BLOCK_SIZE)((idx + 2) * 3); 917 } 918 919 BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi, 920 const MACROBLOCK *const x, 921 const float *features) { 922 float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; 923 const NN_CONFIG *nn_config = &av1_max_part_pred_nn_config; 924 925 assert(cpi->sf.part_sf.auto_max_partition_based_on_simple_motion != 926 NOT_IN_USE); 927 928 av1_nn_predict(features, nn_config, 1, scores); 929 930 int result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; 931 if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == 932 DIRECT_PRED) { 933 result = 0; 934 float max_score = scores[0]; 935 for (int i = 1; i < MAX_NUM_CLASSES_MAX_MIN_PART_PRED; ++i) { 936 if (scores[i] > max_score) { 937 max_score = scores[i]; 938 result = i; 939 } 940 } 941 return get_block_size(result); 942 } 943 944 float probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f }; 945 av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED); 946 947 if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == 948 RELAXED_PRED) { 949 for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; 950 --result) { 951 if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { 952 probs[result] += probs[result + 1]; 953 } 954 if (probs[result] > 0.2) break; 955 } 956 } else if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion == 957 ADAPT_PRED) { 958 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size; 959 // TODO(debargha): x->source_variance is unavailable at this point, 960 // so compute. The redundant recomputation later can be removed. 961 const unsigned int source_variance = av1_get_perpixel_variance_facade( 962 cpi, &x->e_mbd, &x->plane[0].src, sb_size, AOM_PLANE_Y); 963 if (source_variance > 16) { 964 const double thresh = source_variance < 128 ? 0.05 : 0.1; 965 for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0; 966 --result) { 967 if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) { 968 probs[result] += probs[result + 1]; 969 } 970 if (probs[result] > thresh) break; 971 } 972 } 973 } 974 975 return get_block_size(result); 976 } 977 978 // Get the minimum partition block width and height(in log scale) under a 979 // SIMPLE_MOTION_DATA_TREE. 980 static inline void get_min_bsize(const SIMPLE_MOTION_DATA_TREE *sms_tree, 981 int *min_bw, int *min_bh) { 982 if (!sms_tree) return; 983 984 const BLOCK_SIZE bsize = sms_tree->block_size; 985 if (bsize == BLOCK_4X4) { 986 *min_bw = 0; 987 *min_bh = 0; 988 return; 989 } 990 991 PARTITION_TYPE part_type = sms_tree->partitioning; 992 if (part_type == PARTITION_INVALID) return; 993 994 if (part_type == PARTITION_SPLIT) { 995 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 996 get_min_bsize(sms_tree->split[i], min_bw, min_bh); 997 } 998 } else { 999 if (part_type == PARTITION_HORZ_A || part_type == PARTITION_HORZ_B || 1000 part_type == PARTITION_VERT_A || part_type == PARTITION_VERT_B) 1001 part_type = PARTITION_SPLIT; 1002 const BLOCK_SIZE subsize = get_partition_subsize(bsize, part_type); 1003 if (subsize != BLOCK_INVALID) { 1004 *min_bw = AOMMIN(*min_bw, mi_size_wide_log2[subsize]); 1005 *min_bh = AOMMIN(*min_bh, mi_size_high_log2[subsize]); 1006 } 1007 } 1008 } 1009 1010 static inline void add_rd_feature(int64_t rd, int64_t best_rd, float *features, 1011 int *feature_idx) { 1012 const int rd_valid = rd > 0 && rd < INT64_MAX; 1013 const float rd_ratio = rd_valid ? (float)rd / best_rd : 1.0f; 1014 features[(*feature_idx)++] = (float)rd_valid; 1015 features[(*feature_idx)++] = rd_ratio; 1016 } 1017 1018 #define FEATURES 31 1019 void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x, 1020 SIMPLE_MOTION_DATA_TREE *const sms_tree, 1021 int64_t best_rd, int64_t part_none_rd, 1022 int64_t part_split_rd, 1023 int64_t *split_block_rd, 1024 PartitionSearchState *part_state) { 1025 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 1026 const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; 1027 const BLOCK_SIZE bsize = blk_params->bsize; 1028 1029 if (best_rd <= 0 || best_rd == INT64_MAX || 1030 part_state->terminate_partition_search) 1031 return; 1032 1033 const AV1_COMMON *const cm = &cpi->common; 1034 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 1035 const NN_CONFIG *nn_config = NULL; 1036 float thresh = -1e6; 1037 switch (bsize) { 1038 case BLOCK_128X128: 1039 nn_config = &av1_early_term_after_split_nnconfig_64; 1040 thresh = is_480p_or_larger ? -2.0f : -1.2f; 1041 break; 1042 case BLOCK_64X64: 1043 nn_config = &av1_early_term_after_split_nnconfig_64; 1044 thresh = is_480p_or_larger ? -2.0f : -1.2f; 1045 break; 1046 case BLOCK_32X32: 1047 nn_config = &av1_early_term_after_split_nnconfig_32; 1048 thresh = is_480p_or_larger ? -2.6f : -2.3f; 1049 break; 1050 case BLOCK_16X16: 1051 nn_config = &av1_early_term_after_split_nnconfig_16; 1052 thresh = is_480p_or_larger ? -2.0f : -2.4f; 1053 break; 1054 case BLOCK_8X8: 1055 nn_config = &av1_early_term_after_split_nnconfig_8; 1056 thresh = is_480p_or_larger ? -1.0f : -1.4f; 1057 break; 1058 case BLOCK_4X4: break; 1059 default: 1060 assert(0 && "Invalid block size in av1_ml_early_term_after_split()."); 1061 break; 1062 } 1063 if (!nn_config) return; 1064 1065 // Use more conservative threshold for level 1. 1066 if (cpi->sf.part_sf.ml_early_term_after_part_split_level < 2) thresh -= 0.3f; 1067 1068 const MACROBLOCKD *const xd = &x->e_mbd; 1069 const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8); 1070 const int bs = block_size_wide[bsize]; 1071 int f_idx = 0; 1072 float features[FEATURES] = { 0.0f }; 1073 1074 features[f_idx++] = log1pf((float)dc_q / 4.0f); 1075 features[f_idx++] = log1pf((float)best_rd / bs / bs / 1024.0f); 1076 1077 add_rd_feature(part_none_rd, best_rd, features, &f_idx); 1078 add_rd_feature(part_split_rd, best_rd, features, &f_idx); 1079 1080 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 1081 add_rd_feature(split_block_rd[i], best_rd, features, &f_idx); 1082 int min_bw = MAX_SB_SIZE_LOG2; 1083 int min_bh = MAX_SB_SIZE_LOG2; 1084 get_min_bsize(sms_tree->split[i], &min_bw, &min_bh); 1085 features[f_idx++] = (float)min_bw; 1086 features[f_idx++] = (float)min_bh; 1087 } 1088 1089 simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col, 1090 bsize, NULL, 1091 FEATURE_SMS_PRUNE_PART_FLAG); 1092 1093 features[f_idx++] = log1pf((float)sms_tree->sms_none_feat[1]); 1094 1095 features[f_idx++] = log1pf((float)sms_tree->split[0]->sms_none_feat[1]); 1096 features[f_idx++] = log1pf((float)sms_tree->split[1]->sms_none_feat[1]); 1097 features[f_idx++] = log1pf((float)sms_tree->split[2]->sms_none_feat[1]); 1098 features[f_idx++] = log1pf((float)sms_tree->split[3]->sms_none_feat[1]); 1099 1100 features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[1]); 1101 features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[3]); 1102 features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[5]); 1103 features[f_idx++] = log1pf((float)sms_tree->sms_rect_feat[7]); 1104 1105 assert(f_idx == FEATURES); 1106 1107 // Write features to file 1108 write_features_to_file(cpi->oxcf.partition_info_path, 1109 cpi->ext_part_controller.test_mode, features, FEATURES, 1110 4, bsize, mi_row, mi_col); 1111 1112 if (ext_ml_model_decision_after_split( 1113 cpi, features, &part_state->terminate_partition_search)) { 1114 return; 1115 } 1116 1117 float score = 0.0f; 1118 av1_nn_predict(features, nn_config, 1, &score); 1119 // Score is indicator of confidence that we should NOT terminate. 1120 if (score < thresh) { 1121 part_state->terminate_partition_search = 1; 1122 } 1123 } 1124 #undef FEATURES 1125 1126 void av1_ml_prune_rect_partition(AV1_COMP *const cpi, const MACROBLOCK *const x, 1127 int64_t best_rd, int64_t none_rd, 1128 const int64_t *split_rd, 1129 PartitionSearchState *part_state) { 1130 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 1131 const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; 1132 const BLOCK_SIZE bsize = blk_params->bsize; 1133 1134 if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; 1135 best_rd = AOMMAX(best_rd, 1); 1136 const NN_CONFIG *nn_config = NULL; 1137 const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f }; 1138 float cur_thresh = 0.0f; 1139 switch (bsize) { 1140 case BLOCK_8X8: 1141 nn_config = &av1_rect_partition_nnconfig_8; 1142 cur_thresh = prob_thresholds[0]; 1143 break; 1144 case BLOCK_16X16: 1145 nn_config = &av1_rect_partition_nnconfig_16; 1146 cur_thresh = prob_thresholds[1]; 1147 break; 1148 case BLOCK_32X32: 1149 nn_config = &av1_rect_partition_nnconfig_32; 1150 cur_thresh = prob_thresholds[2]; 1151 break; 1152 case BLOCK_64X64: 1153 nn_config = &av1_rect_partition_nnconfig_64; 1154 cur_thresh = prob_thresholds[3]; 1155 break; 1156 case BLOCK_128X128: 1157 nn_config = &av1_rect_partition_nnconfig_128; 1158 cur_thresh = prob_thresholds[4]; 1159 break; 1160 default: assert(0 && "Unexpected bsize."); 1161 } 1162 if (!nn_config) return; 1163 1164 // 1. Compute input features 1165 float features[9]; 1166 1167 // RD cost ratios 1168 for (int i = 0; i < 5; i++) features[i] = 1.0f; 1169 if (none_rd > 0 && none_rd < 1000000000) 1170 features[0] = (float)none_rd / (float)best_rd; 1171 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { 1172 if (split_rd[i] > 0 && split_rd[i] < 1000000000) 1173 features[1 + i] = (float)split_rd[i] / (float)best_rd; 1174 } 1175 1176 // Variance ratios 1177 const MACROBLOCKD *const xd = &x->e_mbd; 1178 int whole_block_variance; 1179 whole_block_variance = av1_get_perpixel_variance_facade( 1180 cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y); 1181 whole_block_variance = AOMMAX(whole_block_variance, 1); 1182 1183 int split_variance[SUB_PARTITIONS_SPLIT]; 1184 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 1185 struct buf_2d buf; 1186 buf.stride = x->plane[0].src.stride; 1187 const int bw = block_size_wide[bsize]; 1188 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 1189 const int x_idx = (i & 1) * bw / 2; 1190 const int y_idx = (i >> 1) * bw / 2; 1191 buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride; 1192 split_variance[i] = 1193 av1_get_perpixel_variance_facade(cpi, xd, &buf, subsize, AOM_PLANE_Y); 1194 } 1195 1196 for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) 1197 features[5 + i] = (float)split_variance[i] / (float)whole_block_variance; 1198 1199 // Write features to file 1200 write_features_to_file(cpi->oxcf.partition_info_path, 1201 cpi->ext_part_controller.test_mode, features, 1202 /*feature_size=*/9, 5, bsize, mi_row, mi_col); 1203 1204 if (ext_ml_model_decision_after_split_part2( 1205 &cpi->ext_part_controller, frame_is_intra_only(&cpi->common), 1206 features, &part_state->prune_rect_part[HORZ], 1207 &part_state->prune_rect_part[VERT])) { 1208 return; 1209 } 1210 1211 // 2. Do the prediction and prune 0-2 partitions based on their probabilities 1212 float raw_scores[3] = { 0.0f }; 1213 av1_nn_predict(features, nn_config, 1, raw_scores); 1214 float probs[3] = { 0.0f }; 1215 av1_nn_softmax(raw_scores, probs, 3); 1216 1217 // probs[0] is the probability of the fact that both rectangular partitions 1218 // are worse than current best_rd 1219 if (probs[1] <= cur_thresh) part_state->prune_rect_part[HORZ] = 1; 1220 if (probs[2] <= cur_thresh) part_state->prune_rect_part[VERT] = 1; 1221 } 1222 1223 // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be 1224 // considered. 1225 static void ml_prune_ab_partition(AV1_COMP *const cpi, int part_ctx, 1226 int var_ctx, int64_t best_rd, 1227 PartitionSearchState *part_state, 1228 int *ab_partitions_allowed) { 1229 const PartitionBlkParams blk_params = part_state->part_blk_params; 1230 const int mi_row = blk_params.mi_row; 1231 const int mi_col = blk_params.mi_col; 1232 const BLOCK_SIZE bsize = blk_params.bsize; 1233 1234 if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; 1235 const NN_CONFIG *nn_config = NULL; 1236 switch (bsize) { 1237 case BLOCK_8X8: nn_config = NULL; break; 1238 case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break; 1239 case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break; 1240 case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break; 1241 case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break; 1242 default: assert(0 && "Unexpected bsize."); 1243 } 1244 if (!nn_config) return; 1245 1246 // Generate features. 1247 float features[10]; 1248 int feature_index = 0; 1249 features[feature_index++] = (float)part_ctx; 1250 features[feature_index++] = (float)var_ctx; 1251 const int rdcost = (int)AOMMIN(INT_MAX, best_rd); 1252 int sub_block_rdcost[8] = { 0 }; 1253 int rd_index = 0; 1254 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 1255 const int64_t *horz_rd = part_state->rect_part_rd[HORZ]; 1256 if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) 1257 sub_block_rdcost[rd_index] = (int)horz_rd[i]; 1258 ++rd_index; 1259 } 1260 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 1261 const int64_t *vert_rd = part_state->rect_part_rd[VERT]; 1262 if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) 1263 sub_block_rdcost[rd_index] = (int)vert_rd[i]; 1264 ++rd_index; 1265 } 1266 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 1267 const int64_t *split_rd = part_state->split_rd; 1268 if (split_rd[i] > 0 && split_rd[i] < 1000000000) 1269 sub_block_rdcost[rd_index] = (int)split_rd[i]; 1270 ++rd_index; 1271 } 1272 for (int i = 0; i < 8; ++i) { 1273 // Ratio between the sub-block RD and the whole-block RD. 1274 float rd_ratio = 1.0f; 1275 if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) 1276 rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; 1277 features[feature_index++] = rd_ratio; 1278 } 1279 assert(feature_index == 10); 1280 1281 // Write features to file 1282 if (!frame_is_intra_only(&cpi->common)) { 1283 write_features_to_file(cpi->oxcf.partition_info_path, 1284 cpi->ext_part_controller.test_mode, features, 1285 /*feature_size=*/10, 6, bsize, mi_row, mi_col); 1286 } 1287 1288 if (ext_ml_model_decision_after_rect( 1289 &cpi->ext_part_controller, frame_is_intra_only(&cpi->common), 1290 features, &ab_partitions_allowed[HORZ_A], 1291 &ab_partitions_allowed[HORZ_B], &ab_partitions_allowed[VERT_A], 1292 &ab_partitions_allowed[VERT_B])) { 1293 return; 1294 } 1295 1296 // Calculate scores using the NN model. 1297 float score[16] = { 0.0f }; 1298 av1_nn_predict(features, nn_config, 1, score); 1299 int int_score[16]; 1300 int max_score = -1000; 1301 for (int i = 0; i < 16; ++i) { 1302 int_score[i] = (int)(100 * score[i]); 1303 max_score = AOMMAX(int_score[i], max_score); 1304 } 1305 1306 // Make decisions based on the model scores. 1307 int thresh = max_score; 1308 switch (bsize) { 1309 case BLOCK_16X16: thresh -= 150; break; 1310 case BLOCK_32X32: thresh -= 100; break; 1311 default: break; 1312 } 1313 av1_zero_array(ab_partitions_allowed, NUM_AB_PARTS); 1314 for (int i = 0; i < 16; ++i) { 1315 if (int_score[i] >= thresh) { 1316 if ((i >> 0) & 1) ab_partitions_allowed[HORZ_A] = 1; 1317 if ((i >> 1) & 1) ab_partitions_allowed[HORZ_B] = 1; 1318 if ((i >> 2) & 1) ab_partitions_allowed[VERT_A] = 1; 1319 if ((i >> 3) & 1) ab_partitions_allowed[VERT_B] = 1; 1320 } 1321 } 1322 } 1323 1324 #define FEATURES 18 1325 #define LABELS 4 1326 #define NEW_LABELS 3 1327 // Use a ML model to predict if horz4 and vert4 should be considered. 1328 void av1_ml_prune_4_partition(AV1_COMP *const cpi, MACROBLOCK *const x, 1329 int part_ctx, int64_t best_rd, 1330 PartitionSearchState *part_state, 1331 int *part4_allowed, 1332 unsigned int pb_source_variance) { 1333 const AV1_COMMON *const cm = &cpi->common; 1334 const PartitionBlkParams blk_params = part_state->part_blk_params; 1335 const int mi_row = blk_params.mi_row; 1336 const int mi_col = blk_params.mi_col; 1337 const BLOCK_SIZE bsize = blk_params.bsize; 1338 1339 int64_t(*rect_part_rd)[SUB_PARTITIONS_RECT] = part_state->rect_part_rd; 1340 int64_t *split_rd = part_state->split_rd; 1341 if (ext_ml_model_decision_after_part_ab( 1342 cpi, x, bsize, part_ctx, best_rd, rect_part_rd, split_rd, 1343 &part4_allowed[HORZ4], &part4_allowed[VERT4], pb_source_variance, 1344 mi_row, mi_col)) 1345 return; 1346 1347 if (best_rd >= 1000000000) return; 1348 int64_t *horz_rd = rect_part_rd[HORZ4]; 1349 int64_t *vert_rd = rect_part_rd[VERT4]; 1350 1351 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; 1352 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; 1353 // res_idx is 0 for res < 480p, 1 for 480p, 2 for 720p+ 1354 const int res_idx = is_480p_or_larger + is_720p_or_larger; 1355 1356 const int bsize_idx = convert_bsize_to_idx(bsize); 1357 if (bsize_idx < 0) return; 1358 const float *ml_mean = av1_partition4_nn_mean[bsize_idx]; 1359 const float *ml_std = av1_partition4_nn_std[bsize_idx]; 1360 1361 int ml_model_index = (cpi->sf.part_sf.ml_4_partition_search_level_index < 3); 1362 1363 const NN_CONFIG *nn_config = NULL; 1364 // 4-way partitions are only allowed for these three square block sizes. 1365 switch (bsize) { 1366 case BLOCK_16X16: 1367 nn_config = &av1_4_partition_nnconfig_16[ml_model_index]; 1368 break; 1369 case BLOCK_32X32: 1370 nn_config = &av1_4_partition_nnconfig_32[ml_model_index]; 1371 break; 1372 case BLOCK_64X64: 1373 nn_config = &av1_4_partition_nnconfig_64[ml_model_index]; 1374 break; 1375 default: assert(0 && "Unexpected bsize."); 1376 } 1377 if (!nn_config || !ml_mean || !ml_std) return; 1378 1379 // Generate features. 1380 float features[FEATURES]; 1381 int feature_index = 0; 1382 features[feature_index++] = (float)part_ctx; 1383 features[feature_index++] = (float)get_unsigned_bits(pb_source_variance); 1384 1385 const int rdcost = (int)AOMMIN(INT_MAX, best_rd); 1386 int sub_block_rdcost[8] = { 0 }; 1387 int rd_index = 0; 1388 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 1389 if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) 1390 sub_block_rdcost[rd_index] = (int)horz_rd[i]; 1391 ++rd_index; 1392 } 1393 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 1394 if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) 1395 sub_block_rdcost[rd_index] = (int)vert_rd[i]; 1396 ++rd_index; 1397 } 1398 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 1399 if (split_rd[i] > 0 && split_rd[i] < 1000000000) 1400 sub_block_rdcost[rd_index] = (int)split_rd[i]; 1401 ++rd_index; 1402 } 1403 for (int i = 0; i < 8; ++i) { 1404 // Ratio between the sub-block RD and the whole-block RD. 1405 float rd_ratio = 1.0f; 1406 if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) 1407 rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; 1408 features[feature_index++] = rd_ratio; 1409 } 1410 1411 // Get variance of the 1:4 and 4:1 sub-blocks. 1412 unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; 1413 unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; 1414 { 1415 BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); 1416 BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); 1417 1418 assert(horz_4_bs != BLOCK_INVALID); 1419 assert(vert_4_bs != BLOCK_INVALID); 1420 1421 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 1422 av1_num_planes(&cpi->common), bsize); 1423 const int src_stride = x->plane[0].src.stride; 1424 uint8_t *src = x->plane[0].src.buf; 1425 const MACROBLOCKD *const xd = &x->e_mbd; 1426 1427 struct buf_2d horz_4_src, vert_4_src; 1428 horz_4_src.stride = src_stride; 1429 vert_4_src.stride = src_stride; 1430 1431 for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { 1432 horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride; 1433 vert_4_src.buf = src + i * block_size_wide[vert_4_bs]; 1434 1435 horz_4_source_var[i] = av1_get_perpixel_variance_facade( 1436 cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y); 1437 vert_4_source_var[i] = av1_get_perpixel_variance_facade( 1438 cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y); 1439 } 1440 } 1441 1442 const float denom = (float)(pb_source_variance + 1); 1443 const float low_b = 0.1f; 1444 const float high_b = 10.0f; 1445 for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { 1446 // Ratio between the 4:1 sub-block variance and the whole-block variance. 1447 float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; 1448 if (var_ratio < low_b) var_ratio = low_b; 1449 if (var_ratio > high_b) var_ratio = high_b; 1450 features[feature_index++] = var_ratio; 1451 } 1452 for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { 1453 // Ratio between the 1:4 sub-block RD and the whole-block RD. 1454 float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; 1455 if (var_ratio < low_b) var_ratio = low_b; 1456 if (var_ratio > high_b) var_ratio = high_b; 1457 features[feature_index++] = var_ratio; 1458 } 1459 assert(feature_index == FEATURES); 1460 1461 if (ml_model_index) { 1462 for (int idx = 0; idx < FEATURES; idx++) { 1463 features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx]; 1464 } 1465 } 1466 1467 // Write features to file 1468 if (!frame_is_intra_only(&cpi->common)) { 1469 write_features_to_file(cpi->oxcf.partition_info_path, 1470 cpi->ext_part_controller.test_mode, features, 1471 FEATURES, 7, bsize, mi_row, mi_col); 1472 } 1473 1474 if (ml_model_index == 0) { 1475 // Calculate scores using the NN model. 1476 float score[LABELS] = { 0.0f }; 1477 av1_nn_predict(features, nn_config, 1, score); 1478 int int_score[LABELS]; 1479 int max_score = -1000; 1480 for (int i = 0; i < LABELS; ++i) { 1481 int_score[i] = (int)(100 * score[i]); 1482 max_score = AOMMAX(int_score[i], max_score); 1483 } 1484 1485 // Make decisions based on the model scores. 1486 int thresh = max_score; 1487 switch (bsize) { 1488 case BLOCK_16X16: thresh -= 500; break; 1489 case BLOCK_32X32: thresh -= 500; break; 1490 case BLOCK_64X64: thresh -= 200; break; 1491 default: break; 1492 } 1493 av1_zero_array(part4_allowed, NUM_PART4_TYPES); 1494 for (int i = 0; i < LABELS; ++i) { 1495 if (int_score[i] >= thresh) { 1496 if ((i >> 0) & 1) part4_allowed[HORZ4] = 1; 1497 if ((i >> 1) & 1) part4_allowed[VERT4] = 1; 1498 } 1499 } 1500 } else { 1501 // Calculate scores using the NN model. 1502 float score[NEW_LABELS] = { 0.0f }; 1503 float probs[NEW_LABELS] = { 0.0f }; 1504 av1_nn_predict(features, nn_config, 1, score); 1505 1506 av1_nn_softmax(score, probs, NEW_LABELS); 1507 1508 // Make decisions based on the model scores. 1509 const float search_thresh = av1_partition4_search_thresh 1510 [cpi->sf.part_sf.ml_4_partition_search_level_index][res_idx][bsize_idx]; 1511 const float not_search_thresh = av1_partition4_not_search_thresh 1512 [cpi->sf.part_sf.ml_4_partition_search_level_index][res_idx][bsize_idx]; 1513 1514 for (int i = 1; i < NEW_LABELS; ++i) { 1515 if (probs[i] >= search_thresh) { 1516 if (i == 1) part4_allowed[HORZ4] = 1; 1517 if (i == 2) part4_allowed[VERT4] = 1; 1518 } 1519 if (probs[i] < not_search_thresh) { 1520 if (i == 1) part4_allowed[HORZ4] = 0; 1521 if (i == 2) part4_allowed[VERT4] = 0; 1522 } 1523 } 1524 } 1525 } 1526 #undef FEATURES 1527 #undef LABELS 1528 #undef NEW_LABELS 1529 1530 #define FEATURES 4 1531 void av1_ml_predict_breakout(AV1_COMP *const cpi, const MACROBLOCK *const x, 1532 const RD_STATS *const rd_stats, 1533 unsigned int pb_source_variance, int bit_depth, 1534 PartitionSearchState *part_state) { 1535 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 1536 const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; 1537 const BLOCK_SIZE bsize = blk_params->bsize; 1538 1539 const int bsize_idx = convert_bsize_to_idx(bsize); 1540 if (bsize_idx < 0) return; 1541 const float *ml_mean = av1_hd_partition_breakout_nn_mean[bsize_idx]; 1542 const float *ml_std = av1_hd_partition_breakout_nn_std[bsize_idx]; 1543 1544 const NN_CONFIG *nn_config = NULL; 1545 float thresh = 0; 1546 switch (bsize) { 1547 case BLOCK_8X8: 1548 nn_config = 1549 &av1_partition_breakout_nnconfig_8 1550 [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; 1551 thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; 1552 break; 1553 case BLOCK_16X16: 1554 nn_config = 1555 &av1_partition_breakout_nnconfig_16 1556 [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; 1557 thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; 1558 break; 1559 case BLOCK_32X32: 1560 nn_config = 1561 &av1_partition_breakout_nnconfig_32 1562 [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; 1563 thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; 1564 break; 1565 case BLOCK_64X64: 1566 nn_config = 1567 &av1_partition_breakout_nnconfig_64 1568 [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; 1569 thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; 1570 break; 1571 case BLOCK_128X128: 1572 nn_config = 1573 &av1_partition_breakout_nnconfig_128 1574 [cpi->sf.part_sf.ml_partition_search_breakout_model_index]; 1575 thresh = cpi->sf.part_sf.ml_partition_search_breakout_thresh[bsize_idx]; 1576 break; 1577 default: assert(0 && "Unexpected bsize."); 1578 } 1579 if (!nn_config || thresh < 0) return; 1580 1581 const float ml_predict_breakout_thresh_scale[3] = { 1.15f, 1.05f, 1.0f }; 1582 thresh = thresh * ml_predict_breakout_thresh_scale 1583 [cpi->sf.part_sf.ml_predict_breakout_level - 1]; 1584 1585 // Generate feature values. 1586 float features[FEATURES]; 1587 int feature_index = 0; 1588 1589 const int num_pels_log2 = num_pels_log2_lookup[bsize]; 1590 float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX); 1591 rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * 1592 rate_f; 1593 features[feature_index++] = rate_f; 1594 1595 const float dist_f = 1596 (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2); 1597 features[feature_index++] = dist_f; 1598 1599 features[feature_index++] = (float)pb_source_variance; 1600 1601 const int dc_q = (int)x->plane[0].dequant_QTX[0] >> (bit_depth - 8); 1602 features[feature_index++] = (float)(dc_q * dc_q) / 256.0f; 1603 assert(feature_index == FEATURES); 1604 1605 if (cpi->sf.part_sf.ml_partition_search_breakout_model_index) { 1606 for (int idx = 0; idx < FEATURES; idx++) { 1607 features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx]; 1608 } 1609 } 1610 1611 // Write features to file 1612 write_features_to_file(cpi->oxcf.partition_info_path, 1613 cpi->ext_part_controller.test_mode, features, FEATURES, 1614 2, bsize, mi_row, mi_col); 1615 1616 if (ext_ml_model_decision_after_none(&cpi->ext_part_controller, 1617 frame_is_intra_only(&cpi->common), 1618 features, &part_state->do_square_split, 1619 &part_state->do_rectangular_split)) { 1620 return; 1621 } 1622 1623 // Calculate score using the NN model. 1624 float score = 0.0f; 1625 av1_nn_predict(features, nn_config, 1, &score); 1626 1627 float thresh_score = (float)log(thresh / (1 - thresh)); 1628 1629 // Make decision. 1630 if (score >= thresh_score) { 1631 part_state->do_square_split = 0; 1632 part_state->do_rectangular_split = 0; 1633 } 1634 } 1635 #undef FEATURES 1636 1637 void av1_prune_partitions_before_search(AV1_COMP *const cpi, 1638 MACROBLOCK *const x, 1639 SIMPLE_MOTION_DATA_TREE *const sms_tree, 1640 PartitionSearchState *part_state) { 1641 const AV1_COMMON *const cm = &cpi->common; 1642 const CommonModeInfoParams *const mi_params = &cm->mi_params; 1643 1644 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 1645 const BLOCK_SIZE bsize = blk_params->bsize; 1646 1647 #if CONFIG_THREE_PASS 1648 if (cpi->third_pass_ctx) { 1649 int mi_row = blk_params->mi_row; 1650 int mi_col = blk_params->mi_col; 1651 double ratio_h, ratio_w; 1652 av1_get_third_pass_ratio(cpi->third_pass_ctx, 0, cm->height, cm->width, 1653 &ratio_h, &ratio_w); 1654 THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( 1655 cpi->third_pass_ctx, 0, mi_row, mi_col, ratio_h, ratio_w); 1656 BLOCK_SIZE third_pass_bsize = 1657 av1_get_third_pass_adjusted_blk_size(this_mi, ratio_h, ratio_w); 1658 // check the actual partition of this block in the second pass 1659 PARTITION_TYPE third_pass_part = 1660 av1_third_pass_get_sb_part_type(cpi->third_pass_ctx, this_mi); 1661 1662 int is_edge = (mi_row + mi_size_high[bsize] >= cm->mi_params.mi_rows) || 1663 (mi_col + mi_size_wide[bsize] >= cm->mi_params.mi_cols); 1664 1665 if (!is_edge && block_size_wide[bsize] >= 16) { 1666 // If in second pass we used rectangular partition, then do not search for 1667 // rectangular partition in the different direction. 1668 if (third_pass_part != PARTITION_NONE) { 1669 if (third_pass_part == PARTITION_HORZ || 1670 third_pass_part == PARTITION_HORZ_4 || 1671 third_pass_part == PARTITION_HORZ_A || 1672 third_pass_part == PARTITION_HORZ_B) { 1673 part_state->partition_rect_allowed[VERT] = 0; 1674 } else if (third_pass_part == PARTITION_VERT || 1675 third_pass_part == PARTITION_VERT_4 || 1676 third_pass_part == PARTITION_VERT_A || 1677 third_pass_part == PARTITION_VERT_B) { 1678 part_state->partition_rect_allowed[HORZ] = 0; 1679 } 1680 } 1681 1682 int minSize = AOMMIN(block_size_wide[third_pass_bsize], 1683 block_size_high[third_pass_bsize]); 1684 int maxSize = AOMMAX(block_size_wide[third_pass_bsize], 1685 block_size_high[third_pass_bsize]); 1686 if (block_size_wide[bsize] < minSize / 4) { 1687 // Current partition is too small, just terminate 1688 part_state->terminate_partition_search = 1; 1689 return; 1690 } else if (block_size_wide[bsize] < minSize / 2) { 1691 if (third_pass_part != PARTITION_NONE) { 1692 // Current partition is very small, and in second pass we used 1693 // rectangular partition. Terminate the search here then. 1694 part_state->terminate_partition_search = 1; 1695 return; 1696 } else { 1697 // Partition is small, but we still check this partition, only disable 1698 // further splits. 1699 // TODO(any): check why this is not covered by the termination for < 1700 // minSize/4. 1701 av1_disable_square_split_partition(part_state); 1702 av1_disable_rect_partitions(part_state); 1703 return; 1704 } 1705 } else if (block_size_wide[bsize] > maxSize) { 1706 // Partition is larger than in the second pass. Only allow split. 1707 av1_set_square_split_only(part_state); 1708 return; 1709 } else if (block_size_wide[bsize] >= minSize && 1710 block_size_wide[bsize] <= maxSize) { 1711 // Partition is within a range where it is very likely to find a good 1712 // choice, so do not prune anything. 1713 return; 1714 } 1715 } 1716 } 1717 #endif // CONFIG_THREE_PASS 1718 1719 // Prune rectangular partitions for larger blocks. 1720 if (bsize > cpi->sf.part_sf.rect_partition_eval_thresh) { 1721 part_state->do_rectangular_split = 0; 1722 part_state->partition_rect_allowed[HORZ] = 0; 1723 part_state->partition_rect_allowed[VERT] = 0; 1724 } 1725 1726 // Prune rectangular, AB and 4-way partition based on q index and block size 1727 if (cpi->sf.part_sf.prune_rectangular_split_based_on_qidx == 1) { 1728 if (bsize == BLOCK_8X8 && x->qindex < 35) 1729 av1_disable_rect_partitions(part_state); 1730 1731 } else if (cpi->sf.part_sf.prune_rectangular_split_based_on_qidx == 2) { 1732 // Enumeration difference between two square partitions 1733 const int sqr_bsize_step = BLOCK_32X32 - BLOCK_16X16; 1734 int max_bsize = 1735 BLOCK_32X32 - (x->qindex * 3 / QINDEX_RANGE) * sqr_bsize_step; 1736 max_bsize = AOMMAX(max_bsize, BLOCK_4X4); 1737 const BLOCK_SIZE max_prune_bsize = 1738 (BLOCK_SIZE)AOMMIN(max_bsize, BLOCK_32X32); 1739 1740 // Prune partition 1741 // qidx 0 to 85: prune bsize below BLOCK_32X32 1742 // qidx 86 to 170: prune bsize below BLOCK_16X16 1743 // qidx 171 to 255: prune bsize below BLOCK_8X8 1744 if (bsize < max_prune_bsize) { 1745 av1_disable_rect_partitions(part_state); 1746 } 1747 } 1748 1749 if (cpi->sf.part_sf.prune_sub_8x8_partition_level && (bsize == BLOCK_8X8)) { 1750 const MACROBLOCKD *const xd = &x->e_mbd; 1751 int prune_sub_8x8; 1752 if (cpi->sf.part_sf.prune_sub_8x8_partition_level == 2) { 1753 prune_sub_8x8 = 1; 1754 } else { 1755 assert(cpi->sf.part_sf.prune_sub_8x8_partition_level == 1); 1756 // Prune if both neighbors are available and either is > BLOCK_8X8 1757 prune_sub_8x8 = xd->left_available && xd->up_available && 1758 (xd->left_mbmi->bsize > BLOCK_8X8 || 1759 xd->above_mbmi->bsize > BLOCK_8X8); 1760 } 1761 if (prune_sub_8x8) { 1762 av1_disable_all_splits(part_state); 1763 } 1764 } 1765 1766 // A CNN-based speed feature pruning out either split or all non-split 1767 // partition in INTRA frame coding. 1768 const int try_intra_cnn_based_part_prune = 1769 frame_is_intra_only(cm) && 1770 cpi->sf.part_sf.intra_cnn_based_part_prune_level && 1771 cm->seq_params->sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 && 1772 blk_params->bsize_at_least_8x8 && 1773 av1_is_whole_blk_in_frame(blk_params, mi_params); 1774 1775 if (try_intra_cnn_based_part_prune) { 1776 intra_mode_cnn_partition(&cpi->common, x, x->part_search_info.quad_tree_idx, 1777 cpi->sf.part_sf.intra_cnn_based_part_prune_level, 1778 part_state); 1779 } 1780 1781 // Use simple motion search to prune out split or non-split partitions. This 1782 // must be done prior to PARTITION_SPLIT to propagate the initial mvs to a 1783 // smaller blocksize. 1784 const int try_split_only = 1785 cpi->sf.part_sf.simple_motion_search_split && 1786 part_state->do_square_split && blk_params->bsize_at_least_8x8 && 1787 av1_is_whole_blk_in_frame(blk_params, mi_params) && 1788 !frame_is_intra_only(cm) && !av1_superres_scaled(cm); 1789 1790 if (try_split_only) { 1791 simple_motion_search_based_split(cpi, x, sms_tree, part_state); 1792 } 1793 1794 // Use simple motion search to prune out rectangular partition in some 1795 // direction. The results are stored in prune_horz and prune_vert in order to 1796 // bypass future related pruning checks if a pruning decision has been made. 1797 1798 // We want to search at least one partition mode, so don't prune if NONE and 1799 // SPLIT are disabled. 1800 const int non_rect_part_allowed = 1801 part_state->do_square_split || part_state->partition_none_allowed; 1802 // Only run the model if the partitions are not already pruned. 1803 const int rect_part_allowed = part_state->do_rectangular_split && 1804 ((part_state->partition_rect_allowed[HORZ] && 1805 !part_state->prune_rect_part[HORZ]) || 1806 (part_state->partition_rect_allowed[VERT] && 1807 !part_state->prune_rect_part[VERT])); 1808 1809 const int try_prune_rect = cpi->sf.part_sf.simple_motion_search_prune_rect && 1810 !frame_is_intra_only(cm) && 1811 non_rect_part_allowed && rect_part_allowed && 1812 !av1_superres_scaled(cm); 1813 1814 if (try_prune_rect) { 1815 simple_motion_search_prune_rect(cpi, x, sms_tree, part_state); 1816 } 1817 } 1818 1819 #ifndef NDEBUG 1820 static inline int is_bsize_square(BLOCK_SIZE bsize) { 1821 return block_size_wide[bsize] == block_size_high[bsize]; 1822 } 1823 #endif // NDEBUG 1824 1825 void av1_prune_partitions_by_max_min_bsize(SuperBlockEnc *sb_enc, 1826 PartitionSearchState *part_state) { 1827 assert(is_bsize_square(sb_enc->max_partition_size)); 1828 assert(is_bsize_square(sb_enc->min_partition_size)); 1829 assert(sb_enc->min_partition_size <= sb_enc->max_partition_size); 1830 const PartitionBlkParams *blk_params = &part_state->part_blk_params; 1831 const BLOCK_SIZE bsize = blk_params->bsize; 1832 assert(is_bsize_square(bsize)); 1833 const int max_partition_size_1d = block_size_wide[sb_enc->max_partition_size]; 1834 const int min_partition_size_1d = block_size_wide[sb_enc->min_partition_size]; 1835 const int bsize_1d = block_size_wide[bsize]; 1836 assert(min_partition_size_1d <= max_partition_size_1d); 1837 const int is_le_min_sq_part = bsize_1d <= min_partition_size_1d; 1838 const int is_gt_max_sq_part = bsize_1d > max_partition_size_1d; 1839 if (is_gt_max_sq_part) { 1840 // If current block size is larger than max, only allow split. 1841 av1_set_square_split_only(part_state); 1842 } else if (is_le_min_sq_part) { 1843 // If current block size is less or equal to min, only allow none if valid 1844 // block large enough; only allow split otherwise. 1845 av1_disable_rect_partitions(part_state); 1846 1847 // only disable square split when current block is not at the picture 1848 // boundary. otherwise, inherit the square split flag from previous logic 1849 if (av1_blk_has_rows_and_cols(blk_params)) { 1850 part_state->do_square_split = 0; 1851 } 1852 part_state->partition_none_allowed = !(part_state->do_square_split); 1853 } 1854 } 1855 1856 // Decide whether to evaluate the AB partition specified by part_type based on 1857 // split and HORZ/VERT info 1858 static int evaluate_ab_partition_based_on_split( 1859 const PC_TREE *pc_tree, PARTITION_TYPE rect_part, 1860 const RD_RECT_PART_WIN_INFO *rect_part_win_info, int qindex, int split_idx1, 1861 int split_idx2) { 1862 int num_win = 0; 1863 // Threshold for number of winners 1864 // Conservative pruning for high quantizers 1865 const int num_win_thresh = AOMMIN(3 * (2 * (MAXQ - qindex) / MAXQ), 3); 1866 int sub_part_win = 1867 (rect_part_win_info == NULL) ? (pc_tree->partitioning == rect_part) 1868 : (rect_part == PARTITION_HORZ) ? rect_part_win_info->rect_part_win[HORZ] 1869 : rect_part_win_info->rect_part_win[VERT]; 1870 num_win += (sub_part_win) ? 1 : 0; 1871 if (pc_tree->split[split_idx1]) { 1872 num_win += 1873 (pc_tree->split[split_idx1]->partitioning == PARTITION_NONE) ? 1 : 0; 1874 } else { 1875 num_win += 1; 1876 } 1877 if (pc_tree->split[split_idx2]) { 1878 num_win += 1879 (pc_tree->split[split_idx2]->partitioning == PARTITION_NONE) ? 1 : 0; 1880 } else { 1881 num_win += 1; 1882 } 1883 if (num_win < num_win_thresh) { 1884 return 0; 1885 } 1886 return 1; 1887 } 1888 1889 void av1_prune_ab_partitions(AV1_COMP *cpi, const MACROBLOCK *x, 1890 const PC_TREE *pc_tree, int pb_source_variance, 1891 int64_t best_rdcost, 1892 const RD_RECT_PART_WIN_INFO *rect_part_win_info, 1893 bool ext_partition_allowed, 1894 PartitionSearchState *part_state, 1895 int *ab_partitions_allowed) { 1896 int64_t *horz_rd = part_state->rect_part_rd[HORZ]; 1897 int64_t *vert_rd = part_state->rect_part_rd[VERT]; 1898 int64_t *split_rd = part_state->split_rd; 1899 const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg; 1900 // The standard AB partitions are allowed initially if ext-partition-types are 1901 // allowed. 1902 int horzab_partition_allowed = ext_partition_allowed && 1903 part_cfg->enable_ab_partitions && 1904 part_state->partition_rect_allowed[HORZ]; 1905 int vertab_partition_allowed = ext_partition_allowed && 1906 part_cfg->enable_ab_partitions && 1907 part_state->partition_rect_allowed[VERT]; 1908 1909 // Pruning: pruning out AB partitions on one main direction based on the 1910 // current best partition and source variance. 1911 if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { 1912 if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 1) { 1913 // TODO(debargha,huisu@google.com): may need to tune the threshold for 1914 // pb_source_variance. 1915 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || 1916 (pc_tree->partitioning == PARTITION_NONE && 1917 pb_source_variance < 32) || 1918 pc_tree->partitioning == PARTITION_SPLIT); 1919 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || 1920 (pc_tree->partitioning == PARTITION_NONE && 1921 pb_source_variance < 32) || 1922 pc_tree->partitioning == PARTITION_SPLIT); 1923 } else { 1924 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || 1925 pc_tree->partitioning == PARTITION_SPLIT); 1926 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || 1927 pc_tree->partitioning == PARTITION_SPLIT); 1928 } 1929 horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0); 1930 horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0); 1931 vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0); 1932 vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0); 1933 split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0); 1934 split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0); 1935 split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0); 1936 split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0); 1937 } 1938 1939 // Pruning: pruning out horz_a or horz_b if the combined rdcost of its 1940 // subblocks estimated from previous partitions is much higher than the best 1941 // rd so far. 1942 ab_partitions_allowed[HORZ_A] = horzab_partition_allowed; 1943 ab_partitions_allowed[HORZ_B] = horzab_partition_allowed; 1944 if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { 1945 const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1]; 1946 const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3]; 1947 switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) { 1948 case 1: 1949 ab_partitions_allowed[HORZ_A] &= (horz_a_rd / 16 * 14 < best_rdcost); 1950 ab_partitions_allowed[HORZ_B] &= (horz_b_rd / 16 * 14 < best_rdcost); 1951 break; 1952 case 2: 1953 default: 1954 ab_partitions_allowed[HORZ_A] &= (horz_a_rd / 16 * 15 < best_rdcost); 1955 ab_partitions_allowed[HORZ_B] &= (horz_b_rd / 16 * 15 < best_rdcost); 1956 break; 1957 } 1958 } 1959 1960 // Pruning: pruning out vert_a or vert_b if the combined rdcost of its 1961 // subblocks estimated from previous partitions is much higher than the best 1962 // rd so far. 1963 ab_partitions_allowed[VERT_A] = vertab_partition_allowed; 1964 ab_partitions_allowed[VERT_B] = vertab_partition_allowed; 1965 if (cpi->sf.part_sf.prune_ext_partition_types_search_level) { 1966 const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2]; 1967 const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3]; 1968 switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) { 1969 case 1: 1970 ab_partitions_allowed[VERT_A] &= (vert_a_rd / 16 * 14 < best_rdcost); 1971 ab_partitions_allowed[VERT_B] &= (vert_b_rd / 16 * 14 < best_rdcost); 1972 break; 1973 case 2: 1974 default: 1975 ab_partitions_allowed[VERT_A] &= (vert_a_rd / 16 * 15 < best_rdcost); 1976 ab_partitions_allowed[VERT_B] &= (vert_b_rd / 16 * 15 < best_rdcost); 1977 break; 1978 } 1979 } 1980 1981 // Pruning: pruning out some ab partitions using a DNN taking rd costs of 1982 // sub-blocks from previous basic partition types. 1983 if (cpi->sf.part_sf.ml_prune_partition && ext_partition_allowed && 1984 part_state->partition_rect_allowed[HORZ] && 1985 part_state->partition_rect_allowed[VERT]) { 1986 // TODO(huisu@google.com): x->source_variance may not be the current 1987 // block's variance. The correct one to use is pb_source_variance. Need to 1988 // re-train the model to fix it. 1989 ml_prune_ab_partition(cpi, pc_tree->partitioning, 1990 get_unsigned_bits(x->source_variance), best_rdcost, 1991 part_state, ab_partitions_allowed); 1992 } 1993 1994 // Pruning: pruning AB partitions based on the number of horz/vert wins 1995 // in the current block and sub-blocks in PARTITION_SPLIT. 1996 if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && 1997 ab_partitions_allowed[HORZ_A]) { 1998 ab_partitions_allowed[HORZ_A] &= evaluate_ab_partition_based_on_split( 1999 pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 0, 1); 2000 } 2001 if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && 2002 ab_partitions_allowed[HORZ_B]) { 2003 ab_partitions_allowed[HORZ_B] &= evaluate_ab_partition_based_on_split( 2004 pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 2, 3); 2005 } 2006 if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && 2007 ab_partitions_allowed[VERT_A]) { 2008 ab_partitions_allowed[VERT_A] &= evaluate_ab_partition_based_on_split( 2009 pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 0, 2); 2010 } 2011 if (cpi->sf.part_sf.prune_ext_part_using_split_info >= 2 && 2012 ab_partitions_allowed[VERT_B]) { 2013 ab_partitions_allowed[VERT_B] &= evaluate_ab_partition_based_on_split( 2014 pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 1, 3); 2015 } 2016 } 2017 2018 // Prepare features for the external model. Specifically, features after 2019 // ab partition is searched. 2020 static void prepare_features_after_part_ab( 2021 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, 2022 int part_ctx, int64_t best_rd, 2023 int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], 2024 int64_t split_rd[SUB_PARTITIONS_SPLIT], unsigned int pb_source_variance, 2025 int mi_row, int mi_col, aom_partition_features_t *const features) { 2026 int64_t *horz_rd = rect_part_rd[HORZ]; 2027 int64_t *vert_rd = rect_part_rd[VERT]; 2028 2029 // Generate features. 2030 int feature_index = 0; 2031 features->after_part_ab.f[feature_index++] = (float)part_ctx; 2032 features->after_part_ab.f[feature_index++] = 2033 (float)get_unsigned_bits(pb_source_variance); 2034 2035 const int rdcost = (int)AOMMIN(INT_MAX, best_rd); 2036 int sub_block_rdcost[8] = { 0 }; 2037 int rd_index = 0; 2038 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 2039 if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) 2040 sub_block_rdcost[rd_index] = (int)horz_rd[i]; 2041 ++rd_index; 2042 } 2043 for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { 2044 if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) 2045 sub_block_rdcost[rd_index] = (int)vert_rd[i]; 2046 ++rd_index; 2047 } 2048 for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { 2049 if (split_rd[i] > 0 && split_rd[i] < 1000000000) 2050 sub_block_rdcost[rd_index] = (int)split_rd[i]; 2051 ++rd_index; 2052 } 2053 for (int i = 0; i < 8; ++i) { 2054 // Ratio between the sub-block RD and the whole-block RD. 2055 float rd_ratio = 1.0f; 2056 if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) 2057 rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; 2058 features->after_part_ab.f[feature_index++] = rd_ratio; 2059 } 2060 2061 // 4-way partitions are only allowed for these three square block sizes. 2062 assert(bsize == BLOCK_16X16 || bsize == BLOCK_32X32 || bsize == BLOCK_64X64); 2063 2064 // Get variance of the 1:4 and 4:1 sub-blocks. 2065 unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; 2066 unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 }; 2067 { 2068 BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); 2069 BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); 2070 2071 assert(horz_4_bs != BLOCK_INVALID); 2072 assert(vert_4_bs != BLOCK_INVALID); 2073 2074 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, 2075 av1_num_planes(&cpi->common), bsize); 2076 const int src_stride = x->plane[0].src.stride; 2077 uint8_t *src = x->plane[0].src.buf; 2078 const MACROBLOCKD *const xd = &x->e_mbd; 2079 2080 struct buf_2d horz_4_src, vert_4_src; 2081 horz_4_src.stride = src_stride; 2082 vert_4_src.stride = src_stride; 2083 2084 for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { 2085 horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride; 2086 vert_4_src.buf = src + i * block_size_wide[vert_4_bs]; 2087 2088 horz_4_source_var[i] = av1_get_perpixel_variance_facade( 2089 cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y); 2090 vert_4_source_var[i] = av1_get_perpixel_variance_facade( 2091 cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y); 2092 } 2093 } 2094 2095 const float denom = (float)(pb_source_variance + 1); 2096 const float low_b = 0.1f; 2097 const float high_b = 10.0f; 2098 for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { 2099 // Ratio between the 4:1 sub-block variance and the whole-block variance. 2100 float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; 2101 if (var_ratio < low_b) var_ratio = low_b; 2102 if (var_ratio > high_b) var_ratio = high_b; 2103 features->after_part_ab.f[feature_index++] = var_ratio; 2104 } 2105 for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) { 2106 // Ratio between the 1:4 sub-block RD and the whole-block RD. 2107 float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; 2108 if (var_ratio < low_b) var_ratio = low_b; 2109 if (var_ratio > high_b) var_ratio = high_b; 2110 features->after_part_ab.f[feature_index++] = var_ratio; 2111 } 2112 assert(feature_index == 18); 2113 } 2114 2115 // If the external partition model is used, we let it determine partition 2116 // decisions before partition none. Specifically, these parameters: 2117 // partition_none_allowed 2118 // partition_horz_allowed 2119 // partition_vert_allowed 2120 // do_rectangular_split 2121 // do_square_split 2122 static bool ext_ml_model_decision_before_none( 2123 AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT], 2124 int *partition_none_allowed, int *partition_horz_allowed, 2125 int *partition_vert_allowed, int *do_rectangular_split, 2126 int *do_square_split) { 2127 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 2128 if (!ext_part_controller->ready) return false; 2129 2130 // Setup features. 2131 aom_partition_features_t features; 2132 features.id = AOM_EXT_PART_FEATURE_BEFORE_NONE; 2133 for (int i = 0; i < FEATURE_SIZE_SMS_SPLIT; ++i) { 2134 features.before_part_none.f[i] = features_from_motion[i]; 2135 } 2136 2137 // Send necessary features to the external model. 2138 av1_ext_part_send_features(ext_part_controller, &features); 2139 2140 // Get partition decisions from the external model. 2141 aom_partition_decision_t decision; 2142 const bool valid_decision = 2143 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2144 if (!valid_decision) return false; 2145 2146 // Populate decisions 2147 *partition_none_allowed = decision.partition_none_allowed; 2148 *partition_horz_allowed = decision.partition_rect_allowed[HORZ]; 2149 *partition_vert_allowed = decision.partition_rect_allowed[VERT]; 2150 *do_rectangular_split = decision.do_rectangular_split; 2151 *do_square_split = decision.do_square_split; 2152 2153 return true; 2154 } 2155 2156 // If the external partition model is used, we let it determine partition 2157 // decisions before partition none. Specifically, these parameters: 2158 // prune_horz 2159 // prune_vert 2160 static bool ext_ml_model_decision_before_none_part2( 2161 AV1_COMP *cpi, 2162 const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART], 2163 int *prune_horz, int *prune_vert) { 2164 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 2165 if (!ext_part_controller->ready) return false; 2166 2167 // Setup features. 2168 aom_partition_features_t features; 2169 features.id = AOM_EXT_PART_FEATURE_BEFORE_NONE_PART2; 2170 for (int i = 0; i < FEATURE_SIZE_SMS_PRUNE_PART; ++i) { 2171 features.before_part_none.f_part2[i] = features_from_motion[i]; 2172 } 2173 2174 // Send necessary features to the external model. 2175 av1_ext_part_send_features(ext_part_controller, &features); 2176 2177 // Get partition decisions from the external model. 2178 aom_partition_decision_t decision; 2179 const bool valid_decision = 2180 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2181 if (!valid_decision) return false; 2182 2183 // Populate decisions 2184 *prune_horz = decision.prune_rect_part[HORZ]; 2185 *prune_vert = decision.prune_rect_part[VERT]; 2186 2187 return true; 2188 } 2189 2190 // If the external partition model is used, we let it determine partition 2191 // decisions after none partition. Specifically, these parameters: 2192 // do_square_split 2193 // do_rectangular_split 2194 bool ext_ml_model_decision_after_none( 2195 ExtPartController *const ext_part_controller, const int is_intra_frame, 2196 const float *const features_after_none, int *do_square_split, 2197 int *do_rectangular_split) { 2198 if (!ext_part_controller->ready || is_intra_frame) return false; 2199 2200 // Setup features. 2201 aom_partition_features_t features; 2202 features.id = AOM_EXT_PART_FEATURE_AFTER_NONE; 2203 for (int i = 0; i < 4; ++i) { 2204 features.after_part_none.f[i] = features_after_none[i]; 2205 } 2206 2207 // Send necessary features to the external model. 2208 av1_ext_part_send_features(ext_part_controller, &features); 2209 2210 // Get partition decisions from the external model. 2211 aom_partition_decision_t decision; 2212 const bool valid_decision = 2213 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2214 if (!valid_decision) return false; 2215 2216 // Populate decisions 2217 *do_square_split = decision.do_square_split; 2218 *do_rectangular_split = decision.do_rectangular_split; 2219 2220 return true; 2221 } 2222 2223 // If the external partition model is used, we let it determine partition 2224 // decisions after none partition. Specifically, these parameters: 2225 // terminate_partition_search 2226 bool ext_ml_model_decision_after_none_part2( 2227 AV1_COMP *const cpi, const float *const features_terminate, 2228 int *terminate_partition_search) { 2229 AV1_COMMON *const cm = &cpi->common; 2230 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 2231 if (!ext_part_controller->ready || frame_is_intra_only(cm)) return false; 2232 2233 // Setup features. 2234 aom_partition_features_t features; 2235 features.id = AOM_EXT_PART_FEATURE_AFTER_NONE_PART2; 2236 for (int i = 0; i < FEATURE_SIZE_SMS_TERM_NONE; ++i) { 2237 features.after_part_none.f_terminate[i] = features_terminate[i]; 2238 } 2239 2240 // Send necessary features to the external model. 2241 av1_ext_part_send_features(ext_part_controller, &features); 2242 2243 // Get partition decisions from the external model. 2244 aom_partition_decision_t decision; 2245 const bool valid_decision = 2246 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2247 if (!valid_decision) return false; 2248 2249 // Populate decisions 2250 *terminate_partition_search = decision.terminate_partition_search; 2251 2252 return true; 2253 } 2254 2255 // If the external partition model is used, we let it determine partition 2256 // decisions after none partition. Specifically, these parameters: 2257 // terminate_partition_search 2258 bool ext_ml_model_decision_after_split(AV1_COMP *const cpi, 2259 const float *const features_terminate, 2260 int *terminate_partition_search) { 2261 const AV1_COMMON *const cm = &cpi->common; 2262 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 2263 if (frame_is_intra_only(cm) || !cpi->ext_part_controller.ready) { 2264 return false; 2265 } 2266 2267 // Setup features. 2268 aom_partition_features_t features; 2269 features.id = AOM_EXT_PART_FEATURE_AFTER_SPLIT; 2270 for (int i = 0; i < 31; ++i) { 2271 features.after_part_split.f_terminate[i] = features_terminate[i]; 2272 } 2273 2274 // Send necessary features to the external model. 2275 av1_ext_part_send_features(ext_part_controller, &features); 2276 2277 // Get partition decisions from the external model. 2278 aom_partition_decision_t decision; 2279 const bool valid_decision = 2280 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2281 if (!valid_decision) return false; 2282 2283 // Populate decisions 2284 *terminate_partition_search = decision.terminate_partition_search; 2285 2286 return true; 2287 } 2288 2289 // If the external partition model is used, we let it determine partition 2290 // decisions after none partition. Specifically, these parameters: 2291 // prune_rect_part[HORZ] 2292 // prune_rect_part[VERT] 2293 bool ext_ml_model_decision_after_split_part2( 2294 ExtPartController *const ext_part_controller, const int is_intra_frame, 2295 const float *const features_prune, int *prune_rect_part_horz, 2296 int *prune_rect_part_vert) { 2297 if (is_intra_frame || !ext_part_controller->ready) { 2298 return false; 2299 } 2300 2301 // Setup features. 2302 aom_partition_features_t features; 2303 features.id = AOM_EXT_PART_FEATURE_AFTER_SPLIT_PART2; 2304 for (int i = 0; i < 9; ++i) { 2305 features.after_part_split.f_prune_rect[i] = features_prune[i]; 2306 } 2307 2308 // Send necessary features to the external model. 2309 av1_ext_part_send_features(ext_part_controller, &features); 2310 2311 // Get partition decisions from the external model. 2312 aom_partition_decision_t decision; 2313 const bool valid_decision = 2314 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2315 if (!valid_decision) return false; 2316 2317 // Populate decisions 2318 *prune_rect_part_horz = decision.prune_rect_part[0]; 2319 *prune_rect_part_vert = decision.prune_rect_part[1]; 2320 2321 return true; 2322 } 2323 2324 // If the external partition model is used, we let it determine partition 2325 // decisions after rectangular partition. Specifically, these parameters: 2326 // horza_partition_allowed 2327 // horzb_partition_allowed 2328 // verta_partition_allowed 2329 // vertb_partition_allowed 2330 static bool ext_ml_model_decision_after_rect( 2331 ExtPartController *const ext_part_controller, const int is_intra_frame, 2332 const float *const features_after_rect, int *horza_partition_allowed, 2333 int *horzb_partition_allowed, int *verta_partition_allowed, 2334 int *vertb_partition_allowed) { 2335 if (is_intra_frame || !ext_part_controller->ready) return false; 2336 2337 // Setup features. 2338 aom_partition_features_t features; 2339 features.id = AOM_EXT_PART_FEATURE_AFTER_RECT; 2340 for (int i = 0; i < 10; ++i) { 2341 features.after_part_rect.f[i] = features_after_rect[i]; 2342 } 2343 2344 // Send necessary features to the external model. 2345 av1_ext_part_send_features(ext_part_controller, &features); 2346 2347 // Get partition decisions from the external model. 2348 aom_partition_decision_t decision; 2349 const bool valid_decision = 2350 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2351 if (!valid_decision) return false; 2352 2353 // Populate decisions 2354 *horza_partition_allowed = decision.horza_partition_allowed; 2355 *horzb_partition_allowed = decision.horzb_partition_allowed; 2356 *verta_partition_allowed = decision.verta_partition_allowed; 2357 *vertb_partition_allowed = decision.vertb_partition_allowed; 2358 2359 return true; 2360 } 2361 2362 // If the external partition model is used, we let it determine partition 2363 // decisions after AB partition. Specifically, these parameters: 2364 // partition_vert4_allowed 2365 // partition_horz4_allowed 2366 static bool ext_ml_model_decision_after_part_ab( 2367 AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx, 2368 int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT], 2369 int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed, 2370 int *const partition_vert4_allowed, unsigned int pb_source_variance, 2371 int mi_row, int mi_col) { 2372 const AV1_COMMON *const cm = &cpi->common; 2373 ExtPartController *const ext_part_controller = &cpi->ext_part_controller; 2374 2375 if (!frame_is_intra_only(cm) && ext_part_controller->ready) { 2376 // Setup features. 2377 aom_partition_features_t features; 2378 features.id = AOM_EXT_PART_FEATURE_AFTER_AB; 2379 prepare_features_after_part_ab(cpi, x, bsize, part_ctx, best_rd, 2380 rect_part_rd, split_rd, pb_source_variance, 2381 mi_row, mi_col, &features); 2382 2383 // Send necessary features to the external model. 2384 av1_ext_part_send_features(ext_part_controller, &features); 2385 2386 // Get partition decisions from the external model. 2387 aom_partition_decision_t decision; 2388 const bool valid_decision = 2389 av1_ext_part_get_partition_decision(ext_part_controller, &decision); 2390 if (!valid_decision) return false; 2391 2392 // Populate decisions 2393 *partition_horz4_allowed = decision.partition_horz4_allowed; 2394 *partition_vert4_allowed = decision.partition_vert4_allowed; 2395 2396 return true; 2397 } 2398 2399 return false; 2400 } 2401 2402 // This function resembles "av1_setup_sms_tree()" in context_tree.c 2403 // with function signature change. 2404 static SIMPLE_MOTION_DATA_TREE *setup_sms_tree( 2405 AV1_COMP *const cpi, SIMPLE_MOTION_DATA_TREE *sms_tree) { 2406 AV1_COMMON *const cm = &cpi->common; 2407 const int stat_generation_stage = is_stat_generation_stage(cpi); 2408 const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; 2409 const int tree_nodes = 2410 av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); 2411 int sms_tree_index = 0; 2412 SIMPLE_MOTION_DATA_TREE *this_sms; 2413 int square_index = 1; 2414 int nodes; 2415 this_sms = &sms_tree[0]; 2416 2417 if (!stat_generation_stage) { 2418 const int leaf_factor = is_sb_size_128 ? 4 : 1; 2419 const int leaf_nodes = 256 * leaf_factor; 2420 2421 // Sets up all the leaf nodes in the tree. 2422 for (sms_tree_index = 0; sms_tree_index < leaf_nodes; ++sms_tree_index) { 2423 SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; 2424 tree->block_size = square[0]; 2425 } 2426 2427 // Each node has 4 leaf nodes, fill each block_size level of the tree 2428 // from leafs to the root. 2429 for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { 2430 for (int i = 0; i < nodes; ++i) { 2431 SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; 2432 tree->block_size = square[square_index]; 2433 for (int j = 0; j < 4; j++) tree->split[j] = this_sms++; 2434 ++sms_tree_index; 2435 } 2436 ++square_index; 2437 } 2438 } else { 2439 // Allocation for firstpass/LAP stage 2440 // TODO(Mufaddal): refactor square_index to use a common block_size macro 2441 // from firstpass.c 2442 SIMPLE_MOTION_DATA_TREE *const tree = &sms_tree[sms_tree_index]; 2443 square_index = 2; 2444 tree->block_size = square[square_index]; 2445 } 2446 2447 // Set up the root node for the largest superblock size 2448 return &sms_tree[tree_nodes - 1]; 2449 } 2450 2451 static void write_motion_feature_to_file( 2452 const char *const path, const int sb_counter, const unsigned int *block_sse, 2453 const unsigned int *block_var, const int num_blocks, const BLOCK_SIZE bsize, 2454 const BLOCK_SIZE fixed_block_size, const int mi_row, const int mi_col) { 2455 char filename[256]; 2456 snprintf(filename, sizeof(filename), "%s/motion_search_feature_sb%d", path, 2457 sb_counter); 2458 FILE *pfile = fopen(filename, "w"); 2459 fprintf(pfile, "%d,%d,%d,%d,%d\n", mi_row, mi_col, bsize, 2460 block_size_wide[fixed_block_size], num_blocks); 2461 for (int i = 0; i < num_blocks; ++i) { 2462 fprintf(pfile, "%d", block_sse[i]); 2463 if (i < num_blocks - 1) fprintf(pfile, ","); 2464 } 2465 fprintf(pfile, "\n"); 2466 for (int i = 0; i < num_blocks; ++i) { 2467 fprintf(pfile, "%d", block_var[i]); 2468 if (i < num_blocks - 1) fprintf(pfile, ","); 2469 } 2470 fprintf(pfile, "\n"); 2471 fclose(pfile); 2472 } 2473 2474 void av1_collect_motion_search_features_sb(AV1_COMP *const cpi, ThreadData *td, 2475 TileDataEnc *tile_data, 2476 const int mi_row, const int mi_col, 2477 const BLOCK_SIZE bsize, 2478 aom_partition_features_t *features) { 2479 const AV1_COMMON *const cm = &cpi->common; 2480 if (frame_is_intra_only(cm)) return; 2481 2482 MACROBLOCK *const x = &td->mb; 2483 const BLOCK_SIZE fixed_block_size = BLOCK_16X16; 2484 const int col_step = mi_size_wide[fixed_block_size]; 2485 const int row_step = mi_size_high[fixed_block_size]; 2486 SIMPLE_MOTION_DATA_TREE *sms_tree = NULL; 2487 const int stat_generation_stage = is_stat_generation_stage(cpi); 2488 const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; 2489 const int tree_nodes = 2490 av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); 2491 CHECK_MEM_ERROR(cm, sms_tree, aom_calloc(tree_nodes, sizeof(*sms_tree))); 2492 SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree); 2493 TileInfo *const tile_info = &tile_data->tile_info; 2494 av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, bsize); 2495 av1_init_simple_motion_search_mvs_for_sb(cpi, NULL, x, sms_root, mi_row, 2496 mi_col); 2497 av1_reset_simple_motion_tree_partition(sms_root, bsize); 2498 const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME 2499 : LAST_FRAME }; 2500 const int mi_width = 2501 AOMMIN(mi_size_wide[bsize], cm->mi_params.mi_cols - mi_col); 2502 const int mi_height = 2503 AOMMIN(mi_size_high[bsize], cm->mi_params.mi_rows - mi_row); 2504 const int col_steps = (mi_width / col_step) + ((mi_width % col_step) > 0); 2505 const int row_steps = (mi_height / row_step) + ((mi_height % row_step) > 0); 2506 const int num_blocks = col_steps * row_steps; 2507 unsigned int *block_sse = aom_calloc(num_blocks, sizeof(*block_sse)); 2508 unsigned int *block_var = aom_calloc(num_blocks, sizeof(*block_var)); 2509 if (!(block_sse && block_var)) { 2510 aom_free(sms_tree); 2511 aom_free(block_sse); 2512 aom_free(block_var); 2513 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, 2514 "Error allocating block_sse & block_var"); 2515 } 2516 int idx = 0; 2517 2518 for (int row = mi_row; 2519 row < AOMMIN(mi_row + mi_size_high[bsize], cm->mi_params.mi_rows); 2520 row += row_step) { 2521 for (int col = mi_col; 2522 col < AOMMIN(mi_col + mi_size_wide[bsize], cm->mi_params.mi_cols); 2523 col += col_step) { 2524 simple_motion_search_get_best_ref( 2525 cpi, x, sms_root, row, col, fixed_block_size, ref_list, 2526 /*num_refs=*/1, /*use_subpixel=*/1, 2527 /*save_mv=*/1, &block_sse[idx], &block_var[idx]); 2528 ++idx; 2529 } 2530 } 2531 if (features == NULL) { 2532 write_motion_feature_to_file(cpi->oxcf.partition_info_path, cpi->sb_counter, 2533 block_sse, block_var, idx, bsize, 2534 fixed_block_size, mi_row, mi_col); 2535 } else { 2536 features->sb_features.motion_features.unit_length = 2537 block_size_wide[fixed_block_size]; 2538 features->sb_features.motion_features.num_units = idx; 2539 for (int i = 0; i < idx; ++i) { 2540 features->sb_features.motion_features.block_sse[i] = block_sse[i]; 2541 features->sb_features.motion_features.block_var[i] = block_var[i]; 2542 } 2543 } 2544 2545 aom_free(block_sse); 2546 aom_free(block_var); 2547 aom_free(sms_tree); 2548 } 2549 2550 #if CONFIG_PARTITION_SEARCH_ORDER 2551 void av1_prepare_motion_search_features_block( 2552 AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, 2553 const int mi_row, const int mi_col, const BLOCK_SIZE bsize, 2554 const int valid_partition_types, unsigned int *block_sse, 2555 unsigned int *block_var, unsigned int sub_block_sse[4], 2556 unsigned int sub_block_var[4], unsigned int horz_block_sse[2], 2557 unsigned int horz_block_var[2], unsigned int vert_block_sse[2], 2558 unsigned int vert_block_var[2]) { 2559 const AV1_COMMON *const cm = &cpi->common; 2560 if (frame_is_intra_only(cm)) return; 2561 MACROBLOCK *const x = &td->mb; 2562 SIMPLE_MOTION_DATA_TREE *sms_tree = NULL; 2563 const int stat_generation_stage = is_stat_generation_stage(cpi); 2564 const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128; 2565 const int tree_nodes = 2566 av1_get_pc_tree_nodes(is_sb_size_128, stat_generation_stage); 2567 CHECK_MEM_ERROR(cm, sms_tree, aom_calloc(tree_nodes, sizeof(*sms_tree))); 2568 SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree); 2569 TileInfo *const tile_info = &tile_data->tile_info; 2570 av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, bsize); 2571 av1_reset_simple_motion_tree_partition(sms_root, bsize); 2572 const int ref_list[] = { cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME 2573 : LAST_FRAME }; 2574 const int sub_mi_width = mi_size_wide[bsize] / 2; 2575 const int sub_mi_height = sub_mi_width; 2576 simple_motion_search_get_best_ref( 2577 cpi, x, sms_root, mi_row, mi_col, bsize, ref_list, /*num_refs=*/1, 2578 /*use_subpixel=*/1, /*save_mv=*/1, block_sse, block_var); 2579 // Split to 4 sub blocks. 2580 if (valid_partition_types & (1 << PARTITION_SPLIT)) { 2581 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); 2582 for (int i = 0; i < 4; ++i) { 2583 const int row = mi_row + (i >> 1) * sub_mi_height; 2584 const int col = mi_col + (i & 1) * sub_mi_width; 2585 simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, 2586 ref_list, /*num_refs=*/1, 2587 /*use_subpixel=*/1, /*save_mv=*/1, 2588 &sub_block_sse[i], &sub_block_var[i]); 2589 } 2590 } 2591 // Horizontal split 2592 if (valid_partition_types & (1 << PARTITION_HORZ)) { 2593 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); 2594 for (int i = 0; i < 2; ++i) { 2595 const int row = mi_row + (i & 1) * sub_mi_height; 2596 const int col = mi_col; 2597 simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, 2598 ref_list, /*num_refs=*/1, 2599 /*use_subpixel=*/1, /*save_mv=*/1, 2600 &horz_block_sse[i], &horz_block_var[i]); 2601 } 2602 } 2603 // Vertical split 2604 if (valid_partition_types & (1 << PARTITION_VERT)) { 2605 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); 2606 for (int i = 0; i < 2; ++i) { 2607 const int row = mi_row; 2608 const int col = mi_col + (i & 1) * sub_mi_width; 2609 simple_motion_search_get_best_ref(cpi, x, sms_root, row, col, subsize, 2610 ref_list, /*num_refs=*/1, 2611 /*use_subpixel=*/1, /*save_mv=*/1, 2612 &vert_block_sse[i], &vert_block_var[i]); 2613 } 2614 } 2615 2616 aom_free(sms_tree); 2617 } 2618 #endif // CONFIG_PARTITION_SEARCH_ORDER 2619 #endif // !CONFIG_REALTIME_ONLY 2620 2621 static inline void init_simple_motion_search_mvs( 2622 SIMPLE_MOTION_DATA_TREE *sms_tree, const FULLPEL_MV *start_mvs) { 2623 memcpy(sms_tree->start_mvs, start_mvs, sizeof(sms_tree->start_mvs)); 2624 av1_zero(sms_tree->sms_none_feat); 2625 av1_zero(sms_tree->sms_rect_feat); 2626 av1_zero(sms_tree->sms_none_valid); 2627 av1_zero(sms_tree->sms_rect_valid); 2628 2629 if (sms_tree->block_size >= BLOCK_8X8) { 2630 init_simple_motion_search_mvs(sms_tree->split[0], start_mvs); 2631 init_simple_motion_search_mvs(sms_tree->split[1], start_mvs); 2632 init_simple_motion_search_mvs(sms_tree->split[2], start_mvs); 2633 init_simple_motion_search_mvs(sms_tree->split[3], start_mvs); 2634 } 2635 } 2636 2637 void av1_init_simple_motion_search_mvs_for_sb(const AV1_COMP *cpi, 2638 const TileInfo *tile_info, 2639 MACROBLOCK *x, 2640 SIMPLE_MOTION_DATA_TREE *sms_root, 2641 int mi_row, int mi_col) { 2642 // Use the NEARESTMV of the sb as the start mv 2643 const AV1_COMMON *cm = &cpi->common; 2644 MACROBLOCKD *const xd = &x->e_mbd; 2645 FULLPEL_MV ref_mvs[REF_FRAMES]; 2646 const BLOCK_SIZE sb_size = cm->seq_params->sb_size; 2647 av1_zero(ref_mvs); 2648 // If tile_info is NULL, assume that the offsets have already been set. 2649 if (tile_info) { 2650 av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, 2651 sb_size); 2652 } 2653 2654 MB_MODE_INFO_EXT mbmi_ext; 2655 const int ref_frame = 2656 cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; 2657 av1_find_mv_refs(cm, xd, xd->mi[0], ref_frame, mbmi_ext.ref_mv_count, 2658 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext.global_mvs, 2659 mbmi_ext.mode_context); 2660 if (mbmi_ext.ref_mv_count[ref_frame] > 0) { 2661 ref_mvs[ref_frame] = 2662 get_fullmv_from_mv(&xd->ref_mv_stack[ref_frame][0].this_mv.as_mv); 2663 } else { 2664 ref_mvs[ref_frame] = 2665 get_fullmv_from_mv(&mbmi_ext.global_mvs[ref_frame].as_mv); 2666 } 2667 2668 init_simple_motion_search_mvs(sms_root, ref_mvs); 2669 }