svc_encoder_rtc.cc (103928B)
1 /* 2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved. 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 // This is an example demonstrating how to implement a multi-layer AOM 13 // encoding scheme for RTC video applications. 14 15 #include <assert.h> 16 #include <inttypes.h> 17 #include <limits.h> 18 #include <math.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include <memory> 24 25 #include "config/aom_config.h" 26 27 #if CONFIG_AV1_DECODER 28 #include "aom/aom_decoder.h" 29 #endif 30 #include "aom/aom_encoder.h" 31 #include "aom/aom_image.h" 32 #include "aom/aom_integer.h" 33 #include "aom/aomcx.h" 34 #include "aom_dsp/bitwriter_buffer.h" 35 #include "aom_ports/aom_timer.h" 36 #include "av1/ratectrl_rtc.h" 37 #include "common/args.h" 38 #include "common/tools_common.h" 39 #include "common/video_writer.h" 40 #include "examples/encoder_util.h" 41 #include "examples/multilayer_metadata.h" 42 43 #define OPTION_BUFFER_SIZE 1024 44 #define MAX_NUM_SPATIAL_LAYERS 4 45 46 #define GOOD_QUALITY 0 47 48 typedef struct { 49 const char *output_filename; 50 char options[OPTION_BUFFER_SIZE]; 51 struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS]; 52 int speed; 53 int aq_mode; 54 int layering_mode; 55 int output_obu; 56 int decode; 57 int tune_content; 58 int show_psnr; 59 bool use_external_rc; 60 bool scale_factors_explicitly_set; 61 const char *multilayer_metadata_file; 62 } AppInput; 63 64 typedef enum { 65 QUANTIZER = 0, 66 BITRATE, 67 SCALE_FACTOR, 68 AUTO_ALT_REF, 69 ALL_OPTION_TYPES 70 } LAYER_OPTION_TYPE; 71 72 enum { kSkip = 0, kDeltaQ = 1, kDeltaLF = 2, kReference = 3 }; 73 74 static const arg_def_t outputfile = 75 ARG_DEF("o", "output", 1, "Output filename"); 76 static const arg_def_t frames_arg = 77 ARG_DEF("f", "frames", 1, "Number of frames to encode"); 78 static const arg_def_t threads_arg = 79 ARG_DEF("th", "threads", 1, "Number of threads to use"); 80 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width"); 81 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height"); 82 static const arg_def_t timebase_arg = 83 ARG_DEF("t", "timebase", 1, "Timebase (num/den)"); 84 static const arg_def_t bitrate_arg = ARG_DEF( 85 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second"); 86 static const arg_def_t spatial_layers_arg = 87 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers"); 88 static const arg_def_t temporal_layers_arg = 89 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers"); 90 static const arg_def_t layering_mode_arg = 91 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme."); 92 static const arg_def_t kf_dist_arg = 93 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes"); 94 static const arg_def_t scale_factors_arg = 95 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)"); 96 static const arg_def_t min_q_arg = 97 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer"); 98 static const arg_def_t max_q_arg = 99 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer"); 100 static const arg_def_t speed_arg = 101 ARG_DEF("sp", "speed", 1, "Speed configuration"); 102 static const arg_def_t aqmode_arg = 103 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on"); 104 static const arg_def_t bitrates_arg = 105 ARG_DEF("bl", "bitrates", 1, 106 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]"); 107 static const arg_def_t dropframe_thresh_arg = 108 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)"); 109 static const arg_def_t error_resilient_arg = 110 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag"); 111 static const arg_def_t output_obu_arg = 112 ARG_DEF(NULL, "output-obu", 1, 113 "Write OBUs when set to 1. Otherwise write IVF files."); 114 static const arg_def_t test_decode_arg = 115 ARG_DEF(NULL, "test-decode", 1, 116 "Attempt to test decoding the output when set to 1. Default is 1."); 117 static const arg_def_t psnr_arg = 118 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line."); 119 static const arg_def_t ext_rc_arg = 120 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control."); 121 static const struct arg_enum_list tune_content_enum[] = { 122 { "default", AOM_CONTENT_DEFAULT }, 123 { "screen", AOM_CONTENT_SCREEN }, 124 { "film", AOM_CONTENT_FILM }, 125 { NULL, 0 } 126 }; 127 static const arg_def_t tune_content_arg = ARG_DEF_ENUM( 128 NULL, "tune-content", 1, "Tune content type", tune_content_enum); 129 #if CONFIG_CWG_E050 130 static const arg_def_t multilayer_metadata_file_arg = 131 ARG_DEF("ml", "multilayer_metadata_file", 1, 132 "Experimental: path to multilayer metadata file"); 133 #endif 134 135 #if CONFIG_AV1_HIGHBITDEPTH 136 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 }, 137 { "10", AOM_BITS_10 }, 138 { NULL, 0 } }; 139 140 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM( 141 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum); 142 #endif // CONFIG_AV1_HIGHBITDEPTH 143 144 static const arg_def_t *svc_args[] = { 145 &frames_arg, 146 &outputfile, 147 &width_arg, 148 &height_arg, 149 &timebase_arg, 150 &bitrate_arg, 151 &spatial_layers_arg, 152 &kf_dist_arg, 153 &scale_factors_arg, 154 &min_q_arg, 155 &max_q_arg, 156 &temporal_layers_arg, 157 &layering_mode_arg, 158 &threads_arg, 159 &aqmode_arg, 160 #if CONFIG_AV1_HIGHBITDEPTH 161 &bitdepth_arg, 162 #endif 163 &speed_arg, 164 &bitrates_arg, 165 &dropframe_thresh_arg, 166 &error_resilient_arg, 167 &output_obu_arg, 168 &test_decode_arg, 169 &tune_content_arg, 170 &psnr_arg, 171 #if CONFIG_CWG_E050 172 &multilayer_metadata_file_arg, 173 #endif 174 NULL, 175 }; 176 177 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest)) 178 179 static const char *exec_name; 180 181 void usage_exit(void) { 182 fprintf(stderr, 183 "Usage: %s <options> input_filename [input_filename ...] -o " 184 "output_filename\n", 185 exec_name); 186 fprintf(stderr, "Options:\n"); 187 arg_show_usage(stderr, svc_args); 188 fprintf( 189 stderr, 190 "Input files must be y4m or yuv.\n" 191 "If multiple input files are specified, they correspond to spatial " 192 "layers, and there should be as many as there are spatial layers.\n" 193 "All input files must have the same width, height, frame rate and number " 194 "of frames.\n" 195 "If only one file is specified, it is used for all spatial layers.\n"); 196 exit(EXIT_FAILURE); 197 } 198 199 static int file_is_y4m(const char detect[4]) { 200 return memcmp(detect, "YUV4", 4) == 0; 201 } 202 203 static int fourcc_is_ivf(const char detect[4]) { 204 if (memcmp(detect, "DKIF", 4) == 0) { 205 return 1; 206 } 207 return 0; 208 } 209 210 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX, 211 1 }; 212 213 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 }; 214 215 static void open_input_file(struct AvxInputContext *input, 216 aom_chroma_sample_position_t csp) { 217 /* Parse certain options from the input file, if possible */ 218 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") 219 : set_binary_mode(stdin); 220 221 if (!input->file) fatal("Failed to open input file"); 222 223 if (!fseeko(input->file, 0, SEEK_END)) { 224 /* Input file is seekable. Figure out how long it is, so we can get 225 * progress info. 226 */ 227 input->length = ftello(input->file); 228 rewind(input->file); 229 } 230 231 /* Default to 1:1 pixel aspect ratio. */ 232 input->pixel_aspect_ratio.numerator = 1; 233 input->pixel_aspect_ratio.denominator = 1; 234 235 /* For RAW input sources, these bytes will applied on the first frame 236 * in read_frame(). 237 */ 238 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); 239 input->detect.position = 0; 240 241 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { 242 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp, 243 input->only_i420) >= 0) { 244 input->file_type = FILE_TYPE_Y4M; 245 input->width = input->y4m.pic_w; 246 input->height = input->y4m.pic_h; 247 input->pixel_aspect_ratio.numerator = input->y4m.par_n; 248 input->pixel_aspect_ratio.denominator = input->y4m.par_d; 249 input->framerate.numerator = input->y4m.fps_n; 250 input->framerate.denominator = input->y4m.fps_d; 251 input->fmt = input->y4m.aom_fmt; 252 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth); 253 } else { 254 fatal("Unsupported Y4M stream."); 255 } 256 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { 257 fatal("IVF is not supported as input."); 258 } else { 259 input->file_type = FILE_TYPE_RAW; 260 } 261 } 262 263 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input, 264 int *value0, int *value1) { 265 if (type == SCALE_FACTOR) { 266 *value0 = (int)strtol(input, &input, 10); 267 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM; 268 *value1 = (int)strtol(input, &input, 10); 269 270 if (*value0 < option_min_values[SCALE_FACTOR] || 271 *value1 < option_min_values[SCALE_FACTOR] || 272 *value0 > option_max_values[SCALE_FACTOR] || 273 *value1 > option_max_values[SCALE_FACTOR] || 274 *value0 > *value1) // num shouldn't be greater than den 275 return AOM_CODEC_INVALID_PARAM; 276 } else { 277 *value0 = atoi(input); 278 if (*value0 < option_min_values[type] || *value0 > option_max_values[type]) 279 return AOM_CODEC_INVALID_PARAM; 280 } 281 return AOM_CODEC_OK; 282 } 283 284 static aom_codec_err_t parse_layer_options_from_string( 285 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input, 286 int *option0, int *option1) { 287 aom_codec_err_t res = AOM_CODEC_OK; 288 char *input_string; 289 char *token; 290 const char *delim = ","; 291 int num_layers = svc_params->number_spatial_layers; 292 int i = 0; 293 294 if (type == BITRATE) 295 num_layers = 296 svc_params->number_spatial_layers * svc_params->number_temporal_layers; 297 298 if (input == NULL || option0 == NULL || 299 (option1 == NULL && type == SCALE_FACTOR)) 300 return AOM_CODEC_INVALID_PARAM; 301 302 const size_t input_length = strlen(input); 303 input_string = reinterpret_cast<char *>(malloc(input_length + 1)); 304 if (input_string == NULL) return AOM_CODEC_MEM_ERROR; 305 memcpy(input_string, input, input_length + 1); 306 token = strtok(input_string, delim); // NOLINT 307 for (i = 0; i < num_layers; ++i) { 308 if (token != NULL) { 309 res = extract_option(type, token, option0 + i, option1 + i); 310 if (res != AOM_CODEC_OK) break; 311 token = strtok(NULL, delim); // NOLINT 312 } else { 313 res = AOM_CODEC_INVALID_PARAM; 314 break; 315 } 316 } 317 free(input_string); 318 return res; 319 } 320 321 static void parse_command_line(int argc, const char **argv_, 322 AppInput *app_input, 323 aom_svc_params_t *svc_params, 324 aom_codec_enc_cfg_t *enc_cfg) { 325 struct arg arg; 326 char **argv = NULL; 327 char **argi = NULL; 328 char **argj = NULL; 329 char string_options[1024] = { 0 }; 330 331 // Default settings 332 svc_params->number_spatial_layers = 1; 333 svc_params->number_temporal_layers = 1; 334 app_input->layering_mode = 0; 335 app_input->output_obu = 0; 336 app_input->decode = 1; 337 enc_cfg->g_threads = 1; 338 enc_cfg->rc_end_usage = AOM_CBR; 339 340 // process command line options 341 argv = argv_dup(argc - 1, argv_ + 1); 342 if (!argv) { 343 fprintf(stderr, "Error allocating argument list\n"); 344 exit(EXIT_FAILURE); 345 } 346 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { 347 arg.argv_step = 1; 348 349 if (arg_match(&arg, &outputfile, argi)) { 350 app_input->output_filename = arg.val; 351 } else if (arg_match(&arg, &width_arg, argi)) { 352 enc_cfg->g_w = arg_parse_uint(&arg); 353 } else if (arg_match(&arg, &height_arg, argi)) { 354 enc_cfg->g_h = arg_parse_uint(&arg); 355 } else if (arg_match(&arg, &timebase_arg, argi)) { 356 enc_cfg->g_timebase = arg_parse_rational(&arg); 357 } else if (arg_match(&arg, &bitrate_arg, argi)) { 358 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg); 359 } else if (arg_match(&arg, &spatial_layers_arg, argi)) { 360 svc_params->number_spatial_layers = arg_parse_uint(&arg); 361 } else if (arg_match(&arg, &temporal_layers_arg, argi)) { 362 svc_params->number_temporal_layers = arg_parse_uint(&arg); 363 } else if (arg_match(&arg, &speed_arg, argi)) { 364 app_input->speed = arg_parse_uint(&arg); 365 if (app_input->speed > 11) { 366 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed); 367 } 368 } else if (arg_match(&arg, &aqmode_arg, argi)) { 369 app_input->aq_mode = arg_parse_uint(&arg); 370 } else if (arg_match(&arg, &threads_arg, argi)) { 371 enc_cfg->g_threads = arg_parse_uint(&arg); 372 } else if (arg_match(&arg, &layering_mode_arg, argi)) { 373 app_input->layering_mode = arg_parse_int(&arg); 374 } else if (arg_match(&arg, &kf_dist_arg, argi)) { 375 enc_cfg->kf_min_dist = arg_parse_uint(&arg); 376 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; 377 } else if (arg_match(&arg, &scale_factors_arg, argi)) { 378 aom_codec_err_t res = parse_layer_options_from_string( 379 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num, 380 svc_params->scaling_factor_den); 381 app_input->scale_factors_explicitly_set = true; 382 if (res != AOM_CODEC_OK) { 383 die("Failed to parse scale factors: %s\n", 384 aom_codec_err_to_string(res)); 385 } 386 } else if (arg_match(&arg, &min_q_arg, argi)) { 387 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg); 388 } else if (arg_match(&arg, &max_q_arg, argi)) { 389 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg); 390 #if CONFIG_AV1_HIGHBITDEPTH 391 } else if (arg_match(&arg, &bitdepth_arg, argi)) { 392 enc_cfg->g_bit_depth = 393 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg)); 394 switch (enc_cfg->g_bit_depth) { 395 case AOM_BITS_8: 396 enc_cfg->g_input_bit_depth = 8; 397 enc_cfg->g_profile = 0; 398 break; 399 case AOM_BITS_10: 400 enc_cfg->g_input_bit_depth = 10; 401 enc_cfg->g_profile = 0; 402 break; 403 default: 404 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth); 405 } 406 #endif // CONFIG_VP9_HIGHBITDEPTH 407 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) { 408 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg); 409 } else if (arg_match(&arg, &error_resilient_arg, argi)) { 410 enc_cfg->g_error_resilient = arg_parse_uint(&arg); 411 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1) 412 die("Invalid value for error resilient (0, 1): %d.", 413 enc_cfg->g_error_resilient); 414 } else if (arg_match(&arg, &output_obu_arg, argi)) { 415 app_input->output_obu = arg_parse_uint(&arg); 416 if (app_input->output_obu != 0 && app_input->output_obu != 1) 417 die("Invalid value for obu output flag (0, 1): %d.", 418 app_input->output_obu); 419 } else if (arg_match(&arg, &test_decode_arg, argi)) { 420 app_input->decode = arg_parse_uint(&arg); 421 if (app_input->decode != 0 && app_input->decode != 1) 422 die("Invalid value for test decode flag (0, 1): %d.", 423 app_input->decode); 424 } else if (arg_match(&arg, &tune_content_arg, argi)) { 425 app_input->tune_content = arg_parse_enum_or_int(&arg); 426 printf("tune content %d\n", app_input->tune_content); 427 } else if (arg_match(&arg, &psnr_arg, argi)) { 428 app_input->show_psnr = 1; 429 } else if (arg_match(&arg, &ext_rc_arg, argi)) { 430 app_input->use_external_rc = true; 431 #if CONFIG_CWG_E050 432 } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) { 433 app_input->multilayer_metadata_file = arg.val; 434 #endif 435 } else { 436 ++argj; 437 } 438 } 439 440 // Total bitrate needs to be parsed after the number of layers. 441 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { 442 arg.argv_step = 1; 443 if (arg_match(&arg, &bitrates_arg, argi)) { 444 aom_codec_err_t res = parse_layer_options_from_string( 445 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL); 446 if (res != AOM_CODEC_OK) { 447 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res)); 448 } 449 } else { 450 ++argj; 451 } 452 } 453 454 // There will be a space in front of the string options 455 if (strlen(string_options) > 0) 456 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE); 457 458 // Check for unrecognized options 459 for (argi = argv; *argi; ++argi) 460 if (argi[0][0] == '-' && strlen(argi[0]) > 1) 461 die("Error: Unrecognized option %s\n", *argi); 462 463 if (argv[0] == NULL) { 464 usage_exit(); 465 } 466 467 int input_count = 0; 468 while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) { 469 app_input->input_ctx[input_count].filename = argv[input_count]; 470 ++input_count; 471 } 472 if (input_count > 1 && input_count != svc_params->number_spatial_layers) { 473 die("Error: Number of input files does not match number of spatial layers"); 474 } 475 if (argv[input_count] != NULL) { 476 die("Error: Too many input files specified, there should be at most %d", 477 MAX_NUM_SPATIAL_LAYERS); 478 } 479 480 free(argv); 481 482 for (int i = 0; i < input_count; ++i) { 483 open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN); 484 if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) { 485 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) { 486 // Override these settings with the info from Y4M file. 487 enc_cfg->g_w = app_input->input_ctx[i].width; 488 enc_cfg->g_h = app_input->input_ctx[i].height; 489 // g_timebase is the reciprocal of frame rate. 490 enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator; 491 enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator; 492 } else if (enc_cfg->g_w != app_input->input_ctx[i].width || 493 enc_cfg->g_h != app_input->input_ctx[i].height || 494 enc_cfg->g_timebase.num != 495 app_input->input_ctx[i].framerate.denominator || 496 enc_cfg->g_timebase.den != 497 app_input->input_ctx[i].framerate.numerator) { 498 die("Error: Input file dimensions and/or frame rate mismatch"); 499 } 500 } 501 } 502 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) { 503 die("Error: Input file dimensions not set, use -w and -h"); 504 } 505 506 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || 507 enc_cfg->g_h % 2) 508 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); 509 510 printf( 511 "Codec %s\n" 512 "layers: %d\n" 513 "width %u, height: %u\n" 514 "num: %d, den: %d, bitrate: %u\n" 515 "gop size: %u\n", 516 aom_codec_iface_name(aom_codec_av1_cx()), 517 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h, 518 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den, 519 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist); 520 } 521 522 static const int mode_to_num_temporal_layers[12] = { 523 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3, 524 }; 525 static const int mode_to_num_spatial_layers[12] = { 526 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3, 527 }; 528 529 // For rate control encoding stats. 530 struct RateControlMetrics { 531 // Number of input frames per layer. 532 int layer_input_frames[AOM_MAX_TS_LAYERS]; 533 // Number of encoded non-key frames per layer. 534 int layer_enc_frames[AOM_MAX_TS_LAYERS]; 535 // Framerate per layer layer (cumulative). 536 double layer_framerate[AOM_MAX_TS_LAYERS]; 537 // Target average frame size per layer (per-frame-bandwidth per layer). 538 double layer_pfb[AOM_MAX_LAYERS]; 539 // Actual average frame size per layer. 540 double layer_avg_frame_size[AOM_MAX_LAYERS]; 541 // Average rate mismatch per layer (|target - actual| / target). 542 double layer_avg_rate_mismatch[AOM_MAX_LAYERS]; 543 // Actual encoding bitrate per layer (cumulative across temporal layers). 544 double layer_encoding_bitrate[AOM_MAX_LAYERS]; 545 // Average of the short-time encoder actual bitrate. 546 // TODO(marpan): Should we add these short-time stats for each layer? 547 double avg_st_encoding_bitrate; 548 // Variance of the short-time encoder actual bitrate. 549 double variance_st_encoding_bitrate; 550 // Window (number of frames) for computing short-timee encoding bitrate. 551 int window_size; 552 // Number of window measurements. 553 int window_count; 554 int layer_target_bitrate[AOM_MAX_LAYERS]; 555 }; 556 557 static const int REF_FRAMES = 8; 558 559 static const int INTER_REFS_PER_FRAME = 7; 560 561 // Reference frames used in this example encoder. 562 enum { 563 SVC_LAST_FRAME = 0, 564 SVC_LAST2_FRAME, 565 SVC_LAST3_FRAME, 566 SVC_GOLDEN_FRAME, 567 SVC_BWDREF_FRAME, 568 SVC_ALTREF2_FRAME, 569 SVC_ALTREF_FRAME 570 }; 571 572 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) { 573 FILE *f = input_ctx->file; 574 y4m_input *y4m = &input_ctx->y4m; 575 int shortread = 0; 576 577 if (input_ctx->file_type == FILE_TYPE_Y4M) { 578 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; 579 } else { 580 shortread = read_yuv_frame(input_ctx, img); 581 } 582 583 return !shortread; 584 } 585 586 static void close_input_file(struct AvxInputContext *input) { 587 fclose(input->file); 588 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); 589 } 590 591 // Note: these rate control metrics assume only 1 key frame in the 592 // sequence (i.e., first frame only). So for temporal pattern# 7 593 // (which has key frame for every frame on base layer), the metrics 594 // computation will be off/wrong. 595 // TODO(marpan): Update these metrics to account for multiple key frames 596 // in the stream. 597 static void set_rate_control_metrics(struct RateControlMetrics *rc, 598 double framerate, int ss_number_layers, 599 int ts_number_layers) { 600 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 }; 601 ts_rate_decimator[0] = 1; 602 if (ts_number_layers == 2) { 603 ts_rate_decimator[0] = 2; 604 ts_rate_decimator[1] = 1; 605 } 606 if (ts_number_layers == 3) { 607 ts_rate_decimator[0] = 4; 608 ts_rate_decimator[1] = 2; 609 ts_rate_decimator[2] = 1; 610 } 611 // Set the layer (cumulative) framerate and the target layer (non-cumulative) 612 // per-frame-bandwidth, for the rate control encoding stats below. 613 for (int sl = 0; sl < ss_number_layers; ++sl) { 614 int i = sl * ts_number_layers; 615 rc->layer_framerate[0] = framerate / ts_rate_decimator[0]; 616 rc->layer_pfb[i] = 617 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0]; 618 for (int tl = 0; tl < ts_number_layers; ++tl) { 619 i = sl * ts_number_layers + tl; 620 if (tl > 0) { 621 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl]; 622 rc->layer_pfb[i] = 623 1000.0 * 624 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) / 625 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]); 626 } 627 rc->layer_input_frames[tl] = 0; 628 rc->layer_enc_frames[tl] = 0; 629 rc->layer_encoding_bitrate[i] = 0.0; 630 rc->layer_avg_frame_size[i] = 0.0; 631 rc->layer_avg_rate_mismatch[i] = 0.0; 632 } 633 } 634 rc->window_count = 0; 635 rc->window_size = 15; 636 rc->avg_st_encoding_bitrate = 0.0; 637 rc->variance_st_encoding_bitrate = 0.0; 638 } 639 640 static void printout_rate_control_summary(struct RateControlMetrics *rc, 641 int frame_cnt, int ss_number_layers, 642 int ts_number_layers) { 643 int tot_num_frames = 0; 644 double perc_fluctuation = 0.0; 645 printf("Total number of processed frames: %d\n\n", frame_cnt - 1); 646 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers); 647 for (int sl = 0; sl < ss_number_layers; ++sl) { 648 tot_num_frames = 0; 649 for (int tl = 0; tl < ts_number_layers; ++tl) { 650 int i = sl * ts_number_layers + tl; 651 const int num_dropped = 652 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] 653 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1; 654 tot_num_frames += rc->layer_input_frames[tl]; 655 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] * 656 rc->layer_encoding_bitrate[i] / 657 tot_num_frames; 658 rc->layer_avg_frame_size[i] = 659 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl]; 660 rc->layer_avg_rate_mismatch[i] = 661 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl]; 662 printf("For layer#: %d %d \n", sl, tl); 663 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i], 664 rc->layer_encoding_bitrate[i]); 665 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i], 666 rc->layer_avg_frame_size[i]); 667 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]); 668 printf( 669 "Number of input frames, encoded (non-key) frames, " 670 "and perc dropped frames: %d %d %f\n", 671 rc->layer_input_frames[tl], rc->layer_enc_frames[tl], 672 100.0 * num_dropped / rc->layer_input_frames[tl]); 673 printf("\n"); 674 } 675 } 676 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; 677 rc->variance_st_encoding_bitrate = 678 rc->variance_st_encoding_bitrate / rc->window_count - 679 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); 680 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / 681 rc->avg_st_encoding_bitrate; 682 printf("Short-time stats, for window of %d frames:\n", rc->window_size); 683 printf("Average, rms-variance, and percent-fluct: %f %f %f\n", 684 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), 685 perc_fluctuation); 686 if (frame_cnt - 1 != tot_num_frames) 687 die("Error: Number of input frames not equal to output!\n"); 688 } 689 690 // Layer pattern configuration. 691 static void set_layer_pattern( 692 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id, 693 aom_svc_ref_frame_config_t *ref_frame_config, 694 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control, 695 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed, 696 int *reference_updated, int test_roi_map) { 697 // Setting this flag to 1 enables simplex example of 698 // RPS (Reference Picture Selection) for 1 layer. 699 int use_rps_example = 0; 700 int i; 701 int enable_longterm_temporal_ref = 1; 702 int shift = (layering_mode == 8) ? 2 : 0; 703 int simulcast_mode = (layering_mode == 11); 704 *use_svc_control = 1; 705 layer_id->spatial_layer_id = spatial_layer_id; 706 int lag_index = 0; 707 int base_count = superframe_cnt >> 2; 708 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST 709 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST 710 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST 711 // Set the reference map buffer idx for the 7 references: 712 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3), 713 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6). 714 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i; 715 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0; 716 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0; 717 718 if (ksvc_mode) { 719 // Same pattern as case 9, but the reference strucutre will be constrained 720 // below. 721 layering_mode = 9; 722 } 723 switch (layering_mode) { 724 case 0: 725 if (use_rps_example == 0) { 726 // 1-layer: update LAST on every frame, reference LAST. 727 layer_id->temporal_layer_id = 0; 728 layer_id->spatial_layer_id = 0; 729 ref_frame_config->refresh[0] = 1; 730 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 731 // Add additional reference (GOLDEN) if test_roi_map is set, 732 // to test reference frame feature on segment. 733 if (test_roi_map) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 734 } else { 735 // Pattern of 2 references (ALTREF and GOLDEN) trailing 736 // LAST by 4 and 8 frames, with some switching logic to 737 // sometimes only predict from the longer-term reference 738 //(golden here). This is simple example to test RPS 739 // (reference picture selection). 740 int last_idx = 0; 741 int last_idx_refresh = 0; 742 int gld_idx = 0; 743 int alt_ref_idx = 0; 744 int lag_alt = 4; 745 int lag_gld = 8; 746 layer_id->temporal_layer_id = 0; 747 layer_id->spatial_layer_id = 0; 748 int sh = 8; // slots 0 - 7. 749 // Moving index slot for last: 0 - (sh - 1) 750 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh; 751 // Moving index for refresh of last: one ahead for next frame. 752 last_idx_refresh = superframe_cnt % sh; 753 // Moving index for gld_ref, lag behind current by lag_gld 754 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh; 755 // Moving index for alt_ref, lag behind LAST by lag_alt frames. 756 if (superframe_cnt > lag_alt) 757 alt_ref_idx = (superframe_cnt - lag_alt) % sh; 758 // Set the ref_idx. 759 // Default all references to slot for last. 760 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 761 ref_frame_config->ref_idx[i] = last_idx; 762 // Set the ref_idx for the relevant references. 763 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx; 764 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh; 765 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx; 766 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx; 767 // Refresh this slot, which will become LAST on next frame. 768 ref_frame_config->refresh[last_idx_refresh] = 1; 769 // Reference LAST, ALTREF, and GOLDEN 770 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 771 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; 772 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 773 // Switch to only GOLDEN every 300 frames. 774 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) { 775 ref_frame_config->reference[SVC_LAST_FRAME] = 0; 776 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0; 777 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 778 // Test if the long-term is LAST instead, this is just a renaming 779 // but its tests if encoder behaves the same, whether its 780 // LAST or GOLDEN. 781 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) { 782 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx; 783 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 784 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0; 785 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0; 786 } 787 } 788 } 789 break; 790 case 1: 791 // 2-temporal layer. 792 // 1 3 5 793 // 0 2 4 794 // Keep golden fixed at slot 3. 795 base_count = superframe_cnt >> 1; 796 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 797 // Cyclically refresh slots 5, 6, 7, for lag alt ref. 798 lag_index = 5; 799 if (base_count > 0) { 800 lag_index = 5 + (base_count % 3); 801 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3); 802 } 803 // Set the altref slot to lag_index. 804 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index; 805 if (superframe_cnt % 2 == 0) { 806 layer_id->temporal_layer_id = 0; 807 // Update LAST on layer 0, reference LAST. 808 ref_frame_config->refresh[0] = 1; 809 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 810 // Refresh lag_index slot, needed for lagging golen. 811 ref_frame_config->refresh[lag_index] = 1; 812 // Refresh GOLDEN every x base layer frames. 813 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1; 814 } else { 815 layer_id->temporal_layer_id = 1; 816 // No updates on layer 1, reference LAST (TL0). 817 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 818 } 819 // Always reference golden and altref on TL0. 820 if (layer_id->temporal_layer_id == 0) { 821 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 822 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; 823 } 824 break; 825 case 2: 826 // 3-temporal layer: 827 // 1 3 5 7 828 // 2 6 829 // 0 4 8 830 if (superframe_cnt % 4 == 0) { 831 // Base layer. 832 layer_id->temporal_layer_id = 0; 833 // Update LAST on layer 0, reference LAST. 834 ref_frame_config->refresh[0] = 1; 835 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 836 } else if ((superframe_cnt - 1) % 4 == 0) { 837 layer_id->temporal_layer_id = 2; 838 // First top layer: no updates, only reference LAST (TL0). 839 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 840 } else if ((superframe_cnt - 2) % 4 == 0) { 841 layer_id->temporal_layer_id = 1; 842 // Middle layer (TL1): update LAST2, only reference LAST (TL0). 843 ref_frame_config->refresh[1] = 1; 844 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 845 } else if ((superframe_cnt - 3) % 4 == 0) { 846 layer_id->temporal_layer_id = 2; 847 // Second top layer: no updates, only reference LAST. 848 // Set buffer idx for LAST to slot 1, since that was the slot 849 // updated in previous frame. So LAST is TL1 frame. 850 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 851 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0; 852 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 853 } 854 break; 855 case 3: 856 // 3 TL, same as above, except allow for predicting 857 // off 2 more references (GOLDEN and ALTREF), with 858 // GOLDEN updated periodically, and ALTREF lagging from 859 // LAST from ~4 frames. Both GOLDEN and ALTREF 860 // can only be updated on base temporal layer. 861 862 // Keep golden fixed at slot 3. 863 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 864 // Cyclically refresh slots 5, 6, 7, for lag altref. 865 lag_index = 5; 866 if (base_count > 0) { 867 lag_index = 5 + (base_count % 3); 868 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3); 869 } 870 // Set the altref slot to lag_index. 871 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index; 872 if (superframe_cnt % 4 == 0) { 873 // Base layer. 874 layer_id->temporal_layer_id = 0; 875 // Update LAST on layer 0, reference LAST. 876 ref_frame_config->refresh[0] = 1; 877 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 878 // Refresh GOLDEN every x ~10 base layer frames. 879 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1; 880 // Refresh lag_index slot, needed for lagging altref. 881 ref_frame_config->refresh[lag_index] = 1; 882 } else if ((superframe_cnt - 1) % 4 == 0) { 883 layer_id->temporal_layer_id = 2; 884 // First top layer: no updates, only reference LAST (TL0). 885 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 886 } else if ((superframe_cnt - 2) % 4 == 0) { 887 layer_id->temporal_layer_id = 1; 888 // Middle layer (TL1): update LAST2, only reference LAST (TL0). 889 ref_frame_config->refresh[1] = 1; 890 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 891 } else if ((superframe_cnt - 3) % 4 == 0) { 892 layer_id->temporal_layer_id = 2; 893 // Second top layer: no updates, only reference LAST. 894 // Set buffer idx for LAST to slot 1, since that was the slot 895 // updated in previous frame. So LAST is TL1 frame. 896 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 897 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0; 898 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 899 } 900 // Every frame can reference GOLDEN AND ALTREF. 901 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 902 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; 903 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN. 904 if (speed >= 7) { 905 ref_frame_comp_pred->use_comp_pred[2] = 1; 906 ref_frame_comp_pred->use_comp_pred[0] = 1; 907 } 908 break; 909 case 4: 910 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will 911 // only reference GF (not LAST). Other frames only reference LAST. 912 // 1 3 5 7 913 // 2 6 914 // 0 4 8 915 if (superframe_cnt % 4 == 0) { 916 // Base layer. 917 layer_id->temporal_layer_id = 0; 918 // Update LAST on layer 0, only reference LAST. 919 ref_frame_config->refresh[0] = 1; 920 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 921 } else if ((superframe_cnt - 1) % 4 == 0) { 922 layer_id->temporal_layer_id = 2; 923 // First top layer: no updates, only reference LAST (TL0). 924 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 925 } else if ((superframe_cnt - 2) % 4 == 0) { 926 layer_id->temporal_layer_id = 1; 927 // Middle layer (TL1): update GF, only reference LAST (TL0). 928 ref_frame_config->refresh[3] = 1; 929 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 930 } else if ((superframe_cnt - 3) % 4 == 0) { 931 layer_id->temporal_layer_id = 2; 932 // Second top layer: no updates, only reference GF. 933 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 934 } 935 break; 936 937 case 5: 938 /* 939 // 2 spatial layers, 1 temporal, without temporal prediction on SL1. 940 layer_id->temporal_layer_id = 0; 941 if (layer_id->spatial_layer_id == 0) { 942 // Reference LAST, update LAST. 943 ref_frame_config->refresh[0] = 1; 944 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 945 } else if (layer_id->spatial_layer_id == 1) { 946 // Reference LAST, which is SL0, and no refresh. 947 ref_frame_config->refresh[0] = 0; 948 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 949 } 950 break; 951 */ 952 // 2 spatial layers, 1 temporal. 953 layer_id->temporal_layer_id = 0; 954 if (layer_id->spatial_layer_id == 0) { 955 // Reference LAST, update LAST. 956 ref_frame_config->refresh[0] = 1; 957 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; 958 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2; 959 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 960 } else if (layer_id->spatial_layer_id == 1) { 961 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1 962 // and GOLDEN to slot 0. Update slot 1 (LAST). 963 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 964 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0; 965 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2; 966 ref_frame_config->refresh[1] = 1; 967 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 968 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 969 } 970 break; 971 972 case 6: 973 // 3 spatial layers, 1 temporal. 974 // Note for this case, we set the buffer idx for all references to be 975 // either LAST or GOLDEN, which are always valid references, since decoder 976 // will check if any of the 7 references is valid scale in 977 // valid_ref_frame_size(). 978 layer_id->temporal_layer_id = 0; 979 if (layer_id->spatial_layer_id == 0) { 980 // Reference LAST, update LAST. Set all buffer_idx to 0. 981 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 982 ref_frame_config->ref_idx[i] = 0; 983 ref_frame_config->refresh[0] = 1; 984 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 985 } else if (layer_id->spatial_layer_id == 1) { 986 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1 987 // and GOLDEN (and all other refs) to slot 0. 988 // Update slot 1 (LAST). 989 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 990 ref_frame_config->ref_idx[i] = 0; 991 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 992 ref_frame_config->refresh[1] = 1; 993 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 994 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 995 } else if (layer_id->spatial_layer_id == 2) { 996 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2 997 // and GOLDEN (and all other refs) to slot 1. 998 // Update slot 2 (LAST). 999 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1000 ref_frame_config->ref_idx[i] = 1; 1001 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1002 ref_frame_config->refresh[2] = 1; 1003 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1004 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 1005 // For 3 spatial layer case: allow for top spatial layer to use 1006 // additional temporal reference. Update every 10 frames. 1007 if (enable_longterm_temporal_ref) { 1008 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1; 1009 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; 1010 if (base_count % 10 == 0) 1011 ref_frame_config->refresh[REF_FRAMES - 1] = 1; 1012 } 1013 } 1014 break; 1015 case 7: 1016 // 2 spatial and 3 temporal layer. 1017 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1018 if (superframe_cnt % 4 == 0) { 1019 // Base temporal layer 1020 layer_id->temporal_layer_id = 0; 1021 if (layer_id->spatial_layer_id == 0) { 1022 // Reference LAST, update LAST 1023 // Set all buffer_idx to 0 1024 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1025 ref_frame_config->ref_idx[i] = 0; 1026 ref_frame_config->refresh[0] = 1; 1027 } else if (layer_id->spatial_layer_id == 1) { 1028 // Reference LAST and GOLDEN. 1029 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1030 ref_frame_config->ref_idx[i] = 0; 1031 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 1032 ref_frame_config->refresh[1] = 1; 1033 } 1034 } else if ((superframe_cnt - 1) % 4 == 0) { 1035 // First top temporal enhancement layer. 1036 layer_id->temporal_layer_id = 2; 1037 if (layer_id->spatial_layer_id == 0) { 1038 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1039 ref_frame_config->ref_idx[i] = 0; 1040 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 1041 ref_frame_config->refresh[3] = 1; 1042 } else if (layer_id->spatial_layer_id == 1) { 1043 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, 1044 // GOLDEN (and all other refs) to slot 3. 1045 // No update. 1046 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1047 ref_frame_config->ref_idx[i] = 3; 1048 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 1049 } 1050 } else if ((superframe_cnt - 2) % 4 == 0) { 1051 // Middle temporal enhancement layer. 1052 layer_id->temporal_layer_id = 1; 1053 if (layer_id->spatial_layer_id == 0) { 1054 // Reference LAST. 1055 // Set all buffer_idx to 0. 1056 // Set GOLDEN to slot 5 and update slot 5. 1057 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1058 ref_frame_config->ref_idx[i] = 0; 1059 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift; 1060 ref_frame_config->refresh[5 - shift] = 1; 1061 } else if (layer_id->spatial_layer_id == 1) { 1062 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, 1063 // GOLDEN (and all other refs) to slot 5. 1064 // Set LAST3 to slot 6 and update slot 6. 1065 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1066 ref_frame_config->ref_idx[i] = 5 - shift; 1067 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 1068 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift; 1069 ref_frame_config->refresh[6 - shift] = 1; 1070 } 1071 } else if ((superframe_cnt - 3) % 4 == 0) { 1072 // Second top temporal enhancement layer. 1073 layer_id->temporal_layer_id = 2; 1074 if (layer_id->spatial_layer_id == 0) { 1075 // Set LAST to slot 5 and reference LAST. 1076 // Set GOLDEN to slot 3 and update slot 3. 1077 // Set all other buffer_idx to 0. 1078 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1079 ref_frame_config->ref_idx[i] = 0; 1080 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift; 1081 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 1082 ref_frame_config->refresh[3] = 1; 1083 } else if (layer_id->spatial_layer_id == 1) { 1084 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6, 1085 // GOLDEN to slot 3. No update. 1086 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1087 ref_frame_config->ref_idx[i] = 0; 1088 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift; 1089 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 1090 } 1091 } 1092 break; 1093 case 8: 1094 // 3 spatial and 3 temporal layer. 1095 // Same as case 9 but overalap in the buffer slot updates. 1096 // (shift = 2). The slots 3 and 4 updated by first TL2 are 1097 // reused for update in TL1 superframe. 1098 // Note for this case, frame order hint must be disabled for 1099 // lower resolutios (operating points > 0) to be decoedable. 1100 case 9: 1101 // 3 spatial and 3 temporal layer. 1102 // No overlap in buffer updates between TL2 and TL1. 1103 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7. 1104 // Set the references via the svc_ref_frame_config control. 1105 // Always reference LAST. 1106 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1107 if (superframe_cnt % 4 == 0) { 1108 // Base temporal layer. 1109 layer_id->temporal_layer_id = 0; 1110 if (layer_id->spatial_layer_id == 0) { 1111 // Reference LAST, update LAST. 1112 // Set all buffer_idx to 0. 1113 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1114 ref_frame_config->ref_idx[i] = 0; 1115 ref_frame_config->refresh[0] = 1; 1116 } else if (layer_id->spatial_layer_id == 1) { 1117 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, 1118 // GOLDEN (and all other refs) to slot 0. 1119 // Update slot 1 (LAST). 1120 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1121 ref_frame_config->ref_idx[i] = 0; 1122 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 1123 ref_frame_config->refresh[1] = 1; 1124 } else if (layer_id->spatial_layer_id == 2) { 1125 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, 1126 // GOLDEN (and all other refs) to slot 1. 1127 // Update slot 2 (LAST). 1128 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1129 ref_frame_config->ref_idx[i] = 1; 1130 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1131 ref_frame_config->refresh[2] = 1; 1132 } 1133 } else if ((superframe_cnt - 1) % 4 == 0) { 1134 // First top temporal enhancement layer. 1135 layer_id->temporal_layer_id = 2; 1136 if (layer_id->spatial_layer_id == 0) { 1137 // Reference LAST (slot 0). 1138 // Set GOLDEN to slot 3 and update slot 3. 1139 // Set all other buffer_idx to slot 0. 1140 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1141 ref_frame_config->ref_idx[i] = 0; 1142 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 1143 ref_frame_config->refresh[3] = 1; 1144 } else if (layer_id->spatial_layer_id == 1) { 1145 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, 1146 // GOLDEN (and all other refs) to slot 3. 1147 // Set LAST2 to slot 4 and Update slot 4. 1148 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1149 ref_frame_config->ref_idx[i] = 3; 1150 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 1151 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4; 1152 ref_frame_config->refresh[4] = 1; 1153 } else if (layer_id->spatial_layer_id == 2) { 1154 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, 1155 // GOLDEN (and all other refs) to slot 4. 1156 // No update. 1157 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1158 ref_frame_config->ref_idx[i] = 4; 1159 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1160 } 1161 } else if ((superframe_cnt - 2) % 4 == 0) { 1162 // Middle temporal enhancement layer. 1163 layer_id->temporal_layer_id = 1; 1164 if (layer_id->spatial_layer_id == 0) { 1165 // Reference LAST. 1166 // Set all buffer_idx to 0. 1167 // Set GOLDEN to slot 5 and update slot 5. 1168 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1169 ref_frame_config->ref_idx[i] = 0; 1170 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift; 1171 ref_frame_config->refresh[5 - shift] = 1; 1172 } else if (layer_id->spatial_layer_id == 1) { 1173 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, 1174 // GOLDEN (and all other refs) to slot 5. 1175 // Set LAST3 to slot 6 and update slot 6. 1176 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1177 ref_frame_config->ref_idx[i] = 5 - shift; 1178 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 1179 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift; 1180 ref_frame_config->refresh[6 - shift] = 1; 1181 } else if (layer_id->spatial_layer_id == 2) { 1182 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, 1183 // GOLDEN (and all other refs) to slot 6. 1184 // Set LAST3 to slot 7 and update slot 7. 1185 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1186 ref_frame_config->ref_idx[i] = 6 - shift; 1187 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1188 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift; 1189 ref_frame_config->refresh[7 - shift] = 1; 1190 } 1191 } else if ((superframe_cnt - 3) % 4 == 0) { 1192 // Second top temporal enhancement layer. 1193 layer_id->temporal_layer_id = 2; 1194 if (layer_id->spatial_layer_id == 0) { 1195 // Set LAST to slot 5 and reference LAST. 1196 // Set GOLDEN to slot 3 and update slot 3. 1197 // Set all other buffer_idx to 0. 1198 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1199 ref_frame_config->ref_idx[i] = 0; 1200 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift; 1201 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 1202 ref_frame_config->refresh[3] = 1; 1203 } else if (layer_id->spatial_layer_id == 1) { 1204 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6, 1205 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4. 1206 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1207 ref_frame_config->ref_idx[i] = 0; 1208 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift; 1209 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 1210 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4; 1211 ref_frame_config->refresh[4] = 1; 1212 } else if (layer_id->spatial_layer_id == 2) { 1213 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7, 1214 // GOLDEN to slot 4. No update. 1215 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1216 ref_frame_config->ref_idx[i] = 0; 1217 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift; 1218 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4; 1219 } 1220 } 1221 break; 1222 case 11: 1223 // Simulcast mode for 3 spatial and 3 temporal layers. 1224 // No inter-layer predicton, only prediction is temporal and single 1225 // reference (LAST). 1226 // No overlap in buffer slots between spatial layers. So for example, 1227 // SL0 only uses slots 0 and 1. 1228 // SL1 only uses slots 2 and 3. 1229 // SL2 only uses slots 4 and 5. 1230 // All 7 references for each inter-frame must only access buffer slots 1231 // for that spatial layer. 1232 // On key (super)frames: SL1 and SL2 must have no references set 1233 // and must refresh all the slots for that layer only (so 2 and 3 1234 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally 1235 // as a Key frame (refresh all slots). SL1/SL2 will be labelled 1236 // internally as Intra-only frames that allow that stream to be decoded. 1237 // These conditions will allow for each spatial stream to be 1238 // independently decodeable. 1239 1240 // Initialize all references to 0 (don't use reference). 1241 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1242 ref_frame_config->reference[i] = 0; 1243 // Initialize as no refresh/update for all slots. 1244 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0; 1245 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1246 ref_frame_config->ref_idx[i] = 0; 1247 1248 if (is_key_frame) { 1249 if (layer_id->spatial_layer_id == 0) { 1250 // Assign LAST/GOLDEN to slot 0/1. 1251 // Refesh slots 0 and 1 for SL0. 1252 // SL0: this will get set to KEY frame internally. 1253 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; 1254 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1; 1255 ref_frame_config->refresh[0] = 1; 1256 ref_frame_config->refresh[1] = 1; 1257 } else if (layer_id->spatial_layer_id == 1) { 1258 // Assign LAST/GOLDEN to slot 2/3. 1259 // Refesh slots 2 and 3 for SL1. 1260 // This will get set to Intra-only frame internally. 1261 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1262 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; 1263 ref_frame_config->refresh[2] = 1; 1264 ref_frame_config->refresh[3] = 1; 1265 } else if (layer_id->spatial_layer_id == 2) { 1266 // Assign LAST/GOLDEN to slot 4/5. 1267 // Refresh slots 4 and 5 for SL2. 1268 // This will get set to Intra-only frame internally. 1269 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; 1270 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5; 1271 ref_frame_config->refresh[4] = 1; 1272 ref_frame_config->refresh[5] = 1; 1273 } 1274 } else if (superframe_cnt % 4 == 0) { 1275 // Base temporal layer: TL0 1276 layer_id->temporal_layer_id = 0; 1277 if (layer_id->spatial_layer_id == 0) { // SL0 1278 // Reference LAST. Assign all references to either slot 1279 // 0 or 1. Here we assign LAST to slot 0, all others to 1. 1280 // Update slot 0 (LAST). 1281 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1282 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1283 ref_frame_config->ref_idx[i] = 1; 1284 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; 1285 ref_frame_config->refresh[0] = 1; 1286 } else if (layer_id->spatial_layer_id == 1) { // SL1 1287 // Reference LAST. Assign all references to either slot 1288 // 2 or 3. Here we assign LAST to slot 2, all others to 3. 1289 // Update slot 2 (LAST). 1290 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1291 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1292 ref_frame_config->ref_idx[i] = 3; 1293 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1294 ref_frame_config->refresh[2] = 1; 1295 } else if (layer_id->spatial_layer_id == 2) { // SL2 1296 // Reference LAST. Assign all references to either slot 1297 // 4 or 5. Here we assign LAST to slot 4, all others to 5. 1298 // Update slot 4 (LAST). 1299 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1300 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1301 ref_frame_config->ref_idx[i] = 5; 1302 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; 1303 ref_frame_config->refresh[4] = 1; 1304 } 1305 } else if ((superframe_cnt - 1) % 4 == 0) { 1306 // First top temporal enhancement layer: TL2 1307 layer_id->temporal_layer_id = 2; 1308 if (layer_id->spatial_layer_id == 0) { // SL0 1309 // Reference LAST (slot 0). Assign other references to slot 1. 1310 // No update/refresh on any slots. 1311 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1312 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1313 ref_frame_config->ref_idx[i] = 1; 1314 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; 1315 } else if (layer_id->spatial_layer_id == 1) { // SL1 1316 // Reference LAST (slot 2). Assign other references to slot 3. 1317 // No update/refresh on any slots. 1318 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1319 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1320 ref_frame_config->ref_idx[i] = 3; 1321 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1322 } else if (layer_id->spatial_layer_id == 2) { // SL2 1323 // Reference LAST (slot 4). Assign other references to slot 4. 1324 // No update/refresh on any slots. 1325 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1326 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1327 ref_frame_config->ref_idx[i] = 5; 1328 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; 1329 } 1330 } else if ((superframe_cnt - 2) % 4 == 0) { 1331 // Middle temporal enhancement layer: TL1 1332 layer_id->temporal_layer_id = 1; 1333 if (layer_id->spatial_layer_id == 0) { // SL0 1334 // Reference LAST (slot 0). 1335 // Set GOLDEN to slot 1 and update slot 1. 1336 // This will be used as reference for next TL2. 1337 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1338 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1339 ref_frame_config->ref_idx[i] = 1; 1340 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; 1341 ref_frame_config->refresh[1] = 1; 1342 } else if (layer_id->spatial_layer_id == 1) { // SL1 1343 // Reference LAST (slot 2). 1344 // Set GOLDEN to slot 3 and update slot 3. 1345 // This will be used as reference for next TL2. 1346 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1347 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1348 ref_frame_config->ref_idx[i] = 3; 1349 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; 1350 ref_frame_config->refresh[3] = 1; 1351 } else if (layer_id->spatial_layer_id == 2) { // SL2 1352 // Reference LAST (slot 4). 1353 // Set GOLDEN to slot 5 and update slot 5. 1354 // This will be used as reference for next TL2. 1355 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1356 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1357 ref_frame_config->ref_idx[i] = 5; 1358 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; 1359 ref_frame_config->refresh[5] = 1; 1360 } 1361 } else if ((superframe_cnt - 3) % 4 == 0) { 1362 // Second top temporal enhancement layer: TL2 1363 layer_id->temporal_layer_id = 2; 1364 if (layer_id->spatial_layer_id == 0) { // SL0 1365 // Reference LAST (slot 1). Assign other references to slot 0. 1366 // No update/refresh on any slots. 1367 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1368 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1369 ref_frame_config->ref_idx[i] = 0; 1370 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; 1371 } else if (layer_id->spatial_layer_id == 1) { // SL1 1372 // Reference LAST (slot 3). Assign other references to slot 2. 1373 // No update/refresh on any slots. 1374 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1375 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1376 ref_frame_config->ref_idx[i] = 2; 1377 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3; 1378 } else if (layer_id->spatial_layer_id == 2) { // SL2 1379 // Reference LAST (slot 5). Assign other references to slot 4. 1380 // No update/refresh on any slots. 1381 ref_frame_config->reference[SVC_LAST_FRAME] = 1; 1382 for (i = 0; i < INTER_REFS_PER_FRAME; i++) 1383 ref_frame_config->ref_idx[i] = 4; 1384 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5; 1385 } 1386 } 1387 if (!simulcast_mode && layer_id->spatial_layer_id > 0) { 1388 // Always reference GOLDEN (inter-layer prediction). 1389 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; 1390 if (ksvc_mode) { 1391 // KSVC: only keep the inter-layer reference (GOLDEN) for 1392 // superframes whose base is key. 1393 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0; 1394 } 1395 if (is_key_frame && layer_id->spatial_layer_id > 1) { 1396 // On superframes whose base is key: remove LAST to avoid prediction 1397 // off layer two levels below. 1398 ref_frame_config->reference[SVC_LAST_FRAME] = 0; 1399 } 1400 } 1401 // For 3 spatial layer case 8 (where there is free buffer slot): 1402 // allow for top spatial layer to use additional temporal reference. 1403 // Additional reference is only updated on base temporal layer, every 1404 // 10 TL0 frames here. 1405 if (!simulcast_mode && enable_longterm_temporal_ref && 1406 layer_id->spatial_layer_id == 2 && layering_mode == 8) { 1407 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1; 1408 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; 1409 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0) 1410 ref_frame_config->refresh[REF_FRAMES - 1] = 1; 1411 } 1412 break; 1413 default: assert(0); die("Error: Unsupported temporal layering mode!\n"); 1414 } 1415 for (i = 0; i < REF_FRAMES; i++) { 1416 if (ref_frame_config->refresh[i] == 1) { 1417 *reference_updated = 1; 1418 break; 1419 } 1420 } 1421 } 1422 1423 static void write_literal(struct aom_write_bit_buffer *wb, uint32_t data, 1424 uint8_t bits, uint32_t offset = 0) { 1425 if (bits > 32) { 1426 die("Invalid bits value %d > 32\n", bits); 1427 } 1428 const uint32_t max = static_cast<uint32_t>(((uint64_t)1 << bits) - 1); 1429 if (data < offset || (data - offset) > max) { 1430 die("Invalid data, value %u out of range [%u, %" PRIu64 "]\n", data, offset, 1431 (uint64_t)max + offset); 1432 } 1433 aom_wb_write_unsigned_literal(wb, data - offset, bits); 1434 } 1435 1436 static void write_depth_representation_element( 1437 struct aom_write_bit_buffer *buffer, 1438 const std::pair<libaom_examples::DepthRepresentationElement, bool> 1439 &element) { 1440 if (!element.second) { 1441 return; 1442 } 1443 write_literal(buffer, element.first.sign_flag, 1); 1444 write_literal(buffer, element.first.exponent, 7); 1445 if (element.first.mantissa_len == 0 || element.first.mantissa_len > 32) { 1446 die("Invalid mantissan_len %d\n", element.first.mantissa_len); 1447 } 1448 write_literal(buffer, element.first.mantissa_len - 1, 5); 1449 write_literal(buffer, element.first.mantissa, element.first.mantissa_len); 1450 } 1451 1452 static void write_color_properties( 1453 struct aom_write_bit_buffer *buffer, 1454 const std::pair<libaom_examples::ColorProperties, bool> &color_properties) { 1455 write_literal(buffer, color_properties.second, 1); 1456 if (color_properties.second) { 1457 write_literal(buffer, color_properties.first.color_range, 1); 1458 write_literal(buffer, color_properties.first.color_primaries, 8); 1459 write_literal(buffer, color_properties.first.transfer_characteristics, 8); 1460 write_literal(buffer, color_properties.first.matrix_coefficients, 8); 1461 } else { 1462 write_literal(buffer, 0, 1); // reserved_1bit 1463 } 1464 } 1465 1466 static void write_alpha_information( 1467 struct aom_write_bit_buffer *buffer, 1468 const libaom_examples::AlphaInformation &alpha_info) { 1469 write_literal(buffer, alpha_info.alpha_use_idc, 2); 1470 write_literal(buffer, alpha_info.alpha_simple_flag, 1); 1471 if (!alpha_info.alpha_simple_flag) { 1472 write_literal(buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8); 1473 write_literal(buffer, alpha_info.alpha_clip_idc, 2); 1474 write_literal(buffer, alpha_info.alpha_incr_flag, 1); 1475 write_literal(buffer, alpha_info.alpha_transparent_value, 1476 alpha_info.alpha_bit_depth + 1); 1477 write_literal(buffer, alpha_info.alpha_opaque_value, 1478 alpha_info.alpha_bit_depth + 1); 1479 if (buffer->bit_offset % 8 != 0) { 1480 // ai_byte_alignment_bits 1481 write_literal(buffer, 0, 8 - (buffer->bit_offset % 8)); 1482 } 1483 assert(buffer->bit_offset % 8 == 0); 1484 1485 write_literal(buffer, 0, 6); // ai_reserved_6bits 1486 write_color_properties(buffer, alpha_info.alpha_color_description); 1487 } else { 1488 write_literal(buffer, 0, 5); // ai_reserved_5bits 1489 } 1490 } 1491 1492 static void write_depth_information( 1493 struct aom_write_bit_buffer *buffer, 1494 const libaom_examples::DepthInformation &depth_info) { 1495 write_literal(buffer, depth_info.z_near.second, 1); 1496 write_literal(buffer, depth_info.z_far.second, 1); 1497 write_literal(buffer, depth_info.d_min.second, 1); 1498 write_literal(buffer, depth_info.d_max.second, 1); 1499 write_literal(buffer, depth_info.depth_representation_type, 4); 1500 if (depth_info.d_min.second || depth_info.d_max.second) { 1501 write_literal(buffer, depth_info.disparity_ref_view_id, 2); 1502 } 1503 write_depth_representation_element(buffer, depth_info.z_near); 1504 write_depth_representation_element(buffer, depth_info.z_far); 1505 write_depth_representation_element(buffer, depth_info.d_min); 1506 write_depth_representation_element(buffer, depth_info.d_max); 1507 if (buffer->bit_offset % 8 != 0) { 1508 write_literal(buffer, 0, 8 - (buffer->bit_offset % 8)); 1509 } 1510 } 1511 1512 static void add_multilayer_metadata( 1513 aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer, 1514 int frame_idx, int spatial_id) { 1515 // Large enough buffer for the multilayer metadata. 1516 // Each layer's metadata is less than 100 bytes and there are at most 4 1517 // layers. 1518 std::vector<uint8_t> data(1024); 1519 struct aom_write_bit_buffer buffer = { data.data(), 0 }; 1520 1521 write_literal(&buffer, multilayer.use_case, 6); 1522 if (multilayer.layers.empty()) { 1523 die("Invalid multilayer metadata, no layers found\n"); 1524 } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) { 1525 die("Invalid multilayer metadata, too many layers (max is %d)\n", 1526 MAX_NUM_SPATIAL_LAYERS); 1527 } 1528 write_literal(&buffer, (int)multilayer.layers.size() - 1, 2); 1529 assert(buffer.bit_offset % 8 == 0); 1530 for (size_t i = 0; i < multilayer.layers.size(); ++i) { 1531 const libaom_examples::LayerMetadata &layer = multilayer.layers[i]; 1532 // Alpha info with segmentation with labels can be up to about 66k bytes, 1533 // which requires 3 bytes to encode in leb128. 1534 const int bytes_reserved_for_size = 3; 1535 // Placeholder for layer_metadata_size which will be written later. 1536 write_literal(&buffer, 0, bytes_reserved_for_size * 8); 1537 const uint32_t metadata_start = buffer.bit_offset; 1538 write_literal(&buffer, (int)i, 2); // ml_spatial_id 1539 write_literal(&buffer, layer.layer_type, 5); 1540 write_literal(&buffer, layer.luma_plane_only_flag, 1); 1541 write_literal(&buffer, layer.layer_view_type, 3); 1542 write_literal(&buffer, layer.group_id, 2); 1543 write_literal(&buffer, layer.layer_dependency_idc, 3); 1544 write_literal(&buffer, layer.layer_metadata_scope, 2); 1545 write_literal(&buffer, 0, 4); // ml_reserved_4bits 1546 1547 if (i > 0) { 1548 write_color_properties(&buffer, layer.layer_color_description); 1549 } else { 1550 write_literal(&buffer, 0, 2); // ml_reserved_2bits 1551 } 1552 assert(buffer.bit_offset % 8 == 0); 1553 1554 if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA && 1555 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) { 1556 write_alpha_information(&buffer, layer.alpha); 1557 assert(buffer.bit_offset % 8 == 0); 1558 } else if (layer.layer_type == 1559 libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH && 1560 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) { 1561 write_depth_information(&buffer, layer.depth); 1562 assert(buffer.bit_offset % 8 == 0); 1563 } 1564 1565 assert(buffer.bit_offset % 8 == 0); 1566 1567 const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8; 1568 const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size; 1569 size_t coded_size; 1570 if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size, 1571 bytes_reserved_for_size, 1572 &buffer.bit_buffer[size_pos], &coded_size)) { 1573 // Need to increase bytes_reserved_for_size in the code above. 1574 die("Error: Failed to write metadata size\n"); 1575 } 1576 } 1577 assert(buffer.bit_offset % 8 == 0); 1578 if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/, 1579 buffer.bit_buffer, buffer.bit_offset / 8, 1580 AOM_MIF_KEY_FRAME)) { 1581 die("Error: Failed to add metadata\n"); 1582 } 1583 1584 if ((int)multilayer.layers.size() > spatial_id) { 1585 const libaom_examples::LayerMetadata &layer = multilayer.layers[spatial_id]; 1586 for (const libaom_examples::FrameLocalMetadata &local_metadata : 1587 layer.local_metadata) { 1588 if (local_metadata.frame_idx == frame_idx) { 1589 if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA) { 1590 buffer = { data.data(), 0 }; 1591 write_alpha_information(&buffer, local_metadata.alpha); 1592 if (aom_img_add_metadata(frame, 1593 34 /*METADATA_TYPE_ALPHA_INFORMATION*/, 1594 buffer.bit_buffer, buffer.bit_offset / 8, 1595 AOM_MIF_ANY_FRAME_LAYER_SPECIFIC)) { 1596 die("Error: Failed to add metadata\n"); 1597 } 1598 } else if (layer.layer_type == 1599 libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH) { 1600 buffer = { data.data(), 0 }; 1601 write_depth_information(&buffer, local_metadata.depth); 1602 if (aom_img_add_metadata(frame, 1603 35 /*METADATA_TYPE_DEPTH_INFORMATION*/, 1604 buffer.bit_buffer, buffer.bit_offset / 8, 1605 AOM_MIF_ANY_FRAME_LAYER_SPECIFIC)) { 1606 die("Error: Failed to add metadata\n"); 1607 } 1608 } 1609 break; 1610 } 1611 } 1612 } 1613 } 1614 1615 #if CONFIG_AV1_DECODER 1616 // Returns whether there is a mismatch between the encoder's new frame and the 1617 // decoder's new frame. 1618 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder, 1619 const int frames_out) { 1620 aom_image_t enc_img, dec_img; 1621 int mismatch = 0; 1622 1623 /* Get the internal new frame */ 1624 AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img); 1625 AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img); 1626 1627 #if CONFIG_AV1_HIGHBITDEPTH 1628 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) != 1629 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) { 1630 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { 1631 aom_image_t enc_hbd_img; 1632 aom_img_alloc( 1633 &enc_hbd_img, 1634 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH), 1635 enc_img.d_w, enc_img.d_h, 16); 1636 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img); 1637 enc_img = enc_hbd_img; 1638 } 1639 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { 1640 aom_image_t dec_hbd_img; 1641 aom_img_alloc( 1642 &dec_hbd_img, 1643 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH), 1644 dec_img.d_w, dec_img.d_h, 16); 1645 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img); 1646 dec_img = dec_hbd_img; 1647 } 1648 } 1649 #endif 1650 1651 if (!aom_compare_img(&enc_img, &dec_img)) { 1652 int y[4], u[4], v[4]; 1653 #if CONFIG_AV1_HIGHBITDEPTH 1654 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { 1655 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v); 1656 } else { 1657 aom_find_mismatch(&enc_img, &dec_img, y, u, v); 1658 } 1659 #else 1660 aom_find_mismatch(&enc_img, &dec_img, y, u, v); 1661 #endif 1662 fprintf(stderr, 1663 "Encode/decode mismatch on frame %d at" 1664 " Y[%d, %d] {%d/%d}," 1665 " U[%d, %d] {%d/%d}," 1666 " V[%d, %d] {%d/%d}\n", 1667 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], 1668 v[1], v[2], v[3]); 1669 mismatch = 1; 1670 } 1671 1672 aom_img_free(&enc_img); 1673 aom_img_free(&dec_img); 1674 return mismatch; 1675 } 1676 #endif // CONFIG_AV1_DECODER 1677 1678 struct PsnrStats { 1679 // The second element of these arrays is reserved for high bitdepth. 1680 uint64_t psnr_sse_total[2]; 1681 uint64_t psnr_samples_total[2]; 1682 double psnr_totals[2][4]; 1683 int psnr_count[2]; 1684 }; 1685 1686 static void show_psnr(struct PsnrStats *psnr_stream, double peak, 1687 int num_layers) { 1688 for (int sl = 0; sl < num_layers; ++sl) { 1689 if (!psnr_stream[sl].psnr_count[0]) continue; 1690 1691 fprintf(stderr, "\nPSNR (Layer %d, Overall/Avg/Y/U/V)", sl); 1692 const double ovpsnr = 1693 sse_to_psnr((double)psnr_stream[sl].psnr_samples_total[0], peak, 1694 (double)psnr_stream[sl].psnr_sse_total[0]); 1695 fprintf(stderr, " %.3f", ovpsnr); 1696 1697 for (int i = 0; i < 4; i++) { 1698 fprintf( 1699 stderr, " %.3f", 1700 psnr_stream[sl].psnr_totals[0][i] / psnr_stream[sl].psnr_count[0]); 1701 } 1702 } 1703 fprintf(stderr, "\n"); 1704 } 1705 1706 static aom::AV1RateControlRtcConfig create_rtc_rc_config( 1707 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) { 1708 aom::AV1RateControlRtcConfig rc_cfg; 1709 rc_cfg.width = cfg.g_w; 1710 rc_cfg.height = cfg.g_h; 1711 rc_cfg.max_quantizer = cfg.rc_max_quantizer; 1712 rc_cfg.min_quantizer = cfg.rc_min_quantizer; 1713 rc_cfg.target_bandwidth = cfg.rc_target_bitrate; 1714 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz; 1715 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz; 1716 rc_cfg.buf_sz = cfg.rc_buf_sz; 1717 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct; 1718 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct; 1719 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT 1720 rc_cfg.max_intra_bitrate_pct = 300; 1721 rc_cfg.framerate = cfg.g_timebase.den; 1722 // TODO(jianj): Add suppor for SVC. 1723 rc_cfg.ss_number_layers = 1; 1724 rc_cfg.ts_number_layers = 1; 1725 rc_cfg.scaling_factor_num[0] = 1; 1726 rc_cfg.scaling_factor_den[0] = 1; 1727 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth); 1728 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer; 1729 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer; 1730 rc_cfg.aq_mode = app_input.aq_mode; 1731 1732 return rc_cfg; 1733 } 1734 1735 static int qindex_to_quantizer(int qindex) { 1736 // Table that converts 0-63 range Q values passed in outside to the 0-255 1737 // range Qindex used internally. 1738 static const int quantizer_to_qindex[] = { 1739 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 1740 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 1741 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 1742 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 1743 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255, 1744 }; 1745 for (int quantizer = 0; quantizer < 64; ++quantizer) 1746 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer; 1747 1748 return 63; 1749 } 1750 1751 static void set_active_map(const aom_codec_enc_cfg_t *cfg, 1752 aom_codec_ctx_t *codec, int frame_cnt) { 1753 aom_active_map_t map = { 0, 0, 0 }; 1754 1755 map.rows = (cfg->g_h + 15) / 16; 1756 map.cols = (cfg->g_w + 15) / 16; 1757 1758 map.active_map = (uint8_t *)malloc(map.rows * map.cols); 1759 if (!map.active_map) die("Failed to allocate active map"); 1760 1761 // Example map for testing. 1762 for (unsigned int i = 0; i < map.rows; ++i) { 1763 for (unsigned int j = 0; j < map.cols; ++j) { 1764 int index = map.cols * i + j; 1765 map.active_map[index] = 1; 1766 if (frame_cnt < 300) { 1767 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0; 1768 } else if (frame_cnt >= 300) { 1769 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0; 1770 } 1771 } 1772 } 1773 1774 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map)) 1775 die_codec(codec, "Failed to set active map"); 1776 1777 free(map.active_map); 1778 } 1779 1780 static void set_roi_map(const aom_codec_enc_cfg_t *cfg, aom_codec_ctx_t *codec, 1781 int roi_feature) { 1782 aom_roi_map_t roi = aom_roi_map_t(); 1783 const int block_size = 4; 1784 roi.rows = (cfg->g_h + block_size - 1) / block_size; 1785 roi.cols = (cfg->g_w + block_size - 1) / block_size; 1786 memset(&roi.skip, 0, sizeof(roi.skip)); 1787 memset(&roi.delta_q, 0, sizeof(roi.delta_q)); 1788 memset(&roi.delta_lf, 0, sizeof(roi.delta_lf)); 1789 memset(roi.ref_frame, -1, sizeof(roi.ref_frame)); 1790 // Set ROI map to be 1 (segment #1) in middle square of image, 1791 // 0 elsewhere. 1792 roi.enabled = 1; 1793 roi.roi_map = (uint8_t *)calloc(roi.rows * roi.cols, sizeof(*roi.roi_map)); 1794 for (unsigned int i = 0; i < roi.rows; ++i) { 1795 for (unsigned int j = 0; j < roi.cols; ++j) { 1796 const int idx = i * roi.cols + j; 1797 if (i > roi.rows / 4 && i < (3 * roi.rows) / 4 && j > roi.cols / 4 && 1798 j < (3 * roi.cols) / 4) 1799 roi.roi_map[idx] = 1; 1800 else 1801 roi.roi_map[idx] = 0; 1802 } 1803 } 1804 // Set the ROI feature, on segment #1. 1805 if (roi_feature == kSkip) 1806 roi.skip[1] = 1; 1807 else if (roi_feature == kDeltaQ) 1808 roi.delta_q[1] = -40; 1809 else if (roi_feature == kDeltaLF) 1810 roi.delta_lf[1] = 40; 1811 else if (roi_feature == kReference) 1812 roi.ref_frame[1] = 4; // GOLDEN_FRAME 1813 1814 if (aom_codec_control(codec, AOME_SET_ROI_MAP, &roi)) 1815 die_codec(codec, "Failed to set roi map"); 1816 1817 free(roi.roi_map); 1818 } 1819 int main(int argc, const char **argv) { 1820 AppInput app_input; 1821 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL }; 1822 FILE *obu_files[AOM_MAX_LAYERS] = { NULL }; 1823 AvxVideoWriter *total_layer_file = NULL; 1824 FILE *total_layer_obu_file = NULL; 1825 aom_codec_enc_cfg_t cfg; 1826 int frame_cnt = 0; 1827 aom_image_t raw; 1828 int frame_avail; 1829 int got_data = 0; 1830 int flags = 0; 1831 int i; 1832 int pts = 0; // PTS starts at 0. 1833 int frame_duration = 1; // 1 timebase tick per frame. 1834 aom_svc_layer_id_t layer_id; 1835 aom_svc_params_t svc_params; 1836 aom_svc_ref_frame_config_t ref_frame_config; 1837 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred; 1838 1839 #if CONFIG_INTERNAL_STATS 1840 FILE *stats_file = fopen("opsnr.stt", "a"); 1841 if (stats_file == NULL) { 1842 die("Cannot open opsnr.stt\n"); 1843 } 1844 #endif 1845 #if CONFIG_AV1_DECODER 1846 aom_codec_ctx_t decoder; 1847 #endif 1848 1849 struct RateControlMetrics rc; 1850 int64_t cx_time = 0; 1851 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers. 1852 int frame_cnt_layer[AOM_MAX_LAYERS]; 1853 double sum_bitrate = 0.0; 1854 double sum_bitrate2 = 0.0; 1855 double framerate = 30.0; 1856 int use_svc_control = 1; 1857 int set_err_resil_frame = 0; 1858 int test_changing_bitrate = 0; 1859 zero(rc.layer_target_bitrate); 1860 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t)); 1861 memset(&app_input, 0, sizeof(AppInput)); 1862 memset(&svc_params, 0, sizeof(svc_params)); 1863 1864 // Flag to test dynamic scaling of source frames for single 1865 // spatial stream, using the scaling_mode control. 1866 const int test_dynamic_scaling_single_layer = 0; 1867 1868 // Flag to test setting speed per layer. 1869 const int test_speed_per_layer = 0; 1870 1871 // Flag for testing active maps. 1872 const int test_active_maps = 0; 1873 1874 // Flag for testing roi map. 1875 const int test_roi_map = 0; 1876 1877 /* Setup default input stream settings */ 1878 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) { 1879 app_input.input_ctx[i].framerate.numerator = 30; 1880 app_input.input_ctx[i].framerate.denominator = 1; 1881 app_input.input_ctx[i].only_i420 = 0; 1882 app_input.input_ctx[i].bit_depth = AOM_BITS_8; 1883 } 1884 app_input.speed = 7; 1885 exec_name = argv[0]; 1886 1887 // start with default encoder configuration 1888 #if GOOD_QUALITY 1889 aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, 1890 AOM_USAGE_GOOD_QUALITY); 1891 #else 1892 aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, 1893 AOM_USAGE_REALTIME); 1894 #endif 1895 if (res != AOM_CODEC_OK) { 1896 die("Failed to get config: %s\n", aom_codec_err_to_string(res)); 1897 } 1898 1899 #if GOOD_QUALITY 1900 cfg.g_usage = AOM_USAGE_GOOD_QUALITY; 1901 #else 1902 // Real time parameters. 1903 cfg.g_usage = AOM_USAGE_REALTIME; 1904 #endif 1905 1906 cfg.rc_end_usage = AOM_CBR; 1907 cfg.rc_min_quantizer = 2; 1908 cfg.rc_max_quantizer = 52; 1909 cfg.rc_undershoot_pct = 50; 1910 cfg.rc_overshoot_pct = 50; 1911 cfg.rc_buf_initial_sz = 600; 1912 cfg.rc_buf_optimal_sz = 600; 1913 cfg.rc_buf_sz = 1000; 1914 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize. 1915 cfg.g_lag_in_frames = 0; 1916 cfg.kf_mode = AOM_KF_AUTO; 1917 cfg.g_w = 0; // Force user to specify width and height for raw input. 1918 cfg.g_h = 0; 1919 1920 parse_command_line(argc, argv, &app_input, &svc_params, &cfg); 1921 1922 int ts_number_layers = svc_params.number_temporal_layers; 1923 int ss_number_layers = svc_params.number_spatial_layers; 1924 1925 unsigned int width = cfg.g_w; 1926 unsigned int height = cfg.g_h; 1927 1928 if (app_input.layering_mode >= 0) { 1929 if (ts_number_layers != 1930 mode_to_num_temporal_layers[app_input.layering_mode] || 1931 ss_number_layers != 1932 mode_to_num_spatial_layers[app_input.layering_mode]) { 1933 die("Number of layers doesn't match layering mode."); 1934 } 1935 } 1936 1937 bool has_non_y4m_input = false; 1938 for (i = 0; i < AOM_MAX_LAYERS; ++i) { 1939 if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) { 1940 has_non_y4m_input = true; 1941 break; 1942 } 1943 } 1944 // Y4M reader has its own allocation. 1945 if (has_non_y4m_input) { 1946 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) { 1947 die("Failed to allocate image (%dx%d)", width, height); 1948 } 1949 } 1950 1951 aom_codec_iface_t *encoder = aom_codec_av1_cx(); 1952 1953 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0], 1954 sizeof(svc_params.layer_target_bitrate)); 1955 1956 unsigned int total_rate = 0; 1957 for (i = 0; i < ss_number_layers; i++) { 1958 total_rate += 1959 svc_params 1960 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1]; 1961 } 1962 if (total_rate != cfg.rc_target_bitrate) { 1963 die("Incorrect total target bitrate, expected: %d", total_rate); 1964 } 1965 1966 svc_params.framerate_factor[0] = 1; 1967 if (ts_number_layers == 2) { 1968 svc_params.framerate_factor[0] = 2; 1969 svc_params.framerate_factor[1] = 1; 1970 } else if (ts_number_layers == 3) { 1971 svc_params.framerate_factor[0] = 4; 1972 svc_params.framerate_factor[1] = 2; 1973 svc_params.framerate_factor[2] = 1; 1974 } 1975 1976 libaom_examples::MultilayerMetadata multilayer_metadata; 1977 if (app_input.multilayer_metadata_file != NULL) { 1978 if (!libaom_examples::parse_multilayer_file( 1979 app_input.multilayer_metadata_file, &multilayer_metadata)) { 1980 die("Failed to parse multilayer metadata"); 1981 } 1982 libaom_examples::print_multilayer_metadata(multilayer_metadata); 1983 } 1984 1985 framerate = cfg.g_timebase.den / cfg.g_timebase.num; 1986 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers); 1987 1988 AvxVideoInfo info; 1989 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); 1990 info.frame_width = cfg.g_w; 1991 info.frame_height = cfg.g_h; 1992 info.time_base.numerator = cfg.g_timebase.num; 1993 info.time_base.denominator = cfg.g_timebase.den; 1994 // Open an output file for each stream. 1995 for (int sl = 0; sl < ss_number_layers; ++sl) { 1996 for (int tl = 0; tl < ts_number_layers; ++tl) { 1997 i = sl * ts_number_layers + tl; 1998 char file_name[PATH_MAX]; 1999 snprintf(file_name, sizeof(file_name), "%s_%d.av1", 2000 app_input.output_filename, i); 2001 if (app_input.output_obu) { 2002 obu_files[i] = fopen(file_name, "wb"); 2003 if (!obu_files[i]) die("Failed to open %s for writing", file_name); 2004 } else { 2005 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info); 2006 if (!outfile[i]) die("Failed to open %s for writing", file_name); 2007 } 2008 } 2009 } 2010 if (app_input.output_obu) { 2011 total_layer_obu_file = fopen(app_input.output_filename, "wb"); 2012 if (!total_layer_obu_file) 2013 die("Failed to open %s for writing", app_input.output_filename); 2014 } else { 2015 total_layer_file = 2016 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info); 2017 if (!total_layer_file) 2018 die("Failed to open %s for writing", app_input.output_filename); 2019 } 2020 2021 // Initialize codec. 2022 aom_codec_ctx_t codec; 2023 aom_codec_flags_t flag = 0; 2024 flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH; 2025 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0; 2026 if (aom_codec_enc_init(&codec, encoder, &cfg, flag)) 2027 die_codec(&codec, "Failed to initialize encoder"); 2028 2029 #if CONFIG_AV1_DECODER 2030 if (app_input.decode) { 2031 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0)) 2032 die_codec(&decoder, "Failed to initialize decoder"); 2033 } 2034 #endif 2035 2036 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed); 2037 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0); 2038 aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0); 2039 aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1); 2040 aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1); 2041 aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0); 2042 aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0); 2043 aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0); 2044 aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0); 2045 aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0); 2046 aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0); 2047 #if GOOD_QUALITY 2048 aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 0); 2049 aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 0); 2050 aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 0); 2051 aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 0); 2052 #else 2053 aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3); 2054 aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3); 2055 aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3); 2056 aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3); 2057 #endif 2058 aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1); 2059 2060 // Settings to reduce key frame encoding time. 2061 aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0); 2062 aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0); 2063 aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0); 2064 aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0); 2065 aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1); 2066 2067 aom_codec_control(&codec, AV1E_SET_AUTO_TILES, 1); 2068 2069 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content); 2070 if (app_input.tune_content == AOM_CONTENT_SCREEN) { 2071 aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1); 2072 // INTRABC is currently disabled for rt mode, as it's too slow. 2073 aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0); 2074 } 2075 2076 if (app_input.use_external_rc) { 2077 aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1); 2078 } 2079 2080 aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR, INT_MAX); 2081 2082 aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE, 2083 AOM_FULL_SUPERFRAME_DROP); 2084 2085 aom_codec_control(&codec, AV1E_SET_POSTENCODE_DROP_RTC, 1); 2086 2087 svc_params.number_spatial_layers = ss_number_layers; 2088 svc_params.number_temporal_layers = ts_number_layers; 2089 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) { 2090 svc_params.max_quantizers[i] = cfg.rc_max_quantizer; 2091 svc_params.min_quantizers[i] = cfg.rc_min_quantizer; 2092 } 2093 // SET QUANTIZER PER LAYER, E.G FOR 2 SPATIAL LAYERS: 2094 // svc_params.max_quantizers[0] = 40; 2095 // svc_params.min_quantizers[0] = 40; 2096 // svc_params.max_quantizers[1] = 50; 2097 // svc_params.min_quantizers[1] = 50; 2098 2099 if (!app_input.scale_factors_explicitly_set) { 2100 for (i = 0; i < ss_number_layers; ++i) { 2101 svc_params.scaling_factor_num[i] = 1; 2102 svc_params.scaling_factor_den[i] = 1; 2103 } 2104 if (ss_number_layers == 2) { 2105 svc_params.scaling_factor_num[0] = 1; 2106 svc_params.scaling_factor_den[0] = 2; 2107 } else if (ss_number_layers == 3) { 2108 svc_params.scaling_factor_num[0] = 1; 2109 svc_params.scaling_factor_den[0] = 4; 2110 svc_params.scaling_factor_num[1] = 1; 2111 svc_params.scaling_factor_den[1] = 2; 2112 } 2113 } 2114 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params); 2115 // TODO(aomedia:3032): Configure KSVC in fixed mode. 2116 2117 // This controls the maximum target size of the key frame. 2118 // For generating smaller key frames, use a smaller max_intra_size_pct 2119 // value, like 100 or 200. 2120 { 2121 const int max_intra_size_pct = 300; 2122 aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT, 2123 max_intra_size_pct); 2124 } 2125 2126 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) { 2127 cx_time_layer[lx] = 0; 2128 frame_cnt_layer[lx] = 0; 2129 } 2130 2131 std::unique_ptr<aom::AV1RateControlRTC> rc_api; 2132 if (app_input.use_external_rc) { 2133 const aom::AV1RateControlRtcConfig rc_cfg = 2134 create_rtc_rc_config(cfg, app_input); 2135 rc_api = aom::AV1RateControlRTC::Create(rc_cfg); 2136 } 2137 2138 frame_avail = 1; 2139 struct PsnrStats psnr_stream[MAX_NUM_SPATIAL_LAYERS]; 2140 memset(&psnr_stream, 0, sizeof(psnr_stream)); 2141 while (frame_avail || got_data) { 2142 struct aom_usec_timer timer; 2143 frame_avail = read_frame(&(app_input.input_ctx[0]), &raw); 2144 // Loop over spatial layers. 2145 for (int slx = 0; slx < ss_number_layers; slx++) { 2146 if (slx > 0 && app_input.input_ctx[slx].filename != NULL) { 2147 const int previous_layer_frame_avail = frame_avail; 2148 frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw); 2149 if (previous_layer_frame_avail != frame_avail) { 2150 die("Mismatch in number of frames between spatial layer input files"); 2151 } 2152 } 2153 2154 aom_codec_iter_t iter = NULL; 2155 const aom_codec_cx_pkt_t *pkt; 2156 int reference_updated = 0; 2157 int layer = 0; 2158 // Flag for superframe whose base is key. 2159 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0; 2160 // For flexible mode: 2161 if (app_input.layering_mode >= 0) { 2162 // Set the reference/update flags, layer_id, and reference_map 2163 // buffer index. 2164 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id, 2165 &ref_frame_config, &ref_frame_comp_pred, 2166 &use_svc_control, slx, is_key_frame, 2167 (app_input.layering_mode == 10), app_input.speed, 2168 &reference_updated, test_roi_map); 2169 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); 2170 if (use_svc_control) { 2171 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG, 2172 &ref_frame_config); 2173 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED, 2174 &ref_frame_comp_pred); 2175 } 2176 if (app_input.multilayer_metadata_file != NULL) { 2177 add_multilayer_metadata(&raw, multilayer_metadata, frame_cnt, slx); 2178 } 2179 // Set the speed per layer. 2180 if (test_speed_per_layer) { 2181 int speed_per_layer = 10; 2182 if (layer_id.spatial_layer_id == 0) { 2183 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6; 2184 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7; 2185 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8; 2186 } else if (layer_id.spatial_layer_id == 1) { 2187 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7; 2188 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8; 2189 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9; 2190 } else if (layer_id.spatial_layer_id == 2) { 2191 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8; 2192 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9; 2193 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10; 2194 } 2195 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer); 2196 } 2197 } else { 2198 // Only up to 3 temporal layers supported in fixed mode. 2199 // Only need to set spatial and temporal layer_id: reference 2200 // prediction, refresh, and buffer_idx are set internally. 2201 layer_id.spatial_layer_id = slx; 2202 layer_id.temporal_layer_id = 0; 2203 if (ts_number_layers == 2) { 2204 layer_id.temporal_layer_id = (frame_cnt % 2) != 0; 2205 } else if (ts_number_layers == 3) { 2206 if (frame_cnt % 2 != 0) 2207 layer_id.temporal_layer_id = 2; 2208 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) 2209 layer_id.temporal_layer_id = 1; 2210 } 2211 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); 2212 } 2213 2214 if (set_err_resil_frame && cfg.g_error_resilient == 0) { 2215 // Set error_resilient per frame: off/0 for base layer and 2216 // on/1 for enhancement layer frames. 2217 // Note that this is can only be done on the fly/per-frame/layer 2218 // if the config error_resilience is off/0. See the logic for updating 2219 // in set_encoder_config(): 2220 // tool_cfg->error_resilient_mode = 2221 // cfg->g_error_resilient | extra_cfg->error_resilient_mode; 2222 const int err_resil_mode = 2223 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0; 2224 aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE, 2225 err_resil_mode); 2226 } 2227 2228 layer = slx * ts_number_layers + layer_id.temporal_layer_id; 2229 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer]; 2230 2231 if (test_dynamic_scaling_single_layer) { 2232 // Example to scale source down by 2x2, then 4x4, and then back up to 2233 // 2x2, and then back to original. 2234 int frame_2x2 = 200; 2235 int frame_4x4 = 400; 2236 int frame_2x2up = 600; 2237 int frame_orig = 800; 2238 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) { 2239 // Scale source down by 2x2. 2240 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO }; 2241 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); 2242 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) { 2243 // Scale source down by 4x4. 2244 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR }; 2245 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); 2246 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) { 2247 // Source back up to 2x2. 2248 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO }; 2249 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); 2250 } else if (frame_cnt >= frame_orig) { 2251 // Source back up to original resolution (no scaling). 2252 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL }; 2253 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); 2254 } 2255 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 || 2256 frame_cnt == frame_2x2up || frame_cnt == frame_orig) { 2257 // For dynamic resize testing on single layer: refresh all references 2258 // on the resized frame: this is to avoid decode error: 2259 // if resize goes down by >= 4x4 then libaom decoder will throw an 2260 // error that some reference (even though not used) is beyond the 2261 // limit size (must be smaller than 4x4). 2262 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1; 2263 if (use_svc_control) { 2264 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG, 2265 &ref_frame_config); 2266 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED, 2267 &ref_frame_comp_pred); 2268 } 2269 } 2270 } 2271 2272 // Change target_bitrate every other frame. 2273 if (test_changing_bitrate && frame_cnt % 2 == 0) { 2274 if (frame_cnt < 500) 2275 cfg.rc_target_bitrate += 10; 2276 else 2277 cfg.rc_target_bitrate -= 10; 2278 // Do big increase and decrease. 2279 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1; 2280 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1; 2281 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100; 2282 // Call change_config, or bypass with new control. 2283 // res = aom_codec_enc_config_set(&codec, &cfg); 2284 if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR, 2285 cfg.rc_target_bitrate)) 2286 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR"); 2287 } 2288 2289 if (rc_api) { 2290 aom::AV1FrameParamsRTC frame_params; 2291 // TODO(jianj): Add support for SVC. 2292 frame_params.spatial_layer_id = 0; 2293 frame_params.temporal_layer_id = 0; 2294 frame_params.frame_type = 2295 is_key_frame ? aom::kKeyFrame : aom::kInterFrame; 2296 rc_api->ComputeQP(frame_params); 2297 const int current_qp = rc_api->GetQP(); 2298 if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS, 2299 qindex_to_quantizer(current_qp))) { 2300 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS"); 2301 } 2302 } 2303 2304 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt); 2305 2306 if (test_roi_map) set_roi_map(&cfg, &codec, kDeltaQ); 2307 2308 // Do the layer encode. 2309 aom_usec_timer_start(&timer); 2310 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags)) 2311 die_codec(&codec, "Failed to encode frame"); 2312 aom_usec_timer_mark(&timer); 2313 cx_time += aom_usec_timer_elapsed(&timer); 2314 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer); 2315 frame_cnt_layer[layer] += 1; 2316 2317 // Get the high motion content flag. 2318 int content_flag = 0; 2319 if (aom_codec_control(&codec, AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC, 2320 &content_flag)) { 2321 die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC"); 2322 } 2323 2324 got_data = 0; 2325 // For simulcast (mode 11): write out each spatial layer to the file. 2326 int ss_layers_write = (app_input.layering_mode == 11) 2327 ? layer_id.spatial_layer_id + 1 2328 : ss_number_layers; 2329 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) { 2330 switch (pkt->kind) { 2331 case AOM_CODEC_CX_FRAME_PKT: 2332 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write; 2333 ++sl) { 2334 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers; 2335 ++tl) { 2336 int j = sl * ts_number_layers + tl; 2337 if (app_input.output_obu) { 2338 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, 2339 obu_files[j]); 2340 } else { 2341 aom_video_writer_write_frame( 2342 outfile[j], 2343 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf), 2344 pkt->data.frame.sz, pts); 2345 } 2346 if (sl == layer_id.spatial_layer_id) 2347 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz; 2348 } 2349 } 2350 got_data = 1; 2351 // Write everything into the top layer. 2352 if (app_input.output_obu) { 2353 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, 2354 total_layer_obu_file); 2355 } else { 2356 aom_video_writer_write_frame( 2357 total_layer_file, 2358 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf), 2359 pkt->data.frame.sz, pts); 2360 } 2361 // Keep count of rate control stats per layer (for non-key). 2362 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) { 2363 int j = layer_id.spatial_layer_id * ts_number_layers + 2364 layer_id.temporal_layer_id; 2365 assert(j >= 0); 2366 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz; 2367 rc.layer_avg_rate_mismatch[j] += 2368 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) / 2369 rc.layer_pfb[j]; 2370 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id]; 2371 } 2372 2373 if (rc_api) { 2374 rc_api->PostEncodeUpdate(pkt->data.frame.sz); 2375 } 2376 // Update for short-time encoding bitrate states, for moving window 2377 // of size rc->window, shifted by rc->window / 2. 2378 // Ignore first window segment, due to key frame. 2379 // For spatial layers: only do this for top/highest SL. 2380 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) { 2381 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate; 2382 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size; 2383 if (frame_cnt % rc.window_size == 0) { 2384 rc.window_count += 1; 2385 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; 2386 rc.variance_st_encoding_bitrate += 2387 (sum_bitrate / rc.window_size) * 2388 (sum_bitrate / rc.window_size); 2389 sum_bitrate = 0.0; 2390 } 2391 } 2392 // Second shifted window. 2393 if (frame_cnt > rc.window_size + rc.window_size / 2 && 2394 slx == ss_number_layers - 1) { 2395 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate; 2396 if (frame_cnt > 2 * rc.window_size && 2397 frame_cnt % rc.window_size == 0) { 2398 rc.window_count += 1; 2399 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; 2400 rc.variance_st_encoding_bitrate += 2401 (sum_bitrate2 / rc.window_size) * 2402 (sum_bitrate2 / rc.window_size); 2403 sum_bitrate2 = 0.0; 2404 } 2405 } 2406 2407 #if CONFIG_AV1_DECODER 2408 if (app_input.decode) { 2409 if (aom_codec_decode( 2410 &decoder, 2411 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf), 2412 pkt->data.frame.sz, NULL)) 2413 die_codec(&decoder, "Failed to decode frame"); 2414 } 2415 #endif 2416 2417 break; 2418 case AOM_CODEC_PSNR_PKT: 2419 if (app_input.show_psnr) { 2420 const int sl = layer_id.spatial_layer_id; 2421 const int show_psnr_hbd = 2422 (cfg.g_input_bit_depth > 8 || cfg.g_bit_depth > AOM_BITS_8); 2423 const int hbd = show_psnr_hbd; 2424 psnr_stream[sl].psnr_sse_total[hbd] += pkt->data.psnr.sse[0]; 2425 psnr_stream[sl].psnr_samples_total[hbd] += 2426 pkt->data.psnr.samples[0]; 2427 for (i = 0; i < 4; i++) { 2428 psnr_stream[sl].psnr_totals[hbd][i] += pkt->data.psnr.psnr[i]; 2429 } 2430 psnr_stream[sl].psnr_count[hbd]++; 2431 } 2432 break; 2433 default: break; 2434 } 2435 } 2436 #if CONFIG_AV1_DECODER 2437 if (got_data && app_input.decode) { 2438 // Don't look for mismatch on non reference frames. 2439 if (reference_updated) { 2440 if (test_decode(&codec, &decoder, frame_cnt)) { 2441 #if CONFIG_INTERNAL_STATS 2442 fprintf(stats_file, "First mismatch occurred in frame %d\n", 2443 frame_cnt); 2444 fclose(stats_file); 2445 #endif 2446 fatal("Mismatch seen"); 2447 } 2448 } 2449 } 2450 #endif 2451 } // loop over spatial layers 2452 ++frame_cnt; 2453 pts += frame_duration; 2454 } 2455 2456 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) { 2457 if (app_input.input_ctx[i].filename == NULL) { 2458 break; 2459 } 2460 close_input_file(&(app_input.input_ctx[i])); 2461 } 2462 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers, 2463 ts_number_layers); 2464 2465 printf("\n"); 2466 for (int slx = 0; slx < ss_number_layers; slx++) 2467 for (int tlx = 0; tlx < ts_number_layers; tlx++) { 2468 int lx = slx * ts_number_layers + tlx; 2469 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n", 2470 slx, tlx, frame_cnt_layer[lx], 2471 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000), 2472 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]); 2473 } 2474 2475 printf("\n"); 2476 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n", 2477 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), 2478 1000000 * (double)frame_cnt / (double)cx_time); 2479 2480 if (app_input.show_psnr) { 2481 const int show_psnr_hbd = 2482 (cfg.g_input_bit_depth > 8 || cfg.g_bit_depth > AOM_BITS_8); 2483 show_psnr(psnr_stream, (double)((1 << (show_psnr_hbd ? 12 : 8)) - 1), 2484 ss_number_layers); 2485 } 2486 2487 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder"); 2488 2489 #if CONFIG_AV1_DECODER 2490 if (app_input.decode) { 2491 if (aom_codec_destroy(&decoder)) 2492 die_codec(&decoder, "Failed to destroy decoder"); 2493 } 2494 #endif 2495 2496 #if CONFIG_INTERNAL_STATS 2497 fprintf(stats_file, "No mismatch detected in recon buffers\n"); 2498 fclose(stats_file); 2499 #endif 2500 2501 // Try to rewrite the output file headers with the actual frame count. 2502 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) 2503 aom_video_writer_close(outfile[i]); 2504 aom_video_writer_close(total_layer_file); 2505 2506 if (has_non_y4m_input) { 2507 aom_img_free(&raw); 2508 } 2509 return EXIT_SUCCESS; 2510 }