tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

svc_encoder_rtc.cc (103928B)


      1 /*
      2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 //  This is an example demonstrating how to implement a multi-layer AOM
     13 //  encoding scheme for RTC video applications.
     14 
     15 #include <assert.h>
     16 #include <inttypes.h>
     17 #include <limits.h>
     18 #include <math.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 
     23 #include <memory>
     24 
     25 #include "config/aom_config.h"
     26 
     27 #if CONFIG_AV1_DECODER
     28 #include "aom/aom_decoder.h"
     29 #endif
     30 #include "aom/aom_encoder.h"
     31 #include "aom/aom_image.h"
     32 #include "aom/aom_integer.h"
     33 #include "aom/aomcx.h"
     34 #include "aom_dsp/bitwriter_buffer.h"
     35 #include "aom_ports/aom_timer.h"
     36 #include "av1/ratectrl_rtc.h"
     37 #include "common/args.h"
     38 #include "common/tools_common.h"
     39 #include "common/video_writer.h"
     40 #include "examples/encoder_util.h"
     41 #include "examples/multilayer_metadata.h"
     42 
     43 #define OPTION_BUFFER_SIZE 1024
     44 #define MAX_NUM_SPATIAL_LAYERS 4
     45 
     46 #define GOOD_QUALITY 0
     47 
     48 typedef struct {
     49  const char *output_filename;
     50  char options[OPTION_BUFFER_SIZE];
     51  struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
     52  int speed;
     53  int aq_mode;
     54  int layering_mode;
     55  int output_obu;
     56  int decode;
     57  int tune_content;
     58  int show_psnr;
     59  bool use_external_rc;
     60  bool scale_factors_explicitly_set;
     61  const char *multilayer_metadata_file;
     62 } AppInput;
     63 
     64 typedef enum {
     65  QUANTIZER = 0,
     66  BITRATE,
     67  SCALE_FACTOR,
     68  AUTO_ALT_REF,
     69  ALL_OPTION_TYPES
     70 } LAYER_OPTION_TYPE;
     71 
     72 enum { kSkip = 0, kDeltaQ = 1, kDeltaLF = 2, kReference = 3 };
     73 
     74 static const arg_def_t outputfile =
     75    ARG_DEF("o", "output", 1, "Output filename");
     76 static const arg_def_t frames_arg =
     77    ARG_DEF("f", "frames", 1, "Number of frames to encode");
     78 static const arg_def_t threads_arg =
     79    ARG_DEF("th", "threads", 1, "Number of threads to use");
     80 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
     81 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
     82 static const arg_def_t timebase_arg =
     83    ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
     84 static const arg_def_t bitrate_arg = ARG_DEF(
     85    "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
     86 static const arg_def_t spatial_layers_arg =
     87    ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
     88 static const arg_def_t temporal_layers_arg =
     89    ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
     90 static const arg_def_t layering_mode_arg =
     91    ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
     92 static const arg_def_t kf_dist_arg =
     93    ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
     94 static const arg_def_t scale_factors_arg =
     95    ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
     96 static const arg_def_t min_q_arg =
     97    ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
     98 static const arg_def_t max_q_arg =
     99    ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
    100 static const arg_def_t speed_arg =
    101    ARG_DEF("sp", "speed", 1, "Speed configuration");
    102 static const arg_def_t aqmode_arg =
    103    ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
    104 static const arg_def_t bitrates_arg =
    105    ARG_DEF("bl", "bitrates", 1,
    106            "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
    107 static const arg_def_t dropframe_thresh_arg =
    108    ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
    109 static const arg_def_t error_resilient_arg =
    110    ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
    111 static const arg_def_t output_obu_arg =
    112    ARG_DEF(NULL, "output-obu", 1,
    113            "Write OBUs when set to 1. Otherwise write IVF files.");
    114 static const arg_def_t test_decode_arg =
    115    ARG_DEF(NULL, "test-decode", 1,
    116            "Attempt to test decoding the output when set to 1. Default is 1.");
    117 static const arg_def_t psnr_arg =
    118    ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
    119 static const arg_def_t ext_rc_arg =
    120    ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
    121 static const struct arg_enum_list tune_content_enum[] = {
    122  { "default", AOM_CONTENT_DEFAULT },
    123  { "screen", AOM_CONTENT_SCREEN },
    124  { "film", AOM_CONTENT_FILM },
    125  { NULL, 0 }
    126 };
    127 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
    128    NULL, "tune-content", 1, "Tune content type", tune_content_enum);
    129 #if CONFIG_CWG_E050
    130 static const arg_def_t multilayer_metadata_file_arg =
    131    ARG_DEF("ml", "multilayer_metadata_file", 1,
    132            "Experimental: path to multilayer metadata file");
    133 #endif
    134 
    135 #if CONFIG_AV1_HIGHBITDEPTH
    136 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
    137                                                      { "10", AOM_BITS_10 },
    138                                                      { NULL, 0 } };
    139 
    140 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
    141    "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
    142 #endif  // CONFIG_AV1_HIGHBITDEPTH
    143 
    144 static const arg_def_t *svc_args[] = {
    145  &frames_arg,
    146  &outputfile,
    147  &width_arg,
    148  &height_arg,
    149  &timebase_arg,
    150  &bitrate_arg,
    151  &spatial_layers_arg,
    152  &kf_dist_arg,
    153  &scale_factors_arg,
    154  &min_q_arg,
    155  &max_q_arg,
    156  &temporal_layers_arg,
    157  &layering_mode_arg,
    158  &threads_arg,
    159  &aqmode_arg,
    160 #if CONFIG_AV1_HIGHBITDEPTH
    161  &bitdepth_arg,
    162 #endif
    163  &speed_arg,
    164  &bitrates_arg,
    165  &dropframe_thresh_arg,
    166  &error_resilient_arg,
    167  &output_obu_arg,
    168  &test_decode_arg,
    169  &tune_content_arg,
    170  &psnr_arg,
    171 #if CONFIG_CWG_E050
    172  &multilayer_metadata_file_arg,
    173 #endif
    174  NULL,
    175 };
    176 
    177 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
    178 
    179 static const char *exec_name;
    180 
    181 void usage_exit(void) {
    182  fprintf(stderr,
    183          "Usage: %s <options> input_filename [input_filename ...] -o "
    184          "output_filename\n",
    185          exec_name);
    186  fprintf(stderr, "Options:\n");
    187  arg_show_usage(stderr, svc_args);
    188  fprintf(
    189      stderr,
    190      "Input files must be y4m or yuv.\n"
    191      "If multiple input files are specified, they correspond to spatial "
    192      "layers, and there should be as many as there are spatial layers.\n"
    193      "All input files must have the same width, height, frame rate and number "
    194      "of frames.\n"
    195      "If only one file is specified, it is used for all spatial layers.\n");
    196  exit(EXIT_FAILURE);
    197 }
    198 
    199 static int file_is_y4m(const char detect[4]) {
    200  return memcmp(detect, "YUV4", 4) == 0;
    201 }
    202 
    203 static int fourcc_is_ivf(const char detect[4]) {
    204  if (memcmp(detect, "DKIF", 4) == 0) {
    205    return 1;
    206  }
    207  return 0;
    208 }
    209 
    210 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
    211                                                         1 };
    212 
    213 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
    214 
    215 static void open_input_file(struct AvxInputContext *input,
    216                            aom_chroma_sample_position_t csp) {
    217  /* Parse certain options from the input file, if possible */
    218  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
    219                                             : set_binary_mode(stdin);
    220 
    221  if (!input->file) fatal("Failed to open input file");
    222 
    223  if (!fseeko(input->file, 0, SEEK_END)) {
    224    /* Input file is seekable. Figure out how long it is, so we can get
    225     * progress info.
    226     */
    227    input->length = ftello(input->file);
    228    rewind(input->file);
    229  }
    230 
    231  /* Default to 1:1 pixel aspect ratio. */
    232  input->pixel_aspect_ratio.numerator = 1;
    233  input->pixel_aspect_ratio.denominator = 1;
    234 
    235  /* For RAW input sources, these bytes will applied on the first frame
    236   *  in read_frame().
    237   */
    238  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
    239  input->detect.position = 0;
    240 
    241  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
    242    if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
    243                       input->only_i420) >= 0) {
    244      input->file_type = FILE_TYPE_Y4M;
    245      input->width = input->y4m.pic_w;
    246      input->height = input->y4m.pic_h;
    247      input->pixel_aspect_ratio.numerator = input->y4m.par_n;
    248      input->pixel_aspect_ratio.denominator = input->y4m.par_d;
    249      input->framerate.numerator = input->y4m.fps_n;
    250      input->framerate.denominator = input->y4m.fps_d;
    251      input->fmt = input->y4m.aom_fmt;
    252      input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
    253    } else {
    254      fatal("Unsupported Y4M stream.");
    255    }
    256  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
    257    fatal("IVF is not supported as input.");
    258  } else {
    259    input->file_type = FILE_TYPE_RAW;
    260  }
    261 }
    262 
    263 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
    264                                      int *value0, int *value1) {
    265  if (type == SCALE_FACTOR) {
    266    *value0 = (int)strtol(input, &input, 10);
    267    if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
    268    *value1 = (int)strtol(input, &input, 10);
    269 
    270    if (*value0 < option_min_values[SCALE_FACTOR] ||
    271        *value1 < option_min_values[SCALE_FACTOR] ||
    272        *value0 > option_max_values[SCALE_FACTOR] ||
    273        *value1 > option_max_values[SCALE_FACTOR] ||
    274        *value0 > *value1)  // num shouldn't be greater than den
    275      return AOM_CODEC_INVALID_PARAM;
    276  } else {
    277    *value0 = atoi(input);
    278    if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
    279      return AOM_CODEC_INVALID_PARAM;
    280  }
    281  return AOM_CODEC_OK;
    282 }
    283 
    284 static aom_codec_err_t parse_layer_options_from_string(
    285    aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
    286    int *option0, int *option1) {
    287  aom_codec_err_t res = AOM_CODEC_OK;
    288  char *input_string;
    289  char *token;
    290  const char *delim = ",";
    291  int num_layers = svc_params->number_spatial_layers;
    292  int i = 0;
    293 
    294  if (type == BITRATE)
    295    num_layers =
    296        svc_params->number_spatial_layers * svc_params->number_temporal_layers;
    297 
    298  if (input == NULL || option0 == NULL ||
    299      (option1 == NULL && type == SCALE_FACTOR))
    300    return AOM_CODEC_INVALID_PARAM;
    301 
    302  const size_t input_length = strlen(input);
    303  input_string = reinterpret_cast<char *>(malloc(input_length + 1));
    304  if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
    305  memcpy(input_string, input, input_length + 1);
    306  token = strtok(input_string, delim);  // NOLINT
    307  for (i = 0; i < num_layers; ++i) {
    308    if (token != NULL) {
    309      res = extract_option(type, token, option0 + i, option1 + i);
    310      if (res != AOM_CODEC_OK) break;
    311      token = strtok(NULL, delim);  // NOLINT
    312    } else {
    313      res = AOM_CODEC_INVALID_PARAM;
    314      break;
    315    }
    316  }
    317  free(input_string);
    318  return res;
    319 }
    320 
    321 static void parse_command_line(int argc, const char **argv_,
    322                               AppInput *app_input,
    323                               aom_svc_params_t *svc_params,
    324                               aom_codec_enc_cfg_t *enc_cfg) {
    325  struct arg arg;
    326  char **argv = NULL;
    327  char **argi = NULL;
    328  char **argj = NULL;
    329  char string_options[1024] = { 0 };
    330 
    331  // Default settings
    332  svc_params->number_spatial_layers = 1;
    333  svc_params->number_temporal_layers = 1;
    334  app_input->layering_mode = 0;
    335  app_input->output_obu = 0;
    336  app_input->decode = 1;
    337  enc_cfg->g_threads = 1;
    338  enc_cfg->rc_end_usage = AOM_CBR;
    339 
    340  // process command line options
    341  argv = argv_dup(argc - 1, argv_ + 1);
    342  if (!argv) {
    343    fprintf(stderr, "Error allocating argument list\n");
    344    exit(EXIT_FAILURE);
    345  }
    346  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
    347    arg.argv_step = 1;
    348 
    349    if (arg_match(&arg, &outputfile, argi)) {
    350      app_input->output_filename = arg.val;
    351    } else if (arg_match(&arg, &width_arg, argi)) {
    352      enc_cfg->g_w = arg_parse_uint(&arg);
    353    } else if (arg_match(&arg, &height_arg, argi)) {
    354      enc_cfg->g_h = arg_parse_uint(&arg);
    355    } else if (arg_match(&arg, &timebase_arg, argi)) {
    356      enc_cfg->g_timebase = arg_parse_rational(&arg);
    357    } else if (arg_match(&arg, &bitrate_arg, argi)) {
    358      enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
    359    } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
    360      svc_params->number_spatial_layers = arg_parse_uint(&arg);
    361    } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
    362      svc_params->number_temporal_layers = arg_parse_uint(&arg);
    363    } else if (arg_match(&arg, &speed_arg, argi)) {
    364      app_input->speed = arg_parse_uint(&arg);
    365      if (app_input->speed > 11) {
    366        aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
    367      }
    368    } else if (arg_match(&arg, &aqmode_arg, argi)) {
    369      app_input->aq_mode = arg_parse_uint(&arg);
    370    } else if (arg_match(&arg, &threads_arg, argi)) {
    371      enc_cfg->g_threads = arg_parse_uint(&arg);
    372    } else if (arg_match(&arg, &layering_mode_arg, argi)) {
    373      app_input->layering_mode = arg_parse_int(&arg);
    374    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
    375      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
    376      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
    377    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
    378      aom_codec_err_t res = parse_layer_options_from_string(
    379          svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
    380          svc_params->scaling_factor_den);
    381      app_input->scale_factors_explicitly_set = true;
    382      if (res != AOM_CODEC_OK) {
    383        die("Failed to parse scale factors: %s\n",
    384            aom_codec_err_to_string(res));
    385      }
    386    } else if (arg_match(&arg, &min_q_arg, argi)) {
    387      enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
    388    } else if (arg_match(&arg, &max_q_arg, argi)) {
    389      enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
    390 #if CONFIG_AV1_HIGHBITDEPTH
    391    } else if (arg_match(&arg, &bitdepth_arg, argi)) {
    392      enc_cfg->g_bit_depth =
    393          static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
    394      switch (enc_cfg->g_bit_depth) {
    395        case AOM_BITS_8:
    396          enc_cfg->g_input_bit_depth = 8;
    397          enc_cfg->g_profile = 0;
    398          break;
    399        case AOM_BITS_10:
    400          enc_cfg->g_input_bit_depth = 10;
    401          enc_cfg->g_profile = 0;
    402          break;
    403        default:
    404          die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
    405      }
    406 #endif  // CONFIG_VP9_HIGHBITDEPTH
    407    } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
    408      enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
    409    } else if (arg_match(&arg, &error_resilient_arg, argi)) {
    410      enc_cfg->g_error_resilient = arg_parse_uint(&arg);
    411      if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
    412        die("Invalid value for error resilient (0, 1): %d.",
    413            enc_cfg->g_error_resilient);
    414    } else if (arg_match(&arg, &output_obu_arg, argi)) {
    415      app_input->output_obu = arg_parse_uint(&arg);
    416      if (app_input->output_obu != 0 && app_input->output_obu != 1)
    417        die("Invalid value for obu output flag (0, 1): %d.",
    418            app_input->output_obu);
    419    } else if (arg_match(&arg, &test_decode_arg, argi)) {
    420      app_input->decode = arg_parse_uint(&arg);
    421      if (app_input->decode != 0 && app_input->decode != 1)
    422        die("Invalid value for test decode flag (0, 1): %d.",
    423            app_input->decode);
    424    } else if (arg_match(&arg, &tune_content_arg, argi)) {
    425      app_input->tune_content = arg_parse_enum_or_int(&arg);
    426      printf("tune content %d\n", app_input->tune_content);
    427    } else if (arg_match(&arg, &psnr_arg, argi)) {
    428      app_input->show_psnr = 1;
    429    } else if (arg_match(&arg, &ext_rc_arg, argi)) {
    430      app_input->use_external_rc = true;
    431 #if CONFIG_CWG_E050
    432    } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
    433      app_input->multilayer_metadata_file = arg.val;
    434 #endif
    435    } else {
    436      ++argj;
    437    }
    438  }
    439 
    440  // Total bitrate needs to be parsed after the number of layers.
    441  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
    442    arg.argv_step = 1;
    443    if (arg_match(&arg, &bitrates_arg, argi)) {
    444      aom_codec_err_t res = parse_layer_options_from_string(
    445          svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
    446      if (res != AOM_CODEC_OK) {
    447        die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
    448      }
    449    } else {
    450      ++argj;
    451    }
    452  }
    453 
    454  // There will be a space in front of the string options
    455  if (strlen(string_options) > 0)
    456    strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
    457 
    458  // Check for unrecognized options
    459  for (argi = argv; *argi; ++argi)
    460    if (argi[0][0] == '-' && strlen(argi[0]) > 1)
    461      die("Error: Unrecognized option %s\n", *argi);
    462 
    463  if (argv[0] == NULL) {
    464    usage_exit();
    465  }
    466 
    467  int input_count = 0;
    468  while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
    469    app_input->input_ctx[input_count].filename = argv[input_count];
    470    ++input_count;
    471  }
    472  if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
    473    die("Error: Number of input files does not match number of spatial layers");
    474  }
    475  if (argv[input_count] != NULL) {
    476    die("Error: Too many input files specified, there should be at most %d",
    477        MAX_NUM_SPATIAL_LAYERS);
    478  }
    479 
    480  free(argv);
    481 
    482  for (int i = 0; i < input_count; ++i) {
    483    open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
    484    if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
    485      if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
    486        // Override these settings with the info from Y4M file.
    487        enc_cfg->g_w = app_input->input_ctx[i].width;
    488        enc_cfg->g_h = app_input->input_ctx[i].height;
    489        // g_timebase is the reciprocal of frame rate.
    490        enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
    491        enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
    492      } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
    493                 enc_cfg->g_h != app_input->input_ctx[i].height ||
    494                 enc_cfg->g_timebase.num !=
    495                     app_input->input_ctx[i].framerate.denominator ||
    496                 enc_cfg->g_timebase.den !=
    497                     app_input->input_ctx[i].framerate.numerator) {
    498        die("Error: Input file dimensions and/or frame rate mismatch");
    499      }
    500    }
    501  }
    502  if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
    503    die("Error: Input file dimensions not set, use -w and -h");
    504  }
    505 
    506  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
    507      enc_cfg->g_h % 2)
    508    die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
    509 
    510  printf(
    511      "Codec %s\n"
    512      "layers: %d\n"
    513      "width %u, height: %u\n"
    514      "num: %d, den: %d, bitrate: %u\n"
    515      "gop size: %u\n",
    516      aom_codec_iface_name(aom_codec_av1_cx()),
    517      svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
    518      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
    519      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
    520 }
    521 
    522 static const int mode_to_num_temporal_layers[12] = {
    523  1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
    524 };
    525 static const int mode_to_num_spatial_layers[12] = {
    526  1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
    527 };
    528 
    529 // For rate control encoding stats.
    530 struct RateControlMetrics {
    531  // Number of input frames per layer.
    532  int layer_input_frames[AOM_MAX_TS_LAYERS];
    533  // Number of encoded non-key frames per layer.
    534  int layer_enc_frames[AOM_MAX_TS_LAYERS];
    535  // Framerate per layer layer (cumulative).
    536  double layer_framerate[AOM_MAX_TS_LAYERS];
    537  // Target average frame size per layer (per-frame-bandwidth per layer).
    538  double layer_pfb[AOM_MAX_LAYERS];
    539  // Actual average frame size per layer.
    540  double layer_avg_frame_size[AOM_MAX_LAYERS];
    541  // Average rate mismatch per layer (|target - actual| / target).
    542  double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
    543  // Actual encoding bitrate per layer (cumulative across temporal layers).
    544  double layer_encoding_bitrate[AOM_MAX_LAYERS];
    545  // Average of the short-time encoder actual bitrate.
    546  // TODO(marpan): Should we add these short-time stats for each layer?
    547  double avg_st_encoding_bitrate;
    548  // Variance of the short-time encoder actual bitrate.
    549  double variance_st_encoding_bitrate;
    550  // Window (number of frames) for computing short-timee encoding bitrate.
    551  int window_size;
    552  // Number of window measurements.
    553  int window_count;
    554  int layer_target_bitrate[AOM_MAX_LAYERS];
    555 };
    556 
    557 static const int REF_FRAMES = 8;
    558 
    559 static const int INTER_REFS_PER_FRAME = 7;
    560 
    561 // Reference frames used in this example encoder.
    562 enum {
    563  SVC_LAST_FRAME = 0,
    564  SVC_LAST2_FRAME,
    565  SVC_LAST3_FRAME,
    566  SVC_GOLDEN_FRAME,
    567  SVC_BWDREF_FRAME,
    568  SVC_ALTREF2_FRAME,
    569  SVC_ALTREF_FRAME
    570 };
    571 
    572 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
    573  FILE *f = input_ctx->file;
    574  y4m_input *y4m = &input_ctx->y4m;
    575  int shortread = 0;
    576 
    577  if (input_ctx->file_type == FILE_TYPE_Y4M) {
    578    if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
    579  } else {
    580    shortread = read_yuv_frame(input_ctx, img);
    581  }
    582 
    583  return !shortread;
    584 }
    585 
    586 static void close_input_file(struct AvxInputContext *input) {
    587  fclose(input->file);
    588  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
    589 }
    590 
    591 // Note: these rate control metrics assume only 1 key frame in the
    592 // sequence (i.e., first frame only). So for temporal pattern# 7
    593 // (which has key frame for every frame on base layer), the metrics
    594 // computation will be off/wrong.
    595 // TODO(marpan): Update these metrics to account for multiple key frames
    596 // in the stream.
    597 static void set_rate_control_metrics(struct RateControlMetrics *rc,
    598                                     double framerate, int ss_number_layers,
    599                                     int ts_number_layers) {
    600  int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
    601  ts_rate_decimator[0] = 1;
    602  if (ts_number_layers == 2) {
    603    ts_rate_decimator[0] = 2;
    604    ts_rate_decimator[1] = 1;
    605  }
    606  if (ts_number_layers == 3) {
    607    ts_rate_decimator[0] = 4;
    608    ts_rate_decimator[1] = 2;
    609    ts_rate_decimator[2] = 1;
    610  }
    611  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
    612  // per-frame-bandwidth, for the rate control encoding stats below.
    613  for (int sl = 0; sl < ss_number_layers; ++sl) {
    614    int i = sl * ts_number_layers;
    615    rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
    616    rc->layer_pfb[i] =
    617        1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
    618    for (int tl = 0; tl < ts_number_layers; ++tl) {
    619      i = sl * ts_number_layers + tl;
    620      if (tl > 0) {
    621        rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
    622        rc->layer_pfb[i] =
    623            1000.0 *
    624            (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
    625            (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
    626      }
    627      rc->layer_input_frames[tl] = 0;
    628      rc->layer_enc_frames[tl] = 0;
    629      rc->layer_encoding_bitrate[i] = 0.0;
    630      rc->layer_avg_frame_size[i] = 0.0;
    631      rc->layer_avg_rate_mismatch[i] = 0.0;
    632    }
    633  }
    634  rc->window_count = 0;
    635  rc->window_size = 15;
    636  rc->avg_st_encoding_bitrate = 0.0;
    637  rc->variance_st_encoding_bitrate = 0.0;
    638 }
    639 
    640 static void printout_rate_control_summary(struct RateControlMetrics *rc,
    641                                          int frame_cnt, int ss_number_layers,
    642                                          int ts_number_layers) {
    643  int tot_num_frames = 0;
    644  double perc_fluctuation = 0.0;
    645  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
    646  printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
    647  for (int sl = 0; sl < ss_number_layers; ++sl) {
    648    tot_num_frames = 0;
    649    for (int tl = 0; tl < ts_number_layers; ++tl) {
    650      int i = sl * ts_number_layers + tl;
    651      const int num_dropped =
    652          tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
    653                 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
    654      tot_num_frames += rc->layer_input_frames[tl];
    655      rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
    656                                      rc->layer_encoding_bitrate[i] /
    657                                      tot_num_frames;
    658      rc->layer_avg_frame_size[i] =
    659          rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
    660      rc->layer_avg_rate_mismatch[i] =
    661          100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
    662      printf("For layer#: %d %d \n", sl, tl);
    663      printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
    664             rc->layer_encoding_bitrate[i]);
    665      printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
    666             rc->layer_avg_frame_size[i]);
    667      printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
    668      printf(
    669          "Number of input frames, encoded (non-key) frames, "
    670          "and perc dropped frames: %d %d %f\n",
    671          rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
    672          100.0 * num_dropped / rc->layer_input_frames[tl]);
    673      printf("\n");
    674    }
    675  }
    676  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
    677  rc->variance_st_encoding_bitrate =
    678      rc->variance_st_encoding_bitrate / rc->window_count -
    679      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
    680  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
    681                     rc->avg_st_encoding_bitrate;
    682  printf("Short-time stats, for window of %d frames:\n", rc->window_size);
    683  printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
    684         rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
    685         perc_fluctuation);
    686  if (frame_cnt - 1 != tot_num_frames)
    687    die("Error: Number of input frames not equal to output!\n");
    688 }
    689 
    690 // Layer pattern configuration.
    691 static void set_layer_pattern(
    692    int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
    693    aom_svc_ref_frame_config_t *ref_frame_config,
    694    aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
    695    int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed,
    696    int *reference_updated, int test_roi_map) {
    697  // Setting this flag to 1 enables simplex example of
    698  // RPS (Reference Picture Selection) for 1 layer.
    699  int use_rps_example = 0;
    700  int i;
    701  int enable_longterm_temporal_ref = 1;
    702  int shift = (layering_mode == 8) ? 2 : 0;
    703  int simulcast_mode = (layering_mode == 11);
    704  *use_svc_control = 1;
    705  layer_id->spatial_layer_id = spatial_layer_id;
    706  int lag_index = 0;
    707  int base_count = superframe_cnt >> 2;
    708  ref_frame_comp_pred->use_comp_pred[0] = 0;  // GOLDEN_LAST
    709  ref_frame_comp_pred->use_comp_pred[1] = 0;  // LAST2_LAST
    710  ref_frame_comp_pred->use_comp_pred[2] = 0;  // ALTREF_LAST
    711  // Set the reference map buffer idx for the 7 references:
    712  // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
    713  // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
    714  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
    715  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
    716  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
    717 
    718  if (ksvc_mode) {
    719    // Same pattern as case 9, but the reference strucutre will be constrained
    720    // below.
    721    layering_mode = 9;
    722  }
    723  switch (layering_mode) {
    724    case 0:
    725      if (use_rps_example == 0) {
    726        // 1-layer: update LAST on every frame, reference LAST.
    727        layer_id->temporal_layer_id = 0;
    728        layer_id->spatial_layer_id = 0;
    729        ref_frame_config->refresh[0] = 1;
    730        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    731        // Add additional reference (GOLDEN) if test_roi_map is set,
    732        // to test reference frame feature on segment.
    733        if (test_roi_map) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    734      } else {
    735        // Pattern of 2 references (ALTREF and GOLDEN) trailing
    736        // LAST by 4 and 8 frames, with some switching logic to
    737        // sometimes only predict from the longer-term reference
    738        //(golden here). This is simple example to test RPS
    739        // (reference picture selection).
    740        int last_idx = 0;
    741        int last_idx_refresh = 0;
    742        int gld_idx = 0;
    743        int alt_ref_idx = 0;
    744        int lag_alt = 4;
    745        int lag_gld = 8;
    746        layer_id->temporal_layer_id = 0;
    747        layer_id->spatial_layer_id = 0;
    748        int sh = 8;  // slots 0 - 7.
    749        // Moving index slot for last: 0 - (sh - 1)
    750        if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
    751        // Moving index for refresh of last: one ahead for next frame.
    752        last_idx_refresh = superframe_cnt % sh;
    753        // Moving index for gld_ref, lag behind current by lag_gld
    754        if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
    755        // Moving index for alt_ref, lag behind LAST by lag_alt frames.
    756        if (superframe_cnt > lag_alt)
    757          alt_ref_idx = (superframe_cnt - lag_alt) % sh;
    758        // Set the ref_idx.
    759        // Default all references to slot for last.
    760        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
    761          ref_frame_config->ref_idx[i] = last_idx;
    762        // Set the ref_idx for the relevant references.
    763        ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
    764        ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
    765        ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
    766        ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
    767        // Refresh this slot, which will become LAST on next frame.
    768        ref_frame_config->refresh[last_idx_refresh] = 1;
    769        // Reference LAST, ALTREF, and GOLDEN
    770        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    771        ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
    772        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    773        // Switch to only GOLDEN every 300 frames.
    774        if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
    775          ref_frame_config->reference[SVC_LAST_FRAME] = 0;
    776          ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
    777          ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    778          // Test if the long-term is LAST instead, this is just a renaming
    779          // but its tests if encoder behaves the same, whether its
    780          // LAST or GOLDEN.
    781          if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
    782            ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
    783            ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    784            ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
    785            ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
    786          }
    787        }
    788      }
    789      break;
    790    case 1:
    791      // 2-temporal layer.
    792      //    1    3    5
    793      //  0    2    4
    794      // Keep golden fixed at slot 3.
    795      base_count = superframe_cnt >> 1;
    796      ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
    797      // Cyclically refresh slots 5, 6, 7, for lag alt ref.
    798      lag_index = 5;
    799      if (base_count > 0) {
    800        lag_index = 5 + (base_count % 3);
    801        if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
    802      }
    803      // Set the altref slot to lag_index.
    804      ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
    805      if (superframe_cnt % 2 == 0) {
    806        layer_id->temporal_layer_id = 0;
    807        // Update LAST on layer 0, reference LAST.
    808        ref_frame_config->refresh[0] = 1;
    809        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    810        // Refresh lag_index slot, needed for lagging golen.
    811        ref_frame_config->refresh[lag_index] = 1;
    812        // Refresh GOLDEN every x base layer frames.
    813        if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
    814      } else {
    815        layer_id->temporal_layer_id = 1;
    816        // No updates on layer 1, reference LAST (TL0).
    817        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    818      }
    819      // Always reference golden and altref on TL0.
    820      if (layer_id->temporal_layer_id == 0) {
    821        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    822        ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
    823      }
    824      break;
    825    case 2:
    826      // 3-temporal layer:
    827      //   1    3   5    7
    828      //     2        6
    829      // 0        4        8
    830      if (superframe_cnt % 4 == 0) {
    831        // Base layer.
    832        layer_id->temporal_layer_id = 0;
    833        // Update LAST on layer 0, reference LAST.
    834        ref_frame_config->refresh[0] = 1;
    835        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    836      } else if ((superframe_cnt - 1) % 4 == 0) {
    837        layer_id->temporal_layer_id = 2;
    838        // First top layer: no updates, only reference LAST (TL0).
    839        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    840      } else if ((superframe_cnt - 2) % 4 == 0) {
    841        layer_id->temporal_layer_id = 1;
    842        // Middle layer (TL1): update LAST2, only reference LAST (TL0).
    843        ref_frame_config->refresh[1] = 1;
    844        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    845      } else if ((superframe_cnt - 3) % 4 == 0) {
    846        layer_id->temporal_layer_id = 2;
    847        // Second top layer: no updates, only reference LAST.
    848        // Set buffer idx for LAST to slot 1, since that was the slot
    849        // updated in previous frame. So LAST is TL1 frame.
    850        ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
    851        ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
    852        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    853      }
    854      break;
    855    case 3:
    856      // 3 TL, same as above, except allow for predicting
    857      // off 2 more references (GOLDEN and ALTREF), with
    858      // GOLDEN updated periodically, and ALTREF lagging from
    859      // LAST from ~4 frames. Both GOLDEN and ALTREF
    860      // can only be updated on base temporal layer.
    861 
    862      // Keep golden fixed at slot 3.
    863      ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
    864      // Cyclically refresh slots 5, 6, 7, for lag altref.
    865      lag_index = 5;
    866      if (base_count > 0) {
    867        lag_index = 5 + (base_count % 3);
    868        if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
    869      }
    870      // Set the altref slot to lag_index.
    871      ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
    872      if (superframe_cnt % 4 == 0) {
    873        // Base layer.
    874        layer_id->temporal_layer_id = 0;
    875        // Update LAST on layer 0, reference LAST.
    876        ref_frame_config->refresh[0] = 1;
    877        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    878        // Refresh GOLDEN every x ~10 base layer frames.
    879        if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
    880        // Refresh lag_index slot, needed for lagging altref.
    881        ref_frame_config->refresh[lag_index] = 1;
    882      } else if ((superframe_cnt - 1) % 4 == 0) {
    883        layer_id->temporal_layer_id = 2;
    884        // First top layer: no updates, only reference LAST (TL0).
    885        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    886      } else if ((superframe_cnt - 2) % 4 == 0) {
    887        layer_id->temporal_layer_id = 1;
    888        // Middle layer (TL1): update LAST2, only reference LAST (TL0).
    889        ref_frame_config->refresh[1] = 1;
    890        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    891      } else if ((superframe_cnt - 3) % 4 == 0) {
    892        layer_id->temporal_layer_id = 2;
    893        // Second top layer: no updates, only reference LAST.
    894        // Set buffer idx for LAST to slot 1, since that was the slot
    895        // updated in previous frame. So LAST is TL1 frame.
    896        ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
    897        ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
    898        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    899      }
    900      // Every frame can reference GOLDEN AND ALTREF.
    901      ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    902      ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
    903      // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
    904      if (speed >= 7) {
    905        ref_frame_comp_pred->use_comp_pred[2] = 1;
    906        ref_frame_comp_pred->use_comp_pred[0] = 1;
    907      }
    908      break;
    909    case 4:
    910      // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
    911      // only reference GF (not LAST). Other frames only reference LAST.
    912      //   1    3   5    7
    913      //     2        6
    914      // 0        4        8
    915      if (superframe_cnt % 4 == 0) {
    916        // Base layer.
    917        layer_id->temporal_layer_id = 0;
    918        // Update LAST on layer 0, only reference LAST.
    919        ref_frame_config->refresh[0] = 1;
    920        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    921      } else if ((superframe_cnt - 1) % 4 == 0) {
    922        layer_id->temporal_layer_id = 2;
    923        // First top layer: no updates, only reference LAST (TL0).
    924        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    925      } else if ((superframe_cnt - 2) % 4 == 0) {
    926        layer_id->temporal_layer_id = 1;
    927        // Middle layer (TL1): update GF, only reference LAST (TL0).
    928        ref_frame_config->refresh[3] = 1;
    929        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    930      } else if ((superframe_cnt - 3) % 4 == 0) {
    931        layer_id->temporal_layer_id = 2;
    932        // Second top layer: no updates, only reference GF.
    933        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    934      }
    935      break;
    936 
    937    case 5:
    938      /*
    939      // 2 spatial layers, 1 temporal, without temporal prediction on SL1.
    940      layer_id->temporal_layer_id = 0;
    941      if (layer_id->spatial_layer_id == 0) {
    942        // Reference LAST, update LAST.
    943        ref_frame_config->refresh[0] = 1;
    944        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    945      } else if (layer_id->spatial_layer_id == 1) {
    946        // Reference LAST, which is SL0, and no refresh.
    947        ref_frame_config->refresh[0] = 0;
    948        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    949      }
    950      break;
    951      */
    952      // 2 spatial layers, 1 temporal.
    953      layer_id->temporal_layer_id = 0;
    954      if (layer_id->spatial_layer_id == 0) {
    955        // Reference LAST, update LAST.
    956        ref_frame_config->refresh[0] = 1;
    957        ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
    958        ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2;
    959        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    960      } else if (layer_id->spatial_layer_id == 1) {
    961        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
    962        // and GOLDEN to slot 0. Update slot 1 (LAST).
    963        ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
    964        ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
    965        ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2;
    966        ref_frame_config->refresh[1] = 1;
    967        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    968        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    969      }
    970      break;
    971 
    972    case 6:
    973      // 3 spatial layers, 1 temporal.
    974      // Note for this case, we set the buffer idx for all references to be
    975      // either LAST or GOLDEN, which are always valid references, since decoder
    976      // will check if any of the 7 references is valid scale in
    977      // valid_ref_frame_size().
    978      layer_id->temporal_layer_id = 0;
    979      if (layer_id->spatial_layer_id == 0) {
    980        // Reference LAST, update LAST. Set all buffer_idx to 0.
    981        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
    982          ref_frame_config->ref_idx[i] = 0;
    983        ref_frame_config->refresh[0] = 1;
    984        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    985      } else if (layer_id->spatial_layer_id == 1) {
    986        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
    987        // and GOLDEN (and all other refs) to slot 0.
    988        // Update slot 1 (LAST).
    989        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
    990          ref_frame_config->ref_idx[i] = 0;
    991        ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
    992        ref_frame_config->refresh[1] = 1;
    993        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
    994        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
    995      } else if (layer_id->spatial_layer_id == 2) {
    996        // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
    997        // and GOLDEN (and all other refs) to slot 1.
    998        // Update slot 2 (LAST).
    999        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1000          ref_frame_config->ref_idx[i] = 1;
   1001        ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1002        ref_frame_config->refresh[2] = 1;
   1003        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1004        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
   1005        // For 3 spatial layer case: allow for top spatial layer to use
   1006        // additional temporal reference. Update every 10 frames.
   1007        if (enable_longterm_temporal_ref) {
   1008          ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
   1009          ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
   1010          if (base_count % 10 == 0)
   1011            ref_frame_config->refresh[REF_FRAMES - 1] = 1;
   1012        }
   1013      }
   1014      break;
   1015    case 7:
   1016      // 2 spatial and 3 temporal layer.
   1017      ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1018      if (superframe_cnt % 4 == 0) {
   1019        // Base temporal layer
   1020        layer_id->temporal_layer_id = 0;
   1021        if (layer_id->spatial_layer_id == 0) {
   1022          // Reference LAST, update LAST
   1023          // Set all buffer_idx to 0
   1024          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1025            ref_frame_config->ref_idx[i] = 0;
   1026          ref_frame_config->refresh[0] = 1;
   1027        } else if (layer_id->spatial_layer_id == 1) {
   1028          // Reference LAST and GOLDEN.
   1029          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1030            ref_frame_config->ref_idx[i] = 0;
   1031          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
   1032          ref_frame_config->refresh[1] = 1;
   1033        }
   1034      } else if ((superframe_cnt - 1) % 4 == 0) {
   1035        // First top temporal enhancement layer.
   1036        layer_id->temporal_layer_id = 2;
   1037        if (layer_id->spatial_layer_id == 0) {
   1038          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1039            ref_frame_config->ref_idx[i] = 0;
   1040          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
   1041          ref_frame_config->refresh[3] = 1;
   1042        } else if (layer_id->spatial_layer_id == 1) {
   1043          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
   1044          // GOLDEN (and all other refs) to slot 3.
   1045          // No update.
   1046          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1047            ref_frame_config->ref_idx[i] = 3;
   1048          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
   1049        }
   1050      } else if ((superframe_cnt - 2) % 4 == 0) {
   1051        // Middle temporal enhancement layer.
   1052        layer_id->temporal_layer_id = 1;
   1053        if (layer_id->spatial_layer_id == 0) {
   1054          // Reference LAST.
   1055          // Set all buffer_idx to 0.
   1056          // Set GOLDEN to slot 5 and update slot 5.
   1057          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1058            ref_frame_config->ref_idx[i] = 0;
   1059          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
   1060          ref_frame_config->refresh[5 - shift] = 1;
   1061        } else if (layer_id->spatial_layer_id == 1) {
   1062          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
   1063          // GOLDEN (and all other refs) to slot 5.
   1064          // Set LAST3 to slot 6 and update slot 6.
   1065          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1066            ref_frame_config->ref_idx[i] = 5 - shift;
   1067          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
   1068          ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
   1069          ref_frame_config->refresh[6 - shift] = 1;
   1070        }
   1071      } else if ((superframe_cnt - 3) % 4 == 0) {
   1072        // Second top temporal enhancement layer.
   1073        layer_id->temporal_layer_id = 2;
   1074        if (layer_id->spatial_layer_id == 0) {
   1075          // Set LAST to slot 5 and reference LAST.
   1076          // Set GOLDEN to slot 3 and update slot 3.
   1077          // Set all other buffer_idx to 0.
   1078          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1079            ref_frame_config->ref_idx[i] = 0;
   1080          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
   1081          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
   1082          ref_frame_config->refresh[3] = 1;
   1083        } else if (layer_id->spatial_layer_id == 1) {
   1084          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
   1085          // GOLDEN to slot 3. No update.
   1086          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1087            ref_frame_config->ref_idx[i] = 0;
   1088          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
   1089          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
   1090        }
   1091      }
   1092      break;
   1093    case 8:
   1094      // 3 spatial and 3 temporal layer.
   1095      // Same as case 9 but overalap in the buffer slot updates.
   1096      // (shift = 2). The slots 3 and 4 updated by first TL2 are
   1097      // reused for update in TL1 superframe.
   1098      // Note for this case, frame order hint must be disabled for
   1099      // lower resolutios (operating points > 0) to be decoedable.
   1100    case 9:
   1101      // 3 spatial and 3 temporal layer.
   1102      // No overlap in buffer updates between TL2 and TL1.
   1103      // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
   1104      // Set the references via the svc_ref_frame_config control.
   1105      // Always reference LAST.
   1106      ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1107      if (superframe_cnt % 4 == 0) {
   1108        // Base temporal layer.
   1109        layer_id->temporal_layer_id = 0;
   1110        if (layer_id->spatial_layer_id == 0) {
   1111          // Reference LAST, update LAST.
   1112          // Set all buffer_idx to 0.
   1113          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1114            ref_frame_config->ref_idx[i] = 0;
   1115          ref_frame_config->refresh[0] = 1;
   1116        } else if (layer_id->spatial_layer_id == 1) {
   1117          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
   1118          // GOLDEN (and all other refs) to slot 0.
   1119          // Update slot 1 (LAST).
   1120          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1121            ref_frame_config->ref_idx[i] = 0;
   1122          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
   1123          ref_frame_config->refresh[1] = 1;
   1124        } else if (layer_id->spatial_layer_id == 2) {
   1125          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
   1126          // GOLDEN (and all other refs) to slot 1.
   1127          // Update slot 2 (LAST).
   1128          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1129            ref_frame_config->ref_idx[i] = 1;
   1130          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1131          ref_frame_config->refresh[2] = 1;
   1132        }
   1133      } else if ((superframe_cnt - 1) % 4 == 0) {
   1134        // First top temporal enhancement layer.
   1135        layer_id->temporal_layer_id = 2;
   1136        if (layer_id->spatial_layer_id == 0) {
   1137          // Reference LAST (slot 0).
   1138          // Set GOLDEN to slot 3 and update slot 3.
   1139          // Set all other buffer_idx to slot 0.
   1140          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1141            ref_frame_config->ref_idx[i] = 0;
   1142          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
   1143          ref_frame_config->refresh[3] = 1;
   1144        } else if (layer_id->spatial_layer_id == 1) {
   1145          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
   1146          // GOLDEN (and all other refs) to slot 3.
   1147          // Set LAST2 to slot 4 and Update slot 4.
   1148          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1149            ref_frame_config->ref_idx[i] = 3;
   1150          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
   1151          ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
   1152          ref_frame_config->refresh[4] = 1;
   1153        } else if (layer_id->spatial_layer_id == 2) {
   1154          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
   1155          // GOLDEN (and all other refs) to slot 4.
   1156          // No update.
   1157          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1158            ref_frame_config->ref_idx[i] = 4;
   1159          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1160        }
   1161      } else if ((superframe_cnt - 2) % 4 == 0) {
   1162        // Middle temporal enhancement layer.
   1163        layer_id->temporal_layer_id = 1;
   1164        if (layer_id->spatial_layer_id == 0) {
   1165          // Reference LAST.
   1166          // Set all buffer_idx to 0.
   1167          // Set GOLDEN to slot 5 and update slot 5.
   1168          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1169            ref_frame_config->ref_idx[i] = 0;
   1170          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
   1171          ref_frame_config->refresh[5 - shift] = 1;
   1172        } else if (layer_id->spatial_layer_id == 1) {
   1173          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
   1174          // GOLDEN (and all other refs) to slot 5.
   1175          // Set LAST3 to slot 6 and update slot 6.
   1176          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1177            ref_frame_config->ref_idx[i] = 5 - shift;
   1178          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
   1179          ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
   1180          ref_frame_config->refresh[6 - shift] = 1;
   1181        } else if (layer_id->spatial_layer_id == 2) {
   1182          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
   1183          // GOLDEN (and all other refs) to slot 6.
   1184          // Set LAST3 to slot 7 and update slot 7.
   1185          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1186            ref_frame_config->ref_idx[i] = 6 - shift;
   1187          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1188          ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
   1189          ref_frame_config->refresh[7 - shift] = 1;
   1190        }
   1191      } else if ((superframe_cnt - 3) % 4 == 0) {
   1192        // Second top temporal enhancement layer.
   1193        layer_id->temporal_layer_id = 2;
   1194        if (layer_id->spatial_layer_id == 0) {
   1195          // Set LAST to slot 5 and reference LAST.
   1196          // Set GOLDEN to slot 3 and update slot 3.
   1197          // Set all other buffer_idx to 0.
   1198          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1199            ref_frame_config->ref_idx[i] = 0;
   1200          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
   1201          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
   1202          ref_frame_config->refresh[3] = 1;
   1203        } else if (layer_id->spatial_layer_id == 1) {
   1204          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
   1205          // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
   1206          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1207            ref_frame_config->ref_idx[i] = 0;
   1208          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
   1209          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
   1210          ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
   1211          ref_frame_config->refresh[4] = 1;
   1212        } else if (layer_id->spatial_layer_id == 2) {
   1213          // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
   1214          // GOLDEN to slot 4. No update.
   1215          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1216            ref_frame_config->ref_idx[i] = 0;
   1217          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
   1218          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
   1219        }
   1220      }
   1221      break;
   1222    case 11:
   1223      // Simulcast mode for 3 spatial and 3 temporal layers.
   1224      // No inter-layer predicton, only prediction is temporal and single
   1225      // reference (LAST).
   1226      // No overlap in buffer slots between spatial layers. So for example,
   1227      // SL0 only uses slots 0 and 1.
   1228      // SL1 only uses slots 2 and 3.
   1229      // SL2 only uses slots 4 and 5.
   1230      // All 7 references for each inter-frame must only access buffer slots
   1231      // for that spatial layer.
   1232      // On key (super)frames: SL1 and SL2 must have no references set
   1233      // and must refresh all the slots for that layer only (so 2 and 3
   1234      // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
   1235      // as a Key frame (refresh all slots). SL1/SL2 will be labelled
   1236      // internally as Intra-only frames that allow that stream to be decoded.
   1237      // These conditions will allow for each spatial stream to be
   1238      // independently decodeable.
   1239 
   1240      // Initialize all references to 0 (don't use reference).
   1241      for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1242        ref_frame_config->reference[i] = 0;
   1243      // Initialize as no refresh/update for all slots.
   1244      for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
   1245      for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1246        ref_frame_config->ref_idx[i] = 0;
   1247 
   1248      if (is_key_frame) {
   1249        if (layer_id->spatial_layer_id == 0) {
   1250          // Assign LAST/GOLDEN to slot 0/1.
   1251          // Refesh slots 0 and 1 for SL0.
   1252          // SL0: this will get set to KEY frame internally.
   1253          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
   1254          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
   1255          ref_frame_config->refresh[0] = 1;
   1256          ref_frame_config->refresh[1] = 1;
   1257        } else if (layer_id->spatial_layer_id == 1) {
   1258          // Assign LAST/GOLDEN to slot 2/3.
   1259          // Refesh slots 2 and 3 for SL1.
   1260          // This will get set to Intra-only frame internally.
   1261          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1262          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
   1263          ref_frame_config->refresh[2] = 1;
   1264          ref_frame_config->refresh[3] = 1;
   1265        } else if (layer_id->spatial_layer_id == 2) {
   1266          // Assign LAST/GOLDEN to slot 4/5.
   1267          // Refresh slots 4 and 5 for SL2.
   1268          // This will get set to Intra-only frame internally.
   1269          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
   1270          ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
   1271          ref_frame_config->refresh[4] = 1;
   1272          ref_frame_config->refresh[5] = 1;
   1273        }
   1274      } else if (superframe_cnt % 4 == 0) {
   1275        // Base temporal layer: TL0
   1276        layer_id->temporal_layer_id = 0;
   1277        if (layer_id->spatial_layer_id == 0) {  // SL0
   1278          // Reference LAST. Assign all references to either slot
   1279          // 0 or 1. Here we assign LAST to slot 0, all others to 1.
   1280          // Update slot 0 (LAST).
   1281          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1282          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1283            ref_frame_config->ref_idx[i] = 1;
   1284          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
   1285          ref_frame_config->refresh[0] = 1;
   1286        } else if (layer_id->spatial_layer_id == 1) {  // SL1
   1287          // Reference LAST. Assign all references to either slot
   1288          // 2 or 3. Here we assign LAST to slot 2, all others to 3.
   1289          // Update slot 2 (LAST).
   1290          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1291          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1292            ref_frame_config->ref_idx[i] = 3;
   1293          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1294          ref_frame_config->refresh[2] = 1;
   1295        } else if (layer_id->spatial_layer_id == 2) {  // SL2
   1296          // Reference LAST. Assign all references to either slot
   1297          // 4 or 5. Here we assign LAST to slot 4, all others to 5.
   1298          // Update slot 4 (LAST).
   1299          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1300          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1301            ref_frame_config->ref_idx[i] = 5;
   1302          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
   1303          ref_frame_config->refresh[4] = 1;
   1304        }
   1305      } else if ((superframe_cnt - 1) % 4 == 0) {
   1306        // First top temporal enhancement layer: TL2
   1307        layer_id->temporal_layer_id = 2;
   1308        if (layer_id->spatial_layer_id == 0) {  // SL0
   1309          // Reference LAST (slot 0). Assign other references to slot 1.
   1310          // No update/refresh on any slots.
   1311          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1312          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1313            ref_frame_config->ref_idx[i] = 1;
   1314          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
   1315        } else if (layer_id->spatial_layer_id == 1) {  // SL1
   1316          // Reference LAST (slot 2). Assign other references to slot 3.
   1317          // No update/refresh on any slots.
   1318          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1319          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1320            ref_frame_config->ref_idx[i] = 3;
   1321          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1322        } else if (layer_id->spatial_layer_id == 2) {  // SL2
   1323          // Reference LAST (slot 4). Assign other references to slot 4.
   1324          // No update/refresh on any slots.
   1325          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1326          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1327            ref_frame_config->ref_idx[i] = 5;
   1328          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
   1329        }
   1330      } else if ((superframe_cnt - 2) % 4 == 0) {
   1331        // Middle temporal enhancement layer: TL1
   1332        layer_id->temporal_layer_id = 1;
   1333        if (layer_id->spatial_layer_id == 0) {  // SL0
   1334          // Reference LAST (slot 0).
   1335          // Set GOLDEN to slot 1 and update slot 1.
   1336          // This will be used as reference for next TL2.
   1337          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1338          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1339            ref_frame_config->ref_idx[i] = 1;
   1340          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
   1341          ref_frame_config->refresh[1] = 1;
   1342        } else if (layer_id->spatial_layer_id == 1) {  // SL1
   1343          // Reference LAST (slot 2).
   1344          // Set GOLDEN to slot 3 and update slot 3.
   1345          // This will be used as reference for next TL2.
   1346          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1347          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1348            ref_frame_config->ref_idx[i] = 3;
   1349          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
   1350          ref_frame_config->refresh[3] = 1;
   1351        } else if (layer_id->spatial_layer_id == 2) {  // SL2
   1352          // Reference LAST (slot 4).
   1353          // Set GOLDEN to slot 5 and update slot 5.
   1354          // This will be used as reference for next TL2.
   1355          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1356          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1357            ref_frame_config->ref_idx[i] = 5;
   1358          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
   1359          ref_frame_config->refresh[5] = 1;
   1360        }
   1361      } else if ((superframe_cnt - 3) % 4 == 0) {
   1362        // Second top temporal enhancement layer: TL2
   1363        layer_id->temporal_layer_id = 2;
   1364        if (layer_id->spatial_layer_id == 0) {  // SL0
   1365          // Reference LAST (slot 1). Assign other references to slot 0.
   1366          // No update/refresh on any slots.
   1367          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1368          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1369            ref_frame_config->ref_idx[i] = 0;
   1370          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
   1371        } else if (layer_id->spatial_layer_id == 1) {  // SL1
   1372          // Reference LAST (slot 3). Assign other references to slot 2.
   1373          // No update/refresh on any slots.
   1374          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1375          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1376            ref_frame_config->ref_idx[i] = 2;
   1377          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
   1378        } else if (layer_id->spatial_layer_id == 2) {  // SL2
   1379          // Reference LAST (slot 5). Assign other references to slot 4.
   1380          // No update/refresh on any slots.
   1381          ref_frame_config->reference[SVC_LAST_FRAME] = 1;
   1382          for (i = 0; i < INTER_REFS_PER_FRAME; i++)
   1383            ref_frame_config->ref_idx[i] = 4;
   1384          ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
   1385        }
   1386      }
   1387      if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
   1388        // Always reference GOLDEN (inter-layer prediction).
   1389        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
   1390        if (ksvc_mode) {
   1391          // KSVC: only keep the inter-layer reference (GOLDEN) for
   1392          // superframes whose base is key.
   1393          if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
   1394        }
   1395        if (is_key_frame && layer_id->spatial_layer_id > 1) {
   1396          // On superframes whose base is key: remove LAST to avoid prediction
   1397          // off layer two levels below.
   1398          ref_frame_config->reference[SVC_LAST_FRAME] = 0;
   1399        }
   1400      }
   1401      // For 3 spatial layer case 8 (where there is free buffer slot):
   1402      // allow for top spatial layer to use additional temporal reference.
   1403      // Additional reference is only updated on base temporal layer, every
   1404      // 10 TL0 frames here.
   1405      if (!simulcast_mode && enable_longterm_temporal_ref &&
   1406          layer_id->spatial_layer_id == 2 && layering_mode == 8) {
   1407        ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
   1408        if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
   1409        if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
   1410          ref_frame_config->refresh[REF_FRAMES - 1] = 1;
   1411      }
   1412      break;
   1413    default: assert(0); die("Error: Unsupported temporal layering mode!\n");
   1414  }
   1415  for (i = 0; i < REF_FRAMES; i++) {
   1416    if (ref_frame_config->refresh[i] == 1) {
   1417      *reference_updated = 1;
   1418      break;
   1419    }
   1420  }
   1421 }
   1422 
   1423 static void write_literal(struct aom_write_bit_buffer *wb, uint32_t data,
   1424                          uint8_t bits, uint32_t offset = 0) {
   1425  if (bits > 32) {
   1426    die("Invalid bits value %d > 32\n", bits);
   1427  }
   1428  const uint32_t max = static_cast<uint32_t>(((uint64_t)1 << bits) - 1);
   1429  if (data < offset || (data - offset) > max) {
   1430    die("Invalid data, value %u out of range [%u, %" PRIu64 "]\n", data, offset,
   1431        (uint64_t)max + offset);
   1432  }
   1433  aom_wb_write_unsigned_literal(wb, data - offset, bits);
   1434 }
   1435 
   1436 static void write_depth_representation_element(
   1437    struct aom_write_bit_buffer *buffer,
   1438    const std::pair<libaom_examples::DepthRepresentationElement, bool>
   1439        &element) {
   1440  if (!element.second) {
   1441    return;
   1442  }
   1443  write_literal(buffer, element.first.sign_flag, 1);
   1444  write_literal(buffer, element.first.exponent, 7);
   1445  if (element.first.mantissa_len == 0 || element.first.mantissa_len > 32) {
   1446    die("Invalid mantissan_len %d\n", element.first.mantissa_len);
   1447  }
   1448  write_literal(buffer, element.first.mantissa_len - 1, 5);
   1449  write_literal(buffer, element.first.mantissa, element.first.mantissa_len);
   1450 }
   1451 
   1452 static void write_color_properties(
   1453    struct aom_write_bit_buffer *buffer,
   1454    const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
   1455  write_literal(buffer, color_properties.second, 1);
   1456  if (color_properties.second) {
   1457    write_literal(buffer, color_properties.first.color_range, 1);
   1458    write_literal(buffer, color_properties.first.color_primaries, 8);
   1459    write_literal(buffer, color_properties.first.transfer_characteristics, 8);
   1460    write_literal(buffer, color_properties.first.matrix_coefficients, 8);
   1461  } else {
   1462    write_literal(buffer, 0, 1);  // reserved_1bit
   1463  }
   1464 }
   1465 
   1466 static void write_alpha_information(
   1467    struct aom_write_bit_buffer *buffer,
   1468    const libaom_examples::AlphaInformation &alpha_info) {
   1469  write_literal(buffer, alpha_info.alpha_use_idc, 2);
   1470  write_literal(buffer, alpha_info.alpha_simple_flag, 1);
   1471  if (!alpha_info.alpha_simple_flag) {
   1472    write_literal(buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
   1473    write_literal(buffer, alpha_info.alpha_clip_idc, 2);
   1474    write_literal(buffer, alpha_info.alpha_incr_flag, 1);
   1475    write_literal(buffer, alpha_info.alpha_transparent_value,
   1476                  alpha_info.alpha_bit_depth + 1);
   1477    write_literal(buffer, alpha_info.alpha_opaque_value,
   1478                  alpha_info.alpha_bit_depth + 1);
   1479    if (buffer->bit_offset % 8 != 0) {
   1480      // ai_byte_alignment_bits
   1481      write_literal(buffer, 0, 8 - (buffer->bit_offset % 8));
   1482    }
   1483    assert(buffer->bit_offset % 8 == 0);
   1484 
   1485    write_literal(buffer, 0, 6);  // ai_reserved_6bits
   1486    write_color_properties(buffer, alpha_info.alpha_color_description);
   1487  } else {
   1488    write_literal(buffer, 0, 5);  // ai_reserved_5bits
   1489  }
   1490 }
   1491 
   1492 static void write_depth_information(
   1493    struct aom_write_bit_buffer *buffer,
   1494    const libaom_examples::DepthInformation &depth_info) {
   1495  write_literal(buffer, depth_info.z_near.second, 1);
   1496  write_literal(buffer, depth_info.z_far.second, 1);
   1497  write_literal(buffer, depth_info.d_min.second, 1);
   1498  write_literal(buffer, depth_info.d_max.second, 1);
   1499  write_literal(buffer, depth_info.depth_representation_type, 4);
   1500  if (depth_info.d_min.second || depth_info.d_max.second) {
   1501    write_literal(buffer, depth_info.disparity_ref_view_id, 2);
   1502  }
   1503  write_depth_representation_element(buffer, depth_info.z_near);
   1504  write_depth_representation_element(buffer, depth_info.z_far);
   1505  write_depth_representation_element(buffer, depth_info.d_min);
   1506  write_depth_representation_element(buffer, depth_info.d_max);
   1507  if (buffer->bit_offset % 8 != 0) {
   1508    write_literal(buffer, 0, 8 - (buffer->bit_offset % 8));
   1509  }
   1510 }
   1511 
   1512 static void add_multilayer_metadata(
   1513    aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer,
   1514    int frame_idx, int spatial_id) {
   1515  // Large enough buffer for the multilayer metadata.
   1516  // Each layer's metadata is less than 100 bytes and there are at most 4
   1517  // layers.
   1518  std::vector<uint8_t> data(1024);
   1519  struct aom_write_bit_buffer buffer = { data.data(), 0 };
   1520 
   1521  write_literal(&buffer, multilayer.use_case, 6);
   1522  if (multilayer.layers.empty()) {
   1523    die("Invalid multilayer metadata, no layers found\n");
   1524  } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
   1525    die("Invalid multilayer metadata, too many layers (max is %d)\n",
   1526        MAX_NUM_SPATIAL_LAYERS);
   1527  }
   1528  write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
   1529  assert(buffer.bit_offset % 8 == 0);
   1530  for (size_t i = 0; i < multilayer.layers.size(); ++i) {
   1531    const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
   1532    // Alpha info with segmentation with labels can be up to about 66k bytes,
   1533    // which requires 3 bytes to encode in leb128.
   1534    const int bytes_reserved_for_size = 3;
   1535    // Placeholder for layer_metadata_size which will be written later.
   1536    write_literal(&buffer, 0, bytes_reserved_for_size * 8);
   1537    const uint32_t metadata_start = buffer.bit_offset;
   1538    write_literal(&buffer, (int)i, 2);  // ml_spatial_id
   1539    write_literal(&buffer, layer.layer_type, 5);
   1540    write_literal(&buffer, layer.luma_plane_only_flag, 1);
   1541    write_literal(&buffer, layer.layer_view_type, 3);
   1542    write_literal(&buffer, layer.group_id, 2);
   1543    write_literal(&buffer, layer.layer_dependency_idc, 3);
   1544    write_literal(&buffer, layer.layer_metadata_scope, 2);
   1545    write_literal(&buffer, 0, 4);  // ml_reserved_4bits
   1546 
   1547    if (i > 0) {
   1548      write_color_properties(&buffer, layer.layer_color_description);
   1549    } else {
   1550      write_literal(&buffer, 0, 2);  // ml_reserved_2bits
   1551    }
   1552    assert(buffer.bit_offset % 8 == 0);
   1553 
   1554    if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA &&
   1555        layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
   1556      write_alpha_information(&buffer, layer.alpha);
   1557      assert(buffer.bit_offset % 8 == 0);
   1558    } else if (layer.layer_type ==
   1559                   libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH &&
   1560               layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
   1561      write_depth_information(&buffer, layer.depth);
   1562      assert(buffer.bit_offset % 8 == 0);
   1563    }
   1564 
   1565    assert(buffer.bit_offset % 8 == 0);
   1566 
   1567    const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
   1568    const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
   1569    size_t coded_size;
   1570    if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
   1571                                   bytes_reserved_for_size,
   1572                                   &buffer.bit_buffer[size_pos], &coded_size)) {
   1573      // Need to increase bytes_reserved_for_size in the code above.
   1574      die("Error: Failed to write metadata size\n");
   1575    }
   1576  }
   1577  assert(buffer.bit_offset % 8 == 0);
   1578  if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
   1579                           buffer.bit_buffer, buffer.bit_offset / 8,
   1580                           AOM_MIF_KEY_FRAME)) {
   1581    die("Error: Failed to add metadata\n");
   1582  }
   1583 
   1584  if ((int)multilayer.layers.size() > spatial_id) {
   1585    const libaom_examples::LayerMetadata &layer = multilayer.layers[spatial_id];
   1586    for (const libaom_examples::FrameLocalMetadata &local_metadata :
   1587         layer.local_metadata) {
   1588      if (local_metadata.frame_idx == frame_idx) {
   1589        if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA) {
   1590          buffer = { data.data(), 0 };
   1591          write_alpha_information(&buffer, local_metadata.alpha);
   1592          if (aom_img_add_metadata(frame,
   1593                                   34 /*METADATA_TYPE_ALPHA_INFORMATION*/,
   1594                                   buffer.bit_buffer, buffer.bit_offset / 8,
   1595                                   AOM_MIF_ANY_FRAME_LAYER_SPECIFIC)) {
   1596            die("Error: Failed to add metadata\n");
   1597          }
   1598        } else if (layer.layer_type ==
   1599                   libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH) {
   1600          buffer = { data.data(), 0 };
   1601          write_depth_information(&buffer, local_metadata.depth);
   1602          if (aom_img_add_metadata(frame,
   1603                                   35 /*METADATA_TYPE_DEPTH_INFORMATION*/,
   1604                                   buffer.bit_buffer, buffer.bit_offset / 8,
   1605                                   AOM_MIF_ANY_FRAME_LAYER_SPECIFIC)) {
   1606            die("Error: Failed to add metadata\n");
   1607          }
   1608        }
   1609        break;
   1610      }
   1611    }
   1612  }
   1613 }
   1614 
   1615 #if CONFIG_AV1_DECODER
   1616 // Returns whether there is a mismatch between the encoder's new frame and the
   1617 // decoder's new frame.
   1618 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
   1619                       const int frames_out) {
   1620  aom_image_t enc_img, dec_img;
   1621  int mismatch = 0;
   1622 
   1623  /* Get the internal new frame */
   1624  AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
   1625  AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
   1626 
   1627 #if CONFIG_AV1_HIGHBITDEPTH
   1628  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
   1629      (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
   1630    if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
   1631      aom_image_t enc_hbd_img;
   1632      aom_img_alloc(
   1633          &enc_hbd_img,
   1634          static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
   1635          enc_img.d_w, enc_img.d_h, 16);
   1636      aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
   1637      enc_img = enc_hbd_img;
   1638    }
   1639    if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
   1640      aom_image_t dec_hbd_img;
   1641      aom_img_alloc(
   1642          &dec_hbd_img,
   1643          static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
   1644          dec_img.d_w, dec_img.d_h, 16);
   1645      aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
   1646      dec_img = dec_hbd_img;
   1647    }
   1648  }
   1649 #endif
   1650 
   1651  if (!aom_compare_img(&enc_img, &dec_img)) {
   1652    int y[4], u[4], v[4];
   1653 #if CONFIG_AV1_HIGHBITDEPTH
   1654    if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
   1655      aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
   1656    } else {
   1657      aom_find_mismatch(&enc_img, &dec_img, y, u, v);
   1658    }
   1659 #else
   1660    aom_find_mismatch(&enc_img, &dec_img, y, u, v);
   1661 #endif
   1662    fprintf(stderr,
   1663            "Encode/decode mismatch on frame %d at"
   1664            " Y[%d, %d] {%d/%d},"
   1665            " U[%d, %d] {%d/%d},"
   1666            " V[%d, %d] {%d/%d}\n",
   1667            frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
   1668            v[1], v[2], v[3]);
   1669    mismatch = 1;
   1670  }
   1671 
   1672  aom_img_free(&enc_img);
   1673  aom_img_free(&dec_img);
   1674  return mismatch;
   1675 }
   1676 #endif  // CONFIG_AV1_DECODER
   1677 
   1678 struct PsnrStats {
   1679  // The second element of these arrays is reserved for high bitdepth.
   1680  uint64_t psnr_sse_total[2];
   1681  uint64_t psnr_samples_total[2];
   1682  double psnr_totals[2][4];
   1683  int psnr_count[2];
   1684 };
   1685 
   1686 static void show_psnr(struct PsnrStats *psnr_stream, double peak,
   1687                      int num_layers) {
   1688  for (int sl = 0; sl < num_layers; ++sl) {
   1689    if (!psnr_stream[sl].psnr_count[0]) continue;
   1690 
   1691    fprintf(stderr, "\nPSNR (Layer %d, Overall/Avg/Y/U/V)", sl);
   1692    const double ovpsnr =
   1693        sse_to_psnr((double)psnr_stream[sl].psnr_samples_total[0], peak,
   1694                    (double)psnr_stream[sl].psnr_sse_total[0]);
   1695    fprintf(stderr, " %.3f", ovpsnr);
   1696 
   1697    for (int i = 0; i < 4; i++) {
   1698      fprintf(
   1699          stderr, " %.3f",
   1700          psnr_stream[sl].psnr_totals[0][i] / psnr_stream[sl].psnr_count[0]);
   1701    }
   1702  }
   1703  fprintf(stderr, "\n");
   1704 }
   1705 
   1706 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
   1707    const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
   1708  aom::AV1RateControlRtcConfig rc_cfg;
   1709  rc_cfg.width = cfg.g_w;
   1710  rc_cfg.height = cfg.g_h;
   1711  rc_cfg.max_quantizer = cfg.rc_max_quantizer;
   1712  rc_cfg.min_quantizer = cfg.rc_min_quantizer;
   1713  rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
   1714  rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
   1715  rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
   1716  rc_cfg.buf_sz = cfg.rc_buf_sz;
   1717  rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
   1718  rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
   1719  // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
   1720  rc_cfg.max_intra_bitrate_pct = 300;
   1721  rc_cfg.framerate = cfg.g_timebase.den;
   1722  // TODO(jianj): Add suppor for SVC.
   1723  rc_cfg.ss_number_layers = 1;
   1724  rc_cfg.ts_number_layers = 1;
   1725  rc_cfg.scaling_factor_num[0] = 1;
   1726  rc_cfg.scaling_factor_den[0] = 1;
   1727  rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
   1728  rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
   1729  rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
   1730  rc_cfg.aq_mode = app_input.aq_mode;
   1731 
   1732  return rc_cfg;
   1733 }
   1734 
   1735 static int qindex_to_quantizer(int qindex) {
   1736  // Table that converts 0-63 range Q values passed in outside to the 0-255
   1737  // range Qindex used internally.
   1738  static const int quantizer_to_qindex[] = {
   1739    0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,
   1740    52,  56,  60,  64,  68,  72,  76,  80,  84,  88,  92,  96,  100,
   1741    104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
   1742    156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
   1743    208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
   1744  };
   1745  for (int quantizer = 0; quantizer < 64; ++quantizer)
   1746    if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
   1747 
   1748  return 63;
   1749 }
   1750 
   1751 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
   1752                           aom_codec_ctx_t *codec, int frame_cnt) {
   1753  aom_active_map_t map = { 0, 0, 0 };
   1754 
   1755  map.rows = (cfg->g_h + 15) / 16;
   1756  map.cols = (cfg->g_w + 15) / 16;
   1757 
   1758  map.active_map = (uint8_t *)malloc(map.rows * map.cols);
   1759  if (!map.active_map) die("Failed to allocate active map");
   1760 
   1761  // Example map for testing.
   1762  for (unsigned int i = 0; i < map.rows; ++i) {
   1763    for (unsigned int j = 0; j < map.cols; ++j) {
   1764      int index = map.cols * i + j;
   1765      map.active_map[index] = 1;
   1766      if (frame_cnt < 300) {
   1767        if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
   1768      } else if (frame_cnt >= 300) {
   1769        if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
   1770      }
   1771    }
   1772  }
   1773 
   1774  if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
   1775    die_codec(codec, "Failed to set active map");
   1776 
   1777  free(map.active_map);
   1778 }
   1779 
   1780 static void set_roi_map(const aom_codec_enc_cfg_t *cfg, aom_codec_ctx_t *codec,
   1781                        int roi_feature) {
   1782  aom_roi_map_t roi = aom_roi_map_t();
   1783  const int block_size = 4;
   1784  roi.rows = (cfg->g_h + block_size - 1) / block_size;
   1785  roi.cols = (cfg->g_w + block_size - 1) / block_size;
   1786  memset(&roi.skip, 0, sizeof(roi.skip));
   1787  memset(&roi.delta_q, 0, sizeof(roi.delta_q));
   1788  memset(&roi.delta_lf, 0, sizeof(roi.delta_lf));
   1789  memset(roi.ref_frame, -1, sizeof(roi.ref_frame));
   1790  // Set ROI map to be 1 (segment #1) in middle square of image,
   1791  // 0 elsewhere.
   1792  roi.enabled = 1;
   1793  roi.roi_map = (uint8_t *)calloc(roi.rows * roi.cols, sizeof(*roi.roi_map));
   1794  for (unsigned int i = 0; i < roi.rows; ++i) {
   1795    for (unsigned int j = 0; j < roi.cols; ++j) {
   1796      const int idx = i * roi.cols + j;
   1797      if (i > roi.rows / 4 && i < (3 * roi.rows) / 4 && j > roi.cols / 4 &&
   1798          j < (3 * roi.cols) / 4)
   1799        roi.roi_map[idx] = 1;
   1800      else
   1801        roi.roi_map[idx] = 0;
   1802    }
   1803  }
   1804  // Set the ROI feature, on segment #1.
   1805  if (roi_feature == kSkip)
   1806    roi.skip[1] = 1;
   1807  else if (roi_feature == kDeltaQ)
   1808    roi.delta_q[1] = -40;
   1809  else if (roi_feature == kDeltaLF)
   1810    roi.delta_lf[1] = 40;
   1811  else if (roi_feature == kReference)
   1812    roi.ref_frame[1] = 4;  // GOLDEN_FRAME
   1813 
   1814  if (aom_codec_control(codec, AOME_SET_ROI_MAP, &roi))
   1815    die_codec(codec, "Failed to set roi map");
   1816 
   1817  free(roi.roi_map);
   1818 }
   1819 int main(int argc, const char **argv) {
   1820  AppInput app_input;
   1821  AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
   1822  FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
   1823  AvxVideoWriter *total_layer_file = NULL;
   1824  FILE *total_layer_obu_file = NULL;
   1825  aom_codec_enc_cfg_t cfg;
   1826  int frame_cnt = 0;
   1827  aom_image_t raw;
   1828  int frame_avail;
   1829  int got_data = 0;
   1830  int flags = 0;
   1831  int i;
   1832  int pts = 0;             // PTS starts at 0.
   1833  int frame_duration = 1;  // 1 timebase tick per frame.
   1834  aom_svc_layer_id_t layer_id;
   1835  aom_svc_params_t svc_params;
   1836  aom_svc_ref_frame_config_t ref_frame_config;
   1837  aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
   1838 
   1839 #if CONFIG_INTERNAL_STATS
   1840  FILE *stats_file = fopen("opsnr.stt", "a");
   1841  if (stats_file == NULL) {
   1842    die("Cannot open opsnr.stt\n");
   1843  }
   1844 #endif
   1845 #if CONFIG_AV1_DECODER
   1846  aom_codec_ctx_t decoder;
   1847 #endif
   1848 
   1849  struct RateControlMetrics rc;
   1850  int64_t cx_time = 0;
   1851  int64_t cx_time_layer[AOM_MAX_LAYERS];  // max number of layers.
   1852  int frame_cnt_layer[AOM_MAX_LAYERS];
   1853  double sum_bitrate = 0.0;
   1854  double sum_bitrate2 = 0.0;
   1855  double framerate = 30.0;
   1856  int use_svc_control = 1;
   1857  int set_err_resil_frame = 0;
   1858  int test_changing_bitrate = 0;
   1859  zero(rc.layer_target_bitrate);
   1860  memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
   1861  memset(&app_input, 0, sizeof(AppInput));
   1862  memset(&svc_params, 0, sizeof(svc_params));
   1863 
   1864  // Flag to test dynamic scaling of source frames for single
   1865  // spatial stream, using the scaling_mode control.
   1866  const int test_dynamic_scaling_single_layer = 0;
   1867 
   1868  // Flag to test setting speed per layer.
   1869  const int test_speed_per_layer = 0;
   1870 
   1871  // Flag for testing active maps.
   1872  const int test_active_maps = 0;
   1873 
   1874  // Flag for testing roi map.
   1875  const int test_roi_map = 0;
   1876 
   1877  /* Setup default input stream settings */
   1878  for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
   1879    app_input.input_ctx[i].framerate.numerator = 30;
   1880    app_input.input_ctx[i].framerate.denominator = 1;
   1881    app_input.input_ctx[i].only_i420 = 0;
   1882    app_input.input_ctx[i].bit_depth = AOM_BITS_8;
   1883  }
   1884  app_input.speed = 7;
   1885  exec_name = argv[0];
   1886 
   1887  // start with default encoder configuration
   1888 #if GOOD_QUALITY
   1889  aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
   1890                                                     AOM_USAGE_GOOD_QUALITY);
   1891 #else
   1892  aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
   1893                                                     AOM_USAGE_REALTIME);
   1894 #endif
   1895  if (res != AOM_CODEC_OK) {
   1896    die("Failed to get config: %s\n", aom_codec_err_to_string(res));
   1897  }
   1898 
   1899 #if GOOD_QUALITY
   1900  cfg.g_usage = AOM_USAGE_GOOD_QUALITY;
   1901 #else
   1902  // Real time parameters.
   1903  cfg.g_usage = AOM_USAGE_REALTIME;
   1904 #endif
   1905 
   1906  cfg.rc_end_usage = AOM_CBR;
   1907  cfg.rc_min_quantizer = 2;
   1908  cfg.rc_max_quantizer = 52;
   1909  cfg.rc_undershoot_pct = 50;
   1910  cfg.rc_overshoot_pct = 50;
   1911  cfg.rc_buf_initial_sz = 600;
   1912  cfg.rc_buf_optimal_sz = 600;
   1913  cfg.rc_buf_sz = 1000;
   1914  cfg.rc_resize_mode = 0;  // Set to RESIZE_DYNAMIC for dynamic resize.
   1915  cfg.g_lag_in_frames = 0;
   1916  cfg.kf_mode = AOM_KF_AUTO;
   1917  cfg.g_w = 0;  // Force user to specify width and height for raw input.
   1918  cfg.g_h = 0;
   1919 
   1920  parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
   1921 
   1922  int ts_number_layers = svc_params.number_temporal_layers;
   1923  int ss_number_layers = svc_params.number_spatial_layers;
   1924 
   1925  unsigned int width = cfg.g_w;
   1926  unsigned int height = cfg.g_h;
   1927 
   1928  if (app_input.layering_mode >= 0) {
   1929    if (ts_number_layers !=
   1930            mode_to_num_temporal_layers[app_input.layering_mode] ||
   1931        ss_number_layers !=
   1932            mode_to_num_spatial_layers[app_input.layering_mode]) {
   1933      die("Number of layers doesn't match layering mode.");
   1934    }
   1935  }
   1936 
   1937  bool has_non_y4m_input = false;
   1938  for (i = 0; i < AOM_MAX_LAYERS; ++i) {
   1939    if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
   1940      has_non_y4m_input = true;
   1941      break;
   1942    }
   1943  }
   1944  // Y4M reader has its own allocation.
   1945  if (has_non_y4m_input) {
   1946    if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
   1947      die("Failed to allocate image (%dx%d)", width, height);
   1948    }
   1949  }
   1950 
   1951  aom_codec_iface_t *encoder = aom_codec_av1_cx();
   1952 
   1953  memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
   1954         sizeof(svc_params.layer_target_bitrate));
   1955 
   1956  unsigned int total_rate = 0;
   1957  for (i = 0; i < ss_number_layers; i++) {
   1958    total_rate +=
   1959        svc_params
   1960            .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
   1961  }
   1962  if (total_rate != cfg.rc_target_bitrate) {
   1963    die("Incorrect total target bitrate, expected: %d", total_rate);
   1964  }
   1965 
   1966  svc_params.framerate_factor[0] = 1;
   1967  if (ts_number_layers == 2) {
   1968    svc_params.framerate_factor[0] = 2;
   1969    svc_params.framerate_factor[1] = 1;
   1970  } else if (ts_number_layers == 3) {
   1971    svc_params.framerate_factor[0] = 4;
   1972    svc_params.framerate_factor[1] = 2;
   1973    svc_params.framerate_factor[2] = 1;
   1974  }
   1975 
   1976  libaom_examples::MultilayerMetadata multilayer_metadata;
   1977  if (app_input.multilayer_metadata_file != NULL) {
   1978    if (!libaom_examples::parse_multilayer_file(
   1979            app_input.multilayer_metadata_file, &multilayer_metadata)) {
   1980      die("Failed to parse multilayer metadata");
   1981    }
   1982    libaom_examples::print_multilayer_metadata(multilayer_metadata);
   1983  }
   1984 
   1985  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
   1986  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
   1987 
   1988  AvxVideoInfo info;
   1989  info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
   1990  info.frame_width = cfg.g_w;
   1991  info.frame_height = cfg.g_h;
   1992  info.time_base.numerator = cfg.g_timebase.num;
   1993  info.time_base.denominator = cfg.g_timebase.den;
   1994  // Open an output file for each stream.
   1995  for (int sl = 0; sl < ss_number_layers; ++sl) {
   1996    for (int tl = 0; tl < ts_number_layers; ++tl) {
   1997      i = sl * ts_number_layers + tl;
   1998      char file_name[PATH_MAX];
   1999      snprintf(file_name, sizeof(file_name), "%s_%d.av1",
   2000               app_input.output_filename, i);
   2001      if (app_input.output_obu) {
   2002        obu_files[i] = fopen(file_name, "wb");
   2003        if (!obu_files[i]) die("Failed to open %s for writing", file_name);
   2004      } else {
   2005        outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
   2006        if (!outfile[i]) die("Failed to open %s for writing", file_name);
   2007      }
   2008    }
   2009  }
   2010  if (app_input.output_obu) {
   2011    total_layer_obu_file = fopen(app_input.output_filename, "wb");
   2012    if (!total_layer_obu_file)
   2013      die("Failed to open %s for writing", app_input.output_filename);
   2014  } else {
   2015    total_layer_file =
   2016        aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
   2017    if (!total_layer_file)
   2018      die("Failed to open %s for writing", app_input.output_filename);
   2019  }
   2020 
   2021  // Initialize codec.
   2022  aom_codec_ctx_t codec;
   2023  aom_codec_flags_t flag = 0;
   2024  flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH;
   2025  flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
   2026  if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
   2027    die_codec(&codec, "Failed to initialize encoder");
   2028 
   2029 #if CONFIG_AV1_DECODER
   2030  if (app_input.decode) {
   2031    if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
   2032      die_codec(&decoder, "Failed to initialize decoder");
   2033  }
   2034 #endif
   2035 
   2036  aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
   2037  aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
   2038  aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
   2039  aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1);
   2040  aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1);
   2041  aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0);
   2042  aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0);
   2043  aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
   2044  aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0);
   2045  aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0);
   2046  aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0);
   2047 #if GOOD_QUALITY
   2048  aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 0);
   2049  aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 0);
   2050  aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 0);
   2051  aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 0);
   2052 #else
   2053  aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3);
   2054  aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3);
   2055  aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3);
   2056  aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3);
   2057 #endif
   2058  aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1);
   2059 
   2060  // Settings to reduce key frame encoding time.
   2061  aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0);
   2062  aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
   2063  aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0);
   2064  aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0);
   2065  aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
   2066 
   2067  aom_codec_control(&codec, AV1E_SET_AUTO_TILES, 1);
   2068 
   2069  aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
   2070  if (app_input.tune_content == AOM_CONTENT_SCREEN) {
   2071    aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
   2072    // INTRABC is currently disabled for rt mode, as it's too slow.
   2073    aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
   2074  }
   2075 
   2076  if (app_input.use_external_rc) {
   2077    aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1);
   2078  }
   2079 
   2080  aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR, INT_MAX);
   2081 
   2082  aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE,
   2083                    AOM_FULL_SUPERFRAME_DROP);
   2084 
   2085  aom_codec_control(&codec, AV1E_SET_POSTENCODE_DROP_RTC, 1);
   2086 
   2087  svc_params.number_spatial_layers = ss_number_layers;
   2088  svc_params.number_temporal_layers = ts_number_layers;
   2089  for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
   2090    svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
   2091    svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
   2092  }
   2093  // SET QUANTIZER PER LAYER, E.G FOR 2 SPATIAL LAYERS:
   2094  // svc_params.max_quantizers[0] = 40;
   2095  // svc_params.min_quantizers[0] = 40;
   2096  // svc_params.max_quantizers[1] = 50;
   2097  // svc_params.min_quantizers[1] = 50;
   2098 
   2099  if (!app_input.scale_factors_explicitly_set) {
   2100    for (i = 0; i < ss_number_layers; ++i) {
   2101      svc_params.scaling_factor_num[i] = 1;
   2102      svc_params.scaling_factor_den[i] = 1;
   2103    }
   2104    if (ss_number_layers == 2) {
   2105      svc_params.scaling_factor_num[0] = 1;
   2106      svc_params.scaling_factor_den[0] = 2;
   2107    } else if (ss_number_layers == 3) {
   2108      svc_params.scaling_factor_num[0] = 1;
   2109      svc_params.scaling_factor_den[0] = 4;
   2110      svc_params.scaling_factor_num[1] = 1;
   2111      svc_params.scaling_factor_den[1] = 2;
   2112    }
   2113  }
   2114  aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
   2115  // TODO(aomedia:3032): Configure KSVC in fixed mode.
   2116 
   2117  // This controls the maximum target size of the key frame.
   2118  // For generating smaller key frames, use a smaller max_intra_size_pct
   2119  // value, like 100 or 200.
   2120  {
   2121    const int max_intra_size_pct = 300;
   2122    aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT,
   2123                      max_intra_size_pct);
   2124  }
   2125 
   2126  for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
   2127    cx_time_layer[lx] = 0;
   2128    frame_cnt_layer[lx] = 0;
   2129  }
   2130 
   2131  std::unique_ptr<aom::AV1RateControlRTC> rc_api;
   2132  if (app_input.use_external_rc) {
   2133    const aom::AV1RateControlRtcConfig rc_cfg =
   2134        create_rtc_rc_config(cfg, app_input);
   2135    rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
   2136  }
   2137 
   2138  frame_avail = 1;
   2139  struct PsnrStats psnr_stream[MAX_NUM_SPATIAL_LAYERS];
   2140  memset(&psnr_stream, 0, sizeof(psnr_stream));
   2141  while (frame_avail || got_data) {
   2142    struct aom_usec_timer timer;
   2143    frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
   2144    // Loop over spatial layers.
   2145    for (int slx = 0; slx < ss_number_layers; slx++) {
   2146      if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
   2147        const int previous_layer_frame_avail = frame_avail;
   2148        frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
   2149        if (previous_layer_frame_avail != frame_avail) {
   2150          die("Mismatch in number of frames between spatial layer input files");
   2151        }
   2152      }
   2153 
   2154      aom_codec_iter_t iter = NULL;
   2155      const aom_codec_cx_pkt_t *pkt;
   2156      int reference_updated = 0;
   2157      int layer = 0;
   2158      // Flag for superframe whose base is key.
   2159      int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
   2160      // For flexible mode:
   2161      if (app_input.layering_mode >= 0) {
   2162        // Set the reference/update flags, layer_id, and reference_map
   2163        // buffer index.
   2164        set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
   2165                          &ref_frame_config, &ref_frame_comp_pred,
   2166                          &use_svc_control, slx, is_key_frame,
   2167                          (app_input.layering_mode == 10), app_input.speed,
   2168                          &reference_updated, test_roi_map);
   2169        aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
   2170        if (use_svc_control) {
   2171          aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
   2172                            &ref_frame_config);
   2173          aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
   2174                            &ref_frame_comp_pred);
   2175        }
   2176        if (app_input.multilayer_metadata_file != NULL) {
   2177          add_multilayer_metadata(&raw, multilayer_metadata, frame_cnt, slx);
   2178        }
   2179        // Set the speed per layer.
   2180        if (test_speed_per_layer) {
   2181          int speed_per_layer = 10;
   2182          if (layer_id.spatial_layer_id == 0) {
   2183            if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
   2184            if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
   2185            if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
   2186          } else if (layer_id.spatial_layer_id == 1) {
   2187            if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
   2188            if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
   2189            if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
   2190          } else if (layer_id.spatial_layer_id == 2) {
   2191            if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
   2192            if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
   2193            if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
   2194          }
   2195          aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
   2196        }
   2197      } else {
   2198        // Only up to 3 temporal layers supported in fixed mode.
   2199        // Only need to set spatial and temporal layer_id: reference
   2200        // prediction, refresh, and buffer_idx are set internally.
   2201        layer_id.spatial_layer_id = slx;
   2202        layer_id.temporal_layer_id = 0;
   2203        if (ts_number_layers == 2) {
   2204          layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
   2205        } else if (ts_number_layers == 3) {
   2206          if (frame_cnt % 2 != 0)
   2207            layer_id.temporal_layer_id = 2;
   2208          else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
   2209            layer_id.temporal_layer_id = 1;
   2210        }
   2211        aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
   2212      }
   2213 
   2214      if (set_err_resil_frame && cfg.g_error_resilient == 0) {
   2215        // Set error_resilient per frame: off/0 for base layer and
   2216        // on/1 for enhancement layer frames.
   2217        // Note that this is can only be done on the fly/per-frame/layer
   2218        // if the config error_resilience is off/0. See the logic for updating
   2219        // in set_encoder_config():
   2220        // tool_cfg->error_resilient_mode =
   2221        //     cfg->g_error_resilient | extra_cfg->error_resilient_mode;
   2222        const int err_resil_mode =
   2223            layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
   2224        aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE,
   2225                          err_resil_mode);
   2226      }
   2227 
   2228      layer = slx * ts_number_layers + layer_id.temporal_layer_id;
   2229      if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
   2230 
   2231      if (test_dynamic_scaling_single_layer) {
   2232        // Example to scale source down by 2x2, then 4x4, and then back up to
   2233        // 2x2, and then back to original.
   2234        int frame_2x2 = 200;
   2235        int frame_4x4 = 400;
   2236        int frame_2x2up = 600;
   2237        int frame_orig = 800;
   2238        if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
   2239          // Scale source down by 2x2.
   2240          struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
   2241          aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
   2242        } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
   2243          // Scale source down by 4x4.
   2244          struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
   2245          aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
   2246        } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
   2247          // Source back up to 2x2.
   2248          struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
   2249          aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
   2250        } else if (frame_cnt >= frame_orig) {
   2251          // Source back up to original resolution (no scaling).
   2252          struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
   2253          aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
   2254        }
   2255        if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
   2256            frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
   2257          // For dynamic resize testing on single layer: refresh all references
   2258          // on the resized frame: this is to avoid decode error:
   2259          // if resize goes down by >= 4x4 then libaom decoder will throw an
   2260          // error that some reference (even though not used) is beyond the
   2261          // limit size (must be smaller than 4x4).
   2262          for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
   2263          if (use_svc_control) {
   2264            aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
   2265                              &ref_frame_config);
   2266            aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
   2267                              &ref_frame_comp_pred);
   2268          }
   2269        }
   2270      }
   2271 
   2272      // Change target_bitrate every other frame.
   2273      if (test_changing_bitrate && frame_cnt % 2 == 0) {
   2274        if (frame_cnt < 500)
   2275          cfg.rc_target_bitrate += 10;
   2276        else
   2277          cfg.rc_target_bitrate -= 10;
   2278        // Do big increase and decrease.
   2279        if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
   2280        if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
   2281        if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
   2282        // Call change_config, or bypass with new control.
   2283        // res = aom_codec_enc_config_set(&codec, &cfg);
   2284        if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR,
   2285                              cfg.rc_target_bitrate))
   2286          die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
   2287      }
   2288 
   2289      if (rc_api) {
   2290        aom::AV1FrameParamsRTC frame_params;
   2291        // TODO(jianj): Add support for SVC.
   2292        frame_params.spatial_layer_id = 0;
   2293        frame_params.temporal_layer_id = 0;
   2294        frame_params.frame_type =
   2295            is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
   2296        rc_api->ComputeQP(frame_params);
   2297        const int current_qp = rc_api->GetQP();
   2298        if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS,
   2299                              qindex_to_quantizer(current_qp))) {
   2300          die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
   2301        }
   2302      }
   2303 
   2304      if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
   2305 
   2306      if (test_roi_map) set_roi_map(&cfg, &codec, kDeltaQ);
   2307 
   2308      // Do the layer encode.
   2309      aom_usec_timer_start(&timer);
   2310      if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
   2311        die_codec(&codec, "Failed to encode frame");
   2312      aom_usec_timer_mark(&timer);
   2313      cx_time += aom_usec_timer_elapsed(&timer);
   2314      cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
   2315      frame_cnt_layer[layer] += 1;
   2316 
   2317      // Get the high motion content flag.
   2318      int content_flag = 0;
   2319      if (aom_codec_control(&codec, AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC,
   2320                            &content_flag)) {
   2321        die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
   2322      }
   2323 
   2324      got_data = 0;
   2325      // For simulcast (mode 11): write out each spatial layer to the file.
   2326      int ss_layers_write = (app_input.layering_mode == 11)
   2327                                ? layer_id.spatial_layer_id + 1
   2328                                : ss_number_layers;
   2329      while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
   2330        switch (pkt->kind) {
   2331          case AOM_CODEC_CX_FRAME_PKT:
   2332            for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
   2333                 ++sl) {
   2334              for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
   2335                   ++tl) {
   2336                int j = sl * ts_number_layers + tl;
   2337                if (app_input.output_obu) {
   2338                  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
   2339                         obu_files[j]);
   2340                } else {
   2341                  aom_video_writer_write_frame(
   2342                      outfile[j],
   2343                      reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
   2344                      pkt->data.frame.sz, pts);
   2345                }
   2346                if (sl == layer_id.spatial_layer_id)
   2347                  rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
   2348              }
   2349            }
   2350            got_data = 1;
   2351            // Write everything into the top layer.
   2352            if (app_input.output_obu) {
   2353              fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
   2354                     total_layer_obu_file);
   2355            } else {
   2356              aom_video_writer_write_frame(
   2357                  total_layer_file,
   2358                  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
   2359                  pkt->data.frame.sz, pts);
   2360            }
   2361            // Keep count of rate control stats per layer (for non-key).
   2362            if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
   2363              int j = layer_id.spatial_layer_id * ts_number_layers +
   2364                      layer_id.temporal_layer_id;
   2365              assert(j >= 0);
   2366              rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
   2367              rc.layer_avg_rate_mismatch[j] +=
   2368                  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
   2369                  rc.layer_pfb[j];
   2370              if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
   2371            }
   2372 
   2373            if (rc_api) {
   2374              rc_api->PostEncodeUpdate(pkt->data.frame.sz);
   2375            }
   2376            // Update for short-time encoding bitrate states, for moving window
   2377            // of size rc->window, shifted by rc->window / 2.
   2378            // Ignore first window segment, due to key frame.
   2379            // For spatial layers: only do this for top/highest SL.
   2380            if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
   2381              sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
   2382              rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
   2383              if (frame_cnt % rc.window_size == 0) {
   2384                rc.window_count += 1;
   2385                rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
   2386                rc.variance_st_encoding_bitrate +=
   2387                    (sum_bitrate / rc.window_size) *
   2388                    (sum_bitrate / rc.window_size);
   2389                sum_bitrate = 0.0;
   2390              }
   2391            }
   2392            // Second shifted window.
   2393            if (frame_cnt > rc.window_size + rc.window_size / 2 &&
   2394                slx == ss_number_layers - 1) {
   2395              sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
   2396              if (frame_cnt > 2 * rc.window_size &&
   2397                  frame_cnt % rc.window_size == 0) {
   2398                rc.window_count += 1;
   2399                rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
   2400                rc.variance_st_encoding_bitrate +=
   2401                    (sum_bitrate2 / rc.window_size) *
   2402                    (sum_bitrate2 / rc.window_size);
   2403                sum_bitrate2 = 0.0;
   2404              }
   2405            }
   2406 
   2407 #if CONFIG_AV1_DECODER
   2408            if (app_input.decode) {
   2409              if (aom_codec_decode(
   2410                      &decoder,
   2411                      reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
   2412                      pkt->data.frame.sz, NULL))
   2413                die_codec(&decoder, "Failed to decode frame");
   2414            }
   2415 #endif
   2416 
   2417            break;
   2418          case AOM_CODEC_PSNR_PKT:
   2419            if (app_input.show_psnr) {
   2420              const int sl = layer_id.spatial_layer_id;
   2421              const int show_psnr_hbd =
   2422                  (cfg.g_input_bit_depth > 8 || cfg.g_bit_depth > AOM_BITS_8);
   2423              const int hbd = show_psnr_hbd;
   2424              psnr_stream[sl].psnr_sse_total[hbd] += pkt->data.psnr.sse[0];
   2425              psnr_stream[sl].psnr_samples_total[hbd] +=
   2426                  pkt->data.psnr.samples[0];
   2427              for (i = 0; i < 4; i++) {
   2428                psnr_stream[sl].psnr_totals[hbd][i] += pkt->data.psnr.psnr[i];
   2429              }
   2430              psnr_stream[sl].psnr_count[hbd]++;
   2431            }
   2432            break;
   2433          default: break;
   2434        }
   2435      }
   2436 #if CONFIG_AV1_DECODER
   2437      if (got_data && app_input.decode) {
   2438        // Don't look for mismatch on non reference frames.
   2439        if (reference_updated) {
   2440          if (test_decode(&codec, &decoder, frame_cnt)) {
   2441 #if CONFIG_INTERNAL_STATS
   2442            fprintf(stats_file, "First mismatch occurred in frame %d\n",
   2443                    frame_cnt);
   2444            fclose(stats_file);
   2445 #endif
   2446            fatal("Mismatch seen");
   2447          }
   2448        }
   2449      }
   2450 #endif
   2451    }  // loop over spatial layers
   2452    ++frame_cnt;
   2453    pts += frame_duration;
   2454  }
   2455 
   2456  for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
   2457    if (app_input.input_ctx[i].filename == NULL) {
   2458      break;
   2459    }
   2460    close_input_file(&(app_input.input_ctx[i]));
   2461  }
   2462  printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
   2463                                ts_number_layers);
   2464 
   2465  printf("\n");
   2466  for (int slx = 0; slx < ss_number_layers; slx++)
   2467    for (int tlx = 0; tlx < ts_number_layers; tlx++) {
   2468      int lx = slx * ts_number_layers + tlx;
   2469      printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
   2470             slx, tlx, frame_cnt_layer[lx],
   2471             (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
   2472             1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
   2473    }
   2474 
   2475  printf("\n");
   2476  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
   2477         frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
   2478         1000000 * (double)frame_cnt / (double)cx_time);
   2479 
   2480  if (app_input.show_psnr) {
   2481    const int show_psnr_hbd =
   2482        (cfg.g_input_bit_depth > 8 || cfg.g_bit_depth > AOM_BITS_8);
   2483    show_psnr(psnr_stream, (double)((1 << (show_psnr_hbd ? 12 : 8)) - 1),
   2484              ss_number_layers);
   2485  }
   2486 
   2487  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
   2488 
   2489 #if CONFIG_AV1_DECODER
   2490  if (app_input.decode) {
   2491    if (aom_codec_destroy(&decoder))
   2492      die_codec(&decoder, "Failed to destroy decoder");
   2493  }
   2494 #endif
   2495 
   2496 #if CONFIG_INTERNAL_STATS
   2497  fprintf(stats_file, "No mismatch detected in recon buffers\n");
   2498  fclose(stats_file);
   2499 #endif
   2500 
   2501  // Try to rewrite the output file headers with the actual frame count.
   2502  for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
   2503    aom_video_writer_close(outfile[i]);
   2504  aom_video_writer_close(total_layer_file);
   2505 
   2506  if (has_non_y4m_input) {
   2507    aom_img_free(&raw);
   2508  }
   2509  return EXIT_SUCCESS;
   2510 }