tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

libaom_av1_encoder_factory.cc (34467B)


      1 /*
      2 *  Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "api/video_codecs/libaom_av1_encoder_factory.h"
     12 
     13 #include <array>
     14 #include <cstddef>
     15 #include <cstdint>
     16 #include <cstring>
     17 #include <map>
     18 #include <memory>
     19 #include <optional>
     20 #include <string>
     21 #include <type_traits>
     22 #include <variant>
     23 #include <vector>
     24 
     25 #include "absl/algorithm/container.h"
     26 #include "absl/cleanup/cleanup.h"
     27 #include "api/array_view.h"
     28 #include "api/scoped_refptr.h"
     29 #include "api/units/data_rate.h"
     30 #include "api/units/data_size.h"
     31 #include "api/units/time_delta.h"
     32 #include "api/video/resolution.h"
     33 #include "api/video/video_frame_buffer.h"
     34 #include "api/video_codecs/video_codec.h"
     35 #include "api/video_codecs/video_encoder_factory_interface.h"
     36 #include "api/video_codecs/video_encoder_interface.h"
     37 #include "api/video_codecs/video_encoding_general.h"
     38 #include "rtc_base/checks.h"
     39 #include "rtc_base/logging.h"
     40 #include "rtc_base/numerics/rational.h"
     41 #include "rtc_base/strings/string_builder.h"
     42 #include "third_party/libaom/source/libaom/aom/aom_codec.h"
     43 #include "third_party/libaom/source/libaom/aom/aom_encoder.h"
     44 #include "third_party/libaom/source/libaom/aom/aom_image.h"
     45 #include "third_party/libaom/source/libaom/aom/aomcx.h"
     46 
     47 #define SET_OR_RETURN(param_id, param_value)                          \
     48  do {                                                                \
     49    if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \
     50      return;                                                         \
     51    }                                                                 \
     52  } while (0)
     53 
     54 #define SET_OR_RETURN_FALSE(param_id, param_value)                    \
     55  do {                                                                \
     56    if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \
     57      return false;                                                   \
     58    }                                                                 \
     59  } while (0)
     60 
     61 namespace webrtc {
     62 
     63 using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings;
     64 using Cbr = FrameEncodeSettings::Cbr;
     65 using Cqp = FrameEncodeSettings::Cqp;
     66 using aom_img_ptr = std::unique_ptr<aom_image_t, decltype(&aom_img_free)>;
     67 
     68 namespace {
     69 // MaxQp defined here:
     70 // http://google3/third_party/libaom/git_root/av1/av1_cx_iface.c;l=3510;rcl=527067478
     71 constexpr int kMaxQp = 63;
     72 constexpr int kNumBuffers = 8;
     73 constexpr int kMaxReferences = 3;
     74 constexpr int kMinEffortLevel = -2;
     75 constexpr int kMaxEffortLevel = 2;
     76 constexpr int kMaxSpatialLayersWtf = 4;
     77 constexpr int kMaxTemporalLayers = 4;
     78 constexpr int kRtpTicksPerSecond = 90000;
     79 constexpr std::array<VideoFrameBuffer::Type, 2> kSupportedInputFormats = {
     80    VideoFrameBuffer::Type::kI420, VideoFrameBuffer::Type::kNV12};
     81 
     82 constexpr std::array<Rational, 7> kSupportedScalingFactors = {
     83    {{.numerator = 8, .denominator = 1},
     84     {.numerator = 4, .denominator = 1},
     85     {.numerator = 2, .denominator = 1},
     86     {.numerator = 1, .denominator = 1},
     87     {.numerator = 1, .denominator = 2},
     88     {.numerator = 1, .denominator = 4},
     89     {.numerator = 1, .denominator = 8}}};
     90 
     91 std::optional<Rational> GetScalingFactor(const Resolution& from,
     92                                         const Resolution& to) {
     93  auto it = absl::c_find_if(kSupportedScalingFactors, [&](const Rational& r) {
     94    return (from.width * r.numerator / r.denominator) == to.width &&
     95           (from.height * r.numerator / r.denominator) == to.height;
     96  });
     97 
     98  if (it != kSupportedScalingFactors.end()) {
     99    return *it;
    100  }
    101 
    102  return {};
    103 }
    104 
    105 class LibaomAv1Encoder : public VideoEncoderInterface {
    106 public:
    107  LibaomAv1Encoder() = default;
    108  ~LibaomAv1Encoder() override;
    109 
    110  bool InitEncode(
    111      const VideoEncoderFactoryInterface::StaticEncoderSettings& settings,
    112      const std::map<std::string, std::string>& encoder_specific_settings);
    113 
    114  void Encode(scoped_refptr<VideoFrameBuffer> frame_buffer,
    115              const TemporalUnitSettings& tu_settings,
    116              std::vector<FrameEncodeSettings> frame_settings) override;
    117 
    118 private:
    119  aom_img_ptr image_to_encode_ = aom_img_ptr(nullptr, aom_img_free);
    120  aom_codec_ctx_t ctx_;
    121  aom_codec_enc_cfg_t cfg_;
    122 
    123  std::optional<VideoCodecMode> current_content_type_;
    124  std::array<std::optional<int>, kMaxSpatialLayersWtf> current_effort_level_;
    125  int max_number_of_threads_;
    126  std::array<std::optional<Resolution>, 8> last_resolution_in_buffer_;
    127 };
    128 
    129 template <typename T>
    130 bool SetEncoderControlParameters(aom_codec_ctx_t* ctx, int id, T value) {
    131  aom_codec_err_t error_code = aom_codec_control(ctx, id, value);
    132  if (error_code != AOM_CODEC_OK) {
    133    RTC_LOG(LS_WARNING) << "aom_codec_control returned " << error_code
    134                        << " with id:  " << id << ".";
    135  }
    136  return error_code == AOM_CODEC_OK;
    137 }
    138 
    139 LibaomAv1Encoder::~LibaomAv1Encoder() {
    140  aom_codec_destroy(&ctx_);
    141 }
    142 
    143 bool LibaomAv1Encoder::InitEncode(
    144    const VideoEncoderFactoryInterface::StaticEncoderSettings& settings,
    145    const std::map<std::string, std::string>& encoder_specific_settings) {
    146  if (!encoder_specific_settings.empty()) {
    147    RTC_LOG(LS_ERROR)
    148        << "libaom av1 encoder accepts no encoder specific settings";
    149    return false;
    150  }
    151 
    152  if (aom_codec_err_t ret = aom_codec_enc_config_default(
    153          aom_codec_av1_cx(), &cfg_, AOM_USAGE_REALTIME);
    154      ret != AOM_CODEC_OK) {
    155    RTC_LOG(LS_ERROR) << "aom_codec_enc_config_default returned " << ret;
    156    return false;
    157  }
    158 
    159  max_number_of_threads_ = settings.max_number_of_threads;
    160 
    161  // The encode resolution is set dynamically for each call to `Encode`, but for
    162  // `aom_codec_enc_init` to not fail we set it here as well.
    163  cfg_.g_w = settings.max_encode_dimensions.width;
    164  cfg_.g_h = settings.max_encode_dimensions.height;
    165  cfg_.g_timebase.num = 1;
    166  // TD: does 90khz timebase make sense, use microseconds instead maybe?
    167  cfg_.g_timebase.den = kRtpTicksPerSecond;
    168  cfg_.g_input_bit_depth = settings.encoding_format.bit_depth;
    169  cfg_.kf_mode = AOM_KF_DISABLED;
    170  // TD: rc_undershoot_pct and rc_overshoot_pct should probably be removed.
    171  cfg_.rc_undershoot_pct = 50;
    172  cfg_.rc_overshoot_pct = 50;
    173  auto* cbr =
    174      std::get_if<VideoEncoderFactoryInterface::StaticEncoderSettings::Cbr>(
    175          &settings.rc_mode);
    176  cfg_.rc_buf_initial_sz = cbr ? cbr->target_buffer_size.ms() : 600;
    177  cfg_.rc_buf_optimal_sz = cbr ? cbr->target_buffer_size.ms() : 600;
    178  cfg_.rc_buf_sz = cbr ? cbr->max_buffer_size.ms() : 1000;
    179  cfg_.g_usage = AOM_USAGE_REALTIME;
    180  cfg_.g_pass = AOM_RC_ONE_PASS;
    181  cfg_.g_lag_in_frames = 0;
    182  cfg_.g_error_resilient = 0;
    183  cfg_.rc_end_usage = cbr ? AOM_CBR : AOM_Q;
    184 
    185  if (aom_codec_err_t ret =
    186          aom_codec_enc_init(&ctx_, aom_codec_av1_cx(), &cfg_, /*flags=*/0);
    187      ret != AOM_CODEC_OK) {
    188    RTC_LOG(LS_ERROR) << "aom_codec_enc_init returned " << ret;
    189    return false;
    190  }
    191 
    192  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CDEF, 1);
    193  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TPL_MODEL, 0);
    194  SET_OR_RETURN_FALSE(AV1E_SET_DELTAQ_MODE, 0);
    195  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ORDER_HINT, 0);
    196  SET_OR_RETURN_FALSE(AV1E_SET_AQ_MODE, 3);
    197  SET_OR_RETURN_FALSE(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
    198  SET_OR_RETURN_FALSE(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
    199  SET_OR_RETURN_FALSE(AV1E_SET_MODE_COST_UPD_FREQ, 3);
    200  SET_OR_RETURN_FALSE(AV1E_SET_MV_COST_UPD_FREQ, 3);
    201  SET_OR_RETURN_FALSE(AV1E_SET_ROW_MT, 1);
    202  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_OBMC, 0);
    203  SET_OR_RETURN_FALSE(AV1E_SET_NOISE_SENSITIVITY, 0);
    204  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_WARPED_MOTION, 0);
    205  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
    206  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_REF_FRAME_MVS, 0);
    207  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CFL_INTRA, 0);
    208  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
    209  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ANGLE_DELTA, 0);
    210  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_FILTER_INTRA, 0);
    211  SET_OR_RETURN_FALSE(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
    212  SET_OR_RETURN_FALSE(AV1E_SET_DISABLE_TRELLIS_QUANT, 1);
    213  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIST_WTD_COMP, 0);
    214  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0);
    215  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DUAL_FILTER, 0);
    216  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_COMP, 0);
    217  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0);
    218  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0);
    219  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRABC, 0);
    220  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_MASKED_COMP, 0);
    221  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_PAETH_INTRA, 0);
    222  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_QM, 0);
    223  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RECT_PARTITIONS, 0);
    224  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RESTORATION, 0);
    225  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0);
    226  SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TX64, 0);
    227  SET_OR_RETURN_FALSE(AV1E_SET_MAX_REFERENCE_FRAMES, 3);
    228 
    229  return true;
    230 }
    231 
    232 struct ThreadTilesAndSuperblockSizeInfo {
    233  int num_threads;
    234  int exp_tile_rows;
    235  int exp_tile_colums;
    236  aom_superblock_size_t superblock_size;
    237 };
    238 
    239 ThreadTilesAndSuperblockSizeInfo GetThreadingTilesAndSuperblockSize(
    240    int width,
    241    int height,
    242    int max_number_of_threads) {
    243  ThreadTilesAndSuperblockSizeInfo res;
    244  const int num_pixels = width * height;
    245  if (num_pixels >= 1920 * 1080 && max_number_of_threads > 8) {
    246    res.num_threads = 8;
    247    res.exp_tile_rows = 2;
    248    res.exp_tile_colums = 1;
    249  } else if (num_pixels >= 640 * 360 && max_number_of_threads > 4) {
    250    res.num_threads = 4;
    251    res.exp_tile_rows = 1;
    252    res.exp_tile_colums = 1;
    253  } else if (num_pixels >= 320 * 180 && max_number_of_threads > 2) {
    254    res.num_threads = 2;
    255    res.exp_tile_rows = 1;
    256    res.exp_tile_colums = 0;
    257  } else {
    258    res.num_threads = 1;
    259    res.exp_tile_rows = 0;
    260    res.exp_tile_colums = 0;
    261  }
    262 
    263  if (res.num_threads > 4 && num_pixels >= 960 * 540) {
    264    res.superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
    265  } else {
    266    res.superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC;
    267  }
    268 
    269  RTC_LOG(LS_WARNING) << __FUNCTION__ << " res.num_threads=" << res.num_threads
    270                      << " res.exp_tile_rows=" << res.exp_tile_rows
    271                      << " res.exp_tile_colums=" << res.exp_tile_colums
    272                      << " res.superblock_size=" << res.superblock_size;
    273 
    274  return res;
    275 }
    276 
    277 bool ValidateEncodeParams(
    278    const VideoFrameBuffer& /* frame_buffer */,
    279    const VideoEncoderInterface::TemporalUnitSettings& /* tu_settings */,
    280    const std::vector<VideoEncoderInterface::FrameEncodeSettings>&
    281        frame_settings,
    282    const std::array<std::optional<Resolution>, 8>& last_resolution_in_buffer,
    283    aom_rc_mode rc_mode) {
    284  if (frame_settings.empty()) {
    285    RTC_LOG(LS_ERROR) << "No frame settings provided.";
    286    return false;
    287  }
    288 
    289  auto in_range = [](int low, int high, int val) {
    290    return low <= val && val < high;
    291  };
    292 
    293  for (size_t i = 0; i < frame_settings.size(); ++i) {
    294    const VideoEncoderInterface::FrameEncodeSettings& settings =
    295        frame_settings[i];
    296 
    297    if (!settings.frame_output) {
    298      RTC_LOG(LS_ERROR) << "No frame output provided.";
    299      return false;
    300    }
    301 
    302    if (!in_range(kMinEffortLevel, kMaxEffortLevel + 1,
    303                  settings.effort_level)) {
    304      RTC_LOG(LS_ERROR) << "Unsupported effort level " << settings.effort_level;
    305      return false;
    306    }
    307 
    308    if (!in_range(0, kMaxSpatialLayersWtf, settings.spatial_id)) {
    309      RTC_LOG(LS_ERROR) << "invalid spatial id " << settings.spatial_id;
    310      return false;
    311    }
    312 
    313    if (!in_range(0, kMaxTemporalLayers, settings.temporal_id)) {
    314      RTC_LOG(LS_ERROR) << "invalid temporal id " << settings.temporal_id;
    315      return false;
    316    }
    317 
    318    if ((settings.frame_type == FrameType::kKeyframe ||
    319         settings.frame_type == FrameType::kStartFrame) &&
    320        !settings.reference_buffers.empty()) {
    321      RTC_LOG(LS_ERROR) << "Reference buffers can not be used for keyframes.";
    322      return false;
    323    }
    324 
    325    if ((settings.frame_type == FrameType::kKeyframe ||
    326         settings.frame_type == FrameType::kStartFrame) &&
    327        !settings.update_buffer) {
    328      RTC_LOG(LS_ERROR)
    329          << "Buffer to update must be specified for keyframe/startframe";
    330      return false;
    331    }
    332 
    333    if (settings.update_buffer &&
    334        !in_range(0, kNumBuffers, *settings.update_buffer)) {
    335      RTC_LOG(LS_ERROR) << "Invalid update buffer id.";
    336      return false;
    337    }
    338 
    339    if (settings.reference_buffers.size() > kMaxReferences) {
    340      RTC_LOG(LS_ERROR) << "Too many referenced buffers.";
    341      return false;
    342    }
    343 
    344    for (size_t j = 0; j < settings.reference_buffers.size(); ++j) {
    345      if (!in_range(0, kNumBuffers, settings.reference_buffers[j])) {
    346        RTC_LOG(LS_ERROR) << "Invalid reference buffer id.";
    347        return false;
    348      }
    349 
    350      // Figure out which frame resolution a certain buffer will hold when the
    351      // frame described by `settings` is encoded.
    352      std::optional<Resolution> referenced_resolution;
    353      bool keyframe_on_previous_layer = false;
    354 
    355      // Will some other frame in this temporal unit update the buffer?
    356      for (size_t k = 0; k < i; ++k) {
    357        if (frame_settings[k].frame_type == FrameType::kKeyframe) {
    358          keyframe_on_previous_layer = true;
    359          referenced_resolution.reset();
    360        }
    361        if (frame_settings[k].update_buffer == settings.reference_buffers[j]) {
    362          referenced_resolution = frame_settings[k].resolution;
    363        }
    364      }
    365 
    366      // Not updated by another frame in the temporal unit, what is the
    367      // resolution of the last frame stored into that buffer?
    368      if (!referenced_resolution && !keyframe_on_previous_layer) {
    369        referenced_resolution =
    370            last_resolution_in_buffer[settings.reference_buffers[j]];
    371      }
    372 
    373      if (!referenced_resolution) {
    374        RTC_LOG(LS_ERROR) << "Referenced buffer holds no frame.";
    375        return false;
    376      }
    377 
    378      if (!GetScalingFactor(*referenced_resolution, settings.resolution)) {
    379        RTC_LOG(LS_ERROR)
    380            << "Required resolution scaling factor not supported.";
    381        return false;
    382      }
    383 
    384      for (size_t l = i + 1; l < settings.reference_buffers.size(); ++l) {
    385        if (settings.reference_buffers[i] == settings.reference_buffers[l]) {
    386          RTC_LOG(LS_ERROR) << "Duplicate reference buffer specified.";
    387          return false;
    388        }
    389      }
    390    }
    391 
    392    if ((rc_mode == AOM_CBR &&
    393         std::holds_alternative<Cqp>(settings.rate_options)) ||
    394        (rc_mode == AOM_Q &&
    395         std::holds_alternative<Cbr>(settings.rate_options))) {
    396      RTC_LOG(LS_ERROR) << "Invalid rate options, encoder configured with "
    397                        << (rc_mode == AOM_CBR ? "AOM_CBR" : "AOM_Q");
    398      return false;
    399    }
    400 
    401    for (size_t j = i + 1; j < frame_settings.size(); ++j) {
    402      if (settings.spatial_id >= frame_settings[j].spatial_id) {
    403        RTC_LOG(LS_ERROR) << "Frame spatial id specified out of order.";
    404        return false;
    405      }
    406    }
    407  }
    408 
    409  return true;
    410 }
    411 
    412 void PrepareInputImage(const VideoFrameBuffer& input_buffer,
    413                       aom_img_ptr& out_aom_image) {
    414  aom_img_fmt_t input_format;
    415  switch (input_buffer.type()) {
    416    case VideoFrameBuffer::Type::kI420:
    417      input_format = AOM_IMG_FMT_I420;
    418      break;
    419    case VideoFrameBuffer::Type::kNV12:
    420      input_format = AOM_IMG_FMT_NV12;
    421      break;
    422    default:
    423      RTC_CHECK_NOTREACHED();
    424      return;
    425  }
    426 
    427  if (!out_aom_image || out_aom_image->fmt != input_format ||
    428      static_cast<int>(out_aom_image->w) != input_buffer.width() ||
    429      static_cast<int>(out_aom_image->h) != input_buffer.height()) {
    430    out_aom_image.reset(
    431        aom_img_wrap(/*img=*/nullptr, input_format, input_buffer.width(),
    432                     input_buffer.height(), /*align=*/1, /*img_data=*/nullptr));
    433 
    434    RTC_LOG(LS_WARNING) << __FUNCTION__ << " input_format=" << input_format
    435                        << " input_buffer.width()=" << input_buffer.width()
    436                        << " input_buffer.height()=" << input_buffer.height()
    437                        << " w=" << out_aom_image->w
    438                        << " h=" << out_aom_image->h
    439                        << " d_w=" << out_aom_image->d_w
    440                        << " d_h=" << out_aom_image->d_h
    441                        << " r_w=" << out_aom_image->r_w
    442                        << " r_h=" << out_aom_image->r_h;
    443  }
    444 
    445  if (input_format == AOM_IMG_FMT_I420) {
    446    const I420BufferInterface* i420_buffer = input_buffer.GetI420();
    447    RTC_DCHECK(i420_buffer);
    448    out_aom_image->planes[AOM_PLANE_Y] =
    449        const_cast<unsigned char*>(i420_buffer->DataY());
    450    out_aom_image->planes[AOM_PLANE_U] =
    451        const_cast<unsigned char*>(i420_buffer->DataU());
    452    out_aom_image->planes[AOM_PLANE_V] =
    453        const_cast<unsigned char*>(i420_buffer->DataV());
    454    out_aom_image->stride[AOM_PLANE_Y] = i420_buffer->StrideY();
    455    out_aom_image->stride[AOM_PLANE_U] = i420_buffer->StrideU();
    456    out_aom_image->stride[AOM_PLANE_V] = i420_buffer->StrideV();
    457  } else {
    458    const NV12BufferInterface* nv12_buffer = input_buffer.GetNV12();
    459    RTC_DCHECK(nv12_buffer);
    460    out_aom_image->planes[AOM_PLANE_Y] =
    461        const_cast<unsigned char*>(nv12_buffer->DataY());
    462    out_aom_image->planes[AOM_PLANE_U] =
    463        const_cast<unsigned char*>(nv12_buffer->DataUV());
    464    out_aom_image->planes[AOM_PLANE_V] = nullptr;
    465    out_aom_image->stride[AOM_PLANE_Y] = nv12_buffer->StrideY();
    466    out_aom_image->stride[AOM_PLANE_U] = nv12_buffer->StrideUV();
    467    out_aom_image->stride[AOM_PLANE_V] = 0;
    468  }
    469 }
    470 
    471 aom_svc_ref_frame_config_t GetSvcRefFrameConfig(
    472    const VideoEncoderInterface::FrameEncodeSettings& settings) {
    473  // Buffer alias to use for each position. In particular when there are two
    474  // buffers being used, prefer to alias them as LAST and GOLDEN, since the AV1
    475  // bitstream format has dedicated fields for them. See last_frame_idx and
    476  // golden_frame_idx in the av1 spec
    477  // https://aomediacodec.github.io/av1-spec/av1-spec.pdf.
    478 
    479  // Libaom is also compiled for RTC, which limits the number of references to
    480  // at most three, and they must be aliased as LAST, GOLDEN and ALTREF. Also
    481  // note that libaom favors LAST the most, and GOLDEN second most, so buffers
    482  // should be specified in order of how useful they are for prediction. Libaom
    483  // could be updated to make LAST, GOLDEN and ALTREF equivalent, but that is
    484  // not a priority for now. All aliases can be used to update buffers.
    485  // TD: Automatically select LAST, GOLDEN and ALTREF depending on previous
    486  //       buffer usage.
    487  static constexpr int kPreferedAlias[] = {0,  // LAST
    488                                           3,  // GOLDEN
    489                                           6,  // ALTREF
    490                                           1, 2, 4, 5};
    491 
    492  aom_svc_ref_frame_config_t ref_frame_config = {};
    493 
    494  int alias_index = 0;
    495  if (!settings.reference_buffers.empty()) {
    496    for (size_t i = 0; i < settings.reference_buffers.size(); ++i) {
    497      ref_frame_config.ref_idx[kPreferedAlias[alias_index]] =
    498          settings.reference_buffers[i];
    499      ref_frame_config.reference[kPreferedAlias[alias_index]] = 1;
    500      alias_index++;
    501    }
    502 
    503    // Delta frames must not alias unused buffers, and since start frames only
    504    // update some buffers it is not safe to leave unused aliases to simply
    505    // point to buffer 0.
    506    for (size_t i = settings.reference_buffers.size();
    507         i < std::size(ref_frame_config.ref_idx); ++i) {
    508      ref_frame_config.ref_idx[kPreferedAlias[i]] =
    509          settings.reference_buffers.back();
    510    }
    511  }
    512 
    513  if (settings.update_buffer) {
    514    if (!absl::c_linear_search(settings.reference_buffers,
    515                               *settings.update_buffer)) {
    516      ref_frame_config.ref_idx[kPreferedAlias[alias_index]] =
    517          *settings.update_buffer;
    518      alias_index++;
    519    }
    520    ref_frame_config.refresh[*settings.update_buffer] = 1;
    521  }
    522 
    523  char buf[256];
    524  SimpleStringBuilder sb(buf);
    525  sb << " spatial_id=" << settings.spatial_id;
    526  sb << "  ref_idx=[ ";
    527  for (auto r : ref_frame_config.ref_idx) {
    528    sb << r << " ";
    529  }
    530  sb << "]  reference=[ ";
    531  for (auto r : ref_frame_config.reference) {
    532    sb << r << " ";
    533  }
    534  sb << "]  refresh=[ ";
    535  for (auto r : ref_frame_config.refresh) {
    536    sb << r << " ";
    537  }
    538  sb << "]";
    539 
    540  RTC_LOG(LS_WARNING) << __FUNCTION__ << sb.str();
    541 
    542  return ref_frame_config;
    543 }
    544 
    545 aom_svc_params_t GetSvcParams(
    546    const VideoFrameBuffer& frame_buffer,
    547    const std::vector<VideoEncoderInterface::FrameEncodeSettings>&
    548        frame_settings) {
    549  aom_svc_params_t svc_params = {};
    550  svc_params.number_spatial_layers = frame_settings.back().spatial_id + 1;
    551  svc_params.number_temporal_layers = kMaxTemporalLayers;
    552 
    553  // TD: What about svc_params.framerate_factor?
    554  // If `framerate_factors` are left at 0 then configured bitrate values will
    555  // not be picked up by libaom.
    556  for (int tid = 0; tid < svc_params.number_temporal_layers; ++tid) {
    557    svc_params.framerate_factor[tid] = 1;
    558  }
    559 
    560  // If the scaling factor is left at zero for unused layers a division by zero
    561  // will happen inside libaom, default all layers to one.
    562  for (int sid = 0; sid < svc_params.number_spatial_layers; ++sid) {
    563    svc_params.scaling_factor_num[sid] = 1;
    564    svc_params.scaling_factor_den[sid] = 1;
    565  }
    566 
    567  for (const VideoEncoderInterface::FrameEncodeSettings& settings :
    568       frame_settings) {
    569    std::optional<Rational> scaling_factor = GetScalingFactor(
    570        {.width = frame_buffer.width(), .height = frame_buffer.height()},
    571        settings.resolution);
    572    RTC_CHECK(scaling_factor);
    573    svc_params.scaling_factor_num[settings.spatial_id] =
    574        scaling_factor->numerator;
    575    svc_params.scaling_factor_den[settings.spatial_id] =
    576        scaling_factor->denominator;
    577 
    578    const int flat_layer_id =
    579        settings.spatial_id * svc_params.number_temporal_layers +
    580        settings.temporal_id;
    581 
    582    RTC_LOG(LS_WARNING) << __FUNCTION__ << " flat_layer_id=" << flat_layer_id
    583                        << " num="
    584                        << svc_params.scaling_factor_num[settings.spatial_id]
    585                        << " den="
    586                        << svc_params.scaling_factor_den[settings.spatial_id];
    587 
    588    std::visit(
    589        [&](auto&& arg) {
    590          using T = std::decay_t<decltype(arg)>;
    591          if constexpr (std::is_same_v<T, Cbr>) {
    592            // Libaom calculates the total bitrate across all spatial layers by
    593            // summing the bitrate of the last temporal layer in each spatial
    594            // layer. This means the bitrate for the top temporal layer always
    595            // has to be set even if that temporal layer is not being encoded.
    596            const int last_temporal_layer_in_spatial_layer_id =
    597                settings.spatial_id * svc_params.number_temporal_layers +
    598                (kMaxTemporalLayers - 1);
    599            svc_params
    600                .layer_target_bitrate[last_temporal_layer_in_spatial_layer_id] =
    601                arg.target_bitrate.kbps();
    602 
    603            svc_params.layer_target_bitrate[flat_layer_id] =
    604                arg.target_bitrate.kbps();
    605            // When libaom is configured with `AOM_CBR` it will still limit QP
    606            // to stay between `min_quantizers` and `max_quantizers'. Set
    607            // `max_quantizers` to max QP to avoid the encoder overshooting.
    608            svc_params.max_quantizers[flat_layer_id] = kMaxQp;
    609            svc_params.min_quantizers[flat_layer_id] = 0;
    610          } else if constexpr (std::is_same_v<T, Cqp>) {
    611            // When libaom is configured with `AOM_Q` it will still look at the
    612            // `layer_target_bitrate` to determine whether the layer is disabled
    613            // or not. Set `layer_target_bitrate` to 1 so that libaom knows the
    614            // layer is active.
    615            svc_params.layer_target_bitrate[flat_layer_id] = 1;
    616            svc_params.max_quantizers[flat_layer_id] = arg.target_qp;
    617            svc_params.min_quantizers[flat_layer_id] = arg.target_qp;
    618            RTC_LOG(LS_WARNING) << __FUNCTION__ << " svc_params.qp["
    619                                << flat_layer_id << "]=" << arg.target_qp;
    620            // TD: Does libaom look at both max and min? Shouldn't it just be
    621            //       one of them
    622          }
    623        },
    624        settings.rate_options);
    625  }
    626 
    627  char buf[512];
    628  SimpleStringBuilder sb(buf);
    629  sb << "GetSvcParams" << " layer bitrates kbps";
    630  for (int s = 0; s < svc_params.number_spatial_layers; ++s) {
    631    sb << " S" << s << "=[ ";
    632    for (int t = 0; t < svc_params.number_temporal_layers; ++t) {
    633      int id = s * svc_params.number_temporal_layers + t;
    634      sb << "T" << t << "=" << svc_params.layer_target_bitrate[id] << " ";
    635    }
    636    sb << "]";
    637  }
    638 
    639  RTC_LOG(LS_WARNING) << sb.str();
    640 
    641  return svc_params;
    642 }
    643 
    644 void LibaomAv1Encoder::Encode(scoped_refptr<VideoFrameBuffer> frame_buffer,
    645                              const TemporalUnitSettings& tu_settings,
    646                              std::vector<FrameEncodeSettings> frame_settings) {
    647  absl::Cleanup on_return = [&] {
    648    // On return call `EncodeComplete` with EncodingError result unless they
    649    // were already called with an EncodedData result.
    650    for (FrameEncodeSettings& settings : frame_settings) {
    651      if (settings.frame_output) {
    652        settings.frame_output->EncodeComplete(EncodingError());
    653      }
    654    }
    655  };
    656 
    657  if (!ValidateEncodeParams(*frame_buffer, tu_settings, frame_settings,
    658                            last_resolution_in_buffer_, cfg_.rc_end_usage)) {
    659    return;
    660  }
    661 
    662  if (current_content_type_ != tu_settings.content_hint) {
    663    if (tu_settings.content_hint == VideoCodecMode::kScreensharing) {
    664      // TD: Set speed 11?
    665      SET_OR_RETURN(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN);
    666      SET_OR_RETURN(AV1E_SET_ENABLE_PALETTE, 1);
    667    } else {
    668      SET_OR_RETURN(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT);
    669      SET_OR_RETURN(AV1E_SET_ENABLE_PALETTE, 0);
    670    }
    671    current_content_type_ = tu_settings.content_hint;
    672  }
    673 
    674  if (cfg_.rc_end_usage == AOM_CBR) {
    675    DataRate accum_rate = DataRate::Zero();
    676    for (const FrameEncodeSettings& settings : frame_settings) {
    677      accum_rate += std::get<Cbr>(settings.rate_options).target_bitrate;
    678    }
    679    cfg_.rc_target_bitrate = accum_rate.kbps();
    680    RTC_LOG(LS_WARNING) << __FUNCTION__
    681                        << " cfg_.rc_target_bitrate=" << cfg_.rc_target_bitrate;
    682  }
    683 
    684  if (static_cast<int>(cfg_.g_w) != frame_buffer->width() ||
    685      static_cast<int>(cfg_.g_h) != frame_buffer->height()) {
    686    RTC_LOG(LS_WARNING) << __FUNCTION__ << " resolution changed from "
    687                        << cfg_.g_w << "x" << cfg_.g_h << " to "
    688                        << frame_buffer->width() << "x"
    689                        << frame_buffer->height();
    690    ThreadTilesAndSuperblockSizeInfo ttsbi = GetThreadingTilesAndSuperblockSize(
    691        frame_buffer->width(), frame_buffer->height(), max_number_of_threads_);
    692    SET_OR_RETURN(AV1E_SET_SUPERBLOCK_SIZE, ttsbi.superblock_size);
    693    SET_OR_RETURN(AV1E_SET_TILE_ROWS, ttsbi.exp_tile_rows);
    694    SET_OR_RETURN(AV1E_SET_TILE_COLUMNS, ttsbi.exp_tile_colums);
    695    cfg_.g_threads = ttsbi.num_threads;
    696    cfg_.g_w = frame_buffer->width();
    697    cfg_.g_h = frame_buffer->height();
    698  }
    699 
    700  PrepareInputImage(*frame_buffer, image_to_encode_);
    701 
    702  // The bitrates caluclated internally in libaom when `AV1E_SET_SVC_PARAMS` is
    703  // called depends on the currently configured `cfg_.rc_target_bitrate`. If the
    704  // total target bitrate is not updated first a division by zero could happen.
    705  if (aom_codec_err_t ret = aom_codec_enc_config_set(&ctx_, &cfg_);
    706      ret != AOM_CODEC_OK) {
    707    RTC_LOG(LS_ERROR) << "aom_codec_enc_config_set returned " << ret;
    708    return;
    709  }
    710  aom_svc_params_t svc_params = GetSvcParams(*frame_buffer, frame_settings);
    711  SET_OR_RETURN(AV1E_SET_SVC_PARAMS, &svc_params);
    712 
    713  // The libaom AV1 encoder requires that `aom_codec_encode` is called for
    714  // every spatial layer, even if no frame should be encoded for that layer.
    715  std::array<FrameEncodeSettings*, kMaxSpatialLayersWtf>
    716      settings_for_spatial_id;
    717  settings_for_spatial_id.fill(nullptr);
    718  FrameEncodeSettings settings_for_unused_layer;
    719  for (FrameEncodeSettings& settings : frame_settings) {
    720    settings_for_spatial_id[settings.spatial_id] = &settings;
    721  }
    722 
    723  for (int sid = frame_settings[0].spatial_id;
    724       sid < svc_params.number_spatial_layers; ++sid) {
    725    const bool layer_enabled = settings_for_spatial_id[sid] != nullptr;
    726    FrameEncodeSettings& settings = layer_enabled
    727                                        ? *settings_for_spatial_id[sid]
    728                                        : settings_for_unused_layer;
    729 
    730    aom_svc_layer_id_t layer_id = {
    731        .spatial_layer_id = sid,
    732        .temporal_layer_id = settings.temporal_id,
    733    };
    734    SET_OR_RETURN(AV1E_SET_SVC_LAYER_ID, &layer_id);
    735    aom_svc_ref_frame_config_t ref_config = GetSvcRefFrameConfig(settings);
    736    SET_OR_RETURN(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_config);
    737 
    738    // TD: Duration can't be zero, what does it matter when the layer is
    739    // not being encoded?
    740    TimeDelta duration = TimeDelta::Millis(1);
    741    if (layer_enabled) {
    742      if (const Cbr* cbr = std::get_if<Cbr>(&settings.rate_options)) {
    743        duration = cbr->duration;
    744      } else {
    745        // TD: What should duration be when Cqp is used?
    746        duration = TimeDelta::Millis(1);
    747      }
    748 
    749      if (settings.effort_level != current_effort_level_[settings.spatial_id]) {
    750        // For RTC we use speed level 6 to 10, with 8 being the default. Note
    751        // that low effort means higher speed.
    752        SET_OR_RETURN(AOME_SET_CPUUSED, 8 - settings.effort_level);
    753        current_effort_level_[settings.spatial_id] = settings.effort_level;
    754      }
    755    }
    756 
    757    RTC_LOG(LS_WARNING)
    758        << __FUNCTION__ << " timestamp="
    759        << (tu_settings.presentation_timestamp.ms() * kRtpTicksPerSecond / 1000)
    760        << "  duration=" << (duration.ms() * kRtpTicksPerSecond / 1000)
    761        << "  type="
    762        << (settings.frame_type == FrameType::kKeyframe ? "key" : "delta");
    763    aom_codec_err_t ret = aom_codec_encode(
    764        &ctx_, &*image_to_encode_, tu_settings.presentation_timestamp.ms() * 90,
    765        duration.ms() * 90,
    766        settings.frame_type == FrameType::kKeyframe ? AOM_EFLAG_FORCE_KF : 0);
    767    if (ret != AOM_CODEC_OK) {
    768      RTC_LOG(LS_WARNING) << "aom_codec_encode returned " << ret;
    769      return;
    770    }
    771 
    772    if (!layer_enabled) {
    773      continue;
    774    }
    775 
    776    if (settings.frame_type == FrameType::kKeyframe) {
    777      last_resolution_in_buffer_ = {};
    778    }
    779 
    780    if (settings.update_buffer) {
    781      last_resolution_in_buffer_[*settings.update_buffer] = settings.resolution;
    782    }
    783 
    784    EncodedData result;
    785    aom_codec_iter_t iter = nullptr;
    786    bool bitstream_produced = false;
    787    while (const aom_codec_cx_pkt_t* pkt =
    788               aom_codec_get_cx_data(&ctx_, &iter)) {
    789      if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
    790        SET_OR_RETURN(AOME_GET_LAST_QUANTIZER_64, &result.encoded_qp);
    791        result.frame_type = pkt->data.frame.flags & AOM_FRAME_IS_KEY
    792                                ? FrameType::kKeyframe
    793                                : FrameType::kDeltaFrame;
    794        ArrayView<uint8_t> output_buffer =
    795            settings.frame_output->GetBitstreamOutputBuffer(
    796                DataSize::Bytes(pkt->data.frame.sz));
    797        if (output_buffer.size() != pkt->data.frame.sz) {
    798          return;
    799        }
    800        memcpy(output_buffer.data(), pkt->data.frame.buf, pkt->data.frame.sz);
    801        bitstream_produced = true;
    802        break;
    803      }
    804    }
    805 
    806    if (!bitstream_produced) {
    807      return;
    808    } else {
    809      RTC_CHECK(settings.frame_output);
    810      settings.frame_output->EncodeComplete(result);
    811      // To avoid invoking any callback more than once.
    812      settings.frame_output = nullptr;
    813    }
    814  }
    815 }
    816 }  // namespace
    817 
    818 std::string LibaomAv1EncoderFactory::CodecName() const {
    819  return "AV1";
    820 }
    821 
    822 std::string LibaomAv1EncoderFactory::ImplementationName() const {
    823  return "Libaom";
    824 }
    825 
    826 std::map<std::string, std::string> LibaomAv1EncoderFactory::CodecSpecifics()
    827    const {
    828  return {};
    829 }
    830 
    831 // clang-format off
    832 // The formater and cpplint have conflicting ideas.
    833 VideoEncoderFactoryInterface::Capabilities
    834 LibaomAv1EncoderFactory::GetEncoderCapabilities() const {
    835  return {
    836      .prediction_constraints = {
    837           .num_buffers = kNumBuffers,
    838           .max_references = kMaxReferences,
    839           .max_temporal_layers = kMaxTemporalLayers,
    840           .buffer_space_type = VideoEncoderFactoryInterface::Capabilities::
    841               PredictionConstraints::BufferSpaceType::kSingleKeyframe,
    842           .max_spatial_layers = kMaxSpatialLayersWtf,
    843           .scaling_factors = {kSupportedScalingFactors.begin(),
    844                               kSupportedScalingFactors.end()},
    845           .supported_frame_types = {FrameType::kKeyframe,
    846                                     FrameType::kStartFrame,
    847                                     FrameType::kDeltaFrame}},
    848      .input_constraints = {
    849              .min = {.width = 64, .height = 36},
    850              .max = {.width = 3840, .height = 2160},
    851              .pixel_alignment = 1,
    852              .input_formats = {kSupportedInputFormats.begin(),
    853                                kSupportedInputFormats.end()},
    854          },
    855      .encoding_formats = {{.sub_sampling = EncodingFormat::k420,
    856                            .bit_depth = 8}},
    857      .rate_control = {
    858           .qp_range = {0, kMaxQp},
    859           .rc_modes = {VideoEncoderFactoryInterface::RateControlMode::kCbr,
    860                        VideoEncoderFactoryInterface::RateControlMode::kCqp}},
    861      .performance = {.encode_on_calling_thread = true,
    862                      .min_max_effort_level = {kMinEffortLevel,
    863                                               kMaxEffortLevel}},
    864  };
    865 }
    866 // clang-format on
    867 
    868 std::unique_ptr<VideoEncoderInterface> LibaomAv1EncoderFactory::CreateEncoder(
    869    const StaticEncoderSettings& settings,
    870    const std::map<std::string, std::string>& encoder_specific_settings) {
    871  auto encoder = std::make_unique<LibaomAv1Encoder>();
    872  if (!encoder->InitEncode(settings, encoder_specific_settings)) {
    873    return nullptr;
    874  }
    875  return encoder;
    876 }
    877 
    878 }  // namespace webrtc