libaom_av1_encoder_factory.cc (34467B)
1 /* 2 * Copyright (c) 2024 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "api/video_codecs/libaom_av1_encoder_factory.h" 12 13 #include <array> 14 #include <cstddef> 15 #include <cstdint> 16 #include <cstring> 17 #include <map> 18 #include <memory> 19 #include <optional> 20 #include <string> 21 #include <type_traits> 22 #include <variant> 23 #include <vector> 24 25 #include "absl/algorithm/container.h" 26 #include "absl/cleanup/cleanup.h" 27 #include "api/array_view.h" 28 #include "api/scoped_refptr.h" 29 #include "api/units/data_rate.h" 30 #include "api/units/data_size.h" 31 #include "api/units/time_delta.h" 32 #include "api/video/resolution.h" 33 #include "api/video/video_frame_buffer.h" 34 #include "api/video_codecs/video_codec.h" 35 #include "api/video_codecs/video_encoder_factory_interface.h" 36 #include "api/video_codecs/video_encoder_interface.h" 37 #include "api/video_codecs/video_encoding_general.h" 38 #include "rtc_base/checks.h" 39 #include "rtc_base/logging.h" 40 #include "rtc_base/numerics/rational.h" 41 #include "rtc_base/strings/string_builder.h" 42 #include "third_party/libaom/source/libaom/aom/aom_codec.h" 43 #include "third_party/libaom/source/libaom/aom/aom_encoder.h" 44 #include "third_party/libaom/source/libaom/aom/aom_image.h" 45 #include "third_party/libaom/source/libaom/aom/aomcx.h" 46 47 #define SET_OR_RETURN(param_id, param_value) \ 48 do { \ 49 if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \ 50 return; \ 51 } \ 52 } while (0) 53 54 #define SET_OR_RETURN_FALSE(param_id, param_value) \ 55 do { \ 56 if (!SetEncoderControlParameters(&ctx_, param_id, param_value)) { \ 57 return false; \ 58 } \ 59 } while (0) 60 61 namespace webrtc { 62 63 using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings; 64 using Cbr = FrameEncodeSettings::Cbr; 65 using Cqp = FrameEncodeSettings::Cqp; 66 using aom_img_ptr = std::unique_ptr<aom_image_t, decltype(&aom_img_free)>; 67 68 namespace { 69 // MaxQp defined here: 70 // http://google3/third_party/libaom/git_root/av1/av1_cx_iface.c;l=3510;rcl=527067478 71 constexpr int kMaxQp = 63; 72 constexpr int kNumBuffers = 8; 73 constexpr int kMaxReferences = 3; 74 constexpr int kMinEffortLevel = -2; 75 constexpr int kMaxEffortLevel = 2; 76 constexpr int kMaxSpatialLayersWtf = 4; 77 constexpr int kMaxTemporalLayers = 4; 78 constexpr int kRtpTicksPerSecond = 90000; 79 constexpr std::array<VideoFrameBuffer::Type, 2> kSupportedInputFormats = { 80 VideoFrameBuffer::Type::kI420, VideoFrameBuffer::Type::kNV12}; 81 82 constexpr std::array<Rational, 7> kSupportedScalingFactors = { 83 {{.numerator = 8, .denominator = 1}, 84 {.numerator = 4, .denominator = 1}, 85 {.numerator = 2, .denominator = 1}, 86 {.numerator = 1, .denominator = 1}, 87 {.numerator = 1, .denominator = 2}, 88 {.numerator = 1, .denominator = 4}, 89 {.numerator = 1, .denominator = 8}}}; 90 91 std::optional<Rational> GetScalingFactor(const Resolution& from, 92 const Resolution& to) { 93 auto it = absl::c_find_if(kSupportedScalingFactors, [&](const Rational& r) { 94 return (from.width * r.numerator / r.denominator) == to.width && 95 (from.height * r.numerator / r.denominator) == to.height; 96 }); 97 98 if (it != kSupportedScalingFactors.end()) { 99 return *it; 100 } 101 102 return {}; 103 } 104 105 class LibaomAv1Encoder : public VideoEncoderInterface { 106 public: 107 LibaomAv1Encoder() = default; 108 ~LibaomAv1Encoder() override; 109 110 bool InitEncode( 111 const VideoEncoderFactoryInterface::StaticEncoderSettings& settings, 112 const std::map<std::string, std::string>& encoder_specific_settings); 113 114 void Encode(scoped_refptr<VideoFrameBuffer> frame_buffer, 115 const TemporalUnitSettings& tu_settings, 116 std::vector<FrameEncodeSettings> frame_settings) override; 117 118 private: 119 aom_img_ptr image_to_encode_ = aom_img_ptr(nullptr, aom_img_free); 120 aom_codec_ctx_t ctx_; 121 aom_codec_enc_cfg_t cfg_; 122 123 std::optional<VideoCodecMode> current_content_type_; 124 std::array<std::optional<int>, kMaxSpatialLayersWtf> current_effort_level_; 125 int max_number_of_threads_; 126 std::array<std::optional<Resolution>, 8> last_resolution_in_buffer_; 127 }; 128 129 template <typename T> 130 bool SetEncoderControlParameters(aom_codec_ctx_t* ctx, int id, T value) { 131 aom_codec_err_t error_code = aom_codec_control(ctx, id, value); 132 if (error_code != AOM_CODEC_OK) { 133 RTC_LOG(LS_WARNING) << "aom_codec_control returned " << error_code 134 << " with id: " << id << "."; 135 } 136 return error_code == AOM_CODEC_OK; 137 } 138 139 LibaomAv1Encoder::~LibaomAv1Encoder() { 140 aom_codec_destroy(&ctx_); 141 } 142 143 bool LibaomAv1Encoder::InitEncode( 144 const VideoEncoderFactoryInterface::StaticEncoderSettings& settings, 145 const std::map<std::string, std::string>& encoder_specific_settings) { 146 if (!encoder_specific_settings.empty()) { 147 RTC_LOG(LS_ERROR) 148 << "libaom av1 encoder accepts no encoder specific settings"; 149 return false; 150 } 151 152 if (aom_codec_err_t ret = aom_codec_enc_config_default( 153 aom_codec_av1_cx(), &cfg_, AOM_USAGE_REALTIME); 154 ret != AOM_CODEC_OK) { 155 RTC_LOG(LS_ERROR) << "aom_codec_enc_config_default returned " << ret; 156 return false; 157 } 158 159 max_number_of_threads_ = settings.max_number_of_threads; 160 161 // The encode resolution is set dynamically for each call to `Encode`, but for 162 // `aom_codec_enc_init` to not fail we set it here as well. 163 cfg_.g_w = settings.max_encode_dimensions.width; 164 cfg_.g_h = settings.max_encode_dimensions.height; 165 cfg_.g_timebase.num = 1; 166 // TD: does 90khz timebase make sense, use microseconds instead maybe? 167 cfg_.g_timebase.den = kRtpTicksPerSecond; 168 cfg_.g_input_bit_depth = settings.encoding_format.bit_depth; 169 cfg_.kf_mode = AOM_KF_DISABLED; 170 // TD: rc_undershoot_pct and rc_overshoot_pct should probably be removed. 171 cfg_.rc_undershoot_pct = 50; 172 cfg_.rc_overshoot_pct = 50; 173 auto* cbr = 174 std::get_if<VideoEncoderFactoryInterface::StaticEncoderSettings::Cbr>( 175 &settings.rc_mode); 176 cfg_.rc_buf_initial_sz = cbr ? cbr->target_buffer_size.ms() : 600; 177 cfg_.rc_buf_optimal_sz = cbr ? cbr->target_buffer_size.ms() : 600; 178 cfg_.rc_buf_sz = cbr ? cbr->max_buffer_size.ms() : 1000; 179 cfg_.g_usage = AOM_USAGE_REALTIME; 180 cfg_.g_pass = AOM_RC_ONE_PASS; 181 cfg_.g_lag_in_frames = 0; 182 cfg_.g_error_resilient = 0; 183 cfg_.rc_end_usage = cbr ? AOM_CBR : AOM_Q; 184 185 if (aom_codec_err_t ret = 186 aom_codec_enc_init(&ctx_, aom_codec_av1_cx(), &cfg_, /*flags=*/0); 187 ret != AOM_CODEC_OK) { 188 RTC_LOG(LS_ERROR) << "aom_codec_enc_init returned " << ret; 189 return false; 190 } 191 192 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CDEF, 1); 193 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TPL_MODEL, 0); 194 SET_OR_RETURN_FALSE(AV1E_SET_DELTAQ_MODE, 0); 195 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ORDER_HINT, 0); 196 SET_OR_RETURN_FALSE(AV1E_SET_AQ_MODE, 3); 197 SET_OR_RETURN_FALSE(AOME_SET_MAX_INTRA_BITRATE_PCT, 300); 198 SET_OR_RETURN_FALSE(AV1E_SET_COEFF_COST_UPD_FREQ, 3); 199 SET_OR_RETURN_FALSE(AV1E_SET_MODE_COST_UPD_FREQ, 3); 200 SET_OR_RETURN_FALSE(AV1E_SET_MV_COST_UPD_FREQ, 3); 201 SET_OR_RETURN_FALSE(AV1E_SET_ROW_MT, 1); 202 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_OBMC, 0); 203 SET_OR_RETURN_FALSE(AV1E_SET_NOISE_SENSITIVITY, 0); 204 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_WARPED_MOTION, 0); 205 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); 206 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_REF_FRAME_MVS, 0); 207 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_CFL_INTRA, 0); 208 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTRA, 0); 209 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_ANGLE_DELTA, 0); 210 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_FILTER_INTRA, 0); 211 SET_OR_RETURN_FALSE(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1); 212 SET_OR_RETURN_FALSE(AV1E_SET_DISABLE_TRELLIS_QUANT, 1); 213 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIST_WTD_COMP, 0); 214 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0); 215 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_DUAL_FILTER, 0); 216 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_COMP, 0); 217 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0); 218 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0); 219 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_INTRABC, 0); 220 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_MASKED_COMP, 0); 221 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_PAETH_INTRA, 0); 222 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_QM, 0); 223 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RECT_PARTITIONS, 0); 224 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_RESTORATION, 0); 225 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0); 226 SET_OR_RETURN_FALSE(AV1E_SET_ENABLE_TX64, 0); 227 SET_OR_RETURN_FALSE(AV1E_SET_MAX_REFERENCE_FRAMES, 3); 228 229 return true; 230 } 231 232 struct ThreadTilesAndSuperblockSizeInfo { 233 int num_threads; 234 int exp_tile_rows; 235 int exp_tile_colums; 236 aom_superblock_size_t superblock_size; 237 }; 238 239 ThreadTilesAndSuperblockSizeInfo GetThreadingTilesAndSuperblockSize( 240 int width, 241 int height, 242 int max_number_of_threads) { 243 ThreadTilesAndSuperblockSizeInfo res; 244 const int num_pixels = width * height; 245 if (num_pixels >= 1920 * 1080 && max_number_of_threads > 8) { 246 res.num_threads = 8; 247 res.exp_tile_rows = 2; 248 res.exp_tile_colums = 1; 249 } else if (num_pixels >= 640 * 360 && max_number_of_threads > 4) { 250 res.num_threads = 4; 251 res.exp_tile_rows = 1; 252 res.exp_tile_colums = 1; 253 } else if (num_pixels >= 320 * 180 && max_number_of_threads > 2) { 254 res.num_threads = 2; 255 res.exp_tile_rows = 1; 256 res.exp_tile_colums = 0; 257 } else { 258 res.num_threads = 1; 259 res.exp_tile_rows = 0; 260 res.exp_tile_colums = 0; 261 } 262 263 if (res.num_threads > 4 && num_pixels >= 960 * 540) { 264 res.superblock_size = AOM_SUPERBLOCK_SIZE_64X64; 265 } else { 266 res.superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC; 267 } 268 269 RTC_LOG(LS_WARNING) << __FUNCTION__ << " res.num_threads=" << res.num_threads 270 << " res.exp_tile_rows=" << res.exp_tile_rows 271 << " res.exp_tile_colums=" << res.exp_tile_colums 272 << " res.superblock_size=" << res.superblock_size; 273 274 return res; 275 } 276 277 bool ValidateEncodeParams( 278 const VideoFrameBuffer& /* frame_buffer */, 279 const VideoEncoderInterface::TemporalUnitSettings& /* tu_settings */, 280 const std::vector<VideoEncoderInterface::FrameEncodeSettings>& 281 frame_settings, 282 const std::array<std::optional<Resolution>, 8>& last_resolution_in_buffer, 283 aom_rc_mode rc_mode) { 284 if (frame_settings.empty()) { 285 RTC_LOG(LS_ERROR) << "No frame settings provided."; 286 return false; 287 } 288 289 auto in_range = [](int low, int high, int val) { 290 return low <= val && val < high; 291 }; 292 293 for (size_t i = 0; i < frame_settings.size(); ++i) { 294 const VideoEncoderInterface::FrameEncodeSettings& settings = 295 frame_settings[i]; 296 297 if (!settings.frame_output) { 298 RTC_LOG(LS_ERROR) << "No frame output provided."; 299 return false; 300 } 301 302 if (!in_range(kMinEffortLevel, kMaxEffortLevel + 1, 303 settings.effort_level)) { 304 RTC_LOG(LS_ERROR) << "Unsupported effort level " << settings.effort_level; 305 return false; 306 } 307 308 if (!in_range(0, kMaxSpatialLayersWtf, settings.spatial_id)) { 309 RTC_LOG(LS_ERROR) << "invalid spatial id " << settings.spatial_id; 310 return false; 311 } 312 313 if (!in_range(0, kMaxTemporalLayers, settings.temporal_id)) { 314 RTC_LOG(LS_ERROR) << "invalid temporal id " << settings.temporal_id; 315 return false; 316 } 317 318 if ((settings.frame_type == FrameType::kKeyframe || 319 settings.frame_type == FrameType::kStartFrame) && 320 !settings.reference_buffers.empty()) { 321 RTC_LOG(LS_ERROR) << "Reference buffers can not be used for keyframes."; 322 return false; 323 } 324 325 if ((settings.frame_type == FrameType::kKeyframe || 326 settings.frame_type == FrameType::kStartFrame) && 327 !settings.update_buffer) { 328 RTC_LOG(LS_ERROR) 329 << "Buffer to update must be specified for keyframe/startframe"; 330 return false; 331 } 332 333 if (settings.update_buffer && 334 !in_range(0, kNumBuffers, *settings.update_buffer)) { 335 RTC_LOG(LS_ERROR) << "Invalid update buffer id."; 336 return false; 337 } 338 339 if (settings.reference_buffers.size() > kMaxReferences) { 340 RTC_LOG(LS_ERROR) << "Too many referenced buffers."; 341 return false; 342 } 343 344 for (size_t j = 0; j < settings.reference_buffers.size(); ++j) { 345 if (!in_range(0, kNumBuffers, settings.reference_buffers[j])) { 346 RTC_LOG(LS_ERROR) << "Invalid reference buffer id."; 347 return false; 348 } 349 350 // Figure out which frame resolution a certain buffer will hold when the 351 // frame described by `settings` is encoded. 352 std::optional<Resolution> referenced_resolution; 353 bool keyframe_on_previous_layer = false; 354 355 // Will some other frame in this temporal unit update the buffer? 356 for (size_t k = 0; k < i; ++k) { 357 if (frame_settings[k].frame_type == FrameType::kKeyframe) { 358 keyframe_on_previous_layer = true; 359 referenced_resolution.reset(); 360 } 361 if (frame_settings[k].update_buffer == settings.reference_buffers[j]) { 362 referenced_resolution = frame_settings[k].resolution; 363 } 364 } 365 366 // Not updated by another frame in the temporal unit, what is the 367 // resolution of the last frame stored into that buffer? 368 if (!referenced_resolution && !keyframe_on_previous_layer) { 369 referenced_resolution = 370 last_resolution_in_buffer[settings.reference_buffers[j]]; 371 } 372 373 if (!referenced_resolution) { 374 RTC_LOG(LS_ERROR) << "Referenced buffer holds no frame."; 375 return false; 376 } 377 378 if (!GetScalingFactor(*referenced_resolution, settings.resolution)) { 379 RTC_LOG(LS_ERROR) 380 << "Required resolution scaling factor not supported."; 381 return false; 382 } 383 384 for (size_t l = i + 1; l < settings.reference_buffers.size(); ++l) { 385 if (settings.reference_buffers[i] == settings.reference_buffers[l]) { 386 RTC_LOG(LS_ERROR) << "Duplicate reference buffer specified."; 387 return false; 388 } 389 } 390 } 391 392 if ((rc_mode == AOM_CBR && 393 std::holds_alternative<Cqp>(settings.rate_options)) || 394 (rc_mode == AOM_Q && 395 std::holds_alternative<Cbr>(settings.rate_options))) { 396 RTC_LOG(LS_ERROR) << "Invalid rate options, encoder configured with " 397 << (rc_mode == AOM_CBR ? "AOM_CBR" : "AOM_Q"); 398 return false; 399 } 400 401 for (size_t j = i + 1; j < frame_settings.size(); ++j) { 402 if (settings.spatial_id >= frame_settings[j].spatial_id) { 403 RTC_LOG(LS_ERROR) << "Frame spatial id specified out of order."; 404 return false; 405 } 406 } 407 } 408 409 return true; 410 } 411 412 void PrepareInputImage(const VideoFrameBuffer& input_buffer, 413 aom_img_ptr& out_aom_image) { 414 aom_img_fmt_t input_format; 415 switch (input_buffer.type()) { 416 case VideoFrameBuffer::Type::kI420: 417 input_format = AOM_IMG_FMT_I420; 418 break; 419 case VideoFrameBuffer::Type::kNV12: 420 input_format = AOM_IMG_FMT_NV12; 421 break; 422 default: 423 RTC_CHECK_NOTREACHED(); 424 return; 425 } 426 427 if (!out_aom_image || out_aom_image->fmt != input_format || 428 static_cast<int>(out_aom_image->w) != input_buffer.width() || 429 static_cast<int>(out_aom_image->h) != input_buffer.height()) { 430 out_aom_image.reset( 431 aom_img_wrap(/*img=*/nullptr, input_format, input_buffer.width(), 432 input_buffer.height(), /*align=*/1, /*img_data=*/nullptr)); 433 434 RTC_LOG(LS_WARNING) << __FUNCTION__ << " input_format=" << input_format 435 << " input_buffer.width()=" << input_buffer.width() 436 << " input_buffer.height()=" << input_buffer.height() 437 << " w=" << out_aom_image->w 438 << " h=" << out_aom_image->h 439 << " d_w=" << out_aom_image->d_w 440 << " d_h=" << out_aom_image->d_h 441 << " r_w=" << out_aom_image->r_w 442 << " r_h=" << out_aom_image->r_h; 443 } 444 445 if (input_format == AOM_IMG_FMT_I420) { 446 const I420BufferInterface* i420_buffer = input_buffer.GetI420(); 447 RTC_DCHECK(i420_buffer); 448 out_aom_image->planes[AOM_PLANE_Y] = 449 const_cast<unsigned char*>(i420_buffer->DataY()); 450 out_aom_image->planes[AOM_PLANE_U] = 451 const_cast<unsigned char*>(i420_buffer->DataU()); 452 out_aom_image->planes[AOM_PLANE_V] = 453 const_cast<unsigned char*>(i420_buffer->DataV()); 454 out_aom_image->stride[AOM_PLANE_Y] = i420_buffer->StrideY(); 455 out_aom_image->stride[AOM_PLANE_U] = i420_buffer->StrideU(); 456 out_aom_image->stride[AOM_PLANE_V] = i420_buffer->StrideV(); 457 } else { 458 const NV12BufferInterface* nv12_buffer = input_buffer.GetNV12(); 459 RTC_DCHECK(nv12_buffer); 460 out_aom_image->planes[AOM_PLANE_Y] = 461 const_cast<unsigned char*>(nv12_buffer->DataY()); 462 out_aom_image->planes[AOM_PLANE_U] = 463 const_cast<unsigned char*>(nv12_buffer->DataUV()); 464 out_aom_image->planes[AOM_PLANE_V] = nullptr; 465 out_aom_image->stride[AOM_PLANE_Y] = nv12_buffer->StrideY(); 466 out_aom_image->stride[AOM_PLANE_U] = nv12_buffer->StrideUV(); 467 out_aom_image->stride[AOM_PLANE_V] = 0; 468 } 469 } 470 471 aom_svc_ref_frame_config_t GetSvcRefFrameConfig( 472 const VideoEncoderInterface::FrameEncodeSettings& settings) { 473 // Buffer alias to use for each position. In particular when there are two 474 // buffers being used, prefer to alias them as LAST and GOLDEN, since the AV1 475 // bitstream format has dedicated fields for them. See last_frame_idx and 476 // golden_frame_idx in the av1 spec 477 // https://aomediacodec.github.io/av1-spec/av1-spec.pdf. 478 479 // Libaom is also compiled for RTC, which limits the number of references to 480 // at most three, and they must be aliased as LAST, GOLDEN and ALTREF. Also 481 // note that libaom favors LAST the most, and GOLDEN second most, so buffers 482 // should be specified in order of how useful they are for prediction. Libaom 483 // could be updated to make LAST, GOLDEN and ALTREF equivalent, but that is 484 // not a priority for now. All aliases can be used to update buffers. 485 // TD: Automatically select LAST, GOLDEN and ALTREF depending on previous 486 // buffer usage. 487 static constexpr int kPreferedAlias[] = {0, // LAST 488 3, // GOLDEN 489 6, // ALTREF 490 1, 2, 4, 5}; 491 492 aom_svc_ref_frame_config_t ref_frame_config = {}; 493 494 int alias_index = 0; 495 if (!settings.reference_buffers.empty()) { 496 for (size_t i = 0; i < settings.reference_buffers.size(); ++i) { 497 ref_frame_config.ref_idx[kPreferedAlias[alias_index]] = 498 settings.reference_buffers[i]; 499 ref_frame_config.reference[kPreferedAlias[alias_index]] = 1; 500 alias_index++; 501 } 502 503 // Delta frames must not alias unused buffers, and since start frames only 504 // update some buffers it is not safe to leave unused aliases to simply 505 // point to buffer 0. 506 for (size_t i = settings.reference_buffers.size(); 507 i < std::size(ref_frame_config.ref_idx); ++i) { 508 ref_frame_config.ref_idx[kPreferedAlias[i]] = 509 settings.reference_buffers.back(); 510 } 511 } 512 513 if (settings.update_buffer) { 514 if (!absl::c_linear_search(settings.reference_buffers, 515 *settings.update_buffer)) { 516 ref_frame_config.ref_idx[kPreferedAlias[alias_index]] = 517 *settings.update_buffer; 518 alias_index++; 519 } 520 ref_frame_config.refresh[*settings.update_buffer] = 1; 521 } 522 523 char buf[256]; 524 SimpleStringBuilder sb(buf); 525 sb << " spatial_id=" << settings.spatial_id; 526 sb << " ref_idx=[ "; 527 for (auto r : ref_frame_config.ref_idx) { 528 sb << r << " "; 529 } 530 sb << "] reference=[ "; 531 for (auto r : ref_frame_config.reference) { 532 sb << r << " "; 533 } 534 sb << "] refresh=[ "; 535 for (auto r : ref_frame_config.refresh) { 536 sb << r << " "; 537 } 538 sb << "]"; 539 540 RTC_LOG(LS_WARNING) << __FUNCTION__ << sb.str(); 541 542 return ref_frame_config; 543 } 544 545 aom_svc_params_t GetSvcParams( 546 const VideoFrameBuffer& frame_buffer, 547 const std::vector<VideoEncoderInterface::FrameEncodeSettings>& 548 frame_settings) { 549 aom_svc_params_t svc_params = {}; 550 svc_params.number_spatial_layers = frame_settings.back().spatial_id + 1; 551 svc_params.number_temporal_layers = kMaxTemporalLayers; 552 553 // TD: What about svc_params.framerate_factor? 554 // If `framerate_factors` are left at 0 then configured bitrate values will 555 // not be picked up by libaom. 556 for (int tid = 0; tid < svc_params.number_temporal_layers; ++tid) { 557 svc_params.framerate_factor[tid] = 1; 558 } 559 560 // If the scaling factor is left at zero for unused layers a division by zero 561 // will happen inside libaom, default all layers to one. 562 for (int sid = 0; sid < svc_params.number_spatial_layers; ++sid) { 563 svc_params.scaling_factor_num[sid] = 1; 564 svc_params.scaling_factor_den[sid] = 1; 565 } 566 567 for (const VideoEncoderInterface::FrameEncodeSettings& settings : 568 frame_settings) { 569 std::optional<Rational> scaling_factor = GetScalingFactor( 570 {.width = frame_buffer.width(), .height = frame_buffer.height()}, 571 settings.resolution); 572 RTC_CHECK(scaling_factor); 573 svc_params.scaling_factor_num[settings.spatial_id] = 574 scaling_factor->numerator; 575 svc_params.scaling_factor_den[settings.spatial_id] = 576 scaling_factor->denominator; 577 578 const int flat_layer_id = 579 settings.spatial_id * svc_params.number_temporal_layers + 580 settings.temporal_id; 581 582 RTC_LOG(LS_WARNING) << __FUNCTION__ << " flat_layer_id=" << flat_layer_id 583 << " num=" 584 << svc_params.scaling_factor_num[settings.spatial_id] 585 << " den=" 586 << svc_params.scaling_factor_den[settings.spatial_id]; 587 588 std::visit( 589 [&](auto&& arg) { 590 using T = std::decay_t<decltype(arg)>; 591 if constexpr (std::is_same_v<T, Cbr>) { 592 // Libaom calculates the total bitrate across all spatial layers by 593 // summing the bitrate of the last temporal layer in each spatial 594 // layer. This means the bitrate for the top temporal layer always 595 // has to be set even if that temporal layer is not being encoded. 596 const int last_temporal_layer_in_spatial_layer_id = 597 settings.spatial_id * svc_params.number_temporal_layers + 598 (kMaxTemporalLayers - 1); 599 svc_params 600 .layer_target_bitrate[last_temporal_layer_in_spatial_layer_id] = 601 arg.target_bitrate.kbps(); 602 603 svc_params.layer_target_bitrate[flat_layer_id] = 604 arg.target_bitrate.kbps(); 605 // When libaom is configured with `AOM_CBR` it will still limit QP 606 // to stay between `min_quantizers` and `max_quantizers'. Set 607 // `max_quantizers` to max QP to avoid the encoder overshooting. 608 svc_params.max_quantizers[flat_layer_id] = kMaxQp; 609 svc_params.min_quantizers[flat_layer_id] = 0; 610 } else if constexpr (std::is_same_v<T, Cqp>) { 611 // When libaom is configured with `AOM_Q` it will still look at the 612 // `layer_target_bitrate` to determine whether the layer is disabled 613 // or not. Set `layer_target_bitrate` to 1 so that libaom knows the 614 // layer is active. 615 svc_params.layer_target_bitrate[flat_layer_id] = 1; 616 svc_params.max_quantizers[flat_layer_id] = arg.target_qp; 617 svc_params.min_quantizers[flat_layer_id] = arg.target_qp; 618 RTC_LOG(LS_WARNING) << __FUNCTION__ << " svc_params.qp[" 619 << flat_layer_id << "]=" << arg.target_qp; 620 // TD: Does libaom look at both max and min? Shouldn't it just be 621 // one of them 622 } 623 }, 624 settings.rate_options); 625 } 626 627 char buf[512]; 628 SimpleStringBuilder sb(buf); 629 sb << "GetSvcParams" << " layer bitrates kbps"; 630 for (int s = 0; s < svc_params.number_spatial_layers; ++s) { 631 sb << " S" << s << "=[ "; 632 for (int t = 0; t < svc_params.number_temporal_layers; ++t) { 633 int id = s * svc_params.number_temporal_layers + t; 634 sb << "T" << t << "=" << svc_params.layer_target_bitrate[id] << " "; 635 } 636 sb << "]"; 637 } 638 639 RTC_LOG(LS_WARNING) << sb.str(); 640 641 return svc_params; 642 } 643 644 void LibaomAv1Encoder::Encode(scoped_refptr<VideoFrameBuffer> frame_buffer, 645 const TemporalUnitSettings& tu_settings, 646 std::vector<FrameEncodeSettings> frame_settings) { 647 absl::Cleanup on_return = [&] { 648 // On return call `EncodeComplete` with EncodingError result unless they 649 // were already called with an EncodedData result. 650 for (FrameEncodeSettings& settings : frame_settings) { 651 if (settings.frame_output) { 652 settings.frame_output->EncodeComplete(EncodingError()); 653 } 654 } 655 }; 656 657 if (!ValidateEncodeParams(*frame_buffer, tu_settings, frame_settings, 658 last_resolution_in_buffer_, cfg_.rc_end_usage)) { 659 return; 660 } 661 662 if (current_content_type_ != tu_settings.content_hint) { 663 if (tu_settings.content_hint == VideoCodecMode::kScreensharing) { 664 // TD: Set speed 11? 665 SET_OR_RETURN(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_SCREEN); 666 SET_OR_RETURN(AV1E_SET_ENABLE_PALETTE, 1); 667 } else { 668 SET_OR_RETURN(AV1E_SET_TUNE_CONTENT, AOM_CONTENT_DEFAULT); 669 SET_OR_RETURN(AV1E_SET_ENABLE_PALETTE, 0); 670 } 671 current_content_type_ = tu_settings.content_hint; 672 } 673 674 if (cfg_.rc_end_usage == AOM_CBR) { 675 DataRate accum_rate = DataRate::Zero(); 676 for (const FrameEncodeSettings& settings : frame_settings) { 677 accum_rate += std::get<Cbr>(settings.rate_options).target_bitrate; 678 } 679 cfg_.rc_target_bitrate = accum_rate.kbps(); 680 RTC_LOG(LS_WARNING) << __FUNCTION__ 681 << " cfg_.rc_target_bitrate=" << cfg_.rc_target_bitrate; 682 } 683 684 if (static_cast<int>(cfg_.g_w) != frame_buffer->width() || 685 static_cast<int>(cfg_.g_h) != frame_buffer->height()) { 686 RTC_LOG(LS_WARNING) << __FUNCTION__ << " resolution changed from " 687 << cfg_.g_w << "x" << cfg_.g_h << " to " 688 << frame_buffer->width() << "x" 689 << frame_buffer->height(); 690 ThreadTilesAndSuperblockSizeInfo ttsbi = GetThreadingTilesAndSuperblockSize( 691 frame_buffer->width(), frame_buffer->height(), max_number_of_threads_); 692 SET_OR_RETURN(AV1E_SET_SUPERBLOCK_SIZE, ttsbi.superblock_size); 693 SET_OR_RETURN(AV1E_SET_TILE_ROWS, ttsbi.exp_tile_rows); 694 SET_OR_RETURN(AV1E_SET_TILE_COLUMNS, ttsbi.exp_tile_colums); 695 cfg_.g_threads = ttsbi.num_threads; 696 cfg_.g_w = frame_buffer->width(); 697 cfg_.g_h = frame_buffer->height(); 698 } 699 700 PrepareInputImage(*frame_buffer, image_to_encode_); 701 702 // The bitrates caluclated internally in libaom when `AV1E_SET_SVC_PARAMS` is 703 // called depends on the currently configured `cfg_.rc_target_bitrate`. If the 704 // total target bitrate is not updated first a division by zero could happen. 705 if (aom_codec_err_t ret = aom_codec_enc_config_set(&ctx_, &cfg_); 706 ret != AOM_CODEC_OK) { 707 RTC_LOG(LS_ERROR) << "aom_codec_enc_config_set returned " << ret; 708 return; 709 } 710 aom_svc_params_t svc_params = GetSvcParams(*frame_buffer, frame_settings); 711 SET_OR_RETURN(AV1E_SET_SVC_PARAMS, &svc_params); 712 713 // The libaom AV1 encoder requires that `aom_codec_encode` is called for 714 // every spatial layer, even if no frame should be encoded for that layer. 715 std::array<FrameEncodeSettings*, kMaxSpatialLayersWtf> 716 settings_for_spatial_id; 717 settings_for_spatial_id.fill(nullptr); 718 FrameEncodeSettings settings_for_unused_layer; 719 for (FrameEncodeSettings& settings : frame_settings) { 720 settings_for_spatial_id[settings.spatial_id] = &settings; 721 } 722 723 for (int sid = frame_settings[0].spatial_id; 724 sid < svc_params.number_spatial_layers; ++sid) { 725 const bool layer_enabled = settings_for_spatial_id[sid] != nullptr; 726 FrameEncodeSettings& settings = layer_enabled 727 ? *settings_for_spatial_id[sid] 728 : settings_for_unused_layer; 729 730 aom_svc_layer_id_t layer_id = { 731 .spatial_layer_id = sid, 732 .temporal_layer_id = settings.temporal_id, 733 }; 734 SET_OR_RETURN(AV1E_SET_SVC_LAYER_ID, &layer_id); 735 aom_svc_ref_frame_config_t ref_config = GetSvcRefFrameConfig(settings); 736 SET_OR_RETURN(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_config); 737 738 // TD: Duration can't be zero, what does it matter when the layer is 739 // not being encoded? 740 TimeDelta duration = TimeDelta::Millis(1); 741 if (layer_enabled) { 742 if (const Cbr* cbr = std::get_if<Cbr>(&settings.rate_options)) { 743 duration = cbr->duration; 744 } else { 745 // TD: What should duration be when Cqp is used? 746 duration = TimeDelta::Millis(1); 747 } 748 749 if (settings.effort_level != current_effort_level_[settings.spatial_id]) { 750 // For RTC we use speed level 6 to 10, with 8 being the default. Note 751 // that low effort means higher speed. 752 SET_OR_RETURN(AOME_SET_CPUUSED, 8 - settings.effort_level); 753 current_effort_level_[settings.spatial_id] = settings.effort_level; 754 } 755 } 756 757 RTC_LOG(LS_WARNING) 758 << __FUNCTION__ << " timestamp=" 759 << (tu_settings.presentation_timestamp.ms() * kRtpTicksPerSecond / 1000) 760 << " duration=" << (duration.ms() * kRtpTicksPerSecond / 1000) 761 << " type=" 762 << (settings.frame_type == FrameType::kKeyframe ? "key" : "delta"); 763 aom_codec_err_t ret = aom_codec_encode( 764 &ctx_, &*image_to_encode_, tu_settings.presentation_timestamp.ms() * 90, 765 duration.ms() * 90, 766 settings.frame_type == FrameType::kKeyframe ? AOM_EFLAG_FORCE_KF : 0); 767 if (ret != AOM_CODEC_OK) { 768 RTC_LOG(LS_WARNING) << "aom_codec_encode returned " << ret; 769 return; 770 } 771 772 if (!layer_enabled) { 773 continue; 774 } 775 776 if (settings.frame_type == FrameType::kKeyframe) { 777 last_resolution_in_buffer_ = {}; 778 } 779 780 if (settings.update_buffer) { 781 last_resolution_in_buffer_[*settings.update_buffer] = settings.resolution; 782 } 783 784 EncodedData result; 785 aom_codec_iter_t iter = nullptr; 786 bool bitstream_produced = false; 787 while (const aom_codec_cx_pkt_t* pkt = 788 aom_codec_get_cx_data(&ctx_, &iter)) { 789 if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) { 790 SET_OR_RETURN(AOME_GET_LAST_QUANTIZER_64, &result.encoded_qp); 791 result.frame_type = pkt->data.frame.flags & AOM_FRAME_IS_KEY 792 ? FrameType::kKeyframe 793 : FrameType::kDeltaFrame; 794 ArrayView<uint8_t> output_buffer = 795 settings.frame_output->GetBitstreamOutputBuffer( 796 DataSize::Bytes(pkt->data.frame.sz)); 797 if (output_buffer.size() != pkt->data.frame.sz) { 798 return; 799 } 800 memcpy(output_buffer.data(), pkt->data.frame.buf, pkt->data.frame.sz); 801 bitstream_produced = true; 802 break; 803 } 804 } 805 806 if (!bitstream_produced) { 807 return; 808 } else { 809 RTC_CHECK(settings.frame_output); 810 settings.frame_output->EncodeComplete(result); 811 // To avoid invoking any callback more than once. 812 settings.frame_output = nullptr; 813 } 814 } 815 } 816 } // namespace 817 818 std::string LibaomAv1EncoderFactory::CodecName() const { 819 return "AV1"; 820 } 821 822 std::string LibaomAv1EncoderFactory::ImplementationName() const { 823 return "Libaom"; 824 } 825 826 std::map<std::string, std::string> LibaomAv1EncoderFactory::CodecSpecifics() 827 const { 828 return {}; 829 } 830 831 // clang-format off 832 // The formater and cpplint have conflicting ideas. 833 VideoEncoderFactoryInterface::Capabilities 834 LibaomAv1EncoderFactory::GetEncoderCapabilities() const { 835 return { 836 .prediction_constraints = { 837 .num_buffers = kNumBuffers, 838 .max_references = kMaxReferences, 839 .max_temporal_layers = kMaxTemporalLayers, 840 .buffer_space_type = VideoEncoderFactoryInterface::Capabilities:: 841 PredictionConstraints::BufferSpaceType::kSingleKeyframe, 842 .max_spatial_layers = kMaxSpatialLayersWtf, 843 .scaling_factors = {kSupportedScalingFactors.begin(), 844 kSupportedScalingFactors.end()}, 845 .supported_frame_types = {FrameType::kKeyframe, 846 FrameType::kStartFrame, 847 FrameType::kDeltaFrame}}, 848 .input_constraints = { 849 .min = {.width = 64, .height = 36}, 850 .max = {.width = 3840, .height = 2160}, 851 .pixel_alignment = 1, 852 .input_formats = {kSupportedInputFormats.begin(), 853 kSupportedInputFormats.end()}, 854 }, 855 .encoding_formats = {{.sub_sampling = EncodingFormat::k420, 856 .bit_depth = 8}}, 857 .rate_control = { 858 .qp_range = {0, kMaxQp}, 859 .rc_modes = {VideoEncoderFactoryInterface::RateControlMode::kCbr, 860 VideoEncoderFactoryInterface::RateControlMode::kCqp}}, 861 .performance = {.encode_on_calling_thread = true, 862 .min_max_effort_level = {kMinEffortLevel, 863 kMaxEffortLevel}}, 864 }; 865 } 866 // clang-format on 867 868 std::unique_ptr<VideoEncoderInterface> LibaomAv1EncoderFactory::CreateEncoder( 869 const StaticEncoderSettings& settings, 870 const std::map<std::string, std::string>& encoder_specific_settings) { 871 auto encoder = std::make_unique<LibaomAv1Encoder>(); 872 if (!encoder->InitEncode(settings, encoder_specific_settings)) { 873 return nullptr; 874 } 875 return encoder; 876 } 877 878 } // namespace webrtc