libvpx_vp9_encoder.cc (88001B)
1 /* 2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 * 10 */ 11 12 #ifdef RTC_ENABLE_VP9 13 14 #include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h" 15 16 #include <algorithm> 17 #include <cstddef> 18 #include <cstdint> 19 #include <cstring> 20 #include <iterator> 21 #include <memory> 22 #include <numeric> 23 #include <optional> 24 #include <utility> 25 #include <vector> 26 27 #include "absl/algorithm/container.h" 28 #include "absl/container/inlined_vector.h" 29 #include "api/array_view.h" 30 #include "api/environment/environment.h" 31 #include "api/fec_controller_override.h" 32 #include "api/field_trials_view.h" 33 #include "api/scoped_refptr.h" 34 #include "api/transport/rtp/dependency_descriptor.h" 35 #include "api/video/encoded_image.h" 36 #include "api/video/i010_buffer.h" 37 #include "api/video/render_resolution.h" 38 #include "api/video/video_bitrate_allocation.h" 39 #include "api/video/video_bitrate_allocator.h" 40 #include "api/video/video_codec_constants.h" 41 #include "api/video/video_codec_type.h" 42 #include "api/video/video_frame.h" 43 #include "api/video/video_frame_buffer.h" 44 #include "api/video/video_frame_type.h" 45 #include "api/video_codecs/scalability_mode.h" 46 #include "api/video_codecs/video_codec.h" 47 #include "api/video_codecs/video_encoder.h" 48 #include "api/video_codecs/vp9_profile.h" 49 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" 50 #include "modules/video_coding/codecs/interface/common_constants.h" 51 #include "modules/video_coding/codecs/interface/libvpx_interface.h" 52 #include "modules/video_coding/codecs/vp9/include/vp9.h" 53 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" 54 #include "modules/video_coding/include/video_codec_interface.h" 55 #include "modules/video_coding/include/video_error_codes.h" 56 #include "modules/video_coding/svc/create_scalability_structure.h" 57 #include "modules/video_coding/svc/scalability_mode_util.h" 58 #include "modules/video_coding/svc/scalable_video_controller.h" 59 #include "modules/video_coding/svc/scalable_video_controller_no_layering.h" 60 #include "modules/video_coding/svc/simulcast_to_svc_converter.h" 61 #include "modules/video_coding/svc/svc_rate_allocator.h" 62 #include "modules/video_coding/utility/framerate_controller_deprecated.h" 63 #include "modules/video_coding/utility/simulcast_rate_allocator.h" 64 #include "rtc_base/checks.h" 65 #include "rtc_base/containers/flat_map.h" 66 #include "rtc_base/experiments/field_trial_list.h" 67 #include "rtc_base/experiments/field_trial_parser.h" 68 #include "rtc_base/experiments/rate_control_settings.h" 69 #include "rtc_base/logging.h" 70 #include "rtc_base/numerics/safe_conversions.h" 71 #include "rtc_base/strings/string_builder.h" 72 #include "rtc_base/trace_event.h" 73 #include "third_party/libvpx/source/libvpx/vpx/vp8cx.h" 74 #include "third_party/libvpx/source/libvpx/vpx/vpx_encoder.h" 75 #include "third_party/libvpx/source/libvpx/vpx/vpx_image.h" 76 77 #if (defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64)) && \ 78 (defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)) 79 #define MOBILE_ARM 80 #endif 81 82 namespace webrtc { 83 84 namespace { 85 // Maps from gof_idx to encoder internal reference frame buffer index. These 86 // maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames. 87 uint8_t kRefBufIdx[4] = {0, 0, 0, 1}; 88 uint8_t kUpdBufIdx[4] = {0, 0, 1, 0}; 89 90 // Maximum allowed PID difference for differnet per-layer frame-rate case. 91 const int kMaxAllowedPidDiff = 30; 92 93 namespace variable_framerate_screenshare { 94 constexpr double kMinFps = 5.0; 95 constexpr int kMinQP = 32; 96 constexpr int kUndershootPct = 30; 97 constexpr int kFramesBeforeSteadyState = 5; 98 } // namespace variable_framerate_screenshare 99 100 // TODO(ilink): Tune these thresholds further. 101 // Selected using ConverenceMotion_1280_720_50.yuv clip. 102 // No toggling observed on any link capacity from 100-2000kbps. 103 // HD was reached consistently when link capacity was 1500kbps. 104 // Set resolutions are a bit more conservative than svc_config.cc sets, e.g. 105 // for 300kbps resolution converged to 270p instead of 360p. 106 constexpr int kLowVp9QpThreshold = 149; 107 constexpr int kHighVp9QpThreshold = 205; 108 109 std::pair<size_t, size_t> GetActiveLayers( 110 const VideoBitrateAllocation& allocation) { 111 for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) { 112 if (allocation.GetSpatialLayerSum(sl_idx) > 0) { 113 size_t last_layer = sl_idx + 1; 114 while (last_layer < kMaxSpatialLayers && 115 allocation.GetSpatialLayerSum(last_layer) > 0) { 116 ++last_layer; 117 } 118 return std::make_pair(sl_idx, last_layer); 119 } 120 } 121 return {0, 0}; 122 } 123 124 std::unique_ptr<ScalableVideoController> CreateVp9ScalabilityStructure( 125 const VideoCodec& codec) { 126 int num_spatial_layers = codec.VP9().numberOfSpatialLayers; 127 int num_temporal_layers = 128 std::max(1, int{codec.VP9().numberOfTemporalLayers}); 129 if (num_spatial_layers == 1 && num_temporal_layers == 1) { 130 return std::make_unique<ScalableVideoControllerNoLayering>(); 131 } 132 133 char name[20]; 134 SimpleStringBuilder ss(name); 135 if (codec.mode == VideoCodecMode::kScreensharing) { 136 // TODO(bugs.webrtc.org/11999): Compose names of the structures when they 137 // are implemented. 138 return nullptr; 139 } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn || 140 num_spatial_layers == 1) { 141 ss << "L" << num_spatial_layers << "T" << num_temporal_layers; 142 } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOnKeyPic) { 143 ss << "L" << num_spatial_layers << "T" << num_temporal_layers << "_KEY"; 144 } else { 145 RTC_DCHECK_EQ(codec.VP9().interLayerPred, InterLayerPredMode::kOff); 146 ss << "S" << num_spatial_layers << "T" << num_temporal_layers; 147 } 148 149 // Check spatial ratio. 150 if (num_spatial_layers > 1) { 151 if (codec.width != codec.spatialLayers[num_spatial_layers - 1].width || 152 codec.height != codec.spatialLayers[num_spatial_layers - 1].height) { 153 RTC_LOG(LS_WARNING) 154 << "Top layer resolution expected to match overall resolution"; 155 return nullptr; 156 } 157 // Check if the ratio is one of the supported. 158 int numerator; 159 int denominator; 160 if (codec.spatialLayers[1].width == 2 * codec.spatialLayers[0].width) { 161 numerator = 1; 162 denominator = 2; 163 // no suffix for 1:2 ratio. 164 } else if (2 * codec.spatialLayers[1].width == 165 3 * codec.spatialLayers[0].width) { 166 numerator = 2; 167 denominator = 3; 168 ss << "h"; 169 } else { 170 RTC_LOG(LS_WARNING) << "Unsupported scalability ratio " 171 << codec.spatialLayers[0].width << ":" 172 << codec.spatialLayers[1].width; 173 return nullptr; 174 } 175 // Validate ratio is consistent for all spatial layer transitions. 176 for (int sid = 1; sid < num_spatial_layers; ++sid) { 177 if (codec.spatialLayers[sid].width * numerator != 178 codec.spatialLayers[sid - 1].width * denominator || 179 codec.spatialLayers[sid].height * numerator != 180 codec.spatialLayers[sid - 1].height * denominator) { 181 RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator 182 << ":" << denominator; 183 return nullptr; 184 } 185 } 186 } 187 188 std::optional<ScalabilityMode> scalability_mode = 189 ScalabilityModeFromString(name); 190 if (!scalability_mode.has_value()) { 191 RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name; 192 return nullptr; 193 } 194 auto scalability_structure_controller = 195 CreateScalabilityStructure(*scalability_mode); 196 if (scalability_structure_controller == nullptr) { 197 RTC_LOG(LS_WARNING) << "Unsupported scalability structure " << name; 198 } else { 199 RTC_LOG(LS_INFO) << "Created scalability structure " << name; 200 } 201 return scalability_structure_controller; 202 } 203 204 vpx_svc_ref_frame_config_t Vp9References( 205 ArrayView<const ScalableVideoController::LayerFrameConfig> layers) { 206 vpx_svc_ref_frame_config_t ref_config = {}; 207 for (const ScalableVideoController::LayerFrameConfig& layer_frame : layers) { 208 const auto& buffers = layer_frame.Buffers(); 209 RTC_DCHECK_LE(buffers.size(), 3); 210 int sid = layer_frame.SpatialId(); 211 if (!buffers.empty()) { 212 ref_config.lst_fb_idx[sid] = buffers[0].id; 213 ref_config.reference_last[sid] = buffers[0].referenced; 214 if (buffers[0].updated) { 215 ref_config.update_buffer_slot[sid] |= (1 << buffers[0].id); 216 } 217 } 218 if (buffers.size() > 1) { 219 ref_config.gld_fb_idx[sid] = buffers[1].id; 220 ref_config.reference_golden[sid] = buffers[1].referenced; 221 if (buffers[1].updated) { 222 ref_config.update_buffer_slot[sid] |= (1 << buffers[1].id); 223 } 224 } 225 if (buffers.size() > 2) { 226 ref_config.alt_fb_idx[sid] = buffers[2].id; 227 ref_config.reference_alt_ref[sid] = buffers[2].referenced; 228 if (buffers[2].updated) { 229 ref_config.update_buffer_slot[sid] |= (1 << buffers[2].id); 230 } 231 } 232 } 233 // TODO(bugs.webrtc.org/11999): Fill ref_config.duration 234 return ref_config; 235 } 236 237 bool AllowDenoising() { 238 #ifdef MOBILE_ARM 239 // Keep the denoiser disabled on mobile ARM devices. It increases encode time 240 // by up to 16%. 241 return false; 242 #else 243 return true; 244 #endif 245 } 246 247 } // namespace 248 249 void LibvpxVp9Encoder::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, 250 void* user_data) { 251 LibvpxVp9Encoder* enc = static_cast<LibvpxVp9Encoder*>(user_data); 252 enc->GetEncodedLayerFrame(pkt); 253 } 254 255 LibvpxVp9Encoder::LibvpxVp9Encoder(const Environment& env, 256 Vp9EncoderSettings settings, 257 std::unique_ptr<LibvpxInterface> interface) 258 : env_(env), 259 libvpx_(std::move(interface)), 260 encoded_image_(), 261 encoded_complete_callback_(nullptr), 262 profile_(settings.profile), 263 inited_(false), 264 timestamp_(0), 265 rc_max_intra_target_(0), 266 encoder_(nullptr), 267 config_(nullptr), 268 raw_(nullptr), 269 input_image_(nullptr), 270 force_key_frame_(true), 271 pics_since_key_(0), 272 num_temporal_layers_(0), 273 num_spatial_layers_(0), 274 num_active_spatial_layers_(0), 275 first_active_layer_(0), 276 layer_deactivation_requires_key_frame_(env.field_trials().IsEnabled( 277 "WebRTC-Vp9IssueKeyFrameOnLayerDeactivation")), 278 is_svc_(false), 279 inter_layer_pred_(InterLayerPredMode::kOn), 280 trusted_rate_controller_(RateControlSettings(env.field_trials()) 281 .LibvpxVp9TrustedRateController()), 282 first_frame_in_picture_(true), 283 ss_info_needed_(false), 284 force_all_active_layers_(false), 285 enable_svc_for_simulcast_( 286 !env.field_trials().IsDisabled("WebRTC-VP9-SvcForSimulcast")), 287 num_cores_(0), 288 is_flexible_mode_(false), 289 variable_framerate_controller_(variable_framerate_screenshare::kMinFps), 290 quality_scaler_experiment_(ParseQualityScalerConfig(env.field_trials())), 291 performance_flags_(ParsePerformanceFlagsFromTrials(env.field_trials())), 292 num_steady_state_frames_(0), 293 config_changed_(true), 294 encoder_info_override_(env.field_trials()), 295 calculate_psnr_( 296 env.field_trials().IsEnabled("WebRTC-Video-CalculatePsnr")) { 297 codec_ = {}; 298 memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); 299 } 300 301 LibvpxVp9Encoder::~LibvpxVp9Encoder() { 302 Release(); 303 } 304 305 void LibvpxVp9Encoder::SetFecControllerOverride(FecControllerOverride*) { 306 // Ignored. 307 } 308 309 int LibvpxVp9Encoder::Release() { 310 int ret_val = WEBRTC_VIDEO_CODEC_OK; 311 312 if (encoder_ != nullptr) { 313 if (inited_) { 314 if (libvpx_->codec_destroy(encoder_)) { 315 ret_val = WEBRTC_VIDEO_CODEC_MEMORY; 316 } 317 } 318 delete encoder_; 319 encoder_ = nullptr; 320 } 321 if (config_ != nullptr) { 322 delete config_; 323 config_ = nullptr; 324 } 325 if (raw_ != nullptr) { 326 libvpx_->img_free(raw_); 327 raw_ = nullptr; 328 } 329 inited_ = false; 330 return ret_val; 331 } 332 333 bool LibvpxVp9Encoder::SetSvcRates( 334 const VideoBitrateAllocation& bitrate_allocation) { 335 std::pair<size_t, size_t> current_layers = 336 GetActiveLayers(current_bitrate_allocation_); 337 std::pair<size_t, size_t> new_layers = GetActiveLayers(bitrate_allocation); 338 339 const bool layer_activation_requires_key_frame = 340 inter_layer_pred_ == InterLayerPredMode::kOff || 341 inter_layer_pred_ == InterLayerPredMode::kOnKeyPic; 342 const bool lower_layers_enabled = new_layers.first < current_layers.first; 343 const bool higher_layers_enabled = new_layers.second > current_layers.second; 344 const bool disabled_layers = new_layers.first > current_layers.first || 345 new_layers.second < current_layers.second; 346 347 if (lower_layers_enabled || 348 (higher_layers_enabled && layer_activation_requires_key_frame) || 349 (disabled_layers && layer_deactivation_requires_key_frame_)) { 350 force_key_frame_ = true; 351 } 352 353 if (current_layers != new_layers) { 354 ss_info_needed_ = true; 355 } 356 357 config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps(); 358 359 for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { 360 if (config_->ss_target_bitrate[sl_idx] == 0) { 361 // Reset frame rate controller if layer is resumed after pause. 362 framerate_controller_[sl_idx].Reset(); 363 } 364 365 config_->ss_target_bitrate[sl_idx] = 366 bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000; 367 368 for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) { 369 config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] = 370 bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000; 371 } 372 373 framerate_controller_[sl_idx].SetTargetRate( 374 num_spatial_layers_ > 1 ? codec_.spatialLayers[sl_idx].maxFramerate 375 : codec_.maxFramerate); 376 } 377 378 num_active_spatial_layers_ = 0; 379 first_active_layer_ = 0; 380 bool seen_active_layer = false; 381 bool expect_no_more_active_layers = false; 382 for (int i = 0; i < num_spatial_layers_; ++i) { 383 if (config_->ss_target_bitrate[i] > 0) { 384 RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is " 385 "deactivated."; 386 if (!seen_active_layer) { 387 first_active_layer_ = i; 388 } 389 num_active_spatial_layers_ = i + 1; 390 seen_active_layer = true; 391 } else { 392 expect_no_more_active_layers = seen_active_layer; 393 } 394 } 395 396 if (seen_active_layer && performance_flags_.use_per_layer_speed) { 397 bool denoiser_on = 398 AllowDenoising() && codec_.VP9()->denoisingOn && 399 performance_flags_by_spatial_index_[num_active_spatial_layers_ - 1] 400 .allow_denoising; 401 libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, 402 denoiser_on ? 1 : 0); 403 } 404 405 if (higher_layers_enabled && !force_key_frame_) { 406 // Prohibit drop of all layers for the next frame, so newly enabled 407 // layer would have a valid spatial reference. 408 for (size_t i = 0; i < num_spatial_layers_; ++i) { 409 svc_drop_frame_.framedrop_thresh[i] = 0; 410 } 411 force_all_active_layers_ = true; 412 } 413 414 if (svc_controller_) { 415 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 416 // Bitrates in `layer_target_bitrate` are accumulated for each temporal 417 // layer but in `VideoBitrateAllocation` they should be separated. 418 int previous_bitrate_kbps = 0; 419 for (int tid = 0; tid < num_temporal_layers_; ++tid) { 420 int accumulated_bitrate_kbps = 421 config_->layer_target_bitrate[sid * num_temporal_layers_ + tid]; 422 int single_layer_bitrate_kbps = 423 accumulated_bitrate_kbps - previous_bitrate_kbps; 424 RTC_DCHECK_GE(single_layer_bitrate_kbps, 0); 425 current_bitrate_allocation_.SetBitrate( 426 sid, tid, single_layer_bitrate_kbps * 1'000); 427 previous_bitrate_kbps = accumulated_bitrate_kbps; 428 } 429 } 430 svc_controller_->OnRatesUpdated(current_bitrate_allocation_); 431 } else { 432 current_bitrate_allocation_ = bitrate_allocation; 433 } 434 config_changed_ = true; 435 return true; 436 } 437 438 void LibvpxVp9Encoder::AdjustScalingFactorsForTopActiveLayer() { 439 if (num_active_spatial_layers_ == 0 || num_spatial_layers_ <= 1 || !is_svc_ || 440 static_cast<int>(config_->g_w) == 441 codec_.spatialLayers[num_active_spatial_layers_ - 1].width) { 442 return; 443 } 444 445 config_->g_w = codec_.spatialLayers[num_active_spatial_layers_ - 1].width; 446 config_->g_h = codec_.spatialLayers[num_active_spatial_layers_ - 1].height; 447 448 // Recalculate scaling factors ignoring top inactive layers. 449 // Divide all by scaling factor of the last active layer. 450 for (int i = 0; i < num_active_spatial_layers_; ++i) { 451 int n = scaling_factors_num_[i] * 452 scaling_factors_den_[num_active_spatial_layers_ - 1]; 453 int d = scaling_factors_den_[i] * 454 scaling_factors_num_[num_active_spatial_layers_ - 1]; 455 int gcd = std::gcd(n, d); 456 svc_params_.scaling_factor_num[i] = n / gcd; 457 svc_params_.scaling_factor_den[i] = d / gcd; 458 } 459 for (int i = num_active_spatial_layers_; i < num_spatial_layers_; ++i) { 460 svc_params_.scaling_factor_num[i] = 1; 461 svc_params_.scaling_factor_den[i] = 1; 462 } 463 464 libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); 465 config_changed_ = true; 466 } 467 468 void LibvpxVp9Encoder::DisableSpatialLayer(int sid) { 469 RTC_DCHECK_LT(sid, num_spatial_layers_); 470 if (config_->ss_target_bitrate[sid] == 0) { 471 return; 472 } 473 config_->ss_target_bitrate[sid] = 0; 474 for (int tid = 0; tid < num_temporal_layers_; ++tid) { 475 config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 0; 476 } 477 config_changed_ = true; 478 } 479 480 void LibvpxVp9Encoder::EnableSpatialLayer(int sid) { 481 RTC_DCHECK_LT(sid, num_spatial_layers_); 482 if (config_->ss_target_bitrate[sid] > 0) { 483 return; 484 } 485 for (int tid = 0; tid < num_temporal_layers_; ++tid) { 486 config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 487 current_bitrate_allocation_.GetTemporalLayerSum(sid, tid) / 1000; 488 } 489 config_->ss_target_bitrate[sid] = 490 current_bitrate_allocation_.GetSpatialLayerSum(sid) / 1000; 491 RTC_DCHECK_GT(config_->ss_target_bitrate[sid], 0); 492 config_changed_ = true; 493 } 494 495 void LibvpxVp9Encoder::SetActiveSpatialLayers() { 496 // Svc controller may decide to skip a frame at certain spatial layer even 497 // when bitrate for it is non-zero, however libvpx uses configured bitrate as 498 // a signal which layers should be produced. 499 RTC_DCHECK(svc_controller_); 500 RTC_DCHECK(!layer_frames_.empty()); 501 RTC_DCHECK(absl::c_is_sorted( 502 layer_frames_, [](const ScalableVideoController::LayerFrameConfig& lhs, 503 const ScalableVideoController::LayerFrameConfig& rhs) { 504 return lhs.SpatialId() < rhs.SpatialId(); 505 })); 506 507 auto frame_it = layer_frames_.begin(); 508 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 509 if (frame_it != layer_frames_.end() && frame_it->SpatialId() == sid) { 510 EnableSpatialLayer(sid); 511 ++frame_it; 512 } else { 513 DisableSpatialLayer(sid); 514 } 515 } 516 } 517 518 void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) { 519 if (!inited_) { 520 RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized."; 521 return; 522 } 523 if (encoder_->err) { 524 RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err; 525 return; 526 } 527 if (parameters.framerate_fps < 1.0) { 528 RTC_LOG(LS_WARNING) << "Unsupported framerate: " 529 << parameters.framerate_fps; 530 return; 531 } 532 533 codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5); 534 535 bool res = SetSvcRates(parameters.bitrate); 536 RTC_DCHECK(res) << "Failed to set new bitrate allocation"; 537 AdjustScalingFactorsForTopActiveLayer(); 538 config_changed_ = true; 539 } 540 541 // TODO(eladalon): s/inst/codec_settings/g. 542 int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, 543 const Settings& settings) { 544 if (inst == nullptr) { 545 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 546 } 547 if (inst->maxFramerate < 1) { 548 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 549 } 550 // Allow zero to represent an unspecified maxBitRate 551 if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) { 552 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 553 } 554 if (inst->width < 1 || inst->height < 1) { 555 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 556 } 557 if (settings.number_of_cores < 1) { 558 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 559 } 560 if (inst->VP9().numberOfTemporalLayers > 3) { 561 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 562 } 563 // libvpx probably does not support more than 3 spatial layers. 564 if (inst->VP9().numberOfSpatialLayers > 3) { 565 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 566 } 567 568 int ret_val = Release(); 569 if (ret_val < 0) { 570 return ret_val; 571 } 572 if (encoder_ == nullptr) { 573 encoder_ = new vpx_codec_ctx_t; 574 memset(encoder_, 0, sizeof(*encoder_)); 575 } 576 if (config_ == nullptr) { 577 config_ = new vpx_codec_enc_cfg_t; 578 memset(config_, 0, sizeof(*config_)); 579 } 580 timestamp_ = 0; 581 if (&codec_ != inst) { 582 codec_ = *inst; 583 } 584 585 if (enable_svc_for_simulcast_ && codec_.numberOfSimulcastStreams > 1) { 586 if (!SimulcastToSvcConverter::IsConfigSupported(codec_)) { 587 return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED; 588 } 589 RTC_LOG(LS_INFO) << "Rewriting simulcast config to SVC."; 590 current_bitrate_allocation_ = 591 SimulcastRateAllocator(env_, codec_) 592 .Allocate(VideoBitrateAllocationParameters( 593 codec_.startBitrate * 1000, codec_.maxFramerate)); 594 simulcast_to_svc_converter_.emplace(codec_); 595 codec_ = simulcast_to_svc_converter_->GetConfig(); 596 } else { 597 current_bitrate_allocation_ = 598 SvcRateAllocator(codec_, env_.field_trials()) 599 .Allocate(VideoBitrateAllocationParameters( 600 codec_.startBitrate * 1000, codec_.maxFramerate)); 601 simulcast_to_svc_converter_ = std::nullopt; 602 } 603 604 memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); 605 606 force_key_frame_ = true; 607 pics_since_key_ = 0; 608 num_cores_ = settings.number_of_cores; 609 610 scalability_mode_ = codec_.GetScalabilityMode(); 611 if (scalability_mode_.has_value()) { 612 // Use settings from `ScalabilityMode` identifier. 613 RTC_LOG(LS_INFO) << "Create scalability structure " 614 << ScalabilityModeToString(*scalability_mode_); 615 svc_controller_ = CreateScalabilityStructure(*scalability_mode_); 616 if (!svc_controller_) { 617 RTC_LOG(LS_WARNING) << "Failed to create scalability structure."; 618 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 619 } 620 ScalableVideoController::StreamLayersConfig info = 621 svc_controller_->StreamConfig(); 622 num_spatial_layers_ = info.num_spatial_layers; 623 num_temporal_layers_ = info.num_temporal_layers; 624 inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode_); 625 } else { 626 num_spatial_layers_ = codec_.VP9()->numberOfSpatialLayers; 627 RTC_DCHECK_GT(num_spatial_layers_, 0); 628 num_temporal_layers_ = codec_.VP9()->numberOfTemporalLayers; 629 if (num_temporal_layers_ == 0) { 630 num_temporal_layers_ = 1; 631 } 632 inter_layer_pred_ = codec_.VP9()->interLayerPred; 633 svc_controller_ = CreateVp9ScalabilityStructure(codec_); 634 } 635 636 framerate_controller_ = std::vector<FramerateControllerDeprecated>( 637 num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate)); 638 639 is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1); 640 641 // Populate encoder configuration with default values. 642 if (libvpx_->codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) { 643 return WEBRTC_VIDEO_CODEC_ERROR; 644 } 645 646 switch (profile_) { 647 case VP9Profile::kProfile0: 648 config_->g_bit_depth = VPX_BITS_8; 649 config_->g_profile = 0; 650 config_->g_input_bit_depth = 8; 651 break; 652 case VP9Profile::kProfile1: 653 // Encoding of profile 1 is not implemented. It would require extended 654 // support for I444, I422, and I440 buffers. 655 RTC_DCHECK_NOTREACHED(); 656 break; 657 case VP9Profile::kProfile2: 658 config_->g_bit_depth = VPX_BITS_10; 659 config_->g_profile = 2; 660 config_->g_input_bit_depth = 10; 661 break; 662 case VP9Profile::kProfile3: 663 // Encoding of profile 3 is not implemented. 664 RTC_DCHECK_NOTREACHED(); 665 break; 666 } 667 668 config_->g_w = codec_.width; 669 config_->g_h = codec_.height; 670 config_->rc_target_bitrate = codec_.startBitrate; // in kbit/s 671 config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0; 672 // Setting the time base of the codec. 673 config_->g_timebase.num = 1; 674 config_->g_timebase.den = kVideoPayloadTypeFrequency; 675 config_->g_lag_in_frames = 0; // 0- no frame lagging 676 config_->g_threads = 1; 677 // Rate control settings. 678 config_->rc_dropframe_thresh = codec_.GetFrameDropEnabled() ? 30 : 0; 679 config_->rc_end_usage = VPX_CBR; 680 config_->g_pass = VPX_RC_ONE_PASS; 681 config_->rc_min_quantizer = 682 codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2; 683 config_->rc_max_quantizer = 52; 684 config_->rc_undershoot_pct = 50; 685 config_->rc_overshoot_pct = 50; 686 config_->rc_buf_initial_sz = 500; 687 config_->rc_buf_optimal_sz = 600; 688 config_->rc_buf_sz = 1000; 689 // Set the maximum target size of any key-frame. 690 rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz); 691 // Key-frame interval is enforced manually by this wrapper. 692 config_->kf_mode = VPX_KF_DISABLED; 693 // TODO(webm:1592): work-around for libvpx issue, as it can still 694 // put some key-frames at will even in VPX_KF_DISABLED kf_mode. 695 config_->kf_max_dist = codec_.VP9()->keyFrameInterval; 696 config_->kf_min_dist = config_->kf_max_dist; 697 if (quality_scaler_experiment_.enabled) { 698 // In that experiment webrtc wide quality scaler is used instead of libvpx 699 // internal scaler. 700 config_->rc_resize_allowed = 0; 701 } else { 702 config_->rc_resize_allowed = codec_.VP9()->automaticResizeOn ? 1 : 0; 703 } 704 // Determine number of threads based on the image size and #cores. 705 config_->g_threads = 706 NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores); 707 708 is_flexible_mode_ = codec_.VP9()->flexibleMode; 709 710 if (num_spatial_layers_ > 1 && 711 codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) { 712 RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with " 713 "several spatial layers"; 714 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 715 } 716 717 if (num_temporal_layers_ == 1) { 718 gof_.SetGofInfoVP9(kTemporalStructureMode1); 719 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; 720 config_->ts_number_layers = 1; 721 config_->ts_rate_decimator[0] = 1; 722 config_->ts_periodicity = 1; 723 config_->ts_layer_id[0] = 0; 724 } else if (num_temporal_layers_ == 2) { 725 gof_.SetGofInfoVP9(kTemporalStructureMode2); 726 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101; 727 config_->ts_number_layers = 2; 728 config_->ts_rate_decimator[0] = 2; 729 config_->ts_rate_decimator[1] = 1; 730 config_->ts_periodicity = 2; 731 config_->ts_layer_id[0] = 0; 732 config_->ts_layer_id[1] = 1; 733 } else if (num_temporal_layers_ == 3) { 734 gof_.SetGofInfoVP9(kTemporalStructureMode3); 735 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212; 736 config_->ts_number_layers = 3; 737 config_->ts_rate_decimator[0] = 4; 738 config_->ts_rate_decimator[1] = 2; 739 config_->ts_rate_decimator[2] = 1; 740 config_->ts_periodicity = 4; 741 config_->ts_layer_id[0] = 0; 742 config_->ts_layer_id[1] = 2; 743 config_->ts_layer_id[2] = 1; 744 config_->ts_layer_id[3] = 2; 745 } else { 746 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 747 } 748 749 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; 750 if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 && 751 codec_.mode == VideoCodecMode::kScreensharing) { 752 // External reference control for several temporal layers with different 753 // frame rates on spatial layers is not implemented yet. 754 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 755 } 756 ref_buf_ = {}; 757 758 return InitAndSetControlSettings(); 759 } 760 761 int LibvpxVp9Encoder::NumberOfThreads(int width, 762 int height, 763 int number_of_cores) { 764 // Keep the number of encoder threads equal to the possible number of column 765 // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS. 766 if (width * height >= 1280 * 720 && number_of_cores > 4) { 767 return 4; 768 } else if (width * height >= 640 * 360 && number_of_cores > 2) { 769 return 2; 770 } else { 771 // Use 2 threads for low res on mobile ARM. 772 #ifdef MOBILE_ARM 773 if (width * height >= 320 * 180 && number_of_cores > 2) { 774 return 2; 775 } 776 #endif 777 // 1 thread less than VGA. 778 return 1; 779 } 780 } 781 782 int LibvpxVp9Encoder::InitAndSetControlSettings() { 783 // Set QP-min/max per spatial and temporal layer. 784 int tot_num_layers = num_spatial_layers_ * num_temporal_layers_; 785 scaling_factors_num_.resize(num_spatial_layers_); 786 scaling_factors_den_.resize(num_spatial_layers_); 787 for (int i = 0; i < tot_num_layers; ++i) { 788 svc_params_.max_quantizers[i] = config_->rc_max_quantizer; 789 svc_params_.min_quantizers[i] = config_->rc_min_quantizer; 790 } 791 config_->ss_number_layers = num_spatial_layers_; 792 if (svc_controller_) { 793 auto stream_config = svc_controller_->StreamConfig(); 794 for (int i = 0; i < stream_config.num_spatial_layers; ++i) { 795 scaling_factors_num_[i] = svc_params_.scaling_factor_num[i] = 796 stream_config.scaling_factor_num[i]; 797 scaling_factors_den_[i] = svc_params_.scaling_factor_den[i] = 798 stream_config.scaling_factor_den[i]; 799 } 800 } else if (num_spatial_layers_ > 1) { 801 for (int i = 0; i < num_spatial_layers_; ++i) { 802 const auto& layer = codec_.spatialLayers[i]; 803 RTC_CHECK_GT(layer.width, 0); 804 const int scale_factor = codec_.width / layer.width; 805 RTC_DCHECK_GT(scale_factor, 0); 806 807 // Ensure scaler factor is integer. 808 if (scale_factor * layer.width != codec_.width) { 809 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 810 } 811 812 // Ensure scale factor is the same in both dimensions. 813 if (scale_factor * layer.height != codec_.height) { 814 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 815 } 816 817 // Ensure scale factor is power of two. 818 const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0; 819 if (!is_pow_of_two) { 820 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 821 } 822 823 scaling_factors_num_[i] = svc_params_.scaling_factor_num[i] = 1; 824 scaling_factors_den_[i] = svc_params_.scaling_factor_den[i] = 825 scale_factor; 826 827 RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0); 828 RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate); 829 if (i > 0) { 830 // Frame rate of high spatial layer is supposed to be equal or higher 831 // than frame rate of low spatial layer. 832 RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate, 833 codec_.spatialLayers[i - 1].maxFramerate); 834 } 835 } 836 } 837 838 UpdatePerformanceFlags(); 839 RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(), 840 static_cast<size_t>(num_spatial_layers_)); 841 842 // `current_bitrate_allocation_` is set in InitEncode and may have used 843 // simulcast configuration. 844 if (!SetSvcRates(current_bitrate_allocation_)) { 845 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; 846 } 847 848 vpx_codec_flags_t flags = 849 config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH; 850 851 const vpx_codec_err_t rv = 852 libvpx_->codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, flags); 853 if (rv != VPX_CODEC_OK) { 854 RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv); 855 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 856 } 857 858 if (performance_flags_.use_per_layer_speed) { 859 for (int si = 0; si < num_spatial_layers_; ++si) { 860 svc_params_.speed_per_layer[si] = 861 performance_flags_by_spatial_index_[si].base_layer_speed; 862 svc_params_.loopfilter_ctrl[si] = 863 performance_flags_by_spatial_index_[si].deblock_mode; 864 } 865 bool denoiser_on = 866 AllowDenoising() && codec_.VP9()->denoisingOn && 867 performance_flags_by_spatial_index_[num_spatial_layers_ - 1] 868 .allow_denoising; 869 libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, 870 denoiser_on ? 1 : 0); 871 } 872 873 libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT, 874 rc_max_intra_target_); 875 libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE, 876 codec_.VP9()->adaptiveQpMode ? 3 : 0); 877 878 libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0); 879 libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0); 880 881 if (is_svc_) { 882 libvpx_->codec_control(encoder_, VP9E_SET_SVC, 1); 883 libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); 884 } 885 if (!is_svc_ || !performance_flags_.use_per_layer_speed) { 886 libvpx_->codec_control( 887 encoder_, VP8E_SET_CPUUSED, 888 performance_flags_by_spatial_index_.rbegin()->base_layer_speed); 889 } 890 891 if (num_spatial_layers_ > 1) { 892 switch (inter_layer_pred_) { 893 case InterLayerPredMode::kOn: 894 libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0); 895 break; 896 case InterLayerPredMode::kOff: 897 libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1); 898 break; 899 case InterLayerPredMode::kOnKeyPic: 900 libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2); 901 break; 902 default: 903 RTC_DCHECK_NOTREACHED(); 904 } 905 906 memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_)); 907 const bool reverse_constrained_drop_mode = 908 inter_layer_pred_ == InterLayerPredMode::kOn && 909 codec_.mode == VideoCodecMode::kScreensharing; 910 if (reverse_constrained_drop_mode) { 911 // Screenshare dropping mode: drop a layer only together with all lower 912 // layers. This ensures that drops on lower layers won't reduce frame-rate 913 // for higher layers and reference structure is RTP-compatible. 914 svc_drop_frame_.framedrop_mode = CONSTRAINED_FROM_ABOVE_DROP; 915 svc_drop_frame_.max_consec_drop = 5; 916 for (size_t i = 0; i < num_spatial_layers_; ++i) { 917 svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; 918 } 919 } else { 920 if (is_flexible_mode_ && svc_controller_ && 921 (inter_layer_pred_ == InterLayerPredMode::kOff || 922 inter_layer_pred_ == InterLayerPredMode::kOnKeyPic)) { 923 // SVC controller is required since it properly accounts for dropped 924 // refs (unlike SetReferences(), which assumes full superframe drop). 925 svc_drop_frame_.framedrop_mode = LAYER_DROP; 926 } else { 927 // Configure encoder to drop entire superframe whenever it needs to drop 928 // a layer. This mode is preferred over per-layer dropping which causes 929 // quality flickering and is not compatible with RTP non-flexible mode. 930 svc_drop_frame_.framedrop_mode = FULL_SUPERFRAME_DROP; 931 } 932 svc_drop_frame_.max_consec_drop = 2; 933 for (size_t i = 0; i < num_spatial_layers_; ++i) { 934 svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; 935 } 936 } 937 libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, 938 &svc_drop_frame_); 939 } 940 941 // Register callback for getting each spatial layer. 942 vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = { 943 LibvpxVp9Encoder::EncoderOutputCodedPacketCallback, 944 reinterpret_cast<void*>(this)}; 945 libvpx_->codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, 946 reinterpret_cast<void*>(&cbp)); 947 948 // Control function to set the number of column tiles in encoding a frame, in 949 // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns. 950 // The number tile columns will be capped by the encoder based on image size 951 // (minimum width of tile column is 256 pixels, maximum is 4096). 952 libvpx_->codec_control(encoder_, VP9E_SET_TILE_COLUMNS, 953 static_cast<int>((config_->g_threads >> 1))); 954 955 // Turn on row-based multithreading. 956 libvpx_->codec_control(encoder_, VP9E_SET_ROW_MT, 1); 957 958 if (AllowDenoising() && !performance_flags_.use_per_layer_speed) { 959 libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, 960 codec_.VP9()->denoisingOn ? 1 : 0); 961 } 962 963 if (codec_.mode == VideoCodecMode::kScreensharing) { 964 // Adjust internal parameters to screen content. 965 libvpx_->codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1); 966 } 967 // Enable encoder skip of static/low content blocks. 968 libvpx_->codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1); 969 970 // This has to be done after the initial setup is completed. 971 AdjustScalingFactorsForTopActiveLayer(); 972 973 inited_ = true; 974 config_changed_ = true; 975 return WEBRTC_VIDEO_CODEC_OK; 976 } 977 978 uint32_t LibvpxVp9Encoder::MaxIntraTarget(uint32_t optimal_buffer_size) { 979 // Set max to the optimal buffer level (normalized by target BR), 980 // and scaled by a scale_par. 981 // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps]. 982 // This value is presented in percentage of perFrameBw: 983 // perFrameBw = targetBR[Kbps] * 1000 / framerate. 984 // The target in % is as follows: 985 float scale_par = 0.5; 986 uint32_t target_pct = 987 optimal_buffer_size * scale_par * codec_.maxFramerate / 10; 988 // Don't go below 3 times the per frame bandwidth. 989 const uint32_t min_intra_size = 300; 990 return (target_pct < min_intra_size) ? min_intra_size : target_pct; 991 } 992 993 int LibvpxVp9Encoder::Encode(const VideoFrame& input_image, 994 const std::vector<VideoFrameType>* frame_types) { 995 if (!inited_) { 996 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 997 } 998 if (encoded_complete_callback_ == nullptr) { 999 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 1000 } 1001 if (num_active_spatial_layers_ == 0) { 1002 // All spatial layers are disabled, return without encoding anything. 1003 return WEBRTC_VIDEO_CODEC_OK; 1004 } 1005 1006 // We only support one stream at the moment. 1007 if (frame_types && !frame_types->empty()) { 1008 if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) { 1009 force_key_frame_ = true; 1010 } 1011 } 1012 1013 if (pics_since_key_ + 1 == 1014 static_cast<size_t>(codec_.VP9()->keyFrameInterval)) { 1015 force_key_frame_ = true; 1016 } 1017 1018 if (svc_controller_) { 1019 layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_); 1020 if (simulcast_to_svc_converter_) { 1021 simulcast_to_svc_converter_->EncodeStarted(force_key_frame_); 1022 } 1023 if (layer_frames_.empty()) { 1024 return WEBRTC_VIDEO_CODEC_ERROR; 1025 } 1026 if (layer_frames_.front().IsKeyframe()) { 1027 force_key_frame_ = true; 1028 } 1029 } 1030 1031 vpx_svc_layer_id_t layer_id = {0}; 1032 if (!force_key_frame_) { 1033 const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; 1034 layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; 1035 1036 if (codec_.mode == VideoCodecMode::kScreensharing) { 1037 const uint32_t frame_timestamp_ms = 1038 1000 * input_image.rtp_timestamp() / kVideoPayloadTypeFrequency; 1039 1040 // To ensure that several rate-limiters with different limits don't 1041 // interfere, they must be queried in order of increasing limit. 1042 1043 bool use_steady_state_limiter = 1044 input_image.update_rect().IsEmpty() && 1045 num_steady_state_frames_ >= 1046 variable_framerate_screenshare::kFramesBeforeSteadyState; 1047 1048 // Need to check all frame limiters, even if lower layers are disabled, 1049 // because variable frame-rate limiter should be checked after the first 1050 // layer. It's easier to overwrite active layers after, then check all 1051 // cases. 1052 for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { 1053 const float layer_fps = 1054 framerate_controller_[layer_id.spatial_layer_id].GetTargetRate(); 1055 // Use steady state rate-limiter at the correct place. 1056 if (use_steady_state_limiter && 1057 layer_fps > variable_framerate_screenshare::kMinFps - 1e-9) { 1058 if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) { 1059 layer_id.spatial_layer_id = num_active_spatial_layers_; 1060 } 1061 // Break always: if rate limiter triggered frame drop, no need to 1062 // continue; otherwise, the rate is less than the next limiters. 1063 break; 1064 } 1065 if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { 1066 ++layer_id.spatial_layer_id; 1067 } else { 1068 break; 1069 } 1070 } 1071 1072 if (use_steady_state_limiter && 1073 layer_id.spatial_layer_id < num_active_spatial_layers_) { 1074 variable_framerate_controller_.AddFrame(frame_timestamp_ms); 1075 } 1076 } 1077 1078 if (force_all_active_layers_) { 1079 layer_id.spatial_layer_id = first_active_layer_; 1080 force_all_active_layers_ = false; 1081 } 1082 1083 RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_); 1084 if (layer_id.spatial_layer_id >= num_active_spatial_layers_) { 1085 // Drop entire picture. 1086 return WEBRTC_VIDEO_CODEC_OK; 1087 } 1088 } 1089 1090 // Need to set temporal layer id on ALL layers, even disabled ones. 1091 // Otherwise libvpx might produce frames on a disabled layer: 1092 // http://crbug.com/1051476 1093 for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { 1094 layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id; 1095 } 1096 1097 if (layer_id.spatial_layer_id < first_active_layer_) { 1098 layer_id.spatial_layer_id = first_active_layer_; 1099 } 1100 1101 if (svc_controller_) { 1102 layer_id.spatial_layer_id = layer_frames_.front().SpatialId(); 1103 layer_id.temporal_layer_id = layer_frames_.front().TemporalId(); 1104 for (const auto& layer : layer_frames_) { 1105 layer_id.temporal_layer_id_per_spatial[layer.SpatialId()] = 1106 layer.TemporalId(); 1107 } 1108 SetActiveSpatialLayers(); 1109 } 1110 1111 if (is_svc_ && performance_flags_.use_per_layer_speed) { 1112 // Update speed settings that might depend on temporal index. 1113 bool speed_updated = false; 1114 for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { 1115 const int target_speed = 1116 layer_id.temporal_layer_id_per_spatial[sl_idx] == 0 1117 ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed 1118 : performance_flags_by_spatial_index_[sl_idx].high_layer_speed; 1119 if (svc_params_.speed_per_layer[sl_idx] != target_speed) { 1120 svc_params_.speed_per_layer[sl_idx] = target_speed; 1121 speed_updated = true; 1122 } 1123 } 1124 if (speed_updated) { 1125 libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); 1126 } 1127 } 1128 1129 libvpx_->codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); 1130 1131 if (num_spatial_layers_ > 1) { 1132 // Update frame dropping settings as they may change on per-frame basis. 1133 libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, 1134 &svc_drop_frame_); 1135 } 1136 1137 if (config_changed_) { 1138 if (libvpx_->codec_enc_config_set(encoder_, config_)) { 1139 return WEBRTC_VIDEO_CODEC_ERROR; 1140 } 1141 1142 if (!performance_flags_.use_per_layer_speed) { 1143 // Not setting individual speeds per layer, find the highest active 1144 // resolution instead and base the speed on that. 1145 for (int i = num_spatial_layers_ - 1; i >= 0; --i) { 1146 if (config_->ss_target_bitrate[i] > 0) { 1147 int width = (scaling_factors_num_[i] * codec_.width) / 1148 scaling_factors_den_[i]; 1149 int height = (scaling_factors_num_[i] * codec_.height) / 1150 scaling_factors_den_[i]; 1151 int speed = 1152 std::prev(performance_flags_.settings_by_resolution.lower_bound( 1153 width * height)) 1154 ->second.base_layer_speed; 1155 libvpx_->codec_control(encoder_, VP8E_SET_CPUUSED, speed); 1156 break; 1157 } 1158 } 1159 } 1160 config_changed_ = false; 1161 } 1162 1163 if (input_image.width() != codec_.width || 1164 input_image.height() != codec_.height) { 1165 int ret = UpdateCodecFrameSize(input_image); 1166 if (ret < 0) { 1167 return ret; 1168 } 1169 } 1170 1171 // Set input image for use in the callback. 1172 // This was necessary since you need some information from input_image. 1173 // You can save only the necessary information (such as timestamp) instead of 1174 // doing this. 1175 input_image_ = &input_image; 1176 1177 scoped_refptr<VideoFrameBuffer> scaled_image; 1178 if (!is_svc_ || num_active_spatial_layers_ == num_spatial_layers_) { 1179 scaled_image = input_image.video_frame_buffer(); 1180 } else { 1181 scaled_image = input_image.video_frame_buffer()->Scale( 1182 codec_.spatialLayers[num_active_spatial_layers_ - 1].width, 1183 codec_.spatialLayers[num_active_spatial_layers_ - 1].height); 1184 } 1185 1186 RTC_DCHECK_EQ(scaled_image->width(), config_->g_w); 1187 RTC_DCHECK_EQ(scaled_image->height(), config_->g_h); 1188 1189 // In case we need to map the buffer, `mapped_buffer` is used to keep it alive 1190 // through reference counting until after encoding has finished. 1191 scoped_refptr<const VideoFrameBuffer> mapped_buffer; 1192 const I010BufferInterface* i010_buffer; 1193 scoped_refptr<const I010BufferInterface> i010_copy; 1194 switch (profile_) { 1195 case VP9Profile::kProfile0: { 1196 mapped_buffer = PrepareBufferForProfile0(scaled_image); 1197 if (!mapped_buffer) { 1198 return WEBRTC_VIDEO_CODEC_ERROR; 1199 } 1200 break; 1201 } 1202 case VP9Profile::kProfile1: { 1203 RTC_DCHECK_NOTREACHED(); 1204 break; 1205 } 1206 case VP9Profile::kProfile2: { 1207 // We can inject kI010 frames directly for encode. All other formats 1208 // should be converted to it. 1209 switch (input_image.video_frame_buffer()->type()) { 1210 case VideoFrameBuffer::Type::kI010: { 1211 i010_buffer = scaled_image->GetI010(); 1212 break; 1213 } 1214 default: { 1215 auto i420_buffer = scaled_image->ToI420(); 1216 if (!i420_buffer) { 1217 RTC_LOG(LS_ERROR) << "Failed to convert " 1218 << VideoFrameBufferTypeToString( 1219 input_image.video_frame_buffer()->type()) 1220 << " image to I420. Can't encode frame."; 1221 return WEBRTC_VIDEO_CODEC_ERROR; 1222 } 1223 i010_copy = I010Buffer::Copy(*i420_buffer); 1224 i010_buffer = i010_copy.get(); 1225 } 1226 } 1227 MaybeRewrapRawWithFormat(VPX_IMG_FMT_I42016, i010_buffer->width(), 1228 i010_buffer->height()); 1229 raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>( 1230 reinterpret_cast<const uint8_t*>(i010_buffer->DataY())); 1231 raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>( 1232 reinterpret_cast<const uint8_t*>(i010_buffer->DataU())); 1233 raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>( 1234 reinterpret_cast<const uint8_t*>(i010_buffer->DataV())); 1235 raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2; 1236 raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2; 1237 raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2; 1238 break; 1239 } 1240 case VP9Profile::kProfile3: { 1241 RTC_DCHECK_NOTREACHED(); 1242 break; 1243 } 1244 } 1245 1246 vpx_enc_frame_flags_t flags = 0; 1247 if (force_key_frame_) { 1248 flags = VPX_EFLAG_FORCE_KF; 1249 } 1250 #if defined(WEBRTC_ENCODER_PSNR_STATS) && defined(VPX_EFLAG_CALCULATE_PSNR) 1251 if (calculate_psnr_ && psnr_frame_sampler_.ShouldBeSampled(input_image)) { 1252 flags |= VPX_EFLAG_CALCULATE_PSNR; 1253 } 1254 #endif 1255 1256 if (svc_controller_) { 1257 vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_); 1258 libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, 1259 &ref_config); 1260 } else { 1261 vpx_svc_ref_frame_config_t ref_config = 1262 SetReferences(force_key_frame_, layer_id.spatial_layer_id); 1263 1264 if (VideoCodecMode::kScreensharing == codec_.mode) { 1265 for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { 1266 ref_config.duration[sl_idx] = static_cast<int64_t>( 1267 kVideoPayloadTypeFrequency / 1268 (std::min(static_cast<float>(codec_.maxFramerate), 1269 framerate_controller_[sl_idx].GetTargetRate()))); 1270 } 1271 } 1272 1273 libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, 1274 &ref_config); 1275 } 1276 1277 first_frame_in_picture_ = true; 1278 1279 // TODO(ssilkin): Frame duration should be specified per spatial layer 1280 // since their frame rate can be different. For now calculate frame duration 1281 // based on target frame rate of the highest spatial layer, which frame rate 1282 // is supposed to be equal or higher than frame rate of low spatial layers. 1283 // Also, timestamp should represent actual time passed since previous frame 1284 // (not 'expected' time). Then rate controller can drain buffer more 1285 // accurately. 1286 RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_); 1287 float target_framerate_fps = 1288 (codec_.mode == VideoCodecMode::kScreensharing) 1289 ? std::min(static_cast<float>(codec_.maxFramerate), 1290 framerate_controller_[num_active_spatial_layers_ - 1] 1291 .GetTargetRate()) 1292 : codec_.maxFramerate; 1293 uint32_t duration = 1294 static_cast<uint32_t>(kVideoPayloadTypeFrequency / target_framerate_fps); 1295 const vpx_codec_err_t rv = libvpx_->codec_encode( 1296 encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME); 1297 if (rv != VPX_CODEC_OK) { 1298 RTC_LOG(LS_ERROR) << "Encoding error: " << libvpx_->codec_err_to_string(rv) 1299 << "\n" 1300 "Details: " 1301 << libvpx_->codec_error(encoder_) << "\n" 1302 << libvpx_->codec_error_detail(encoder_); 1303 return WEBRTC_VIDEO_CODEC_ERROR; 1304 } 1305 timestamp_ += duration; 1306 1307 return WEBRTC_VIDEO_CODEC_OK; 1308 } 1309 1310 int LibvpxVp9Encoder::UpdateCodecFrameSize( 1311 const VideoFrame& input_image) { 1312 RTC_LOG(LS_INFO) << "Reconfiging VP from " << 1313 codec_.width << "x" << codec_.height << " to " << 1314 input_image.width() << "x" << input_image.height(); 1315 // Preserve latest bitrate/framerate setting 1316 // TODO: Mozilla - see below, we need to save more state here. 1317 //uint32_t old_bitrate_kbit = config_->rc_target_bitrate; 1318 //uint32_t old_framerate = codec_.maxFramerate; 1319 1320 codec_.width = input_image.width(); 1321 codec_.height = input_image.height(); 1322 1323 vpx_img_free(raw_); 1324 raw_ = vpx_img_wrap(NULL, VPX_IMG_FMT_I420, codec_.width, codec_.height, 1325 1, NULL); 1326 // Update encoder context for new frame size. 1327 config_->g_w = codec_.width; 1328 config_->g_h = codec_.height; 1329 1330 // Determine number of threads based on the image size and #cores. 1331 config_->g_threads = NumberOfThreads(codec_.width, codec_.height, 1332 num_cores_); 1333 1334 // NOTE: We would like to do this the same way vp8 does it 1335 // (with vpx_codec_enc_config_set()), but that causes asserts 1336 // in AQ 3 (cyclic); and in AQ 0 it works, but on a resize to smaller 1337 // than 1/2 x 1/2 original it asserts in convolve(). Given these 1338 // bugs in trying to do it the "right" way, we basically re-do 1339 // the initialization. 1340 vpx_codec_destroy(encoder_); // clean up old state 1341 int result = InitAndSetControlSettings(); 1342 if (result == WEBRTC_VIDEO_CODEC_OK) { 1343 // TODO: Mozilla rates have become much more complicated, we need to store 1344 // more state or find another way of doing this. 1345 //return SetRates(old_bitrate_kbit, old_framerate); 1346 RTC_CHECK(false); 1347 return WEBRTC_VIDEO_CODEC_UNINITIALIZED; 1348 } 1349 return result; 1350 } 1351 1352 bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, 1353 std::optional<int>* spatial_idx, 1354 std::optional<int>* temporal_idx, 1355 const vpx_codec_cx_pkt& pkt) { 1356 RTC_CHECK(codec_specific != nullptr); 1357 codec_specific->codecType = kVideoCodecVP9; 1358 CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9); 1359 1360 vp9_info->first_frame_in_picture = first_frame_in_picture_; 1361 vp9_info->flexible_mode = is_flexible_mode_; 1362 1363 if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { 1364 pics_since_key_ = 0; 1365 } else if (first_frame_in_picture_) { 1366 ++pics_since_key_; 1367 } 1368 1369 vpx_svc_layer_id_t layer_id = {0}; 1370 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); 1371 1372 // Can't have keyframe with non-zero temporal layer. 1373 RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0); 1374 1375 RTC_CHECK_GT(num_temporal_layers_, 0); 1376 RTC_CHECK_GT(num_active_spatial_layers_, 0); 1377 if (num_temporal_layers_ == 1) { 1378 RTC_CHECK_EQ(layer_id.temporal_layer_id, 0); 1379 vp9_info->temporal_idx = kNoTemporalIdx; 1380 *temporal_idx = std::nullopt; 1381 } else { 1382 vp9_info->temporal_idx = layer_id.temporal_layer_id; 1383 *temporal_idx = layer_id.temporal_layer_id; 1384 } 1385 if (num_active_spatial_layers_ == 1) { 1386 RTC_CHECK_EQ(layer_id.spatial_layer_id, 0); 1387 *spatial_idx = std::nullopt; 1388 } else { 1389 *spatial_idx = layer_id.spatial_layer_id; 1390 } 1391 1392 const bool is_key_pic = (pics_since_key_ == 0); 1393 const bool is_inter_layer_pred_allowed = 1394 (inter_layer_pred_ == InterLayerPredMode::kOn || 1395 (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic)); 1396 1397 // Always set inter_layer_predicted to true on high layer frame if inter-layer 1398 // prediction (ILP) is allowed even if encoder didn't actually use it. 1399 // Setting inter_layer_predicted to false would allow receiver to decode high 1400 // layer frame without decoding low layer frame. If that would happen (e.g. 1401 // if low layer frame is lost) then receiver won't be able to decode next high 1402 // layer frame which uses ILP. 1403 vp9_info->inter_layer_predicted = 1404 first_frame_in_picture_ ? false : is_inter_layer_pred_allowed; 1405 1406 // Mark all low spatial layer frames as references (not just frames of 1407 // active low spatial layers) if inter-layer prediction is enabled since 1408 // these frames are indirect references of high spatial layer, which can 1409 // later be enabled without key frame. 1410 vp9_info->non_ref_for_inter_layer_pred = 1411 !is_inter_layer_pred_allowed || 1412 layer_id.spatial_layer_id + 1 == num_spatial_layers_; 1413 1414 // Always populate this, so that the packetizer can properly set the marker 1415 // bit. 1416 vp9_info->num_spatial_layers = num_active_spatial_layers_; 1417 vp9_info->first_active_layer = first_active_layer_; 1418 1419 vp9_info->num_ref_pics = 0; 1420 FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted, 1421 vp9_info); 1422 if (vp9_info->flexible_mode) { 1423 vp9_info->gof_idx = kNoGofIdx; 1424 if (!svc_controller_) { 1425 if (num_temporal_layers_ == 1) { 1426 vp9_info->temporal_up_switch = true; 1427 } else { 1428 // In flexible mode with > 1 temporal layer but no SVC controller we 1429 // can't techincally determine if a frame is an upswitch point, use 1430 // gof-based data as proxy for now. 1431 // TODO(sprang): Remove once SVC controller is the only choice. 1432 vp9_info->gof_idx = 1433 static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); 1434 vp9_info->temporal_up_switch = 1435 gof_.temporal_up_switch[vp9_info->gof_idx]; 1436 } 1437 } 1438 } else { 1439 vp9_info->gof_idx = 1440 static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof); 1441 vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx]; 1442 RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] || 1443 vp9_info->num_ref_pics == 0); 1444 } 1445 1446 vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0); 1447 1448 // Write SS on key frame of independently coded spatial layers and on base 1449 // temporal/spatial layer frame if number of layers changed without issuing 1450 // of key picture (inter-layer prediction is enabled). 1451 const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted; 1452 if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 && 1453 layer_id.spatial_layer_id == first_active_layer_)) { 1454 vp9_info->ss_data_available = true; 1455 vp9_info->spatial_layer_resolution_present = true; 1456 // Signal disabled layers. 1457 for (size_t i = 0; i < first_active_layer_; ++i) { 1458 vp9_info->width[i] = 0; 1459 vp9_info->height[i] = 0; 1460 } 1461 for (size_t i = first_active_layer_; i < num_active_spatial_layers_; ++i) { 1462 vp9_info->width[i] = 1463 codec_.width * scaling_factors_num_[i] / scaling_factors_den_[i]; 1464 vp9_info->height[i] = 1465 codec_.height * scaling_factors_num_[i] / scaling_factors_den_[i]; 1466 } 1467 if (vp9_info->flexible_mode) { 1468 vp9_info->gof.num_frames_in_gof = 0; 1469 } else { 1470 vp9_info->gof.CopyGofInfoVP9(gof_); 1471 } 1472 1473 ss_info_needed_ = false; 1474 } else { 1475 vp9_info->ss_data_available = false; 1476 } 1477 1478 first_frame_in_picture_ = false; 1479 1480 // Populate codec-agnostic section in the codec specific structure. 1481 if (svc_controller_) { 1482 auto it = absl::c_find_if( 1483 layer_frames_, 1484 [&](const ScalableVideoController::LayerFrameConfig& config) { 1485 return config.SpatialId() == layer_id.spatial_layer_id; 1486 }); 1487 if (it == layer_frames_.end()) { 1488 RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S" 1489 << layer_id.spatial_layer_id << "T" 1490 << layer_id.temporal_layer_id 1491 << " that wasn't requested."; 1492 return false; 1493 } 1494 codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it); 1495 if (is_key_frame) { 1496 codec_specific->template_structure = 1497 svc_controller_->DependencyStructure(); 1498 auto& resolutions = codec_specific->template_structure->resolutions; 1499 resolutions.resize(num_spatial_layers_); 1500 for (int sid = 0; sid < num_spatial_layers_; ++sid) { 1501 resolutions[sid] = RenderResolution( 1502 /*width=*/codec_.width * scaling_factors_num_[sid] / 1503 scaling_factors_den_[sid], 1504 /*height=*/codec_.height * scaling_factors_num_[sid] / 1505 scaling_factors_den_[sid]); 1506 } 1507 } 1508 if (is_flexible_mode_) { 1509 // Populate data for legacy temporal-upswitch state. 1510 // We can switch up to a higher temporal layer only if all temporal layers 1511 // higher than this (within the current spatial layer) are switch points. 1512 vp9_info->temporal_up_switch = true; 1513 for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_; 1514 ++i) { 1515 // Assumes decode targets are always ordered first by spatial then by 1516 // temporal id. 1517 size_t dti_index = 1518 (layer_id.spatial_layer_id * num_temporal_layers_) + i; 1519 vp9_info->temporal_up_switch &= 1520 (codec_specific->generic_frame_info 1521 ->decode_target_indications[dti_index] == 1522 DecodeTargetIndication::kSwitch); 1523 } 1524 } 1525 } 1526 // If returned the configured scalability mode in standard mode, otherwise 1527 // create one if it is based on layer activation. 1528 if (scalability_mode_) { 1529 codec_specific->scalability_mode = scalability_mode_; 1530 } else { 1531 codec_specific_.scalability_mode = MakeScalabilityMode( 1532 num_active_spatial_layers_, num_temporal_layers_, inter_layer_pred_, 1533 num_active_spatial_layers_ > 1 1534 ? std::make_optional(ScalabilityModeResolutionRatio::kTwoToOne) 1535 : std::nullopt, 1536 /*shift=*/false); 1537 } 1538 1539 return true; 1540 } 1541 1542 void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, 1543 const size_t pic_num, 1544 const bool inter_layer_predicted, 1545 CodecSpecificInfoVP9* vp9_info) { 1546 vpx_svc_layer_id_t layer_id = {0}; 1547 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); 1548 1549 const bool is_key_frame = 1550 (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; 1551 1552 std::vector<RefFrameBuffer> ref_buf_list; 1553 1554 if (is_svc_) { 1555 vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; 1556 libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, 1557 &enc_layer_conf); 1558 char ref_buf_flags[] = "00000000"; 1559 // There should be one character per buffer + 1 termination '\0'. 1560 static_assert(sizeof(ref_buf_flags) == kNumVp9Buffers + 1); 1561 1562 if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) { 1563 const size_t fb_idx = 1564 enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]; 1565 RTC_DCHECK_LT(fb_idx, ref_buf_.size()); 1566 if (std::find(ref_buf_list.begin(), ref_buf_list.end(), 1567 ref_buf_[fb_idx]) == ref_buf_list.end()) { 1568 ref_buf_list.push_back(ref_buf_[fb_idx]); 1569 ref_buf_flags[fb_idx] = '1'; 1570 } 1571 } 1572 1573 if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) { 1574 const size_t fb_idx = 1575 enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]; 1576 RTC_DCHECK_LT(fb_idx, ref_buf_.size()); 1577 if (std::find(ref_buf_list.begin(), ref_buf_list.end(), 1578 ref_buf_[fb_idx]) == ref_buf_list.end()) { 1579 ref_buf_list.push_back(ref_buf_[fb_idx]); 1580 ref_buf_flags[fb_idx] = '1'; 1581 } 1582 } 1583 1584 if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) { 1585 const size_t fb_idx = 1586 enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]; 1587 RTC_DCHECK_LT(fb_idx, ref_buf_.size()); 1588 if (std::find(ref_buf_list.begin(), ref_buf_list.end(), 1589 ref_buf_[fb_idx]) == ref_buf_list.end()) { 1590 ref_buf_list.push_back(ref_buf_[fb_idx]); 1591 ref_buf_flags[fb_idx] = '1'; 1592 } 1593 } 1594 1595 RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " 1596 << layer_id.spatial_layer_id << " tl " 1597 << layer_id.temporal_layer_id << " refered buffers " 1598 << ref_buf_flags; 1599 1600 } else if (!is_key_frame) { 1601 RTC_DCHECK_EQ(num_spatial_layers_, 1); 1602 RTC_DCHECK_EQ(num_temporal_layers_, 1); 1603 // In non-SVC mode encoder doesn't provide reference list. Assume each frame 1604 // refers previous one, which is stored in buffer 0. 1605 ref_buf_list.push_back(ref_buf_[0]); 1606 } 1607 1608 std::vector<size_t> ref_pid_list; 1609 1610 vp9_info->num_ref_pics = 0; 1611 for (const RefFrameBuffer& ref_buf : ref_buf_list) { 1612 RTC_DCHECK_LE(ref_buf.pic_num, pic_num); 1613 if (ref_buf.pic_num < pic_num) { 1614 if (inter_layer_pred_ != InterLayerPredMode::kOn) { 1615 // RTP spec limits temporal prediction to the same spatial layer. 1616 // It is safe to ignore this requirement if inter-layer prediction is 1617 // enabled for all frames when all base frames are relayed to receiver. 1618 RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); 1619 } else { 1620 RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id); 1621 } 1622 RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id); 1623 1624 // Encoder may reference several spatial layers on the same previous 1625 // frame in case if some spatial layers are skipped on the current frame. 1626 // We shouldn't put duplicate references as it may break some old 1627 // clients and isn't RTP compatible. 1628 if (std::find(ref_pid_list.begin(), ref_pid_list.end(), 1629 ref_buf.pic_num) != ref_pid_list.end()) { 1630 continue; 1631 } 1632 ref_pid_list.push_back(ref_buf.pic_num); 1633 1634 const size_t p_diff = pic_num - ref_buf.pic_num; 1635 RTC_DCHECK_LE(p_diff, 127UL); 1636 1637 vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff); 1638 ++vp9_info->num_ref_pics; 1639 } else { 1640 RTC_DCHECK(inter_layer_predicted); 1641 // RTP spec only allows to use previous spatial layer for inter-layer 1642 // prediction. 1643 RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id); 1644 } 1645 } 1646 } 1647 1648 void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& /* pkt */, 1649 const size_t pic_num) { 1650 vpx_svc_layer_id_t layer_id = {0}; 1651 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); 1652 1653 RefFrameBuffer frame_buf = {.pic_num = pic_num, 1654 .spatial_layer_id = layer_id.spatial_layer_id, 1655 .temporal_layer_id = layer_id.temporal_layer_id}; 1656 1657 if (is_svc_) { 1658 vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; 1659 libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, 1660 &enc_layer_conf); 1661 const int update_buffer_slot = 1662 enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id]; 1663 1664 for (size_t i = 0; i < ref_buf_.size(); ++i) { 1665 if (update_buffer_slot & (1 << i)) { 1666 ref_buf_[i] = frame_buf; 1667 } 1668 } 1669 1670 RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl " 1671 << layer_id.spatial_layer_id << " tl " 1672 << layer_id.temporal_layer_id << " updated buffers " 1673 << (update_buffer_slot & (1 << 0) ? 1 : 0) 1674 << (update_buffer_slot & (1 << 1) ? 1 : 0) 1675 << (update_buffer_slot & (1 << 2) ? 1 : 0) 1676 << (update_buffer_slot & (1 << 3) ? 1 : 0) 1677 << (update_buffer_slot & (1 << 4) ? 1 : 0) 1678 << (update_buffer_slot & (1 << 5) ? 1 : 0) 1679 << (update_buffer_slot & (1 << 6) ? 1 : 0) 1680 << (update_buffer_slot & (1 << 7) ? 1 : 0); 1681 } else { 1682 RTC_DCHECK_EQ(num_spatial_layers_, 1); 1683 RTC_DCHECK_EQ(num_temporal_layers_, 1); 1684 // In non-svc mode encoder doesn't provide reference list. Assume each frame 1685 // is reference and stored in buffer 0. 1686 ref_buf_[0] = frame_buf; 1687 } 1688 } 1689 1690 vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences( 1691 bool is_key_pic, 1692 int first_active_spatial_layer_id) { 1693 // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs. 1694 RTC_DCHECK_LE(gof_.num_frames_in_gof, 4); 1695 1696 vpx_svc_ref_frame_config_t ref_config; 1697 memset(&ref_config, 0, sizeof(ref_config)); 1698 1699 const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1); 1700 const bool is_inter_layer_pred_allowed = 1701 inter_layer_pred_ == InterLayerPredMode::kOn || 1702 (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic); 1703 std::optional<int> last_updated_buf_idx; 1704 1705 // Put temporal reference to LAST and spatial reference to GOLDEN. Update 1706 // frame buffer (i.e. store encoded frame) if current frame is a temporal 1707 // reference (i.e. it belongs to a low temporal layer) or it is a spatial 1708 // reference. In later case, always store spatial reference in the last 1709 // reference frame buffer. 1710 // For the case of 3 temporal and 3 spatial layers we need 6 frame buffers 1711 // for temporal references plus 1 buffer for spatial reference. 7 buffers 1712 // in total. 1713 1714 for (int sl_idx = first_active_spatial_layer_id; 1715 sl_idx < num_active_spatial_layers_; ++sl_idx) { 1716 const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1; 1717 const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof; 1718 1719 if (!is_key_pic) { 1720 // Set up temporal reference. 1721 const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx]; 1722 1723 // Last reference frame buffer is reserved for spatial reference. It is 1724 // not supposed to be used for temporal prediction. 1725 RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1); 1726 1727 const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num; 1728 // Incorrect spatial layer may be in the buffer due to a key-frame. 1729 const bool same_spatial_layer = 1730 ref_buf_[buf_idx].spatial_layer_id == sl_idx; 1731 bool correct_pid = false; 1732 if (is_flexible_mode_) { 1733 correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff; 1734 } else { 1735 // Below code assumes single temporal referecence. 1736 RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1); 1737 correct_pid = pid_diff == gof_.pid_diff[gof_idx][0]; 1738 } 1739 1740 if (same_spatial_layer && correct_pid) { 1741 ref_config.lst_fb_idx[sl_idx] = buf_idx; 1742 ref_config.reference_last[sl_idx] = 1; 1743 } else { 1744 // This reference doesn't match with one specified by GOF. This can 1745 // only happen if spatial layer is enabled dynamically without key 1746 // frame. Spatial prediction is supposed to be enabled in this case. 1747 RTC_DCHECK(is_inter_layer_pred_allowed && 1748 sl_idx > first_active_spatial_layer_id); 1749 } 1750 } 1751 1752 if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) { 1753 // Set up spatial reference. 1754 RTC_DCHECK(last_updated_buf_idx); 1755 ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx; 1756 ref_config.reference_golden[sl_idx] = 1; 1757 } else { 1758 RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || 1759 sl_idx == first_active_spatial_layer_id || 1760 inter_layer_pred_ == InterLayerPredMode::kOff); 1761 } 1762 1763 last_updated_buf_idx.reset(); 1764 1765 if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 || 1766 num_temporal_layers_ == 1) { 1767 last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx]; 1768 1769 // Ensure last frame buffer is not used for temporal prediction (it is 1770 // reserved for spatial reference). 1771 RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1); 1772 } else if (is_inter_layer_pred_allowed) { 1773 last_updated_buf_idx = kNumVp9Buffers - 1; 1774 } 1775 1776 if (last_updated_buf_idx) { 1777 ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx; 1778 } 1779 } 1780 1781 return ref_config; 1782 } 1783 1784 void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { 1785 RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT); 1786 1787 if (pkt->data.frame.sz == 0) { 1788 // Ignore dropped frame. 1789 return; 1790 } 1791 1792 vpx_svc_layer_id_t layer_id = {0}; 1793 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); 1794 1795 encoded_image_.SetEncodedData(EncodedImageBuffer::Create( 1796 static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz)); 1797 1798 codec_specific_ = {}; 1799 std::optional<int> spatial_index; 1800 std::optional<int> temporal_index; 1801 if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index, 1802 *pkt)) { 1803 // Drop the frame. 1804 encoded_image_.set_size(0); 1805 return; 1806 } 1807 encoded_image_.SetSpatialIndex(spatial_index); 1808 encoded_image_.SetTemporalIndex(temporal_index); 1809 1810 const bool is_key_frame = 1811 ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) && 1812 !codec_specific_.codecSpecific.VP9.inter_layer_predicted; 1813 1814 // Ensure encoder issued key frame on request. 1815 RTC_DCHECK(is_key_frame || !force_key_frame_); 1816 1817 // Check if encoded frame is a key frame. 1818 encoded_image_._frameType = VideoFrameType::kVideoFrameDelta; 1819 if (is_key_frame) { 1820 encoded_image_._frameType = VideoFrameType::kVideoFrameKey; 1821 force_key_frame_ = false; 1822 } 1823 1824 UpdateReferenceBuffers(*pkt, pics_since_key_); 1825 1826 TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size()); 1827 encoded_image_.SetRtpTimestamp(input_image_->rtp_timestamp()); 1828 encoded_image_.SetPresentationTimestamp( 1829 input_image_->presentation_timestamp()); 1830 encoded_image_.SetColorSpace(input_image_->color_space()); 1831 encoded_image_._encodedHeight = 1832 pkt->data.frame.height[layer_id.spatial_layer_id]; 1833 encoded_image_._encodedWidth = 1834 pkt->data.frame.width[layer_id.spatial_layer_id]; 1835 int qp = -1; 1836 libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp); 1837 encoded_image_.qp_ = qp; 1838 // Pull PSNR which is not pushed for VP9. 1839 // TODO: bugs.webrtc.org/388070060 - check SVC behavior. 1840 // TODO: bugs.webrtc.org/388070060 - this is broken for simulcast which seems 1841 // to be using kSVC. 1842 vpx_codec_iter_t iter = nullptr; 1843 const vpx_codec_cx_pkt_t* cx_data = nullptr; 1844 encoded_image_.set_psnr(std::nullopt); 1845 while ((cx_data = vpx_codec_get_cx_data(encoder_, &iter)) != nullptr) { 1846 if (cx_data->kind == VPX_CODEC_PSNR_PKT) { 1847 // PSNR index: 0: total, 1: Y, 2: U, 3: V 1848 encoded_image_.set_psnr( 1849 EncodedImage::Psnr({.y = cx_data->data.psnr.psnr[1], 1850 .u = cx_data->data.psnr.psnr[2], 1851 .v = cx_data->data.psnr.psnr[3]})); 1852 } 1853 } 1854 1855 const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 == 1856 num_active_spatial_layers_; 1857 DeliverBufferedFrame(end_of_picture); 1858 } 1859 1860 void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) { 1861 if (encoded_image_.size() > 0) { 1862 if (num_spatial_layers_ > 1) { 1863 // Restore frame dropping settings, as dropping may be temporary forbidden 1864 // due to dynamically enabled layers. 1865 for (size_t i = 0; i < num_spatial_layers_; ++i) { 1866 svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; 1867 } 1868 } 1869 1870 codec_specific_.end_of_picture = end_of_picture; 1871 1872 if (!simulcast_to_svc_converter_) { 1873 encoded_image_.SetSimulcastIndex(std::nullopt); 1874 } else { 1875 simulcast_to_svc_converter_->ConvertFrame(encoded_image_, 1876 codec_specific_); 1877 } 1878 1879 encoded_complete_callback_->OnEncodedImage(encoded_image_, 1880 &codec_specific_); 1881 1882 if (codec_.mode == VideoCodecMode::kScreensharing) { 1883 const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0); 1884 const uint32_t frame_timestamp_ms = 1885 1000 * encoded_image_.RtpTimestamp() / kVideoPayloadTypeFrequency; 1886 framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms); 1887 1888 const size_t steady_state_size = SteadyStateSize( 1889 spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx); 1890 1891 // Only frames on spatial layers, which may be limited in a steady state 1892 // are considered for steady state detection. 1893 if (framerate_controller_[spatial_idx].GetTargetRate() > 1894 variable_framerate_screenshare::kMinFps + 1e-9) { 1895 if (encoded_image_.qp_ <= variable_framerate_screenshare::kMinQP && 1896 encoded_image_.size() <= steady_state_size) { 1897 ++num_steady_state_frames_; 1898 } else { 1899 num_steady_state_frames_ = 0; 1900 } 1901 } 1902 } 1903 encoded_image_.set_size(0); 1904 } 1905 } 1906 1907 int LibvpxVp9Encoder::RegisterEncodeCompleteCallback( 1908 EncodedImageCallback* callback) { 1909 encoded_complete_callback_ = callback; 1910 return WEBRTC_VIDEO_CODEC_OK; 1911 } 1912 1913 VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const { 1914 EncoderInfo info; 1915 info.supports_native_handle = false; 1916 info.supports_simulcast = true; 1917 info.implementation_name = "libvpx"; 1918 if (quality_scaler_experiment_.enabled && inited_ && 1919 codec_.VP9().automaticResizeOn) { 1920 info.scaling_settings = VideoEncoder::ScalingSettings( 1921 quality_scaler_experiment_.low_qp, quality_scaler_experiment_.high_qp); 1922 } else { 1923 info.scaling_settings = VideoEncoder::ScalingSettings::kOff; 1924 } 1925 info.has_trusted_rate_controller = trusted_rate_controller_; 1926 info.is_hardware_accelerated = false; 1927 if (inited_) { 1928 // Find the max configured fps of any active spatial layer. 1929 float max_fps = 0.0; 1930 for (size_t si = 0; si < num_spatial_layers_; ++si) { 1931 if (codec_.spatialLayers[si].active && 1932 codec_.spatialLayers[si].maxFramerate > max_fps) { 1933 max_fps = codec_.spatialLayers[si].maxFramerate; 1934 } 1935 } 1936 if (num_active_spatial_layers_ > 0) { 1937 info.mapped_resolution = 1938 VideoEncoder::Resolution(config_->g_w, config_->g_h); 1939 } 1940 1941 for (size_t si = 0; si < num_spatial_layers_; ++si) { 1942 info.fps_allocation[si].clear(); 1943 if (!codec_.spatialLayers[si].active) { 1944 continue; 1945 } 1946 1947 // This spatial layer may already use a fraction of the total frame rate. 1948 const float sl_fps_fraction = 1949 codec_.spatialLayers[si].maxFramerate / max_fps; 1950 for (size_t ti = 0; ti < num_temporal_layers_; ++ti) { 1951 const uint32_t decimator = 1952 num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti]; 1953 RTC_DCHECK_GT(decimator, 0); 1954 info.fps_allocation[si].push_back( 1955 saturated_cast<uint8_t>(EncoderInfo::kMaxFramerateFraction * 1956 (sl_fps_fraction / decimator))); 1957 } 1958 } 1959 if (profile_ == VP9Profile::kProfile0) { 1960 info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, 1961 VideoFrameBuffer::Type::kNV12}; 1962 } 1963 1964 if (codec_.mode == VideoCodecMode::kScreensharing) { 1965 info.min_qp = variable_framerate_screenshare::kMinQP; 1966 } 1967 } 1968 if (!encoder_info_override_.resolution_bitrate_limits().empty()) { 1969 info.resolution_bitrate_limits = 1970 encoder_info_override_.resolution_bitrate_limits(); 1971 } 1972 return info; 1973 } 1974 1975 size_t LibvpxVp9Encoder::SteadyStateSize(int sid, int tid) { 1976 const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate( 1977 sid, tid == kNoTemporalIdx ? 0 : tid); 1978 const float fps = (codec_.mode == VideoCodecMode::kScreensharing) 1979 ? std::min(static_cast<float>(codec_.maxFramerate), 1980 framerate_controller_[sid].GetTargetRate()) 1981 : codec_.maxFramerate; 1982 return static_cast<size_t>( 1983 bitrate_bps / (8 * fps) * 1984 (100 - variable_framerate_screenshare::kUndershootPct) / 100 + 1985 0.5); 1986 } 1987 1988 // static 1989 LibvpxVp9Encoder::QualityScalerExperiment 1990 LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) { 1991 FieldTrialFlag disabled = FieldTrialFlag("Disabled"); 1992 FieldTrialParameter<int> low_qp("low_qp", kLowVp9QpThreshold); 1993 FieldTrialParameter<int> high_qp("hihg_qp", kHighVp9QpThreshold); 1994 ParseFieldTrial({&disabled, &low_qp, &high_qp}, 1995 trials.Lookup("WebRTC-VP9QualityScaler")); 1996 QualityScalerExperiment config; 1997 config.enabled = !disabled.Get(); 1998 RTC_LOG(LS_INFO) << "Webrtc quality scaler for vp9 is " 1999 << (config.enabled ? "enabled." : "disabled"); 2000 config.low_qp = low_qp.Get(); 2001 config.high_qp = high_qp.Get(); 2002 2003 return config; 2004 } 2005 2006 void LibvpxVp9Encoder::UpdatePerformanceFlags() { 2007 flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution; 2008 if (codec_.GetVideoEncoderComplexity() == 2009 VideoCodecComplexity::kComplexityLow) { 2010 // For low tier devices, always use speed 9. Only disable upper 2011 // layer deblocking below QCIF. 2012 params_by_resolution[0] = {.base_layer_speed = 9, 2013 .high_layer_speed = 9, 2014 .deblock_mode = 1, 2015 .allow_denoising = true}; 2016 params_by_resolution[352 * 288] = {.base_layer_speed = 9, 2017 .high_layer_speed = 9, 2018 .deblock_mode = 0, 2019 .allow_denoising = true}; 2020 } else { 2021 params_by_resolution = performance_flags_.settings_by_resolution; 2022 } 2023 2024 const auto find_speed = [&](int min_pixel_count) { 2025 RTC_DCHECK(!params_by_resolution.empty()); 2026 auto it = params_by_resolution.upper_bound(min_pixel_count); 2027 return std::prev(it)->second; 2028 }; 2029 performance_flags_by_spatial_index_.clear(); 2030 2031 if (is_svc_) { 2032 for (int si = 0; si < num_spatial_layers_; ++si) { 2033 performance_flags_by_spatial_index_.push_back(find_speed( 2034 codec_.spatialLayers[si].width * codec_.spatialLayers[si].height)); 2035 } 2036 } else { 2037 performance_flags_by_spatial_index_.push_back( 2038 find_speed(codec_.width * codec_.height)); 2039 } 2040 } 2041 2042 // static 2043 LibvpxVp9Encoder::PerformanceFlags 2044 LibvpxVp9Encoder::ParsePerformanceFlagsFromTrials( 2045 const FieldTrialsView& trials) { 2046 struct Params : public PerformanceFlags::ParameterSet { 2047 int min_pixel_count = 0; 2048 }; 2049 2050 FieldTrialStructList<Params> trials_list( 2051 {FieldTrialStructMember("min_pixel_count", 2052 [](Params* p) { return &p->min_pixel_count; }), 2053 FieldTrialStructMember("high_layer_speed", 2054 [](Params* p) { return &p->high_layer_speed; }), 2055 FieldTrialStructMember("base_layer_speed", 2056 [](Params* p) { return &p->base_layer_speed; }), 2057 FieldTrialStructMember("deblock_mode", 2058 [](Params* p) { return &p->deblock_mode; }), 2059 FieldTrialStructMember("denoiser", 2060 [](Params* p) { return &p->allow_denoising; })}, 2061 {}); 2062 2063 FieldTrialFlag per_layer_speed("use_per_layer_speed"); 2064 2065 ParseFieldTrial({&trials_list, &per_layer_speed}, 2066 trials.Lookup("WebRTC-VP9-PerformanceFlags")); 2067 2068 PerformanceFlags flags; 2069 flags.use_per_layer_speed = per_layer_speed.Get(); 2070 2071 constexpr int kMinSpeed = 1; 2072 constexpr int kMaxSpeed = 9; 2073 for (auto& f : trials_list.Get()) { 2074 if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed || 2075 f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed || 2076 f.deblock_mode < 0 || f.deblock_mode > 2) { 2077 RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: " 2078 << "min_pixel_count = " << f.min_pixel_count 2079 << ", high_layer_speed = " << f.high_layer_speed 2080 << ", base_layer_speed = " << f.base_layer_speed 2081 << ", deblock_mode = " << f.deblock_mode; 2082 continue; 2083 } 2084 flags.settings_by_resolution[f.min_pixel_count] = f; 2085 } 2086 2087 if (flags.settings_by_resolution.empty()) { 2088 return GetDefaultPerformanceFlags(); 2089 } 2090 2091 return flags; 2092 } 2093 2094 // static 2095 LibvpxVp9Encoder::PerformanceFlags 2096 LibvpxVp9Encoder::GetDefaultPerformanceFlags() { 2097 PerformanceFlags flags; 2098 flags.use_per_layer_speed = true; 2099 #ifdef MOBILE_ARM 2100 // Speed 8 on all layers for all resolutions. 2101 flags.settings_by_resolution[0] = {.base_layer_speed = 8, 2102 .high_layer_speed = 8, 2103 .deblock_mode = 0, 2104 .allow_denoising = true}; 2105 #else 2106 2107 // For smaller resolutions, use lower speed setting for the temporal base 2108 // layer (get some coding gain at the cost of increased encoding complexity). 2109 // Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and 2110 // disable deblocking for upper-most temporal layers. 2111 flags.settings_by_resolution[0] = {.base_layer_speed = 5, 2112 .high_layer_speed = 8, 2113 .deblock_mode = 1, 2114 .allow_denoising = true}; 2115 2116 // Use speed 7 for QCIF and above. 2117 // Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and 2118 // enable deblocking for all temporal layers. 2119 flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7, 2120 .high_layer_speed = 8, 2121 .deblock_mode = 0, 2122 .allow_denoising = true}; 2123 2124 // For very high resolution (1080p and up), turn the speed all the way up 2125 // since this is very CPU intensive. Also disable denoising to save CPU, at 2126 // these resolutions denoising appear less effective and hopefully you also 2127 // have a less noisy video source at this point. 2128 flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9, 2129 .high_layer_speed = 9, 2130 .deblock_mode = 0, 2131 .allow_denoising = false}; 2132 2133 #endif 2134 return flags; 2135 } 2136 2137 void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt, 2138 unsigned int width, 2139 unsigned int height) { 2140 if (!raw_) { 2141 raw_ = libvpx_->img_wrap(nullptr, fmt, width, height, 1, nullptr); 2142 RTC_LOG(LS_INFO) << "Configured VP9 encoder pixel format to " 2143 << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420") << " " 2144 << width << "x" << height; 2145 } else if (raw_->fmt != fmt || raw_->d_w != width || raw_->d_h != height) { 2146 RTC_LOG(LS_INFO) << "Switching VP9 encoder pixel format to " 2147 << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420") << " " 2148 << width << "x" << height; 2149 libvpx_->img_free(raw_); 2150 raw_ = libvpx_->img_wrap(nullptr, fmt, width, height, 1, nullptr); 2151 } 2152 // else no-op since the image is already in the right format. 2153 raw_->bit_depth = (fmt == VPX_IMG_FMT_I42016) ? 16 : 8; 2154 } 2155 2156 scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0( 2157 scoped_refptr<VideoFrameBuffer> buffer) { 2158 absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> 2159 supported_formats = {VideoFrameBuffer::Type::kI420, 2160 VideoFrameBuffer::Type::kNV12}; 2161 2162 scoped_refptr<VideoFrameBuffer> mapped_buffer; 2163 if (buffer->type() != VideoFrameBuffer::Type::kNative) { 2164 // `buffer` is already mapped. 2165 mapped_buffer = buffer; 2166 } else { 2167 // Attempt to map to one of the supported formats. 2168 mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats); 2169 } 2170 if (!mapped_buffer || 2171 (absl::c_find(supported_formats, mapped_buffer->type()) == 2172 supported_formats.end() && 2173 mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) { 2174 // Unknown pixel format or unable to map, convert to I420 and prepare that 2175 // buffer instead to ensure Scale() is safe to use. 2176 auto converted_buffer = buffer->ToI420(); 2177 if (!converted_buffer) { 2178 RTC_LOG(LS_ERROR) << "Failed to convert " 2179 << VideoFrameBufferTypeToString(buffer->type()) 2180 << " image to I420. Can't encode frame."; 2181 return {}; 2182 } 2183 RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 || 2184 converted_buffer->type() == VideoFrameBuffer::Type::kI420A); 2185 2186 // Because `buffer` had to be converted, use `converted_buffer` instead. 2187 buffer = mapped_buffer = converted_buffer; 2188 } 2189 2190 // Prepare `raw_` from `mapped_buffer`. 2191 switch (mapped_buffer->type()) { 2192 case VideoFrameBuffer::Type::kI420: 2193 case VideoFrameBuffer::Type::kI420A: { 2194 MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420, mapped_buffer->width(), 2195 mapped_buffer->height()); 2196 const I420BufferInterface* i420_buffer = mapped_buffer->GetI420(); 2197 RTC_DCHECK(i420_buffer); 2198 raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY()); 2199 raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU()); 2200 raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV()); 2201 raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY(); 2202 raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU(); 2203 raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV(); 2204 break; 2205 } 2206 case VideoFrameBuffer::Type::kNV12: { 2207 MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12, mapped_buffer->width(), 2208 mapped_buffer->height()); 2209 const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12(); 2210 RTC_DCHECK(nv12_buffer); 2211 raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY()); 2212 raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV()); 2213 raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1; 2214 raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY(); 2215 raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV(); 2216 raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV(); 2217 break; 2218 } 2219 default: 2220 RTC_DCHECK_NOTREACHED(); 2221 } 2222 return mapped_buffer; 2223 } 2224 2225 } // namespace webrtc 2226 2227 #endif // RTC_ENABLE_VP9