simple_encoder_wrapper.cc (8185B)
1 /* 2 * Copyright (c) 2024 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "api/video_codecs/simple_encoder_wrapper.h" 12 13 #include <algorithm> 14 #include <cstddef> 15 #include <cstdint> 16 #include <memory> 17 #include <optional> 18 #include <string> 19 #include <utility> 20 #include <variant> 21 #include <vector> 22 23 #include "absl/algorithm/container.h" 24 #include "absl/strings/string_view.h" 25 #include "api/array_view.h" 26 #include "api/scoped_refptr.h" 27 #include "api/units/data_size.h" 28 #include "api/units/frequency.h" 29 #include "api/video/video_frame_buffer.h" 30 #include "api/video_codecs/scalability_mode.h" 31 #include "api/video_codecs/scalability_mode_helper.h" 32 #include "api/video_codecs/video_encoder_factory_interface.h" 33 #include "api/video_codecs/video_encoder_interface.h" 34 #include "common_video/generic_frame_descriptor/generic_frame_info.h" 35 #include "modules/video_coding/svc/create_scalability_structure.h" 36 #include "modules/video_coding/svc/scalable_video_controller.h" 37 #include "rtc_base/checks.h" 38 #include "rtc_base/numerics/rational.h" 39 40 namespace webrtc { 41 using PredictionConstraints = 42 VideoEncoderFactoryInterface::Capabilities::PredictionConstraints; 43 using FrameEncodeSettings = VideoEncoderInterface::FrameEncodeSettings; 44 45 namespace { 46 enum class Inter { kS, kL, kKey }; 47 enum class Scaling { k1_2, k2_3 }; 48 std::string SvcToString(int spatial_layers, 49 int temporal_layers, 50 Inter inter, 51 Scaling scaling) { 52 RTC_CHECK(spatial_layers > 1 || inter == Inter::kL); 53 std::string res; 54 res += inter == Inter::kS ? "S" : "L"; 55 res += std::to_string(spatial_layers); 56 res += "T"; 57 res += std::to_string(temporal_layers); 58 if (scaling == Scaling::k2_3) { 59 res += "h"; 60 } 61 if (inter == Inter::kKey) { 62 res += "_KEY"; 63 } 64 65 return res; 66 } 67 } // namespace 68 69 // static 70 std::vector<std::string> SimpleEncoderWrapper::SupportedWebrtcSvcModes( 71 const PredictionConstraints& prediction_constraints) { 72 std::vector<std::string> res; 73 74 const int max_spatial_layers = 75 std::min(3, prediction_constraints.max_spatial_layers); 76 const int max_temporal_layers = 77 std::min(3, prediction_constraints.max_temporal_layers); 78 const bool scale_by_half = 79 absl::c_linear_search(prediction_constraints.scaling_factors, 80 Rational{.numerator = 1, .denominator = 2}); 81 const bool scale_by_two_thirds = 82 absl::c_linear_search(prediction_constraints.scaling_factors, 83 Rational{.numerator = 2, .denominator = 3}); 84 const bool inter_layer = 85 prediction_constraints.max_references > 1 && 86 prediction_constraints.buffer_space_type != 87 PredictionConstraints::BufferSpaceType::kMultiInstance; 88 89 for (int s = 1; s <= max_spatial_layers; ++s) { 90 for (int t = 1; t <= max_temporal_layers; ++t) { 91 if (prediction_constraints.num_buffers > ((std::max(1, t - 1) * s) - 1)) { 92 if (s == 1 || inter_layer) { 93 res.push_back(SvcToString(s, t, Inter::kL, Scaling::k1_2)); 94 if (s == 1) { 95 continue; 96 } 97 } 98 if (scale_by_half) { 99 res.push_back(SvcToString(s, t, Inter::kS, Scaling::k1_2)); 100 if (inter_layer) { 101 res.push_back(SvcToString(s, t, Inter::kKey, Scaling::k1_2)); 102 } 103 } 104 if (scale_by_two_thirds) { 105 res.push_back(SvcToString(s, t, Inter::kS, Scaling::k2_3)); 106 if (inter_layer) { 107 res.push_back(SvcToString(s, t, Inter::kKey, Scaling::k2_3)); 108 res.push_back(SvcToString(s, t, Inter::kL, Scaling::k2_3)); 109 } 110 } 111 } 112 } 113 } 114 115 return res; 116 } 117 118 // static 119 std::unique_ptr<SimpleEncoderWrapper> SimpleEncoderWrapper::Create( 120 std::unique_ptr<VideoEncoderInterface> encoder, 121 absl::string_view scalability_mode) { 122 if (!encoder) { 123 return nullptr; 124 } 125 126 std::optional<ScalabilityMode> sm = 127 ScalabilityModeStringToEnum(scalability_mode); 128 if (!sm) { 129 return nullptr; 130 } 131 132 std::unique_ptr<ScalableVideoController> svc_controller = 133 CreateScalabilityStructure(*sm); 134 if (!svc_controller) { 135 return nullptr; 136 } 137 138 return std::make_unique<SimpleEncoderWrapper>(std::move(encoder), 139 std::move(svc_controller)); 140 } 141 142 SimpleEncoderWrapper::SimpleEncoderWrapper( 143 std::unique_ptr<VideoEncoderInterface> encoder, 144 std::unique_ptr<ScalableVideoController> svc_controller) 145 : encoder_(std::move(encoder)), 146 svc_controller_(std::move(svc_controller)), 147 layer_configs_(svc_controller_->StreamConfig()) {} 148 149 void SimpleEncoderWrapper::SetEncodeQp(int qp) { 150 target_qp_ = qp; 151 } 152 153 void SimpleEncoderWrapper::SetEncodeFps(int fps) { 154 fps_ = fps; 155 } 156 157 void SimpleEncoderWrapper::Encode(scoped_refptr<VideoFrameBuffer> frame_buffer, 158 bool force_keyframe, 159 EncodeResultCallback callback) { 160 std::vector<ScalableVideoController::LayerFrameConfig> configs = 161 svc_controller_->NextFrameConfig(force_keyframe); 162 std::vector<FrameEncodeSettings> encode_settings; 163 std::vector<GenericFrameInfo> frame_infos; 164 165 for (size_t s = 0; s < configs.size(); ++s) { 166 const ScalableVideoController::LayerFrameConfig& config = configs[s]; 167 frame_infos.push_back(svc_controller_->OnEncodeDone(config)); 168 FrameEncodeSettings& settings = encode_settings.emplace_back(); 169 settings.rate_options = VideoEncoderInterface::FrameEncodeSettings::Cqp{ 170 .target_qp = target_qp_}; 171 settings.spatial_id = config.SpatialId(); 172 settings.temporal_id = config.TemporalId(); 173 const int num = layer_configs_.scaling_factor_num[s]; 174 const int den = layer_configs_.scaling_factor_den[s]; 175 settings.resolution = {.width = (frame_buffer->width() * num / den), 176 .height = (frame_buffer->height() * num / den)}; 177 178 bool buffer_updated = false; 179 for (const CodecBufferUsage& buffer : config.Buffers()) { 180 if (buffer.referenced) { 181 settings.reference_buffers.push_back(buffer.id); 182 } 183 if (buffer.updated) { 184 RTC_CHECK(!buffer_updated); 185 settings.update_buffer = buffer.id; 186 buffer_updated = true; 187 } 188 } 189 190 if (settings.reference_buffers.empty()) { 191 settings.frame_type = FrameType::kKeyframe; 192 } 193 194 struct FrameOut : public VideoEncoderInterface::FrameOutput { 195 ArrayView<uint8_t> GetBitstreamOutputBuffer(DataSize size) override { 196 bitstream.resize(size.bytes()); 197 return bitstream; 198 } 199 200 void EncodeComplete( 201 const VideoEncoderInterface::EncodeResult& result) override { 202 auto* data = std::get_if<VideoEncoderInterface::EncodedData>(&result); 203 204 SimpleEncoderWrapper::EncodeResult res; 205 if (!data) { 206 res.oh_no = true; 207 callback(res); 208 return; 209 } 210 211 res.frame_type = data->frame_type; 212 res.bitstream_data = std::move(bitstream); 213 res.generic_frame_info = frame_info; 214 if (res.frame_type == FrameType::kKeyframe) { 215 res.dependency_structure = svc_controller->DependencyStructure(); 216 } 217 callback(res); 218 } 219 std::vector<uint8_t> bitstream; 220 EncodeResultCallback callback; 221 GenericFrameInfo frame_info; 222 ScalableVideoController* svc_controller; 223 }; 224 225 auto out = std::make_unique<FrameOut>(); 226 227 out->callback = callback; 228 out->frame_info = std::move(frame_infos[settings.spatial_id]); 229 out->svc_controller = svc_controller_.get(); 230 231 settings.frame_output = std::move(out); 232 } 233 234 encoder_->Encode(std::move(frame_buffer), 235 {.presentation_timestamp = presentation_timestamp_}, 236 std::move(encode_settings)); 237 presentation_timestamp_ += 1 / Frequency::Hertz(fps_); 238 } 239 240 } // namespace webrtc