video_encoder.h (18974B)
1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_ 12 #define API_VIDEO_CODECS_VIDEO_ENCODER_H_ 13 14 #include <cstddef> 15 #include <cstdint> 16 #include <limits> 17 #include <optional> 18 #include <string> 19 #include <vector> 20 21 #include "absl/container/inlined_vector.h" 22 #include "api/fec_controller_override.h" 23 #include "api/units/data_rate.h" 24 #include "api/video/encoded_image.h" 25 #include "api/video/video_bitrate_allocation.h" 26 #include "api/video/video_codec_constants.h" 27 #include "api/video/video_frame.h" 28 #include "api/video/video_frame_buffer.h" 29 #include "api/video/video_frame_type.h" 30 #include "api/video_codecs/video_codec.h" 31 #include "rtc_base/system/rtc_export.h" 32 33 namespace webrtc { 34 35 // TODO(pbos): Expose these through a public (root) header or change these APIs. 36 struct CodecSpecificInfo; 37 38 constexpr int kDefaultMinPixelsPerFrame = 320 * 180; 39 40 class RTC_EXPORT EncodedImageCallback { 41 public: 42 virtual ~EncodedImageCallback() {} 43 44 struct Result { 45 enum Error { 46 OK, 47 48 // Failed to send the packet. 49 ERROR_SEND_FAILED, 50 }; 51 52 explicit Result(Error error) : error(error) {} 53 Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {} 54 55 Error error; 56 57 // Frame ID assigned to the frame. The frame ID should be the same as the ID 58 // seen by the receiver for this frame. RTP timestamp of the frame is used 59 // as frame ID when RTP is used to send video. Must be used only when 60 // error=OK. 61 uint32_t frame_id = 0; 62 63 // Tells the encoder that the next frame is should be dropped. 64 bool drop_next_frame = false; 65 }; 66 67 // Used to signal the encoder about reason a frame is dropped. 68 // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate 69 // limiting purposes). 70 // kDroppedByEncoder - dropped by encoder's internal rate limiter. 71 // TODO(bugs.webrtc.org/10164): Delete this enum? It duplicates the more 72 // general VideoStreamEncoderObserver::DropReason. Also, 73 // kDroppedByMediaOptimizations is not produced by any encoder, but by 74 // VideoStreamEncoder. 75 enum class DropReason : uint8_t { 76 kDroppedByMediaOptimizations, 77 kDroppedByEncoder 78 }; 79 80 // Callback function which is called when an image has been encoded. 81 virtual Result OnEncodedImage( 82 const EncodedImage& encoded_image, 83 const CodecSpecificInfo* codec_specific_info) = 0; 84 85 virtual void OnDroppedFrame(DropReason /* reason */) {} 86 }; 87 88 class RTC_EXPORT VideoEncoder { 89 public: 90 struct QpThresholds { 91 QpThresholds(int l, int h) : low(l), high(h) {} 92 QpThresholds() : low(-1), high(-1) {} 93 int low; 94 int high; 95 }; 96 97 // Quality scaling is enabled if thresholds are provided. 98 struct RTC_EXPORT ScalingSettings { 99 private: 100 // Private magic type for kOff, implicitly convertible to 101 // ScalingSettings. 102 struct KOff {}; 103 104 public: 105 // TODO(bugs.webrtc.org/9078): Since std::optional should be trivially copy 106 // constructible, this magic value can likely be replaced by a constexpr 107 // ScalingSettings value. 108 static constexpr KOff kOff = {}; 109 110 ScalingSettings(int low, int high); 111 ScalingSettings(int low, int high, int min_pixels); 112 ScalingSettings(const ScalingSettings&); 113 ScalingSettings(KOff); // NOLINT(runtime/explicit) 114 ~ScalingSettings(); 115 116 std::optional<QpThresholds> thresholds; 117 118 // We will never ask for a resolution lower than this. 119 // TODO(kthelgason): Lower this limit when better testing 120 // on MediaCodec and fallback implementations are in place. 121 // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206 122 int min_pixels_per_frame = kDefaultMinPixelsPerFrame; 123 124 private: 125 // Private constructor; to get an object without thresholds, use 126 // the magic constant ScalingSettings::kOff. 127 ScalingSettings(); 128 }; 129 130 // Bitrate limits for resolution. 131 struct ResolutionBitrateLimits { 132 ResolutionBitrateLimits(int frame_size_pixels, 133 int min_start_bitrate_bps, 134 int min_bitrate_bps, 135 int max_bitrate_bps) 136 : frame_size_pixels(frame_size_pixels), 137 min_start_bitrate_bps(min_start_bitrate_bps), 138 min_bitrate_bps(min_bitrate_bps), 139 max_bitrate_bps(max_bitrate_bps) {} 140 // Size of video frame, in pixels, the bitrate thresholds are intended for. 141 int frame_size_pixels = 0; 142 // Recommended minimum bitrate to start encoding. 143 int min_start_bitrate_bps = 0; 144 // Recommended minimum bitrate. 145 int min_bitrate_bps = 0; 146 // Recommended maximum bitrate. 147 int max_bitrate_bps = 0; 148 149 bool operator==(const ResolutionBitrateLimits& rhs) const; 150 bool operator!=(const ResolutionBitrateLimits& rhs) const { 151 return !(*this == rhs); 152 } 153 }; 154 155 struct RTC_EXPORT Resolution { 156 Resolution(int width, int height) : width(width), height(height) {} 157 int width = 0; 158 int height = 0; 159 }; 160 161 // Struct containing metadata about the encoder implementing this interface. 162 struct RTC_EXPORT EncoderInfo { 163 static constexpr uint8_t kMaxFramerateFraction = 164 std::numeric_limits<uint8_t>::max(); 165 166 EncoderInfo(); 167 EncoderInfo(const EncoderInfo&); 168 169 ~EncoderInfo(); 170 171 std::string ToString() const; 172 bool operator==(const EncoderInfo& rhs) const; 173 bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); } 174 175 // Any encoder implementation wishing to use the WebRTC provided 176 // quality scaler must populate this field. 177 ScalingSettings scaling_settings; 178 179 // The width and height of the incoming video frames should be divisible 180 // by `requested_resolution_alignment`. If they are not, the encoder may 181 // drop the incoming frame. 182 // For example: With I420, this value would be a multiple of 2. 183 // Note that this field is unrelated to any horizontal or vertical stride 184 // requirements the encoder has on the incoming video frame buffers. 185 uint32_t requested_resolution_alignment; 186 187 // Same as above but if true, each simulcast layer should also be divisible 188 // by `requested_resolution_alignment`. 189 // Note that scale factors `scale_resolution_down_by` may be adjusted so a 190 // common multiple is not too large to avoid largely cropped frames and 191 // possibly with an aspect ratio far from the original. 192 // Warning: large values of scale_resolution_down_by could be changed 193 // considerably, especially if `requested_resolution_alignment` is large. 194 bool apply_alignment_to_all_simulcast_layers; 195 196 // If true, encoder supports working with a native handle (e.g. texture 197 // handle for hw codecs) rather than requiring a raw I420 buffer. 198 bool supports_native_handle; 199 200 // The name of this particular encoder implementation, e.g. "libvpx". 201 std::string implementation_name; 202 203 // If this field is true, the encoder rate controller must perform 204 // well even in difficult situations, and produce close to the specified 205 // target bitrate seen over a reasonable time window, drop frames if 206 // necessary in order to keep the rate correct, and react quickly to 207 // changing bitrate targets. If this method returns true, we disable the 208 // frame dropper in the media optimization module and rely entirely on the 209 // encoder to produce media at a bitrate that closely matches the target. 210 // Any overshooting may result in delay buildup. If this method returns 211 // false (default behavior), the media opt frame dropper will drop input 212 // frames if it suspect encoder misbehavior. Misbehavior is common, 213 // especially in hardware codecs. Disable media opt at your own risk. 214 bool has_trusted_rate_controller; 215 216 // If this field is true, the encoder uses hardware support and different 217 // thresholds will be used in CPU adaptation. 218 bool is_hardware_accelerated; 219 220 // For each spatial layer (simulcast stream or SVC layer), represented as an 221 // element in `fps_allocation` a vector indicates how many temporal layers 222 // the encoder is using for that spatial layer. 223 // For each spatial/temporal layer pair, the frame rate fraction is given as 224 // an 8bit unsigned integer where 0 = 0% and 255 = 100%. 225 // 226 // If the vector is empty for a given spatial layer, it indicates that frame 227 // rates are not defined and we can't count on any specific frame rate to be 228 // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic. 229 // 230 // The encoder may update this on a per-frame basis in response to both 231 // internal and external signals. 232 // 233 // Spatial layers are treated independently, but temporal layers are 234 // cumulative. For instance, if: 235 // fps_allocation[0][0] = kMaxFramerateFraction / 2; 236 // fps_allocation[0][1] = kMaxFramerateFraction; 237 // Then half of the frames are in the base layer and half is in TL1, but 238 // since TL1 is assumed to depend on the base layer, the frame rate is 239 // indicated as the full 100% for the top layer. 240 // 241 // Defaults to a single spatial layer containing a single temporal layer 242 // with a 100% frame rate fraction. 243 absl::InlinedVector<uint8_t, kMaxTemporalStreams> 244 fps_allocation[kMaxSpatialLayers]; 245 246 // Recommended bitrate limits for different resolutions. 247 std::vector<ResolutionBitrateLimits> resolution_bitrate_limits; 248 249 // Obtains the limits from `resolution_bitrate_limits` that best matches the 250 // `frame_size_pixels`. 251 std::optional<ResolutionBitrateLimits> GetEncoderBitrateLimitsForResolution( 252 int frame_size_pixels) const; 253 254 // If true, this encoder has internal support for generating simulcast 255 // streams. Otherwise, an adapter class will be needed. 256 // Even if true, the config provided to InitEncode() might not be supported, 257 // in such case the encoder should return 258 // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED. 259 bool supports_simulcast; 260 261 // The list of pixel formats preferred by the encoder. It is assumed that if 262 // the list is empty and supports_native_handle is false, then {I420} is the 263 // preferred pixel format. The order of the formats does not matter. 264 absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> 265 preferred_pixel_formats; 266 267 // Indicates whether or not QP value encoder writes into frame/slice/tile 268 // header can be interpreted as average frame/slice/tile QP. 269 std::optional<bool> is_qp_trusted; 270 271 // The minimum QP that the encoder is expected to use with the current 272 // configuration. This may be used to determine if the encoder has reached 273 // its target video quality for static screenshare content. 274 std::optional<int> min_qp; 275 276 // Maximum resolution accessed by software encoder, 277 // i.e. resolution needed for cpu readable image. 278 // This has to be set by software encoders. 279 // If it's not set, mapping will happen during the 280 // encode time, otherwise more optimal implementation 281 // specific path may be used. 282 std::optional<Resolution> mapped_resolution; 283 }; 284 285 struct RTC_EXPORT RateControlParameters { 286 RateControlParameters(); 287 RateControlParameters(const VideoBitrateAllocation& bitrate, 288 double framerate_fps); 289 RateControlParameters(const VideoBitrateAllocation& bitrate, 290 double framerate_fps, 291 DataRate bandwidth_allocation); 292 virtual ~RateControlParameters(); 293 294 // Target bitrate, per spatial/temporal layer. 295 // A target bitrate of 0bps indicates a layer should not be encoded at all. 296 VideoBitrateAllocation target_bitrate; 297 // Adjusted target bitrate, per spatial/temporal layer. May be lower or 298 // higher than the target depending on encoder behaviour. 299 VideoBitrateAllocation bitrate; 300 // Target framerate, in fps. A value <= 0.0 is invalid and should be 301 // interpreted as framerate target not available. In this case the encoder 302 // should fall back to the max framerate specified in `codec_settings` of 303 // the last InitEncode() call. 304 double framerate_fps; 305 // The network bandwidth available for video. This is at least 306 // `bitrate.get_sum_bps()`, but may be higher if the application is not 307 // network constrained. 308 DataRate bandwidth_allocation; 309 310 bool operator==(const RateControlParameters& rhs) const; 311 bool operator!=(const RateControlParameters& rhs) const; 312 }; 313 314 struct LossNotification { 315 // The timestamp of the last decodable frame *prior* to the last received. 316 // (The last received - described below - might itself be decodable or not.) 317 uint32_t timestamp_of_last_decodable; 318 // The timestamp of the last received frame. 319 uint32_t timestamp_of_last_received; 320 // Describes whether the dependencies of the last received frame were 321 // all decodable. 322 // `false` if some dependencies were undecodable, `true` if all dependencies 323 // were decodable, and `nullopt` if the dependencies are unknown. 324 std::optional<bool> dependencies_of_last_received_decodable; 325 // Describes whether the received frame was decodable. 326 // `false` if some dependency was undecodable or if some packet belonging 327 // to the last received frame was missed. 328 // `true` if all dependencies were decodable and all packets belonging 329 // to the last received frame were received. 330 // `nullopt` if no packet belonging to the last frame was missed, but the 331 // last packet in the frame was not yet received. 332 std::optional<bool> last_received_decodable; 333 }; 334 335 // Negotiated capabilities which the VideoEncoder may expect the other 336 // side to use. 337 struct Capabilities { 338 explicit Capabilities(bool loss_notification) 339 : loss_notification(loss_notification) {} 340 bool loss_notification; 341 }; 342 343 struct Settings { 344 Settings(const Capabilities& capabilities, 345 int number_of_cores, 346 size_t max_payload_size) 347 : capabilities(capabilities), 348 number_of_cores(number_of_cores), 349 max_payload_size(max_payload_size) {} 350 351 Capabilities capabilities; 352 int number_of_cores; 353 size_t max_payload_size; 354 // Experimental API - currently only supported by LibvpxVp8Encoder and 355 // the OpenH264 encoder. If set, limits the number of encoder threads. 356 std::optional<int> encoder_thread_limit; 357 }; 358 359 static VideoCodecVP8 GetDefaultVp8Settings(); 360 static VideoCodecVP9 GetDefaultVp9Settings(); 361 static VideoCodecH264 GetDefaultH264Settings(); 362 363 virtual ~VideoEncoder() {} 364 365 // Set a FecControllerOverride, through which the encoder may override 366 // decisions made by FecController. 367 // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual. 368 virtual void SetFecControllerOverride( 369 FecControllerOverride* fec_controller_override); 370 371 // Initialize the encoder with the information from the codecSettings 372 // 373 // Input: 374 // - codec_settings : Codec settings 375 // - settings : Settings affecting the encoding itself. 376 // Input for deprecated version: 377 // - number_of_cores : Number of cores available for the encoder 378 // - max_payload_size : The maximum size each payload is allowed 379 // to have. Usually MTU - overhead. 380 // 381 // Return value : Set bit rate if OK 382 // <0 - Errors: 383 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER 384 // WEBRTC_VIDEO_CODEC_ERR_SIZE 385 // WEBRTC_VIDEO_CODEC_MEMORY 386 // WEBRTC_VIDEO_CODEC_ERROR 387 // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting 388 // an announcement to discuss-webrtc, remove the three-parameters variant 389 // and make the two-parameters variant pure-virtual. 390 /* ABSL_DEPRECATED("bugs.webrtc.org/10720") */ virtual int32_t InitEncode( 391 const VideoCodec* codec_settings, 392 int32_t number_of_cores, 393 size_t max_payload_size); 394 virtual int InitEncode(const VideoCodec* codec_settings, 395 const VideoEncoder::Settings& settings); 396 397 // Register an encode complete callback object. 398 // 399 // Input: 400 // - callback : Callback object which handles encoded images. 401 // 402 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise. 403 virtual int32_t RegisterEncodeCompleteCallback( 404 EncodedImageCallback* callback) = 0; 405 406 // Free encoder memory. 407 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise. 408 virtual int32_t Release() = 0; 409 410 // Encode an image (as a part of a video stream). The encoded image 411 // will be returned to the user through the encode complete callback. 412 // 413 // Input: 414 // - frame : Image to be encoded 415 // - frame_types : Frame type to be generated by the encoder. 416 // 417 // Return value : WEBRTC_VIDEO_CODEC_OK if OK 418 // <0 - Errors: 419 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER 420 // WEBRTC_VIDEO_CODEC_MEMORY 421 // WEBRTC_VIDEO_CODEC_ERROR 422 virtual int32_t Encode(const VideoFrame& frame, 423 const std::vector<VideoFrameType>* frame_types) = 0; 424 425 // Sets rate control parameters: bitrate, framerate, etc. These settings are 426 // instantaneous (i.e. not moving averages) and should apply from now until 427 // the next call to SetRates(). 428 virtual void SetRates(const RateControlParameters& parameters) = 0; 429 430 // Inform the encoder when the packet loss rate changes. 431 // 432 // Input: - packet_loss_rate : The packet loss rate (0.0 to 1.0). 433 virtual void OnPacketLossRateUpdate(float packet_loss_rate); 434 435 // Inform the encoder when the round trip time changes. 436 // 437 // Input: - rtt_ms : The new RTT, in milliseconds. 438 virtual void OnRttUpdate(int64_t rtt_ms); 439 440 // Called when a loss notification is received. 441 virtual void OnLossNotification(const LossNotification& loss_notification); 442 443 // Returns meta-data about the encoder, such as implementation name. 444 // The output of this method may change during runtime. For instance if a 445 // hardware encoder fails, it may fall back to doing software encoding using 446 // an implementation with different characteristics. 447 virtual EncoderInfo GetEncoderInfo() const = 0; 448 }; 449 } // namespace webrtc 450 #endif // API_VIDEO_CODECS_VIDEO_ENCODER_H_