tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

video_encoder.h (18974B)


      1 /*
      2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_
     12 #define API_VIDEO_CODECS_VIDEO_ENCODER_H_
     13 
     14 #include <cstddef>
     15 #include <cstdint>
     16 #include <limits>
     17 #include <optional>
     18 #include <string>
     19 #include <vector>
     20 
     21 #include "absl/container/inlined_vector.h"
     22 #include "api/fec_controller_override.h"
     23 #include "api/units/data_rate.h"
     24 #include "api/video/encoded_image.h"
     25 #include "api/video/video_bitrate_allocation.h"
     26 #include "api/video/video_codec_constants.h"
     27 #include "api/video/video_frame.h"
     28 #include "api/video/video_frame_buffer.h"
     29 #include "api/video/video_frame_type.h"
     30 #include "api/video_codecs/video_codec.h"
     31 #include "rtc_base/system/rtc_export.h"
     32 
     33 namespace webrtc {
     34 
     35 // TODO(pbos): Expose these through a public (root) header or change these APIs.
     36 struct CodecSpecificInfo;
     37 
     38 constexpr int kDefaultMinPixelsPerFrame = 320 * 180;
     39 
     40 class RTC_EXPORT EncodedImageCallback {
     41 public:
     42  virtual ~EncodedImageCallback() {}
     43 
     44  struct Result {
     45    enum Error {
     46      OK,
     47 
     48      // Failed to send the packet.
     49      ERROR_SEND_FAILED,
     50    };
     51 
     52    explicit Result(Error error) : error(error) {}
     53    Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {}
     54 
     55    Error error;
     56 
     57    // Frame ID assigned to the frame. The frame ID should be the same as the ID
     58    // seen by the receiver for this frame. RTP timestamp of the frame is used
     59    // as frame ID when RTP is used to send video. Must be used only when
     60    // error=OK.
     61    uint32_t frame_id = 0;
     62 
     63    // Tells the encoder that the next frame is should be dropped.
     64    bool drop_next_frame = false;
     65  };
     66 
     67  // Used to signal the encoder about reason a frame is dropped.
     68  // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate
     69  // limiting purposes).
     70  // kDroppedByEncoder - dropped by encoder's internal rate limiter.
     71  // TODO(bugs.webrtc.org/10164): Delete this enum? It duplicates the more
     72  // general VideoStreamEncoderObserver::DropReason. Also,
     73  // kDroppedByMediaOptimizations is not produced by any encoder, but by
     74  // VideoStreamEncoder.
     75  enum class DropReason : uint8_t {
     76    kDroppedByMediaOptimizations,
     77    kDroppedByEncoder
     78  };
     79 
     80  // Callback function which is called when an image has been encoded.
     81  virtual Result OnEncodedImage(
     82      const EncodedImage& encoded_image,
     83      const CodecSpecificInfo* codec_specific_info) = 0;
     84 
     85  virtual void OnDroppedFrame(DropReason /* reason */) {}
     86 };
     87 
     88 class RTC_EXPORT VideoEncoder {
     89 public:
     90  struct QpThresholds {
     91    QpThresholds(int l, int h) : low(l), high(h) {}
     92    QpThresholds() : low(-1), high(-1) {}
     93    int low;
     94    int high;
     95  };
     96 
     97  // Quality scaling is enabled if thresholds are provided.
     98  struct RTC_EXPORT ScalingSettings {
     99   private:
    100    // Private magic type for kOff, implicitly convertible to
    101    // ScalingSettings.
    102    struct KOff {};
    103 
    104   public:
    105    // TODO(bugs.webrtc.org/9078): Since std::optional should be trivially copy
    106    // constructible, this magic value can likely be replaced by a constexpr
    107    // ScalingSettings value.
    108    static constexpr KOff kOff = {};
    109 
    110    ScalingSettings(int low, int high);
    111    ScalingSettings(int low, int high, int min_pixels);
    112    ScalingSettings(const ScalingSettings&);
    113    ScalingSettings(KOff);  // NOLINT(runtime/explicit)
    114    ~ScalingSettings();
    115 
    116    std::optional<QpThresholds> thresholds;
    117 
    118    // We will never ask for a resolution lower than this.
    119    // TODO(kthelgason): Lower this limit when better testing
    120    // on MediaCodec and fallback implementations are in place.
    121    // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206
    122    int min_pixels_per_frame = kDefaultMinPixelsPerFrame;
    123 
    124   private:
    125    // Private constructor; to get an object without thresholds, use
    126    // the magic constant ScalingSettings::kOff.
    127    ScalingSettings();
    128  };
    129 
    130  // Bitrate limits for resolution.
    131  struct ResolutionBitrateLimits {
    132    ResolutionBitrateLimits(int frame_size_pixels,
    133                            int min_start_bitrate_bps,
    134                            int min_bitrate_bps,
    135                            int max_bitrate_bps)
    136        : frame_size_pixels(frame_size_pixels),
    137          min_start_bitrate_bps(min_start_bitrate_bps),
    138          min_bitrate_bps(min_bitrate_bps),
    139          max_bitrate_bps(max_bitrate_bps) {}
    140    // Size of video frame, in pixels, the bitrate thresholds are intended for.
    141    int frame_size_pixels = 0;
    142    // Recommended minimum bitrate to start encoding.
    143    int min_start_bitrate_bps = 0;
    144    // Recommended minimum bitrate.
    145    int min_bitrate_bps = 0;
    146    // Recommended maximum bitrate.
    147    int max_bitrate_bps = 0;
    148 
    149    bool operator==(const ResolutionBitrateLimits& rhs) const;
    150    bool operator!=(const ResolutionBitrateLimits& rhs) const {
    151      return !(*this == rhs);
    152    }
    153  };
    154 
    155  struct RTC_EXPORT Resolution {
    156    Resolution(int width, int height) : width(width), height(height) {}
    157    int width = 0;
    158    int height = 0;
    159  };
    160 
    161  // Struct containing metadata about the encoder implementing this interface.
    162  struct RTC_EXPORT EncoderInfo {
    163    static constexpr uint8_t kMaxFramerateFraction =
    164        std::numeric_limits<uint8_t>::max();
    165 
    166    EncoderInfo();
    167    EncoderInfo(const EncoderInfo&);
    168 
    169    ~EncoderInfo();
    170 
    171    std::string ToString() const;
    172    bool operator==(const EncoderInfo& rhs) const;
    173    bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); }
    174 
    175    // Any encoder implementation wishing to use the WebRTC provided
    176    // quality scaler must populate this field.
    177    ScalingSettings scaling_settings;
    178 
    179    // The width and height of the incoming video frames should be divisible
    180    // by `requested_resolution_alignment`. If they are not, the encoder may
    181    // drop the incoming frame.
    182    // For example: With I420, this value would be a multiple of 2.
    183    // Note that this field is unrelated to any horizontal or vertical stride
    184    // requirements the encoder has on the incoming video frame buffers.
    185    uint32_t requested_resolution_alignment;
    186 
    187    // Same as above but if true, each simulcast layer should also be divisible
    188    // by `requested_resolution_alignment`.
    189    // Note that scale factors `scale_resolution_down_by` may be adjusted so a
    190    // common multiple is not too large to avoid largely cropped frames and
    191    // possibly with an aspect ratio far from the original.
    192    // Warning: large values of scale_resolution_down_by could be changed
    193    // considerably, especially if `requested_resolution_alignment` is large.
    194    bool apply_alignment_to_all_simulcast_layers;
    195 
    196    // If true, encoder supports working with a native handle (e.g. texture
    197    // handle for hw codecs) rather than requiring a raw I420 buffer.
    198    bool supports_native_handle;
    199 
    200    // The name of this particular encoder implementation, e.g. "libvpx".
    201    std::string implementation_name;
    202 
    203    // If this field is true, the encoder rate controller must perform
    204    // well even in difficult situations, and produce close to the specified
    205    // target bitrate seen over a reasonable time window, drop frames if
    206    // necessary in order to keep the rate correct, and react quickly to
    207    // changing bitrate targets. If this method returns true, we disable the
    208    // frame dropper in the media optimization module and rely entirely on the
    209    // encoder to produce media at a bitrate that closely matches the target.
    210    // Any overshooting may result in delay buildup. If this method returns
    211    // false (default behavior), the media opt frame dropper will drop input
    212    // frames if it suspect encoder misbehavior. Misbehavior is common,
    213    // especially in hardware codecs. Disable media opt at your own risk.
    214    bool has_trusted_rate_controller;
    215 
    216    // If this field is true, the encoder uses hardware support and different
    217    // thresholds will be used in CPU adaptation.
    218    bool is_hardware_accelerated;
    219 
    220    // For each spatial layer (simulcast stream or SVC layer), represented as an
    221    // element in `fps_allocation` a vector indicates how many temporal layers
    222    // the encoder is using for that spatial layer.
    223    // For each spatial/temporal layer pair, the frame rate fraction is given as
    224    // an 8bit unsigned integer where 0 = 0% and 255 = 100%.
    225    //
    226    // If the vector is empty for a given spatial layer, it indicates that frame
    227    // rates are not defined and we can't count on any specific frame rate to be
    228    // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic.
    229    //
    230    // The encoder may update this on a per-frame basis in response to both
    231    // internal and external signals.
    232    //
    233    // Spatial layers are treated independently, but temporal layers are
    234    // cumulative. For instance, if:
    235    //   fps_allocation[0][0] = kMaxFramerateFraction / 2;
    236    //   fps_allocation[0][1] = kMaxFramerateFraction;
    237    // Then half of the frames are in the base layer and half is in TL1, but
    238    // since TL1 is assumed to depend on the base layer, the frame rate is
    239    // indicated as the full 100% for the top layer.
    240    //
    241    // Defaults to a single spatial layer containing a single temporal layer
    242    // with a 100% frame rate fraction.
    243    absl::InlinedVector<uint8_t, kMaxTemporalStreams>
    244        fps_allocation[kMaxSpatialLayers];
    245 
    246    // Recommended bitrate limits for different resolutions.
    247    std::vector<ResolutionBitrateLimits> resolution_bitrate_limits;
    248 
    249    // Obtains the limits from `resolution_bitrate_limits` that best matches the
    250    // `frame_size_pixels`.
    251    std::optional<ResolutionBitrateLimits> GetEncoderBitrateLimitsForResolution(
    252        int frame_size_pixels) const;
    253 
    254    // If true, this encoder has internal support for generating simulcast
    255    // streams. Otherwise, an adapter class will be needed.
    256    // Even if true, the config provided to InitEncode() might not be supported,
    257    // in such case the encoder should return
    258    // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED.
    259    bool supports_simulcast;
    260 
    261    // The list of pixel formats preferred by the encoder. It is assumed that if
    262    // the list is empty and supports_native_handle is false, then {I420} is the
    263    // preferred pixel format. The order of the formats does not matter.
    264    absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
    265        preferred_pixel_formats;
    266 
    267    // Indicates whether or not QP value encoder writes into frame/slice/tile
    268    // header can be interpreted as average frame/slice/tile QP.
    269    std::optional<bool> is_qp_trusted;
    270 
    271    // The minimum QP that the encoder is expected to use with the current
    272    // configuration. This may be used to determine if the encoder has reached
    273    // its target video quality for static screenshare content.
    274    std::optional<int> min_qp;
    275 
    276    // Maximum resolution accessed by software encoder,
    277    // i.e. resolution needed for cpu readable image.
    278    // This has to be set by software encoders.
    279    // If it's not set, mapping will happen during the
    280    // encode time, otherwise more optimal implementation
    281    // specific path may be used.
    282    std::optional<Resolution> mapped_resolution;
    283  };
    284 
    285  struct RTC_EXPORT RateControlParameters {
    286    RateControlParameters();
    287    RateControlParameters(const VideoBitrateAllocation& bitrate,
    288                          double framerate_fps);
    289    RateControlParameters(const VideoBitrateAllocation& bitrate,
    290                          double framerate_fps,
    291                          DataRate bandwidth_allocation);
    292    virtual ~RateControlParameters();
    293 
    294    // Target bitrate, per spatial/temporal layer.
    295    // A target bitrate of 0bps indicates a layer should not be encoded at all.
    296    VideoBitrateAllocation target_bitrate;
    297    // Adjusted target bitrate, per spatial/temporal layer. May be lower or
    298    // higher than the target depending on encoder behaviour.
    299    VideoBitrateAllocation bitrate;
    300    // Target framerate, in fps. A value <= 0.0 is invalid and should be
    301    // interpreted as framerate target not available. In this case the encoder
    302    // should fall back to the max framerate specified in `codec_settings` of
    303    // the last InitEncode() call.
    304    double framerate_fps;
    305    // The network bandwidth available for video. This is at least
    306    // `bitrate.get_sum_bps()`, but may be higher if the application is not
    307    // network constrained.
    308    DataRate bandwidth_allocation;
    309 
    310    bool operator==(const RateControlParameters& rhs) const;
    311    bool operator!=(const RateControlParameters& rhs) const;
    312  };
    313 
    314  struct LossNotification {
    315    // The timestamp of the last decodable frame *prior* to the last received.
    316    // (The last received - described below - might itself be decodable or not.)
    317    uint32_t timestamp_of_last_decodable;
    318    // The timestamp of the last received frame.
    319    uint32_t timestamp_of_last_received;
    320    // Describes whether the dependencies of the last received frame were
    321    // all decodable.
    322    // `false` if some dependencies were undecodable, `true` if all dependencies
    323    // were decodable, and `nullopt` if the dependencies are unknown.
    324    std::optional<bool> dependencies_of_last_received_decodable;
    325    // Describes whether the received frame was decodable.
    326    // `false` if some dependency was undecodable or if some packet belonging
    327    // to the last received frame was missed.
    328    // `true` if all dependencies were decodable and all packets belonging
    329    // to the last received frame were received.
    330    // `nullopt` if no packet belonging to the last frame was missed, but the
    331    // last packet in the frame was not yet received.
    332    std::optional<bool> last_received_decodable;
    333  };
    334 
    335  // Negotiated capabilities which the VideoEncoder may expect the other
    336  // side to use.
    337  struct Capabilities {
    338    explicit Capabilities(bool loss_notification)
    339        : loss_notification(loss_notification) {}
    340    bool loss_notification;
    341  };
    342 
    343  struct Settings {
    344    Settings(const Capabilities& capabilities,
    345             int number_of_cores,
    346             size_t max_payload_size)
    347        : capabilities(capabilities),
    348          number_of_cores(number_of_cores),
    349          max_payload_size(max_payload_size) {}
    350 
    351    Capabilities capabilities;
    352    int number_of_cores;
    353    size_t max_payload_size;
    354    // Experimental API - currently only supported by LibvpxVp8Encoder and
    355    // the OpenH264 encoder. If set, limits the number of encoder threads.
    356    std::optional<int> encoder_thread_limit;
    357  };
    358 
    359  static VideoCodecVP8 GetDefaultVp8Settings();
    360  static VideoCodecVP9 GetDefaultVp9Settings();
    361  static VideoCodecH264 GetDefaultH264Settings();
    362 
    363  virtual ~VideoEncoder() {}
    364 
    365  // Set a FecControllerOverride, through which the encoder may override
    366  // decisions made by FecController.
    367  // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual.
    368  virtual void SetFecControllerOverride(
    369      FecControllerOverride* fec_controller_override);
    370 
    371  // Initialize the encoder with the information from the codecSettings
    372  //
    373  // Input:
    374  //          - codec_settings    : Codec settings
    375  //          - settings          : Settings affecting the encoding itself.
    376  // Input for deprecated version:
    377  //          - number_of_cores   : Number of cores available for the encoder
    378  //          - max_payload_size  : The maximum size each payload is allowed
    379  //                                to have. Usually MTU - overhead.
    380  //
    381  // Return value                  : Set bit rate if OK
    382  //                                 <0 - Errors:
    383  //                                  WEBRTC_VIDEO_CODEC_ERR_PARAMETER
    384  //                                  WEBRTC_VIDEO_CODEC_ERR_SIZE
    385  //                                  WEBRTC_VIDEO_CODEC_MEMORY
    386  //                                  WEBRTC_VIDEO_CODEC_ERROR
    387  // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting
    388  // an announcement to discuss-webrtc, remove the three-parameters variant
    389  // and make the two-parameters variant pure-virtual.
    390  /* ABSL_DEPRECATED("bugs.webrtc.org/10720") */ virtual int32_t InitEncode(
    391      const VideoCodec* codec_settings,
    392      int32_t number_of_cores,
    393      size_t max_payload_size);
    394  virtual int InitEncode(const VideoCodec* codec_settings,
    395                         const VideoEncoder::Settings& settings);
    396 
    397  // Register an encode complete callback object.
    398  //
    399  // Input:
    400  //          - callback         : Callback object which handles encoded images.
    401  //
    402  // Return value                : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
    403  virtual int32_t RegisterEncodeCompleteCallback(
    404      EncodedImageCallback* callback) = 0;
    405 
    406  // Free encoder memory.
    407  // Return value                : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
    408  virtual int32_t Release() = 0;
    409 
    410  // Encode an image (as a part of a video stream). The encoded image
    411  // will be returned to the user through the encode complete callback.
    412  //
    413  // Input:
    414  //          - frame             : Image to be encoded
    415  //          - frame_types       : Frame type to be generated by the encoder.
    416  //
    417  // Return value                 : WEBRTC_VIDEO_CODEC_OK if OK
    418  //                                <0 - Errors:
    419  //                                  WEBRTC_VIDEO_CODEC_ERR_PARAMETER
    420  //                                  WEBRTC_VIDEO_CODEC_MEMORY
    421  //                                  WEBRTC_VIDEO_CODEC_ERROR
    422  virtual int32_t Encode(const VideoFrame& frame,
    423                         const std::vector<VideoFrameType>* frame_types) = 0;
    424 
    425  // Sets rate control parameters: bitrate, framerate, etc. These settings are
    426  // instantaneous (i.e. not moving averages) and should apply from now until
    427  // the next call to SetRates().
    428  virtual void SetRates(const RateControlParameters& parameters) = 0;
    429 
    430  // Inform the encoder when the packet loss rate changes.
    431  //
    432  // Input:   - packet_loss_rate  : The packet loss rate (0.0 to 1.0).
    433  virtual void OnPacketLossRateUpdate(float packet_loss_rate);
    434 
    435  // Inform the encoder when the round trip time changes.
    436  //
    437  // Input:   - rtt_ms            : The new RTT, in milliseconds.
    438  virtual void OnRttUpdate(int64_t rtt_ms);
    439 
    440  // Called when a loss notification is received.
    441  virtual void OnLossNotification(const LossNotification& loss_notification);
    442 
    443  // Returns meta-data about the encoder, such as implementation name.
    444  // The output of this method may change during runtime. For instance if a
    445  // hardware encoder fails, it may fall back to doing software encoding using
    446  // an implementation with different characteristics.
    447  virtual EncoderInfo GetEncoderInfo() const = 0;
    448 };
    449 }  // namespace webrtc
    450 #endif  // API_VIDEO_CODECS_VIDEO_ENCODER_H_