tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

audio_encoder.h (11594B)


      1 /*
      2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #ifndef API_AUDIO_CODECS_AUDIO_ENCODER_H_
     12 #define API_AUDIO_CODECS_AUDIO_ENCODER_H_
     13 
     14 #include <stddef.h>
     15 #include <stdint.h>
     16 
     17 #include <memory>
     18 #include <optional>
     19 #include <utility>
     20 #include <vector>
     21 
     22 #include "absl/base/attributes.h"
     23 #include "absl/strings/string_view.h"
     24 #include "api/array_view.h"
     25 #include "api/audio/audio_view.h"
     26 #include "api/call/bitrate_allocation.h"
     27 #include "api/units/data_rate.h"
     28 #include "api/units/time_delta.h"
     29 #include "rtc_base/buffer.h"
     30 
     31 namespace webrtc {
     32 
     33 // Statistics related to Audio Network Adaptation.
     34 struct ANAStats {
     35  ANAStats();
     36  ANAStats(const ANAStats&);
     37  ~ANAStats();
     38  // Number of actions taken by the ANA bitrate controller since the start of
     39  // the call. If this value is not set, it indicates that the bitrate
     40  // controller is disabled.
     41  std::optional<uint32_t> bitrate_action_counter;
     42  // Number of actions taken by the ANA channel controller since the start of
     43  // the call. If this value is not set, it indicates that the channel
     44  // controller is disabled.
     45  std::optional<uint32_t> channel_action_counter;
     46  // Number of actions taken by the ANA DTX controller since the start of the
     47  // call. If this value is not set, it indicates that the DTX controller is
     48  // disabled.
     49  std::optional<uint32_t> dtx_action_counter;
     50  // Number of actions taken by the ANA FEC controller since the start of the
     51  // call. If this value is not set, it indicates that the FEC controller is
     52  // disabled.
     53  std::optional<uint32_t> fec_action_counter;
     54  // Number of times the ANA frame length controller decided to increase the
     55  // frame length since the start of the call. If this value is not set, it
     56  // indicates that the frame length controller is disabled.
     57  std::optional<uint32_t> frame_length_increase_counter;
     58  // Number of times the ANA frame length controller decided to decrease the
     59  // frame length since the start of the call. If this value is not set, it
     60  // indicates that the frame length controller is disabled.
     61  std::optional<uint32_t> frame_length_decrease_counter;
     62  // The uplink packet loss fractions as set by the ANA FEC controller. If this
     63  // value is not set, it indicates that the ANA FEC controller is not active.
     64  std::optional<float> uplink_packet_loss_fraction;
     65 };
     66 
     67 // This is the interface class for encoders in AudioCoding module. Each codec
     68 // type must have an implementation of this class.
     69 class AudioEncoder {
     70 public:
     71  // Used for UMA logging of codec usage. The same codecs, with the
     72  // same values, must be listed in
     73  // src/tools/metrics/histograms/histograms.xml in chromium to log
     74  // correct values.
     75  enum class CodecType {
     76    kOther = 0,  // Codec not specified, and/or not listed in this enum
     77    kOpus = 1,
     78    kIsac = 2,
     79    kPcmA = 3,
     80    kPcmU = 4,
     81    kG722 = 5,
     82 
     83    // Number of histogram bins in the UMA logging of codec types. The
     84    // total number of different codecs that are logged cannot exceed this
     85    // number.
     86    kMaxLoggedAudioCodecTypes
     87  };
     88 
     89  struct EncodedInfoLeaf {
     90    size_t encoded_bytes = 0;
     91    uint32_t encoded_timestamp = 0;
     92    int payload_type = 0;
     93    bool send_even_if_empty = false;
     94    bool speech = true;
     95    CodecType encoder_type = CodecType::kOther;
     96  };
     97 
     98  // This is the main struct for auxiliary encoding information. Each encoded
     99  // packet should be accompanied by one EncodedInfo struct, containing the
    100  // total number of `encoded_bytes`, the `encoded_timestamp` and the
    101  // `payload_type`. If the packet contains redundant encodings, the `redundant`
    102  // vector will be populated with EncodedInfoLeaf structs. Each struct in the
    103  // vector represents one encoding; the order of structs in the vector is the
    104  // same as the order in which the actual payloads are written to the byte
    105  // stream. When EncoderInfoLeaf structs are present in the vector, the main
    106  // struct's `encoded_bytes` will be the sum of all the `encoded_bytes` in the
    107  // vector.
    108  struct EncodedInfo : public EncodedInfoLeaf {
    109    EncodedInfo();
    110    EncodedInfo(const EncodedInfo&);
    111    EncodedInfo(EncodedInfo&&);
    112    ~EncodedInfo();
    113    EncodedInfo& operator=(const EncodedInfo&);
    114    EncodedInfo& operator=(EncodedInfo&&);
    115 
    116    std::vector<EncodedInfoLeaf> redundant;
    117  };
    118 
    119  virtual ~AudioEncoder() = default;
    120 
    121  // Returns the input sample rate in Hz and the number of input channels.
    122  // These are constants set at instantiation time.
    123  virtual int SampleRateHz() const = 0;
    124  virtual size_t NumChannels() const = 0;
    125 
    126  // Returns the rate at which the RTP timestamps are updated. The default
    127  // implementation returns SampleRateHz().
    128  virtual int RtpTimestampRateHz() const;
    129 
    130  // Returns the number of 10 ms frames the encoder will put in the next
    131  // packet. This value may only change when Encode() outputs a packet; i.e.,
    132  // the encoder may vary the number of 10 ms frames from packet to packet, but
    133  // it must decide the length of the next packet no later than when outputting
    134  // the preceding packet.
    135  virtual size_t Num10MsFramesInNextPacket() const = 0;
    136 
    137  // Returns the maximum value that can be returned by
    138  // Num10MsFramesInNextPacket().
    139  virtual size_t Max10MsFramesInAPacket() const = 0;
    140 
    141  // Returns the current target bitrate in bits/s. The value -1 means that the
    142  // codec adapts the target automatically, and a current target cannot be
    143  // provided.
    144  virtual int GetTargetBitrate() const = 0;
    145 
    146  // Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 *
    147  // NumChannels() samples). Multi-channel audio must be sample-interleaved.
    148  // The encoder appends zero or more bytes of output to `encoded` and returns
    149  // additional encoding information.  Encode() checks some preconditions, calls
    150  // EncodeImpl() which does the actual work, and then checks some
    151  // postconditions.
    152  EncodedInfo Encode(uint32_t rtp_timestamp,
    153                     ArrayView<const int16_t> audio,
    154                     Buffer* encoded);
    155 
    156  // Resets the encoder to its starting state, discarding any input that has
    157  // been fed to the encoder but not yet emitted in a packet.
    158  virtual void Reset() = 0;
    159 
    160  // Enables or disables codec-internal FEC (forward error correction). Returns
    161  // true if the codec was able to comply. The default implementation returns
    162  // true when asked to disable FEC and false when asked to enable it (meaning
    163  // that FEC isn't supported).
    164  virtual bool SetFec(bool enable);
    165 
    166  // Enables or disables codec-internal VAD/DTX. Returns true if the codec was
    167  // able to comply. The default implementation returns true when asked to
    168  // disable DTX and false when asked to enable it (meaning that DTX isn't
    169  // supported).
    170  virtual bool SetDtx(bool enable);
    171 
    172  // Returns the status of codec-internal DTX. The default implementation always
    173  // returns false.
    174  virtual bool GetDtx() const;
    175 
    176  // Sets the application mode. Returns true if the codec was able to comply.
    177  // The default implementation just returns false.
    178  enum class Application { kSpeech, kAudio };
    179  virtual bool SetApplication(Application application);
    180 
    181  // Tells the encoder about the highest sample rate the decoder is expected to
    182  // use when decoding the bitstream. The encoder would typically use this
    183  // information to adjust the quality of the encoding. The default
    184  // implementation does nothing.
    185  virtual void SetMaxPlaybackRate(int frequency_hz);
    186 
    187  // Tells the encoder what average bitrate we'd like it to produce. The
    188  // encoder is free to adjust or disregard the given bitrate (the default
    189  // implementation does the latter).
    190  ABSL_DEPRECATED("Use OnReceivedTargetAudioBitrate instead")
    191  virtual void SetTargetBitrate(int target_bps);
    192 
    193  // Causes this encoder to let go of any other encoders it contains, and
    194  // returns a pointer to an array where they are stored (which is required to
    195  // live as long as this encoder). Unless the returned array is empty, you may
    196  // not call any methods on this encoder afterwards, except for the
    197  // destructor. The default implementation just returns an empty array.
    198  // NOTE: This method is subject to change. Do not call or override it.
    199  virtual ArrayView<std::unique_ptr<AudioEncoder>> ReclaimContainedEncoders();
    200 
    201  // Enables audio network adaptor. Returns true if successful.
    202  virtual bool EnableAudioNetworkAdaptor(absl::string_view config);
    203 
    204  // Disables audio network adaptor.
    205  virtual void DisableAudioNetworkAdaptor();
    206 
    207  // Provides uplink packet loss fraction to this encoder to allow it to adapt.
    208  // `uplink_packet_loss_fraction` is in the range [0.0, 1.0].
    209  virtual void OnReceivedUplinkPacketLossFraction(
    210      float uplink_packet_loss_fraction);
    211 
    212  ABSL_DEPRECATED("")
    213  virtual void OnReceivedUplinkRecoverablePacketLossFraction(
    214      float uplink_recoverable_packet_loss_fraction);
    215 
    216  // Provides target audio bitrate to this encoder to allow it to adapt.
    217  virtual void OnReceivedTargetAudioBitrate(int target_bps);
    218 
    219  // Provides target audio bitrate and corresponding probing interval of
    220  // the bandwidth estimator to this encoder to allow it to adapt.
    221  virtual void OnReceivedUplinkBandwidth(int target_audio_bitrate_bps,
    222                                         std::optional<int64_t> bwe_period_ms);
    223 
    224  // Provides target audio bitrate and corresponding probing interval of
    225  // the bandwidth estimator to this encoder to allow it to adapt.
    226  virtual void OnReceivedUplinkAllocation(BitrateAllocationUpdate update);
    227 
    228  // Provides RTT to this encoder to allow it to adapt.
    229  virtual void OnReceivedRtt(int rtt_ms);
    230 
    231  // Provides overhead to this encoder to adapt. The overhead is the number of
    232  // bytes that will be added to each packet the encoder generates.
    233  virtual void OnReceivedOverhead(size_t overhead_bytes_per_packet);
    234 
    235  // To allow encoder to adapt its frame length, it must be provided the frame
    236  // length range that receivers can accept.
    237  virtual void SetReceiverFrameLengthRange(int min_frame_length_ms,
    238                                           int max_frame_length_ms);
    239 
    240  // Get statistics related to audio network adaptation.
    241  virtual ANAStats GetANAStats() const;
    242 
    243  // The range of frame lengths that are supported or nullopt if there's no such
    244  // information. This is used together with the bitrate range to calculate the
    245  // full bitrate range, including overhead.
    246  virtual std::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange()
    247      const = 0;
    248 
    249  // The range of payload bitrates that are supported. This is used together
    250  // with the frame length range to calculate the full bitrate range, including
    251  // overhead.
    252  virtual std::optional<std::pair<DataRate, DataRate>> GetBitrateRange() const {
    253    return std::nullopt;
    254  }
    255 
    256  // The maximum number of audio channels supported by WebRTC encoders.
    257  static constexpr int kMaxNumberOfChannels = kMaxNumberOfAudioChannels;
    258 
    259 protected:
    260  // Subclasses implement this to perform the actual encoding. Called by
    261  // Encode().
    262  virtual EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
    263                                 ArrayView<const int16_t> audio,
    264                                 Buffer* encoded) = 0;
    265 };
    266 }  // namespace webrtc
    267 #endif  // API_AUDIO_CODECS_AUDIO_ENCODER_H_