tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

webrtc_voice_engine.h (19164B)


      1 /*
      2 *  Copyright (c) 2004 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #ifndef MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_
     12 #define MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_
     13 
     14 #include <stddef.h>
     15 #include <stdint.h>
     16 
     17 #include <map>
     18 #include <memory>
     19 #include <optional>
     20 #include <set>
     21 #include <string>
     22 #include <utility>
     23 #include <vector>
     24 
     25 #include "absl/functional/any_invocable.h"
     26 #include "absl/strings/string_view.h"
     27 #include "api/audio/audio_device.h"
     28 #include "api/audio/audio_frame_processor.h"
     29 #include "api/audio/audio_mixer.h"
     30 #include "api/audio/audio_processing.h"
     31 #include "api/audio_codecs/audio_codec_pair_id.h"
     32 #include "api/audio_codecs/audio_decoder_factory.h"
     33 #include "api/audio_codecs/audio_encoder_factory.h"
     34 #include "api/audio_codecs/audio_format.h"
     35 #include "api/audio_options.h"
     36 #include "api/call/audio_sink.h"
     37 #include "api/crypto/crypto_options.h"
     38 #include "api/crypto/frame_decryptor_interface.h"
     39 #include "api/crypto/frame_encryptor_interface.h"
     40 #include "api/environment/environment.h"
     41 #include "api/field_trials_view.h"
     42 #include "api/frame_transformer_interface.h"
     43 #include "api/media_types.h"
     44 #include "api/rtc_error.h"
     45 #include "api/rtp_headers.h"
     46 #include "api/rtp_parameters.h"
     47 #include "api/rtp_sender_interface.h"
     48 #include "api/scoped_refptr.h"
     49 #include "api/sequence_checker.h"
     50 #include "api/task_queue/pending_task_safety_flag.h"
     51 #include "api/task_queue/task_queue_base.h"
     52 #include "api/transport/rtp/rtp_source.h"
     53 #include "call/audio_send_stream.h"
     54 #include "call/audio_state.h"
     55 #include "call/call.h"
     56 #include "media/base/audio_source.h"
     57 #include "media/base/codec.h"
     58 #include "media/base/media_channel.h"
     59 #include "media/base/media_channel_impl.h"
     60 #include "media/base/media_config.h"
     61 #include "media/base/media_engine.h"
     62 #include "media/base/stream_params.h"
     63 #include "modules/rtp_rtcp/include/rtp_header_extension_map.h"
     64 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
     65 #include "rtc_base/checks.h"
     66 #include "rtc_base/network/sent_packet.h"
     67 #include "rtc_base/network_route.h"
     68 #include "rtc_base/system/file_wrapper.h"
     69 
     70 namespace webrtc {
     71 
     72 class AudioFrameProcessor;
     73 
     74 // WebRtcVoiceEngine is a class to be used with CompositeMediaEngine.
     75 // It uses the WebRtc VoiceEngine library for audio handling.
     76 class WebRtcVoiceEngine final : public VoiceEngineInterface {
     77  friend class WebRtcVoiceSendChannel;
     78  friend class WebRtcVoiceReceiveChannel;
     79 
     80 public:
     81  WebRtcVoiceEngine(const Environment& env,
     82                    scoped_refptr<AudioDeviceModule> adm,
     83                    scoped_refptr<AudioEncoderFactory> encoder_factory,
     84                    scoped_refptr<AudioDecoderFactory> decoder_factory,
     85                    scoped_refptr<AudioMixer> audio_mixer,
     86                    scoped_refptr<AudioProcessing> audio_processing,
     87                    std::unique_ptr<AudioFrameProcessor> audio_frame_processor);
     88 
     89  WebRtcVoiceEngine() = delete;
     90  WebRtcVoiceEngine(const WebRtcVoiceEngine&) = delete;
     91  WebRtcVoiceEngine& operator=(const WebRtcVoiceEngine&) = delete;
     92 
     93  ~WebRtcVoiceEngine() override;
     94 
     95  // Does initialization that needs to occur on the worker thread.
     96  void Init() override;
     97  scoped_refptr<AudioState> GetAudioState() const override;
     98 
     99  std::unique_ptr<VoiceMediaSendChannelInterface> CreateSendChannel(
    100      const Environment& env,
    101      Call* call,
    102      const MediaConfig& config,
    103      const AudioOptions& options,
    104      const CryptoOptions& crypto_options,
    105      AudioCodecPairId codec_pair_id) override;
    106 
    107  std::unique_ptr<VoiceMediaReceiveChannelInterface> CreateReceiveChannel(
    108      const Environment& env,
    109      Call* call,
    110      const MediaConfig& config,
    111      const AudioOptions& options,
    112      const CryptoOptions& crypto_options,
    113      AudioCodecPairId codec_pair_id) override;
    114 
    115  const std::vector<Codec>& LegacySendCodecs() const override;
    116  const std::vector<Codec>& LegacyRecvCodecs() const override;
    117 
    118  AudioEncoderFactory* encoder_factory() const override {
    119    return encoder_factory_.get();
    120  }
    121  AudioDecoderFactory* decoder_factory() const override {
    122    return decoder_factory_.get();
    123  }
    124  std::vector<RtpHeaderExtensionCapability> GetRtpHeaderExtensions(
    125      const webrtc::FieldTrialsView* field_trials) const override;
    126 
    127  // Starts AEC dump using an existing file. A maximum file size in bytes can be
    128  // specified. When the maximum file size is reached, logging is stopped and
    129  // the file is closed. If max_size_bytes is set to <= 0, no limit will be
    130  // used.
    131  bool StartAecDump(FileWrapper file, int64_t max_size_bytes) override;
    132 
    133  // Stops AEC dump.
    134  void StopAecDump() override;
    135 
    136  std::optional<AudioDeviceModule::Stats> GetAudioDeviceStats() override;
    137 
    138 private:
    139  // Every option that is "set" will be applied. Every option not "set" will be
    140  // ignored. This allows us to selectively turn on and off different options
    141  // easily at any time.
    142  void ApplyOptions(const AudioOptions& options);
    143 
    144  const Environment env_;
    145  std::unique_ptr<TaskQueueBase, TaskQueueDeleter> low_priority_worker_queue_;
    146 
    147  AudioDeviceModule* adm();
    148  AudioProcessing* apm() const;
    149  AudioState* audio_state();
    150 
    151  SequenceChecker signal_thread_checker_{SequenceChecker::kDetached};
    152  SequenceChecker worker_thread_checker_{SequenceChecker::kDetached};
    153 
    154  // Field trial flags.
    155  const bool minimized_remsampling_on_mobile_trial_enabled_;
    156  const bool payload_types_in_transport_trial_enabled_;
    157 
    158  // The audio device module.
    159  const scoped_refptr<AudioDeviceModule> adm_;
    160  scoped_refptr<AudioEncoderFactory> encoder_factory_;
    161  scoped_refptr<AudioDecoderFactory> decoder_factory_;
    162  // The audio processing module.
    163  scoped_refptr<AudioProcessing> apm_;
    164  // The primary instance of WebRtc VoiceEngine.
    165  scoped_refptr<AudioState> audio_state_;
    166  const std::vector<Codec> legacy_send_codecs_;
    167  const std::vector<Codec> legacy_recv_codecs_;
    168  bool is_dumping_aec_ = false;
    169  bool initialized_ = false;
    170 
    171  // Jitter buffer settings for new streams.
    172  size_t audio_jitter_buffer_max_packets_ = 200;
    173  bool audio_jitter_buffer_fast_accelerate_ = false;
    174  int audio_jitter_buffer_min_delay_ms_ = 0;
    175 };
    176 
    177 class WebRtcVoiceSendChannel final : public MediaChannelUtil,
    178                                     public VoiceMediaSendChannelInterface {
    179 public:
    180  WebRtcVoiceSendChannel(const Environment& env,
    181                         WebRtcVoiceEngine* engine,
    182                         const MediaConfig& config,
    183                         const AudioOptions& options,
    184                         const CryptoOptions& crypto_options,
    185                         Call* call,
    186                         AudioCodecPairId codec_pair_id);
    187 
    188  WebRtcVoiceSendChannel() = delete;
    189  WebRtcVoiceSendChannel(const WebRtcVoiceSendChannel&) = delete;
    190  WebRtcVoiceSendChannel& operator=(const WebRtcVoiceSendChannel&) = delete;
    191 
    192  ~WebRtcVoiceSendChannel() override;
    193 
    194  MediaType media_type() const override { return MediaType::AUDIO; }
    195  VideoMediaSendChannelInterface* AsVideoSendChannel() override {
    196    RTC_CHECK_NOTREACHED();
    197    return nullptr;
    198  }
    199  VoiceMediaSendChannelInterface* AsVoiceSendChannel() override { return this; }
    200 
    201  std::optional<Codec> GetSendCodec() const override;
    202 
    203  // Functions imported from MediaChannelUtil
    204  void SetInterface(MediaChannelNetworkInterface* iface) override {
    205    MediaChannelUtil::SetInterface(iface);
    206  }
    207 
    208  bool HasNetworkInterface() const override {
    209    return MediaChannelUtil::HasNetworkInterface();
    210  }
    211  void SetExtmapAllowMixed(bool extmap_allow_mixed) override {
    212    MediaChannelUtil::SetExtmapAllowMixed(extmap_allow_mixed);
    213  }
    214  bool ExtmapAllowMixed() const override {
    215    return MediaChannelUtil::ExtmapAllowMixed();
    216  }
    217 
    218  const AudioOptions& options() const { return options_; }
    219 
    220  bool SetSenderParameters(const AudioSenderParameter& params) override;
    221  RtpParameters GetRtpSendParameters(uint32_t ssrc) const override;
    222  RTCError SetRtpSendParameters(uint32_t ssrc,
    223                                const RtpParameters& parameters,
    224                                SetParametersCallback callback) override;
    225 
    226  void SetSend(bool send) override;
    227  bool SetAudioSend(uint32_t ssrc,
    228                    bool enable,
    229                    const AudioOptions* options,
    230                    AudioSource* source) override;
    231  bool AddSendStream(const StreamParams& sp) override;
    232  bool RemoveSendStream(uint32_t ssrc) override;
    233 
    234  void SetSsrcListChangedCallback(
    235      absl::AnyInvocable<void(const std::set<uint32_t>&)> callback) override;
    236 
    237  // E2EE Frame API
    238  // Set a frame encryptor to a particular ssrc that will intercept all
    239  // outgoing audio payloads frames and attempt to encrypt them and forward the
    240  // result to the packetizer.
    241  void SetFrameEncryptor(
    242      uint32_t ssrc,
    243      scoped_refptr<FrameEncryptorInterface> frame_encryptor) override;
    244 
    245  bool CanInsertDtmf() override;
    246  bool InsertDtmf(uint32_t ssrc, int event, int duration) override;
    247 
    248  void OnPacketSent(const SentPacketInfo& sent_packet) override;
    249  void OnNetworkRouteChanged(absl::string_view transport_name,
    250                             const NetworkRoute& network_route) override;
    251  void OnReadyToSend(bool ready) override;
    252  bool GetStats(VoiceMediaSendInfo* info) override;
    253 
    254  // Sets a frame transformer between encoder and packetizer, to transform
    255  // encoded frames before sending them out the network.
    256  void SetEncoderToPacketizerFrameTransformer(
    257      uint32_t ssrc,
    258      scoped_refptr<FrameTransformerInterface> frame_transformer) override;
    259 
    260  bool SenderNackEnabled() const override {
    261    if (!send_codec_spec_) {
    262      return false;
    263    }
    264    return send_codec_spec_->nack_enabled;
    265  }
    266  bool SenderNonSenderRttEnabled() const override {
    267    if (!send_codec_spec_) {
    268      return false;
    269    }
    270    return send_codec_spec_->enable_non_sender_rtt;
    271  }
    272  bool SendCodecHasNack() const override { return SenderNackEnabled(); }
    273 
    274  void SetSendCodecChangedCallback(
    275      absl::AnyInvocable<void()> callback) override {
    276    send_codec_changed_callback_ = std::move(callback);
    277  }
    278 
    279 private:
    280  bool SetOptions(const AudioOptions& options);
    281  bool SetSendCodecs(const std::vector<Codec>& codecs,
    282                     std::optional<Codec> preferred_codec);
    283  bool SetLocalSource(uint32_t ssrc, AudioSource* source);
    284  bool MuteStream(uint32_t ssrc, bool mute);
    285 
    286  WebRtcVoiceEngine* engine() { return engine_; }
    287  bool SetMaxSendBitrate(int bps);
    288  void SetupRecording();
    289 
    290  const Environment env_;
    291  TaskQueueBase* const worker_thread_;
    292  ScopedTaskSafety task_safety_;
    293  SequenceChecker network_thread_checker_{SequenceChecker::kDetached};
    294 
    295  WebRtcVoiceEngine* const engine_ = nullptr;
    296  std::vector<Codec> send_codecs_;
    297 
    298  int max_send_bitrate_bps_ = 0;
    299  AudioOptions options_;
    300  std::optional<int> dtmf_payload_type_;
    301  int dtmf_payload_freq_ = -1;
    302  bool enable_non_sender_rtt_ = false;
    303  bool send_ = false;
    304  Call* const call_ = nullptr;
    305 
    306  const MediaConfig::Audio audio_config_;
    307 
    308  class WebRtcAudioSendStream;
    309 
    310  std::map<uint32_t, WebRtcAudioSendStream*> send_streams_;
    311  std::vector<RtpExtension> send_rtp_extensions_;
    312  std::optional<RtcpFeedbackType> rtcp_cc_ack_type_;
    313  std::string mid_;
    314  RtcpMode rtcp_mode_;
    315 
    316  std::optional<AudioSendStream::Config::SendCodecSpec> send_codec_spec_;
    317 
    318  // TODO(kwiberg): Per-SSRC codec pair IDs?
    319  const AudioCodecPairId codec_pair_id_;
    320 
    321  // Per peer connection crypto options that last for the lifetime of the peer
    322  // connection.
    323  const CryptoOptions crypto_options_;
    324  scoped_refptr<FrameTransformerInterface> unsignaled_frame_transformer_;
    325 
    326  void FillSendCodecStats(VoiceMediaSendInfo* voice_media_info);
    327 
    328  // Callback invoked whenever the send codec changes.
    329  // TODO(bugs.webrtc.org/13931): Remove again when coupling isn't needed.
    330  absl::AnyInvocable<void()> send_codec_changed_callback_;
    331  // Callback invoked whenever the list of SSRCs changes.
    332  absl::AnyInvocable<void(const std::set<uint32_t>&)>
    333      ssrc_list_changed_callback_;
    334 };
    335 
    336 class WebRtcVoiceReceiveChannel final
    337    : public MediaChannelUtil,
    338      public VoiceMediaReceiveChannelInterface {
    339 public:
    340  WebRtcVoiceReceiveChannel(const Environment& env,
    341                            WebRtcVoiceEngine* engine,
    342                            const MediaConfig& config,
    343                            const AudioOptions& options,
    344                            const CryptoOptions& crypto_options,
    345                            Call* call,
    346                            AudioCodecPairId codec_pair_id);
    347 
    348  WebRtcVoiceReceiveChannel() = delete;
    349  WebRtcVoiceReceiveChannel(const WebRtcVoiceReceiveChannel&) = delete;
    350  WebRtcVoiceReceiveChannel& operator=(const WebRtcVoiceReceiveChannel&) =
    351      delete;
    352 
    353  ~WebRtcVoiceReceiveChannel() override;
    354 
    355  MediaType media_type() const override { return MediaType::AUDIO; }
    356 
    357  VideoMediaReceiveChannelInterface* AsVideoReceiveChannel() override {
    358    RTC_CHECK_NOTREACHED();
    359    return nullptr;
    360  }
    361  VoiceMediaReceiveChannelInterface* AsVoiceReceiveChannel() override {
    362    return this;
    363  }
    364 
    365  const AudioOptions& options() const { return options_; }
    366 
    367  void SetInterface(MediaChannelNetworkInterface* iface) override {
    368    MediaChannelUtil::SetInterface(iface);
    369  }
    370  bool SetReceiverParameters(const AudioReceiverParameters& params) override;
    371  RtpParameters GetRtpReceiverParameters(uint32_t ssrc) const override;
    372  RtpParameters GetDefaultRtpReceiveParameters() const override;
    373 
    374  void SetPlayout(bool playout) override;
    375  bool AddRecvStream(const StreamParams& sp) override;
    376  bool RemoveRecvStream(uint32_t ssrc) override;
    377  void ResetUnsignaledRecvStream() override;
    378  std::optional<uint32_t> GetUnsignaledSsrc() const override;
    379 
    380  void ChooseReceiverReportSsrc(const std::set<uint32_t>& choices) override;
    381 
    382  void OnDemuxerCriteriaUpdatePending() override;
    383  void OnDemuxerCriteriaUpdateComplete() override;
    384 
    385  // E2EE Frame API
    386  // Set a frame decryptor to a particular ssrc that will intercept all
    387  // incoming audio payloads and attempt to decrypt them before forwarding the
    388  // result.
    389  void SetFrameDecryptor(
    390      uint32_t ssrc,
    391      scoped_refptr<FrameDecryptorInterface> frame_decryptor) override;
    392 
    393  bool SetOutputVolume(uint32_t ssrc, double volume) override;
    394  // Applies the new volume to current and future unsignaled streams.
    395  bool SetDefaultOutputVolume(double volume) override;
    396 
    397  bool SetBaseMinimumPlayoutDelayMs(uint32_t ssrc, int delay_ms) override;
    398  std::optional<int> GetBaseMinimumPlayoutDelayMs(uint32_t ssrc) const override;
    399 
    400  void OnPacketReceived(const RtpPacketReceived& packet) override;
    401  bool GetStats(VoiceMediaReceiveInfo* info,
    402                bool get_and_clear_legacy_stats) override;
    403 
    404  // Set the audio sink for an existing stream.
    405  void SetRawAudioSink(uint32_t ssrc,
    406                       std::unique_ptr<AudioSinkInterface> sink) override;
    407  // Will set the audio sink on the latest unsignaled stream, future or
    408  // current. Only one stream at a time will use the sink.
    409  void SetDefaultRawAudioSink(
    410      std::unique_ptr<AudioSinkInterface> sink) override;
    411 
    412  std::vector<RtpSource> GetSources(uint32_t ssrc) const override;
    413 
    414  void SetDepacketizerToDecoderFrameTransformer(
    415      uint32_t ssrc,
    416      scoped_refptr<FrameTransformerInterface> frame_transformer) override;
    417 
    418  ::webrtc::RtcpMode RtcpMode() const override { return recv_rtcp_mode_; }
    419  void SetRtcpMode(::webrtc::RtcpMode mode) override;
    420  void SetReceiveNackEnabled(bool enabled) override;
    421  void SetReceiveNonSenderRttEnabled(bool enabled) override;
    422 
    423 private:
    424  bool SetOptions(const AudioOptions& options);
    425  bool SetRecvCodecs(const std::vector<Codec>& codecs);
    426  bool SetLocalSource(uint32_t ssrc, AudioSource* source);
    427  bool MuteStream(uint32_t ssrc, bool mute);
    428 
    429  WebRtcVoiceEngine* engine() { return engine_; }
    430  void SetupRecording();
    431 
    432  // Expected to be invoked once per packet that belongs to this channel that
    433  // can not be demuxed. Returns true if a default receive stream has been
    434  // created.
    435  bool MaybeCreateDefaultReceiveStream(const RtpPacketReceived& packet);
    436  // Check if 'ssrc' is an unsignaled stream, and if so mark it as not being
    437  // unsignaled anymore (i.e. it is now removed, or signaled), and return true.
    438  bool MaybeDeregisterUnsignaledRecvStream(uint32_t ssrc);
    439 
    440  const Environment env_;
    441  TaskQueueBase* const worker_thread_;
    442  ScopedTaskSafety task_safety_;
    443  SequenceChecker network_thread_checker_{SequenceChecker::kDetached};
    444 
    445  WebRtcVoiceEngine* const engine_ = nullptr;
    446 
    447  // TODO(kwiberg): decoder_map_ and recv_codecs_ store the exact same
    448  // information, in slightly different formats. Eliminate recv_codecs_.
    449  std::map<int, SdpAudioFormat> decoder_map_;
    450  std::vector<Codec> recv_codecs_;
    451 
    452  AudioOptions options_;
    453  bool recv_nack_enabled_ = false;
    454  ::webrtc::RtcpMode recv_rtcp_mode_ = RtcpMode::kCompound;
    455  bool enable_non_sender_rtt_ = false;
    456  bool playout_ = false;
    457  Call* const call_ = nullptr;
    458 
    459  const MediaConfig::Audio audio_config_;
    460 
    461  // Queue of unsignaled SSRCs; oldest at the beginning.
    462  std::vector<uint32_t> unsignaled_recv_ssrcs_;
    463 
    464  // This is a stream param that comes from the remote description, but wasn't
    465  // signaled with any a=ssrc lines. It holds the information that was signaled
    466  // before the unsignaled receive stream is created when the first packet is
    467  // received.
    468  StreamParams unsignaled_stream_params_;
    469 
    470  // Volume for unsignaled streams, which may be set before the stream exists.
    471  double default_recv_volume_ = 1.0;
    472 
    473  // Delay for unsignaled streams, which may be set before the stream exists.
    474  int default_recv_base_minimum_delay_ms_ = 0;
    475 
    476  // Sink for latest unsignaled stream - may be set before the stream exists.
    477  std::unique_ptr<AudioSinkInterface> default_sink_;
    478  // Default SSRC to use for RTCP receiver reports in case of no signaled
    479  // send streams. See: https://code.google.com/p/webrtc/issues/detail?id=4740
    480  // and https://code.google.com/p/chromium/issues/detail?id=547661
    481  uint32_t receiver_reports_ssrc_ = 0xFA17FA17u;
    482 
    483  std::string mid_;
    484 
    485  class WebRtcAudioReceiveStream;
    486 
    487  std::map<uint32_t, WebRtcAudioReceiveStream*> recv_streams_;
    488  std::vector<RtpExtension> recv_rtp_extensions_;
    489  RtpHeaderExtensionMap recv_rtp_extension_map_;
    490 
    491  std::optional<AudioSendStream::Config::SendCodecSpec> send_codec_spec_;
    492 
    493  // TODO(kwiberg): Per-SSRC codec pair IDs?
    494  const AudioCodecPairId codec_pair_id_;
    495 
    496  // Per peer connection crypto options that last for the lifetime of the peer
    497  // connection.
    498  const CryptoOptions crypto_options_;
    499  // Unsignaled streams have an option to have a frame decryptor set on them.
    500  scoped_refptr<FrameDecryptorInterface> unsignaled_frame_decryptor_;
    501  scoped_refptr<FrameTransformerInterface> unsignaled_frame_transformer_;
    502 
    503  void FillReceiveCodecStats(VoiceMediaReceiveInfo* voice_media_info);
    504 };
    505 
    506 }  //  namespace webrtc
    507 
    508 
    509 #endif  // MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_