webrtc_voice_engine.h (19164B)
1 /* 2 * Copyright (c) 2004 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_ 12 #define MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_ 13 14 #include <stddef.h> 15 #include <stdint.h> 16 17 #include <map> 18 #include <memory> 19 #include <optional> 20 #include <set> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 #include "absl/functional/any_invocable.h" 26 #include "absl/strings/string_view.h" 27 #include "api/audio/audio_device.h" 28 #include "api/audio/audio_frame_processor.h" 29 #include "api/audio/audio_mixer.h" 30 #include "api/audio/audio_processing.h" 31 #include "api/audio_codecs/audio_codec_pair_id.h" 32 #include "api/audio_codecs/audio_decoder_factory.h" 33 #include "api/audio_codecs/audio_encoder_factory.h" 34 #include "api/audio_codecs/audio_format.h" 35 #include "api/audio_options.h" 36 #include "api/call/audio_sink.h" 37 #include "api/crypto/crypto_options.h" 38 #include "api/crypto/frame_decryptor_interface.h" 39 #include "api/crypto/frame_encryptor_interface.h" 40 #include "api/environment/environment.h" 41 #include "api/field_trials_view.h" 42 #include "api/frame_transformer_interface.h" 43 #include "api/media_types.h" 44 #include "api/rtc_error.h" 45 #include "api/rtp_headers.h" 46 #include "api/rtp_parameters.h" 47 #include "api/rtp_sender_interface.h" 48 #include "api/scoped_refptr.h" 49 #include "api/sequence_checker.h" 50 #include "api/task_queue/pending_task_safety_flag.h" 51 #include "api/task_queue/task_queue_base.h" 52 #include "api/transport/rtp/rtp_source.h" 53 #include "call/audio_send_stream.h" 54 #include "call/audio_state.h" 55 #include "call/call.h" 56 #include "media/base/audio_source.h" 57 #include "media/base/codec.h" 58 #include "media/base/media_channel.h" 59 #include "media/base/media_channel_impl.h" 60 #include "media/base/media_config.h" 61 #include "media/base/media_engine.h" 62 #include "media/base/stream_params.h" 63 #include "modules/rtp_rtcp/include/rtp_header_extension_map.h" 64 #include "modules/rtp_rtcp/source/rtp_packet_received.h" 65 #include "rtc_base/checks.h" 66 #include "rtc_base/network/sent_packet.h" 67 #include "rtc_base/network_route.h" 68 #include "rtc_base/system/file_wrapper.h" 69 70 namespace webrtc { 71 72 class AudioFrameProcessor; 73 74 // WebRtcVoiceEngine is a class to be used with CompositeMediaEngine. 75 // It uses the WebRtc VoiceEngine library for audio handling. 76 class WebRtcVoiceEngine final : public VoiceEngineInterface { 77 friend class WebRtcVoiceSendChannel; 78 friend class WebRtcVoiceReceiveChannel; 79 80 public: 81 WebRtcVoiceEngine(const Environment& env, 82 scoped_refptr<AudioDeviceModule> adm, 83 scoped_refptr<AudioEncoderFactory> encoder_factory, 84 scoped_refptr<AudioDecoderFactory> decoder_factory, 85 scoped_refptr<AudioMixer> audio_mixer, 86 scoped_refptr<AudioProcessing> audio_processing, 87 std::unique_ptr<AudioFrameProcessor> audio_frame_processor); 88 89 WebRtcVoiceEngine() = delete; 90 WebRtcVoiceEngine(const WebRtcVoiceEngine&) = delete; 91 WebRtcVoiceEngine& operator=(const WebRtcVoiceEngine&) = delete; 92 93 ~WebRtcVoiceEngine() override; 94 95 // Does initialization that needs to occur on the worker thread. 96 void Init() override; 97 scoped_refptr<AudioState> GetAudioState() const override; 98 99 std::unique_ptr<VoiceMediaSendChannelInterface> CreateSendChannel( 100 const Environment& env, 101 Call* call, 102 const MediaConfig& config, 103 const AudioOptions& options, 104 const CryptoOptions& crypto_options, 105 AudioCodecPairId codec_pair_id) override; 106 107 std::unique_ptr<VoiceMediaReceiveChannelInterface> CreateReceiveChannel( 108 const Environment& env, 109 Call* call, 110 const MediaConfig& config, 111 const AudioOptions& options, 112 const CryptoOptions& crypto_options, 113 AudioCodecPairId codec_pair_id) override; 114 115 const std::vector<Codec>& LegacySendCodecs() const override; 116 const std::vector<Codec>& LegacyRecvCodecs() const override; 117 118 AudioEncoderFactory* encoder_factory() const override { 119 return encoder_factory_.get(); 120 } 121 AudioDecoderFactory* decoder_factory() const override { 122 return decoder_factory_.get(); 123 } 124 std::vector<RtpHeaderExtensionCapability> GetRtpHeaderExtensions( 125 const webrtc::FieldTrialsView* field_trials) const override; 126 127 // Starts AEC dump using an existing file. A maximum file size in bytes can be 128 // specified. When the maximum file size is reached, logging is stopped and 129 // the file is closed. If max_size_bytes is set to <= 0, no limit will be 130 // used. 131 bool StartAecDump(FileWrapper file, int64_t max_size_bytes) override; 132 133 // Stops AEC dump. 134 void StopAecDump() override; 135 136 std::optional<AudioDeviceModule::Stats> GetAudioDeviceStats() override; 137 138 private: 139 // Every option that is "set" will be applied. Every option not "set" will be 140 // ignored. This allows us to selectively turn on and off different options 141 // easily at any time. 142 void ApplyOptions(const AudioOptions& options); 143 144 const Environment env_; 145 std::unique_ptr<TaskQueueBase, TaskQueueDeleter> low_priority_worker_queue_; 146 147 AudioDeviceModule* adm(); 148 AudioProcessing* apm() const; 149 AudioState* audio_state(); 150 151 SequenceChecker signal_thread_checker_{SequenceChecker::kDetached}; 152 SequenceChecker worker_thread_checker_{SequenceChecker::kDetached}; 153 154 // Field trial flags. 155 const bool minimized_remsampling_on_mobile_trial_enabled_; 156 const bool payload_types_in_transport_trial_enabled_; 157 158 // The audio device module. 159 const scoped_refptr<AudioDeviceModule> adm_; 160 scoped_refptr<AudioEncoderFactory> encoder_factory_; 161 scoped_refptr<AudioDecoderFactory> decoder_factory_; 162 // The audio processing module. 163 scoped_refptr<AudioProcessing> apm_; 164 // The primary instance of WebRtc VoiceEngine. 165 scoped_refptr<AudioState> audio_state_; 166 const std::vector<Codec> legacy_send_codecs_; 167 const std::vector<Codec> legacy_recv_codecs_; 168 bool is_dumping_aec_ = false; 169 bool initialized_ = false; 170 171 // Jitter buffer settings for new streams. 172 size_t audio_jitter_buffer_max_packets_ = 200; 173 bool audio_jitter_buffer_fast_accelerate_ = false; 174 int audio_jitter_buffer_min_delay_ms_ = 0; 175 }; 176 177 class WebRtcVoiceSendChannel final : public MediaChannelUtil, 178 public VoiceMediaSendChannelInterface { 179 public: 180 WebRtcVoiceSendChannel(const Environment& env, 181 WebRtcVoiceEngine* engine, 182 const MediaConfig& config, 183 const AudioOptions& options, 184 const CryptoOptions& crypto_options, 185 Call* call, 186 AudioCodecPairId codec_pair_id); 187 188 WebRtcVoiceSendChannel() = delete; 189 WebRtcVoiceSendChannel(const WebRtcVoiceSendChannel&) = delete; 190 WebRtcVoiceSendChannel& operator=(const WebRtcVoiceSendChannel&) = delete; 191 192 ~WebRtcVoiceSendChannel() override; 193 194 MediaType media_type() const override { return MediaType::AUDIO; } 195 VideoMediaSendChannelInterface* AsVideoSendChannel() override { 196 RTC_CHECK_NOTREACHED(); 197 return nullptr; 198 } 199 VoiceMediaSendChannelInterface* AsVoiceSendChannel() override { return this; } 200 201 std::optional<Codec> GetSendCodec() const override; 202 203 // Functions imported from MediaChannelUtil 204 void SetInterface(MediaChannelNetworkInterface* iface) override { 205 MediaChannelUtil::SetInterface(iface); 206 } 207 208 bool HasNetworkInterface() const override { 209 return MediaChannelUtil::HasNetworkInterface(); 210 } 211 void SetExtmapAllowMixed(bool extmap_allow_mixed) override { 212 MediaChannelUtil::SetExtmapAllowMixed(extmap_allow_mixed); 213 } 214 bool ExtmapAllowMixed() const override { 215 return MediaChannelUtil::ExtmapAllowMixed(); 216 } 217 218 const AudioOptions& options() const { return options_; } 219 220 bool SetSenderParameters(const AudioSenderParameter& params) override; 221 RtpParameters GetRtpSendParameters(uint32_t ssrc) const override; 222 RTCError SetRtpSendParameters(uint32_t ssrc, 223 const RtpParameters& parameters, 224 SetParametersCallback callback) override; 225 226 void SetSend(bool send) override; 227 bool SetAudioSend(uint32_t ssrc, 228 bool enable, 229 const AudioOptions* options, 230 AudioSource* source) override; 231 bool AddSendStream(const StreamParams& sp) override; 232 bool RemoveSendStream(uint32_t ssrc) override; 233 234 void SetSsrcListChangedCallback( 235 absl::AnyInvocable<void(const std::set<uint32_t>&)> callback) override; 236 237 // E2EE Frame API 238 // Set a frame encryptor to a particular ssrc that will intercept all 239 // outgoing audio payloads frames and attempt to encrypt them and forward the 240 // result to the packetizer. 241 void SetFrameEncryptor( 242 uint32_t ssrc, 243 scoped_refptr<FrameEncryptorInterface> frame_encryptor) override; 244 245 bool CanInsertDtmf() override; 246 bool InsertDtmf(uint32_t ssrc, int event, int duration) override; 247 248 void OnPacketSent(const SentPacketInfo& sent_packet) override; 249 void OnNetworkRouteChanged(absl::string_view transport_name, 250 const NetworkRoute& network_route) override; 251 void OnReadyToSend(bool ready) override; 252 bool GetStats(VoiceMediaSendInfo* info) override; 253 254 // Sets a frame transformer between encoder and packetizer, to transform 255 // encoded frames before sending them out the network. 256 void SetEncoderToPacketizerFrameTransformer( 257 uint32_t ssrc, 258 scoped_refptr<FrameTransformerInterface> frame_transformer) override; 259 260 bool SenderNackEnabled() const override { 261 if (!send_codec_spec_) { 262 return false; 263 } 264 return send_codec_spec_->nack_enabled; 265 } 266 bool SenderNonSenderRttEnabled() const override { 267 if (!send_codec_spec_) { 268 return false; 269 } 270 return send_codec_spec_->enable_non_sender_rtt; 271 } 272 bool SendCodecHasNack() const override { return SenderNackEnabled(); } 273 274 void SetSendCodecChangedCallback( 275 absl::AnyInvocable<void()> callback) override { 276 send_codec_changed_callback_ = std::move(callback); 277 } 278 279 private: 280 bool SetOptions(const AudioOptions& options); 281 bool SetSendCodecs(const std::vector<Codec>& codecs, 282 std::optional<Codec> preferred_codec); 283 bool SetLocalSource(uint32_t ssrc, AudioSource* source); 284 bool MuteStream(uint32_t ssrc, bool mute); 285 286 WebRtcVoiceEngine* engine() { return engine_; } 287 bool SetMaxSendBitrate(int bps); 288 void SetupRecording(); 289 290 const Environment env_; 291 TaskQueueBase* const worker_thread_; 292 ScopedTaskSafety task_safety_; 293 SequenceChecker network_thread_checker_{SequenceChecker::kDetached}; 294 295 WebRtcVoiceEngine* const engine_ = nullptr; 296 std::vector<Codec> send_codecs_; 297 298 int max_send_bitrate_bps_ = 0; 299 AudioOptions options_; 300 std::optional<int> dtmf_payload_type_; 301 int dtmf_payload_freq_ = -1; 302 bool enable_non_sender_rtt_ = false; 303 bool send_ = false; 304 Call* const call_ = nullptr; 305 306 const MediaConfig::Audio audio_config_; 307 308 class WebRtcAudioSendStream; 309 310 std::map<uint32_t, WebRtcAudioSendStream*> send_streams_; 311 std::vector<RtpExtension> send_rtp_extensions_; 312 std::optional<RtcpFeedbackType> rtcp_cc_ack_type_; 313 std::string mid_; 314 RtcpMode rtcp_mode_; 315 316 std::optional<AudioSendStream::Config::SendCodecSpec> send_codec_spec_; 317 318 // TODO(kwiberg): Per-SSRC codec pair IDs? 319 const AudioCodecPairId codec_pair_id_; 320 321 // Per peer connection crypto options that last for the lifetime of the peer 322 // connection. 323 const CryptoOptions crypto_options_; 324 scoped_refptr<FrameTransformerInterface> unsignaled_frame_transformer_; 325 326 void FillSendCodecStats(VoiceMediaSendInfo* voice_media_info); 327 328 // Callback invoked whenever the send codec changes. 329 // TODO(bugs.webrtc.org/13931): Remove again when coupling isn't needed. 330 absl::AnyInvocable<void()> send_codec_changed_callback_; 331 // Callback invoked whenever the list of SSRCs changes. 332 absl::AnyInvocable<void(const std::set<uint32_t>&)> 333 ssrc_list_changed_callback_; 334 }; 335 336 class WebRtcVoiceReceiveChannel final 337 : public MediaChannelUtil, 338 public VoiceMediaReceiveChannelInterface { 339 public: 340 WebRtcVoiceReceiveChannel(const Environment& env, 341 WebRtcVoiceEngine* engine, 342 const MediaConfig& config, 343 const AudioOptions& options, 344 const CryptoOptions& crypto_options, 345 Call* call, 346 AudioCodecPairId codec_pair_id); 347 348 WebRtcVoiceReceiveChannel() = delete; 349 WebRtcVoiceReceiveChannel(const WebRtcVoiceReceiveChannel&) = delete; 350 WebRtcVoiceReceiveChannel& operator=(const WebRtcVoiceReceiveChannel&) = 351 delete; 352 353 ~WebRtcVoiceReceiveChannel() override; 354 355 MediaType media_type() const override { return MediaType::AUDIO; } 356 357 VideoMediaReceiveChannelInterface* AsVideoReceiveChannel() override { 358 RTC_CHECK_NOTREACHED(); 359 return nullptr; 360 } 361 VoiceMediaReceiveChannelInterface* AsVoiceReceiveChannel() override { 362 return this; 363 } 364 365 const AudioOptions& options() const { return options_; } 366 367 void SetInterface(MediaChannelNetworkInterface* iface) override { 368 MediaChannelUtil::SetInterface(iface); 369 } 370 bool SetReceiverParameters(const AudioReceiverParameters& params) override; 371 RtpParameters GetRtpReceiverParameters(uint32_t ssrc) const override; 372 RtpParameters GetDefaultRtpReceiveParameters() const override; 373 374 void SetPlayout(bool playout) override; 375 bool AddRecvStream(const StreamParams& sp) override; 376 bool RemoveRecvStream(uint32_t ssrc) override; 377 void ResetUnsignaledRecvStream() override; 378 std::optional<uint32_t> GetUnsignaledSsrc() const override; 379 380 void ChooseReceiverReportSsrc(const std::set<uint32_t>& choices) override; 381 382 void OnDemuxerCriteriaUpdatePending() override; 383 void OnDemuxerCriteriaUpdateComplete() override; 384 385 // E2EE Frame API 386 // Set a frame decryptor to a particular ssrc that will intercept all 387 // incoming audio payloads and attempt to decrypt them before forwarding the 388 // result. 389 void SetFrameDecryptor( 390 uint32_t ssrc, 391 scoped_refptr<FrameDecryptorInterface> frame_decryptor) override; 392 393 bool SetOutputVolume(uint32_t ssrc, double volume) override; 394 // Applies the new volume to current and future unsignaled streams. 395 bool SetDefaultOutputVolume(double volume) override; 396 397 bool SetBaseMinimumPlayoutDelayMs(uint32_t ssrc, int delay_ms) override; 398 std::optional<int> GetBaseMinimumPlayoutDelayMs(uint32_t ssrc) const override; 399 400 void OnPacketReceived(const RtpPacketReceived& packet) override; 401 bool GetStats(VoiceMediaReceiveInfo* info, 402 bool get_and_clear_legacy_stats) override; 403 404 // Set the audio sink for an existing stream. 405 void SetRawAudioSink(uint32_t ssrc, 406 std::unique_ptr<AudioSinkInterface> sink) override; 407 // Will set the audio sink on the latest unsignaled stream, future or 408 // current. Only one stream at a time will use the sink. 409 void SetDefaultRawAudioSink( 410 std::unique_ptr<AudioSinkInterface> sink) override; 411 412 std::vector<RtpSource> GetSources(uint32_t ssrc) const override; 413 414 void SetDepacketizerToDecoderFrameTransformer( 415 uint32_t ssrc, 416 scoped_refptr<FrameTransformerInterface> frame_transformer) override; 417 418 ::webrtc::RtcpMode RtcpMode() const override { return recv_rtcp_mode_; } 419 void SetRtcpMode(::webrtc::RtcpMode mode) override; 420 void SetReceiveNackEnabled(bool enabled) override; 421 void SetReceiveNonSenderRttEnabled(bool enabled) override; 422 423 private: 424 bool SetOptions(const AudioOptions& options); 425 bool SetRecvCodecs(const std::vector<Codec>& codecs); 426 bool SetLocalSource(uint32_t ssrc, AudioSource* source); 427 bool MuteStream(uint32_t ssrc, bool mute); 428 429 WebRtcVoiceEngine* engine() { return engine_; } 430 void SetupRecording(); 431 432 // Expected to be invoked once per packet that belongs to this channel that 433 // can not be demuxed. Returns true if a default receive stream has been 434 // created. 435 bool MaybeCreateDefaultReceiveStream(const RtpPacketReceived& packet); 436 // Check if 'ssrc' is an unsignaled stream, and if so mark it as not being 437 // unsignaled anymore (i.e. it is now removed, or signaled), and return true. 438 bool MaybeDeregisterUnsignaledRecvStream(uint32_t ssrc); 439 440 const Environment env_; 441 TaskQueueBase* const worker_thread_; 442 ScopedTaskSafety task_safety_; 443 SequenceChecker network_thread_checker_{SequenceChecker::kDetached}; 444 445 WebRtcVoiceEngine* const engine_ = nullptr; 446 447 // TODO(kwiberg): decoder_map_ and recv_codecs_ store the exact same 448 // information, in slightly different formats. Eliminate recv_codecs_. 449 std::map<int, SdpAudioFormat> decoder_map_; 450 std::vector<Codec> recv_codecs_; 451 452 AudioOptions options_; 453 bool recv_nack_enabled_ = false; 454 ::webrtc::RtcpMode recv_rtcp_mode_ = RtcpMode::kCompound; 455 bool enable_non_sender_rtt_ = false; 456 bool playout_ = false; 457 Call* const call_ = nullptr; 458 459 const MediaConfig::Audio audio_config_; 460 461 // Queue of unsignaled SSRCs; oldest at the beginning. 462 std::vector<uint32_t> unsignaled_recv_ssrcs_; 463 464 // This is a stream param that comes from the remote description, but wasn't 465 // signaled with any a=ssrc lines. It holds the information that was signaled 466 // before the unsignaled receive stream is created when the first packet is 467 // received. 468 StreamParams unsignaled_stream_params_; 469 470 // Volume for unsignaled streams, which may be set before the stream exists. 471 double default_recv_volume_ = 1.0; 472 473 // Delay for unsignaled streams, which may be set before the stream exists. 474 int default_recv_base_minimum_delay_ms_ = 0; 475 476 // Sink for latest unsignaled stream - may be set before the stream exists. 477 std::unique_ptr<AudioSinkInterface> default_sink_; 478 // Default SSRC to use for RTCP receiver reports in case of no signaled 479 // send streams. See: https://code.google.com/p/webrtc/issues/detail?id=4740 480 // and https://code.google.com/p/chromium/issues/detail?id=547661 481 uint32_t receiver_reports_ssrc_ = 0xFA17FA17u; 482 483 std::string mid_; 484 485 class WebRtcAudioReceiveStream; 486 487 std::map<uint32_t, WebRtcAudioReceiveStream*> recv_streams_; 488 std::vector<RtpExtension> recv_rtp_extensions_; 489 RtpHeaderExtensionMap recv_rtp_extension_map_; 490 491 std::optional<AudioSendStream::Config::SendCodecSpec> send_codec_spec_; 492 493 // TODO(kwiberg): Per-SSRC codec pair IDs? 494 const AudioCodecPairId codec_pair_id_; 495 496 // Per peer connection crypto options that last for the lifetime of the peer 497 // connection. 498 const CryptoOptions crypto_options_; 499 // Unsignaled streams have an option to have a frame decryptor set on them. 500 scoped_refptr<FrameDecryptorInterface> unsignaled_frame_decryptor_; 501 scoped_refptr<FrameTransformerInterface> unsignaled_frame_transformer_; 502 503 void FillReceiveCodecStats(VoiceMediaReceiveInfo* voice_media_info); 504 }; 505 506 } // namespace webrtc 507 508 509 #endif // MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_