MediaEngineWebRTCAudio.cpp (55722B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "MediaEngineWebRTCAudio.h" 7 8 #include <algorithm> 9 10 #include "AudioConverter.h" 11 #include "MediaManager.h" 12 #include "MediaTrackConstraints.h" 13 #include "MediaTrackGraph.h" 14 #include "Tracing.h" 15 #include "api/audio/builtin_audio_processing_builder.h" 16 #include "api/audio/echo_canceller3_factory.h" 17 #include "api/environment/environment_factory.h" 18 #include "common_audio/include/audio_util.h" 19 #include "libwebrtcglue/WebrtcEnvironmentWrapper.h" 20 #include "modules/audio_processing/include/audio_processing.h" 21 #include "mozilla/Assertions.h" 22 #include "mozilla/ErrorNames.h" 23 #include "mozilla/Logging.h" 24 #include "mozilla/Sprintf.h" 25 #include "nsGlobalWindowInner.h" 26 #include "nsIDUtils.h" 27 #include "transport/runnable_utils.h" 28 29 using namespace webrtc; 30 31 // These are restrictions from the webrtc.org code 32 #define MAX_CHANNELS 2 33 #define MONO 1 34 #define MAX_SAMPLING_FREQ 48000 // Hz - multiple of 100 35 36 namespace mozilla { 37 38 using dom::MediaSourceEnum; 39 40 extern LazyLogModule gMediaManagerLog; 41 #define LOG(...) MOZ_LOG(gMediaManagerLog, LogLevel::Debug, (__VA_ARGS__)) 42 #define LOG_FRAME(...) \ 43 MOZ_LOG(gMediaManagerLog, LogLevel::Verbose, (__VA_ARGS__)) 44 #define LOG_ERROR(...) MOZ_LOG(gMediaManagerLog, LogLevel::Error, (__VA_ARGS__)) 45 46 /** 47 * WebRTC Microphone MediaEngineSource. 48 */ 49 50 MediaEngineWebRTCMicrophoneSource::MediaEngineWebRTCMicrophoneSource( 51 const MediaDevice* aMediaDevice) 52 : mPrincipal(PRINCIPAL_HANDLE_NONE), 53 mDeviceInfo(aMediaDevice->mAudioDeviceInfo), 54 mDeviceMaxChannelCount(mDeviceInfo->MaxChannels()), 55 mSettings(new nsMainThreadPtrHolder< 56 media::Refcountable<dom::MediaTrackSettings>>( 57 "MediaEngineWebRTCMicrophoneSource::mSettings", 58 new media::Refcountable<dom::MediaTrackSettings>(), 59 // Non-strict means it won't assert main thread for us. 60 // It would be great if it did but we're already on the media thread. 61 /* aStrict = */ false)), 62 mCapabilities(new nsMainThreadPtrHolder< 63 media::Refcountable<dom::MediaTrackCapabilities>>( 64 "MediaEngineWebRTCMicrophoneSource::mCapabilities", 65 new media::Refcountable<dom::MediaTrackCapabilities>(), 66 // Non-strict means it won't assert main thread for us. 67 // It would be great if it did but we're already on the media thread. 68 /* aStrict = */ false)) { 69 MOZ_ASSERT(aMediaDevice->mMediaSource == MediaSourceEnum::Microphone); 70 #ifndef ANDROID 71 MOZ_ASSERT(mDeviceInfo->DeviceID()); 72 #endif 73 74 // We'll init lazily as needed 75 mSettings->mEchoCancellation.Construct(0); 76 mSettings->mAutoGainControl.Construct(0); 77 mSettings->mNoiseSuppression.Construct(0); 78 mSettings->mChannelCount.Construct(0); 79 80 mState = kReleased; 81 82 // Set mMaxChannelsCapablitiy on main thread. 83 NS_DispatchToMainThread(NS_NewRunnableFunction( 84 __func__, [capabilities = mCapabilities, 85 deviceMaxChannelCount = mDeviceMaxChannelCount] { 86 nsTArray<bool> echoCancellation; 87 echoCancellation.AppendElement(true); 88 echoCancellation.AppendElement(false); 89 capabilities->mEchoCancellation.Reset(); 90 capabilities->mEchoCancellation.Construct(std::move(echoCancellation)); 91 92 nsTArray<bool> autoGainControl; 93 autoGainControl.AppendElement(true); 94 autoGainControl.AppendElement(false); 95 capabilities->mAutoGainControl.Reset(); 96 capabilities->mAutoGainControl.Construct(std::move(autoGainControl)); 97 98 nsTArray<bool> noiseSuppression; 99 noiseSuppression.AppendElement(true); 100 noiseSuppression.AppendElement(false); 101 capabilities->mNoiseSuppression.Reset(); 102 capabilities->mNoiseSuppression.Construct(std::move(noiseSuppression)); 103 104 if (deviceMaxChannelCount) { 105 dom::ULongRange channelCountRange; 106 channelCountRange.mMax.Construct(deviceMaxChannelCount); 107 channelCountRange.mMin.Construct(1); 108 capabilities->mChannelCount.Reset(); 109 capabilities->mChannelCount.Construct(channelCountRange); 110 } 111 })); 112 } 113 114 /*static*/ already_AddRefed<MediaEngineWebRTCMicrophoneSource> 115 MediaEngineWebRTCMicrophoneSource::CreateFrom( 116 const MediaEngineWebRTCMicrophoneSource* aSource, 117 const MediaDevice* aMediaDevice) { 118 auto src = MakeRefPtr<MediaEngineWebRTCMicrophoneSource>(aMediaDevice); 119 *static_cast<dom::MediaTrackSettings*>(src->mSettings) = *aSource->mSettings; 120 *static_cast<dom::MediaTrackCapabilities*>(src->mCapabilities) = 121 *aSource->mCapabilities; 122 return src.forget(); 123 } 124 125 nsresult MediaEngineWebRTCMicrophoneSource::EvaluateSettings( 126 const NormalizedConstraints& aConstraintsUpdate, 127 const MediaEnginePrefs& aInPrefs, MediaEnginePrefs* aOutPrefs, 128 const char** aOutBadConstraint) { 129 AssertIsOnOwningThread(); 130 131 FlattenedConstraints c(aConstraintsUpdate); 132 MediaEnginePrefs prefs = aInPrefs; 133 134 prefs.mAecOn = c.mEchoCancellation.Get(aInPrefs.mAecOn); 135 prefs.mAgcOn = c.mAutoGainControl.Get(aInPrefs.mAgcOn && prefs.mAecOn); 136 prefs.mNoiseOn = c.mNoiseSuppression.Get(aInPrefs.mNoiseOn && prefs.mAecOn); 137 138 // Determine an actual channel count to use for this source. Three factors at 139 // play here: the device capabilities, the constraints passed in by content, 140 // and a pref that can force things (for testing) 141 int32_t maxChannels = static_cast<int32_t>(mDeviceInfo->MaxChannels()); 142 143 // First, check channelCount violation wrt constraints. This fails in case of 144 // error. 145 if (c.mChannelCount.mMin > maxChannels) { 146 *aOutBadConstraint = "channelCount"; 147 return NS_ERROR_FAILURE; 148 } 149 // A pref can force the channel count to use. If the pref has a value of zero 150 // or lower, it has no effect. 151 if (aInPrefs.mChannels <= 0) { 152 prefs.mChannels = maxChannels; 153 } 154 155 // Get the number of channels asked for by content, and clamp it between the 156 // pref and the maximum number of channels that the device supports. 157 prefs.mChannels = c.mChannelCount.Get(std::min(prefs.mChannels, maxChannels)); 158 prefs.mChannels = std::clamp(prefs.mChannels, 1, maxChannels); 159 160 LOG("Mic source %p Audio config: aec: %s, agc: %s, noise: %s, channels: %d", 161 this, prefs.mAecOn ? "on" : "off", prefs.mAgcOn ? "on" : "off", 162 prefs.mNoiseOn ? "on" : "off", prefs.mChannels); 163 164 *aOutPrefs = prefs; 165 166 return NS_OK; 167 } 168 169 nsresult MediaEngineWebRTCMicrophoneSource::Reconfigure( 170 const dom::MediaTrackConstraints& aConstraints, 171 const MediaEnginePrefs& aPrefs, const char** aOutBadConstraint) { 172 AssertIsOnOwningThread(); 173 MOZ_ASSERT(mTrack); 174 175 LOG("Mic source %p Reconfigure ", this); 176 177 NormalizedConstraints constraints(aConstraints); 178 MediaEnginePrefs outputPrefs; 179 nsresult rv = 180 EvaluateSettings(constraints, aPrefs, &outputPrefs, aOutBadConstraint); 181 if (NS_FAILED(rv)) { 182 if (aOutBadConstraint) { 183 return NS_ERROR_INVALID_ARG; 184 } 185 186 nsAutoCString name; 187 GetErrorName(rv, name); 188 LOG("Mic source %p Reconfigure() failed unexpectedly. rv=%s", this, 189 name.Data()); 190 Stop(); 191 return NS_ERROR_UNEXPECTED; 192 } 193 194 ApplySettings(outputPrefs); 195 196 mCurrentPrefs = outputPrefs; 197 198 return NS_OK; 199 } 200 201 AudioProcessing::Config AudioInputProcessing::ConfigForPrefs( 202 MediaTrackGraph* aGraph, const MediaEnginePrefs& aPrefs) const { 203 AudioProcessing::Config config; 204 205 config.pipeline.multi_channel_render = true; 206 config.pipeline.multi_channel_capture = true; 207 208 config.echo_canceller.enabled = aPrefs.mAecOn; 209 config.echo_canceller.mobile_mode = aPrefs.mUseAecMobile; 210 211 if ((config.gain_controller1.enabled = 212 aPrefs.mAgcOn && !aPrefs.mAgc2Forced)) { 213 auto mode = static_cast<AudioProcessing::Config::GainController1::Mode>( 214 aPrefs.mAgc); 215 if (mode != AudioProcessing::Config::GainController1::kAdaptiveAnalog && 216 mode != AudioProcessing::Config::GainController1::kAdaptiveDigital && 217 mode != AudioProcessing::Config::GainController1::kFixedDigital) { 218 LOG_ERROR("AudioInputProcessing %p Attempt to set invalid AGC mode %d", 219 this, static_cast<int>(mode)); 220 mode = AudioProcessing::Config::GainController1::kAdaptiveDigital; 221 } 222 #if defined(WEBRTC_IOS) || defined(ATA) || defined(WEBRTC_ANDROID) 223 if (mode == AudioProcessing::Config::GainController1::kAdaptiveAnalog) { 224 LOG_ERROR( 225 "AudioInputProcessing %p Invalid AGC mode kAdaptiveAnalog on " 226 "mobile", 227 this); 228 MOZ_ASSERT_UNREACHABLE( 229 "Bad pref set in all.js or in about:config" 230 " for the auto gain, on mobile."); 231 mode = AudioProcessing::Config::GainController1::kFixedDigital; 232 } 233 #endif 234 config.gain_controller1.mode = mode; 235 } 236 config.gain_controller2.enabled = 237 config.gain_controller2.adaptive_digital.enabled = 238 aPrefs.mAgcOn && aPrefs.mAgc2Forced; 239 240 if ((config.noise_suppression.enabled = aPrefs.mNoiseOn)) { 241 auto level = static_cast<AudioProcessing::Config::NoiseSuppression::Level>( 242 aPrefs.mNoise); 243 if (level != AudioProcessing::Config::NoiseSuppression::kLow && 244 level != AudioProcessing::Config::NoiseSuppression::kModerate && 245 level != AudioProcessing::Config::NoiseSuppression::kHigh && 246 level != AudioProcessing::Config::NoiseSuppression::kVeryHigh) { 247 LOG_ERROR( 248 "AudioInputProcessing %p Attempt to set invalid noise suppression " 249 "level %d", 250 this, static_cast<int>(level)); 251 252 level = AudioProcessing::Config::NoiseSuppression::kModerate; 253 } 254 config.noise_suppression.level = level; 255 } 256 257 config.transient_suppression.enabled = aPrefs.mTransientOn; 258 259 config.high_pass_filter.enabled = aPrefs.mHPFOn; 260 261 if ((mPlatformProcessingSetParams & 262 CUBEB_INPUT_PROCESSING_PARAM_ECHO_CANCELLATION)) { 263 // Platform processing (VPIO on macOS) will cancel echo from the output 264 // device used as the output stream. Leave it on here when rendering audio 265 // to another output device. 266 config.echo_canceller.enabled = !aGraph->OutputForAECIsPrimary(); 267 } 268 if (mPlatformProcessingSetParams & 269 CUBEB_INPUT_PROCESSING_PARAM_AUTOMATIC_GAIN_CONTROL) { 270 config.gain_controller1.enabled = config.gain_controller2.enabled = false; 271 } 272 if (mPlatformProcessingSetParams & 273 CUBEB_INPUT_PROCESSING_PARAM_NOISE_SUPPRESSION) { 274 config.noise_suppression.enabled = false; 275 } 276 277 return config; 278 } 279 280 void MediaEngineWebRTCMicrophoneSource::ApplySettings( 281 const MediaEnginePrefs& aPrefs) { 282 AssertIsOnOwningThread(); 283 284 TRACE("ApplySettings"); 285 MOZ_ASSERT( 286 mTrack, 287 "ApplySetting is to be called only after SetTrack has been called"); 288 289 RefPtr<MediaEngineWebRTCMicrophoneSource> that = this; 290 CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID(); 291 NS_DispatchToMainThread(NS_NewRunnableFunction( 292 __func__, [this, that, deviceID, track = mTrack, prefs = aPrefs] { 293 mSettings->mEchoCancellation.Value() = prefs.mAecOn; 294 mSettings->mAutoGainControl.Value() = prefs.mAgcOn; 295 mSettings->mNoiseSuppression.Value() = prefs.mNoiseOn; 296 mSettings->mChannelCount.Value() = prefs.mChannels; 297 298 if (track->IsDestroyed()) { 299 return; 300 } 301 track->QueueControlMessageWithNoShutdown( 302 [track, deviceID, prefs, inputProcessing = mInputProcessing] { 303 inputProcessing->ApplySettings(track->Graph(), deviceID, prefs); 304 }); 305 })); 306 } 307 308 nsresult MediaEngineWebRTCMicrophoneSource::Allocate( 309 const dom::MediaTrackConstraints& aConstraints, 310 const MediaEnginePrefs& aPrefs, uint64_t aWindowID, 311 const char** aOutBadConstraint) { 312 AssertIsOnOwningThread(); 313 314 mState = kAllocated; 315 316 NormalizedConstraints normalized(aConstraints); 317 MediaEnginePrefs outputPrefs; 318 nsresult rv = 319 EvaluateSettings(normalized, aPrefs, &outputPrefs, aOutBadConstraint); 320 if (NS_FAILED(rv)) { 321 return rv; 322 } 323 324 NS_DispatchToMainThread(NS_NewRunnableFunction( 325 __func__, [settings = mSettings, prefs = outputPrefs] { 326 settings->mEchoCancellation.Value() = prefs.mAecOn; 327 settings->mAutoGainControl.Value() = prefs.mAgcOn; 328 settings->mNoiseSuppression.Value() = prefs.mNoiseOn; 329 settings->mChannelCount.Value() = prefs.mChannels; 330 })); 331 332 mCurrentPrefs = outputPrefs; 333 334 return rv; 335 } 336 337 nsresult MediaEngineWebRTCMicrophoneSource::Deallocate() { 338 AssertIsOnOwningThread(); 339 340 MOZ_ASSERT(mState == kStopped || mState == kAllocated); 341 342 if (mTrack) { 343 NS_DispatchToMainThread(NS_NewRunnableFunction( 344 __func__, 345 [track = std::move(mTrack), inputProcessing = mInputProcessing] { 346 if (track->IsDestroyed()) { 347 // This track has already been destroyed on main thread by its 348 // DOMMediaStream. No cleanup left to do. 349 return; 350 } 351 track->QueueControlMessageWithNoShutdown([inputProcessing] { 352 TRACE("mInputProcessing::End"); 353 inputProcessing->End(); 354 }); 355 })); 356 } 357 358 // Reset all state. This is not strictly necessary, this instance will get 359 // destroyed soon. 360 mTrack = nullptr; 361 mPrincipal = PRINCIPAL_HANDLE_NONE; 362 363 // If empty, no callbacks to deliver data should be occuring 364 MOZ_ASSERT(mState != kReleased, "Source not allocated"); 365 MOZ_ASSERT(mState != kStarted, "Source not stopped"); 366 367 mState = kReleased; 368 LOG("Mic source %p Audio device %s deallocated", this, 369 NS_ConvertUTF16toUTF8(mDeviceInfo->Name()).get()); 370 return NS_OK; 371 } 372 373 void MediaEngineWebRTCMicrophoneSource::SetTrack( 374 const RefPtr<MediaTrack>& aTrack, const PrincipalHandle& aPrincipal) { 375 AssertIsOnOwningThread(); 376 MOZ_ASSERT(aTrack); 377 MOZ_ASSERT(aTrack->AsAudioProcessingTrack()); 378 379 MOZ_ASSERT(!mTrack); 380 MOZ_ASSERT(mPrincipal == PRINCIPAL_HANDLE_NONE); 381 mTrack = aTrack->AsAudioProcessingTrack(); 382 mPrincipal = aPrincipal; 383 384 mInputProcessing = 385 MakeAndAddRef<AudioInputProcessing>(mDeviceMaxChannelCount); 386 387 NS_DispatchToMainThread(NS_NewRunnableFunction( 388 __func__, [track = mTrack, processing = mInputProcessing]() mutable { 389 track->SetInputProcessing(std::move(processing)); 390 track->Resume(); // Suspended by MediaManager 391 })); 392 393 LOG("Mic source %p Track %p registered for microphone capture", this, 394 aTrack.get()); 395 } 396 397 nsresult MediaEngineWebRTCMicrophoneSource::Start() { 398 AssertIsOnOwningThread(); 399 400 // This spans setting both the enabled state and mState. 401 if (mState == kStarted) { 402 return NS_OK; 403 } 404 405 MOZ_ASSERT(mState == kAllocated || mState == kStopped); 406 407 ApplySettings(mCurrentPrefs); 408 409 CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID(); 410 NS_DispatchToMainThread(NS_NewRunnableFunction( 411 __func__, [inputProcessing = mInputProcessing, deviceID, track = mTrack, 412 principal = mPrincipal] { 413 if (track->IsDestroyed()) { 414 return; 415 } 416 417 track->QueueControlMessageWithNoShutdown([track, inputProcessing] { 418 TRACE("mInputProcessing::Start"); 419 inputProcessing->Start(track->Graph()); 420 }); 421 track->ConnectDeviceInput(deviceID, inputProcessing.get(), principal); 422 })); 423 424 MOZ_ASSERT(mState != kReleased); 425 mState = kStarted; 426 427 return NS_OK; 428 } 429 430 nsresult MediaEngineWebRTCMicrophoneSource::Stop() { 431 AssertIsOnOwningThread(); 432 433 LOG("Mic source %p Stop()", this); 434 MOZ_ASSERT(mTrack, "SetTrack must have been called before ::Stop"); 435 436 if (mState == kStopped) { 437 // Already stopped - this is allowed 438 return NS_OK; 439 } 440 441 NS_DispatchToMainThread(NS_NewRunnableFunction( 442 __func__, [inputProcessing = mInputProcessing, deviceInfo = mDeviceInfo, 443 track = mTrack] { 444 if (track->IsDestroyed()) { 445 return; 446 } 447 448 MOZ_ASSERT(track->DeviceId().value() == deviceInfo->DeviceID()); 449 track->DisconnectDeviceInput(); 450 track->QueueControlMessageWithNoShutdown([track, inputProcessing] { 451 TRACE("mInputProcessing::Stop"); 452 inputProcessing->Stop(track->Graph()); 453 }); 454 })); 455 456 MOZ_ASSERT(mState == kStarted, "Should be started when stopping"); 457 mState = kStopped; 458 459 return NS_OK; 460 } 461 462 void MediaEngineWebRTCMicrophoneSource::GetSettings( 463 dom::MediaTrackSettings& aOutSettings) const { 464 MOZ_ASSERT(NS_IsMainThread()); 465 aOutSettings = *mSettings; 466 } 467 468 void MediaEngineWebRTCMicrophoneSource::GetCapabilities( 469 dom::MediaTrackCapabilities& aOutCapabilities) const { 470 MOZ_ASSERT(NS_IsMainThread()); 471 aOutCapabilities = *mCapabilities; 472 } 473 474 AudioInputProcessing::AudioInputProcessing(uint32_t aMaxChannelCount) 475 : mInputDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100), 476 mEnabled(false), 477 mEnded(false), 478 mPacketCount(0) { 479 mSettings.mChannels = static_cast<int32_t>(std::min<uint32_t>( 480 std::numeric_limits<int32_t>::max(), aMaxChannelCount)); 481 } 482 483 void AudioInputProcessing::Disconnect(MediaTrackGraph* aGraph) { 484 aGraph->AssertOnGraphThread(); 485 mPlatformProcessingSetGeneration = 0; 486 mPlatformProcessingSetParams = CUBEB_INPUT_PROCESSING_PARAM_NONE; 487 ApplySettingsInternal(aGraph, mSettings); 488 } 489 490 void AudioInputProcessing::NotifySetRequestedInputProcessingParams( 491 MediaTrackGraph* aGraph, int aGeneration, 492 cubeb_input_processing_params aRequestedParams) { 493 aGraph->AssertOnGraphThread(); 494 MOZ_ASSERT(aGeneration >= mPlatformProcessingSetGeneration); 495 if (aGeneration <= mPlatformProcessingSetGeneration) { 496 return; 497 } 498 mPlatformProcessingSetGeneration = aGeneration; 499 cubeb_input_processing_params intersection = 500 mPlatformProcessingSetParams & aRequestedParams; 501 LOG("AudioInputProcessing %p platform processing params being applied are " 502 "now %s (Gen %d). Assuming %s while waiting for the result.", 503 this, CubebUtils::ProcessingParamsToString(aRequestedParams).get(), 504 aGeneration, CubebUtils::ProcessingParamsToString(intersection).get()); 505 if (mPlatformProcessingSetParams == intersection) { 506 LOG("AudioInputProcessing %p intersection %s of platform processing params " 507 "already applied. Doing nothing.", 508 this, CubebUtils::ProcessingParamsToString(intersection).get()); 509 return; 510 } 511 mPlatformProcessingSetParams = intersection; 512 ApplySettingsInternal(aGraph, mSettings); 513 } 514 515 void AudioInputProcessing::NotifySetRequestedInputProcessingParamsResult( 516 MediaTrackGraph* aGraph, int aGeneration, 517 const Result<cubeb_input_processing_params, int>& aResult) { 518 aGraph->AssertOnGraphThread(); 519 if (aGeneration != mPlatformProcessingSetGeneration) { 520 // This is a result from an old request, wait for a more recent one. 521 return; 522 } 523 if (aResult.isOk()) { 524 if (mPlatformProcessingSetParams == aResult.inspect()) { 525 // No change. 526 return; 527 } 528 mPlatformProcessingSetError = Nothing(); 529 mPlatformProcessingSetParams = aResult.inspect(); 530 LOG("AudioInputProcessing %p platform processing params are now %s.", this, 531 CubebUtils::ProcessingParamsToString(mPlatformProcessingSetParams) 532 .get()); 533 } else { 534 mPlatformProcessingSetError = Some(aResult.inspectErr()); 535 mPlatformProcessingSetParams = CUBEB_INPUT_PROCESSING_PARAM_NONE; 536 LOG("AudioInputProcessing %p platform processing params failed to apply. " 537 "Applying input processing config in libwebrtc.", 538 this); 539 } 540 ApplySettingsInternal(aGraph, mSettings); 541 } 542 543 bool AudioInputProcessing::IsPassThrough(MediaTrackGraph* aGraph) const { 544 aGraph->AssertOnGraphThread(); 545 // The high-pass filter is not taken into account when activating the 546 // pass through, since it's not controllable from content. 547 auto config = AppliedConfig(aGraph); 548 auto aec = [](const auto& config) { return config.echo_canceller.enabled; }; 549 auto agc = [](const auto& config) { 550 return config.gain_controller1.enabled || config.gain_controller2.enabled; 551 }; 552 auto ns = [](const auto& config) { return config.noise_suppression.enabled; }; 553 return !(aec(config) || agc(config) || ns(config)); 554 } 555 556 void AudioInputProcessing::PassThroughChanged(MediaTrackGraph* aGraph) { 557 aGraph->AssertOnGraphThread(); 558 559 if (!mEnabled) { 560 MOZ_ASSERT(!mPacketizerInput); 561 return; 562 } 563 564 if (IsPassThrough(aGraph)) { 565 // Switching to pass-through. Clear state so that it doesn't affect any 566 // future processing, if re-enabled. 567 ResetAudioProcessing(aGraph); 568 } else { 569 // No longer pass-through. Processing will not use old state. 570 // Packetizer setup is deferred until needed. 571 MOZ_ASSERT(!mPacketizerInput); 572 } 573 } 574 575 uint32_t AudioInputProcessing::GetRequestedInputChannelCount() const { 576 return mSettings.mChannels; 577 } 578 579 void AudioInputProcessing::RequestedInputChannelCountChanged( 580 MediaTrackGraph* aGraph, CubebUtils::AudioDeviceID aDeviceId) { 581 aGraph->ReevaluateInputDevice(aDeviceId); 582 } 583 584 void AudioInputProcessing::Start(MediaTrackGraph* aGraph) { 585 aGraph->AssertOnGraphThread(); 586 587 if (mEnabled) { 588 return; 589 } 590 mEnabled = true; 591 592 MOZ_ASSERT(!mPacketizerInput); 593 } 594 595 void AudioInputProcessing::Stop(MediaTrackGraph* aGraph) { 596 aGraph->AssertOnGraphThread(); 597 598 if (!mEnabled) { 599 return; 600 } 601 602 mEnabled = false; 603 604 if (IsPassThrough(aGraph)) { 605 return; 606 } 607 608 // Packetizer is active and we were just stopped. Stop the packetizer and 609 // processing. 610 ResetAudioProcessing(aGraph); 611 } 612 613 // The following is how how Process() works in pass-through and non-pass-through 614 // mode. In both mode, Process() outputs the same amount of the frames as its 615 // input data. 616 // 617 // I. In non-pass-through mode: 618 // 619 // We will use webrtc::AudioProcessing to process the input audio data in this 620 // mode. The data input in webrtc::AudioProcessing needs to be a 10ms chunk, 621 // while the input data passed to Process() is not necessary to have times of 622 // 10ms-chunk length. To divide the input data into 10ms chunks, 623 // mPacketizerInput is introduced. 624 // 625 // We will add one 10ms-chunk silence into the internal buffer before Process() 626 // works. Those extra frames is called pre-buffering. It aims to avoid glitches 627 // we may have when producing data in mPacketizerInput. Without pre-buffering, 628 // when the input data length is not 10ms-times, we could end up having no 629 // enough output needs since mPacketizerInput would keep some input data, which 630 // is the remainder of the 10ms-chunk length. To force processing those data 631 // left in mPacketizerInput, we would need to add some extra frames to make 632 // mPacketizerInput produce a 10ms-chunk. For example, if the sample rate is 633 // 44100 Hz, then the packet-size is 441 frames. When we only have 384 input 634 // frames, we would need to put additional 57 frames to mPacketizerInput to 635 // produce a packet. However, those extra 57 frames result in a glitch sound. 636 // 637 // By adding one 10ms-chunk silence in advance to the internal buffer, we won't 638 // need to add extra frames between the input data no matter what data length it 639 // is. The only drawback is the input data won't be processed and send to output 640 // immediately. Process() will consume pre-buffering data for its output first. 641 // The below describes how it works: 642 // 643 // 644 // Process() 645 // +-----------------------------+ 646 // input D(N) | +--------+ +--------+ | output D(N) 647 // --------------|-->| P(N) |-->| S(N) |---|--------------> 648 // | +--------+ +--------+ | 649 // | packetizer mSegment | 650 // +-----------------------------+ 651 // <------ internal buffer ------> 652 // 653 // 654 // D(N): number of frames from the input and the output needs in the N round 655 // Z: number of frames of a 10ms chunk(packet) in mPacketizerInput, Z >= 1 656 // (if Z = 1, packetizer has no effect) 657 // P(N): number of frames left in mPacketizerInput after the N round. Once the 658 // frames in packetizer >= Z, packetizer will produce a packet to 659 // mSegment, so P(N) = (P(N-1) + D(N)) % Z, 0 <= P(N) <= Z-1 660 // S(N): number of frames left in mSegment after the N round. The input D(N) 661 // frames will be passed to mPacketizerInput first, and then 662 // mPacketizerInput may append some packets to mSegment, so 663 // S(N) = S(N-1) + Z * floor((P(N-1) + D(N)) / Z) - D(N) 664 // 665 // At the first, we set P(0) = 0, S(0) = X, where X >= Z-1. X is the 666 // pre-buffering put in the internal buffer. With this settings, P(K) + S(K) = X 667 // always holds. 668 // 669 // Intuitively, this seems true: We put X frames in the internal buffer at 670 // first. If the data won't be blocked in packetizer, after the Process(), the 671 // internal buffer should still hold X frames since the number of frames coming 672 // from input is the same as the output needs. The key of having enough data for 673 // output needs, while the input data is piled up in packetizer, is by putting 674 // at least Z-1 frames as pre-buffering, since the maximum number of frames 675 // stuck in the packetizer before it can emit a packet is packet-size - 1. 676 // Otherwise, we don't have enough data for output if the new input data plus 677 // the data left in packetizer produces a smaller-than-10ms chunk, which will be 678 // left in packetizer. Thus we must have some pre-buffering frames in the 679 // mSegment to make up the length of the left chunk we need for output. This can 680 // also be told by by induction: 681 // (1) This holds when K = 0 682 // (2) Assume this holds when K = N: so P(N) + S(N) = X 683 // => P(N) + S(N) = X >= Z-1 => S(N) >= Z-1-P(N) 684 // (3) When K = N+1, we have D(N+1) input frames comes 685 // a. if P(N) + D(N+1) < Z, then packetizer has no enough data for one 686 // packet. No data produced by packertizer, so the mSegment now has 687 // S(N) >= Z-1-P(N) frames. Output needs D(N+1) < Z-P(N) frames. So it 688 // needs at most Z-P(N)-1 frames, and mSegment has enough frames for 689 // output, Then, P(N+1) = P(N) + D(N+1) and S(N+1) = S(N) - D(N+1) 690 // => P(N+1) + S(N+1) = P(N) + S(N) = X 691 // b. if P(N) + D(N+1) = Z, then packetizer will produce one packet for 692 // mSegment, so mSegment now has S(N) + Z frames. Output needs D(N+1) 693 // = Z-P(N) frames. S(N) has at least Z-1-P(N)+Z >= Z-P(N) frames, since 694 // Z >= 1. So mSegment has enough frames for output. Then, P(N+1) = 0 and 695 // S(N+1) = S(N) + Z - D(N+1) = S(N) + P(N) 696 // => P(N+1) + S(N+1) = P(N) + S(N) = X 697 // c. if P(N) + D(N+1) > Z, and let P(N) + D(N+1) = q * Z + r, where q >= 1 698 // and 0 <= r <= Z-1, then packetizer will produce can produce q packets 699 // for mSegment. Output needs D(N+1) = q * Z - P(N) + r frames and 700 // mSegment has S(N) + q * z >= q * z - P(N) + Z-1 >= q*z -P(N) + r, 701 // since r <= Z-1. So mSegment has enough frames for output. Then, 702 // P(N+1) = r and S(N+1) = S(N) + q * Z - D(N+1) 703 // => P(N+1) + S(N+1) = S(N) + (q * Z + r - D(N+1)) = S(N) + P(N) = X 704 // => P(K) + S(K) = X always holds 705 // 706 // Since P(K) + S(K) = X and P(K) is in [0, Z-1], the S(K) is in [X-Z+1, X] 707 // range. In our implementation, X is set to Z so S(K) is in [1, Z]. 708 // By the above workflow, we always have enough data for output and no extra 709 // frames put into packetizer. It means we don't have any glitch! 710 // 711 // II. In pass-through mode: 712 // 713 // Process() 714 // +--------+ 715 // input D(N) | | output D(N) 716 // -------------->-------->---------------> 717 // | | 718 // +--------+ 719 // 720 // The D(N) frames of data are just forwarded from input to output without any 721 // processing 722 void AudioInputProcessing::Process(AudioProcessingTrack* aTrack, 723 GraphTime aFrom, GraphTime aTo, 724 AudioSegment* aInput, 725 AudioSegment* aOutput) { 726 aTrack->AssertOnGraphThread(); 727 MOZ_ASSERT(aFrom <= aTo); 728 MOZ_ASSERT(!mEnded); 729 730 TrackTime need = aTo - aFrom; 731 if (need == 0) { 732 return; 733 } 734 735 MediaTrackGraph* graph = aTrack->Graph(); 736 if (!mEnabled) { 737 LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Filling %" PRId64 738 " frames of silence to output (disabled)", 739 graph, graph->CurrentDriver(), this, need); 740 aOutput->AppendNullData(need); 741 return; 742 } 743 744 MOZ_ASSERT(aInput->GetDuration() == need, 745 "Wrong data length from input port source"); 746 747 if (mSettings.mAecOn && 748 (mPlatformProcessingSetParams & 749 CUBEB_INPUT_PROCESSING_PARAM_ECHO_CANCELLATION) && 750 mAppliedConfig.echo_canceller.enabled == 751 aTrack->Graph()->OutputForAECIsPrimary()) { 752 ApplySettingsInternal(aTrack->Graph(), mSettings); 753 } 754 755 if (IsPassThrough(graph)) { 756 LOG_FRAME( 757 "(Graph %p, Driver %p) AudioInputProcessing %p Forwarding %" PRId64 758 " frames of input data to output directly (PassThrough)", 759 graph, graph->CurrentDriver(), this, aInput->GetDuration()); 760 aOutput->AppendSegment(aInput); 761 return; 762 } 763 764 // If the requested input channel count is updated, create a new 765 // packetizer. No need to change the pre-buffering since the rate is always 766 // the same. The frames left in the packetizer would be replaced by null 767 // data and then transferred to mSegment. 768 EnsurePacketizer(aTrack); 769 770 // Preconditions of the audio-processing logic. 771 MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) + 772 mPacketizerInput->FramesAvailable() == 773 mPacketizerInput->mPacketSize); 774 // We pre-buffer mPacketSize frames, but the maximum number of frames stuck in 775 // the packetizer before it can emit a packet is mPacketSize-1. Thus that 776 // remaining 1 frame will always be present in mSegment. 777 MOZ_ASSERT(mSegment.GetDuration() >= 1); 778 MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize); 779 780 PacketizeAndProcess(aTrack, *aInput); 781 LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Buffer has %" PRId64 782 " frames of data now, after packetizing and processing", 783 graph, graph->CurrentDriver(), this, mSegment.GetDuration()); 784 785 // By setting pre-buffering to the number of frames of one packet, and 786 // because the maximum number of frames stuck in the packetizer before 787 // it can emit a packet is the mPacketSize-1, we always have at least 788 // one more frame than output needs. 789 MOZ_ASSERT(mSegment.GetDuration() > need); 790 aOutput->AppendSlice(mSegment, 0, need); 791 mSegment.RemoveLeading(need); 792 LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p moving %" PRId64 793 " frames of data to output, leaving %" PRId64 " frames in buffer", 794 graph, graph->CurrentDriver(), this, need, mSegment.GetDuration()); 795 796 // Postconditions of the audio-processing logic. 797 MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) + 798 mPacketizerInput->FramesAvailable() == 799 mPacketizerInput->mPacketSize); 800 MOZ_ASSERT(mSegment.GetDuration() >= 1); 801 MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize); 802 } 803 804 void AudioInputProcessing::ProcessOutputData(AudioProcessingTrack* aTrack, 805 const AudioChunk& aChunk) { 806 MOZ_ASSERT(aChunk.ChannelCount() > 0); 807 aTrack->AssertOnGraphThread(); 808 809 if (!mEnabled) { 810 return; 811 } 812 813 if (IsPassThrough(aTrack->Graph())) { 814 return; 815 } 816 817 if (aChunk.mDuration == 0) { 818 return; 819 } 820 821 TrackRate sampleRate = aTrack->mSampleRate; 822 uint32_t framesPerPacket = GetPacketSize(sampleRate); // in frames 823 // Downmix from aChannels to MAX_CHANNELS if needed. 824 uint32_t channelCount = 825 std::min<uint32_t>(aChunk.ChannelCount(), MAX_CHANNELS); 826 if (channelCount != mOutputBufferChannelCount || 827 channelCount * framesPerPacket != mOutputBuffer.Length()) { 828 mOutputBuffer.SetLength(channelCount * framesPerPacket); 829 mOutputBufferChannelCount = channelCount; 830 // It's ok to drop the audio still in the packetizer here: if this changes, 831 // we changed devices or something. 832 mOutputBufferFrameCount = 0; 833 } 834 835 TrackTime chunkOffset = 0; 836 AutoTArray<float*, MAX_CHANNELS> channelPtrs; 837 channelPtrs.SetLength(channelCount); 838 do { 839 MOZ_ASSERT(mOutputBufferFrameCount < framesPerPacket); 840 uint32_t packetRemainder = framesPerPacket - mOutputBufferFrameCount; 841 mSubChunk = aChunk; 842 mSubChunk.SliceTo( 843 chunkOffset, std::min(chunkOffset + packetRemainder, aChunk.mDuration)); 844 MOZ_ASSERT(mSubChunk.mDuration <= packetRemainder); 845 846 for (uint32_t channel = 0; channel < channelCount; channel++) { 847 channelPtrs[channel] = 848 &mOutputBuffer[channel * framesPerPacket + mOutputBufferFrameCount]; 849 } 850 mSubChunk.DownMixTo(channelPtrs); 851 852 chunkOffset += mSubChunk.mDuration; 853 MOZ_ASSERT(chunkOffset <= aChunk.mDuration); 854 mOutputBufferFrameCount += mSubChunk.mDuration; 855 MOZ_ASSERT(mOutputBufferFrameCount <= framesPerPacket); 856 857 if (mOutputBufferFrameCount == framesPerPacket) { 858 // Have a complete packet. Analyze it. 859 EnsureAudioProcessing(aTrack); 860 for (uint32_t channel = 0; channel < channelCount; channel++) { 861 channelPtrs[channel] = &mOutputBuffer[channel * framesPerPacket]; 862 } 863 StreamConfig reverseConfig(sampleRate, channelCount); 864 DebugOnly<int> err = mAudioProcessing->AnalyzeReverseStream( 865 channelPtrs.Elements(), reverseConfig); 866 MOZ_ASSERT(!err, "Could not process the reverse stream."); 867 868 mOutputBufferFrameCount = 0; 869 } 870 } while (chunkOffset < aChunk.mDuration); 871 872 mSubChunk.SetNull(0); 873 } 874 875 // Only called if we're not in passthrough mode 876 void AudioInputProcessing::PacketizeAndProcess(AudioProcessingTrack* aTrack, 877 const AudioSegment& aSegment) { 878 MediaTrackGraph* graph = aTrack->Graph(); 879 MOZ_ASSERT(!IsPassThrough(graph), 880 "This should be bypassed when in PassThrough mode."); 881 MOZ_ASSERT(mEnabled); 882 MOZ_ASSERT(mPacketizerInput); 883 MOZ_ASSERT(mPacketizerInput->mPacketSize == 884 GetPacketSize(aTrack->mSampleRate)); 885 886 // Calculate number of the pending frames in mChunksInPacketizer. 887 auto pendingFrames = [&]() { 888 TrackTime frames = 0; 889 for (const auto& p : mChunksInPacketizer) { 890 frames += p.first; 891 } 892 return frames; 893 }; 894 895 // Precondition of the Principal-labelling logic below. 896 MOZ_ASSERT(mPacketizerInput->FramesAvailable() == 897 static_cast<uint32_t>(pendingFrames())); 898 899 // The WriteToInterleavedBuffer will do upmix or downmix if the channel-count 900 // in aSegment's chunks is different from mPacketizerInput->mChannels 901 // WriteToInterleavedBuffer could be avoided once Bug 1729041 is done. 902 size_t sampleCount = aSegment.WriteToInterleavedBuffer( 903 mInterleavedBuffer, mPacketizerInput->mChannels); 904 size_t frameCount = 905 sampleCount / static_cast<size_t>(mPacketizerInput->mChannels); 906 907 // Packetize our input data into 10ms chunks, deinterleave into planar channel 908 // buffers, process, and append to the right MediaStreamTrack. 909 mPacketizerInput->Input(mInterleavedBuffer.Elements(), 910 static_cast<uint32_t>(frameCount)); 911 912 // Update mChunksInPacketizer and make sure the precondition for the 913 // Principal-labelling logic still holds. 914 for (AudioSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded(); 915 iter.Next()) { 916 MOZ_ASSERT(iter->mDuration > 0); 917 mChunksInPacketizer.emplace_back( 918 std::make_pair(iter->mDuration, iter->mPrincipalHandle)); 919 } 920 MOZ_ASSERT(mPacketizerInput->FramesAvailable() == 921 static_cast<uint32_t>(pendingFrames())); 922 923 LOG_FRAME( 924 "(Graph %p, Driver %p) AudioInputProcessing %p Packetizing %zu frames. " 925 "Packetizer has %u frames (enough for %u packets) now", 926 graph, graph->CurrentDriver(), this, frameCount, 927 mPacketizerInput->FramesAvailable(), 928 mPacketizerInput->PacketsAvailable()); 929 930 size_t offset = 0; 931 932 while (mPacketizerInput->PacketsAvailable()) { 933 mPacketCount++; 934 uint32_t samplesPerPacket = 935 mPacketizerInput->mPacketSize * mPacketizerInput->mChannels; 936 if (mInputBuffer.Length() < samplesPerPacket) { 937 mInputBuffer.SetLength(samplesPerPacket); 938 } 939 if (mDeinterleavedBuffer.Length() < samplesPerPacket) { 940 mDeinterleavedBuffer.SetLength(samplesPerPacket); 941 } 942 float* packet = mInputBuffer.Data(); 943 mPacketizerInput->Output(packet); 944 945 // Downmix from mPacketizerInput->mChannels to mono if needed. We always 946 // have floats here, the packetizer performed the conversion. 947 AutoTArray<float*, 8> deinterleavedPacketizedInputDataChannelPointers; 948 uint32_t channelCountInput = 0; 949 if (mPacketizerInput->mChannels > MAX_CHANNELS) { 950 channelCountInput = MONO; 951 deinterleavedPacketizedInputDataChannelPointers.SetLength( 952 channelCountInput); 953 deinterleavedPacketizedInputDataChannelPointers[0] = 954 mDeinterleavedBuffer.Data(); 955 // Downmix to mono (and effectively have a planar buffer) by summing all 956 // channels in the first channel, and scaling by the number of channels to 957 // avoid clipping. 958 float gain = 1.f / mPacketizerInput->mChannels; 959 size_t readIndex = 0; 960 for (size_t i = 0; i < mPacketizerInput->mPacketSize; i++) { 961 mDeinterleavedBuffer.Data()[i] = 0.; 962 for (size_t j = 0; j < mPacketizerInput->mChannels; j++) { 963 mDeinterleavedBuffer.Data()[i] += gain * packet[readIndex++]; 964 } 965 } 966 } else { 967 channelCountInput = mPacketizerInput->mChannels; 968 webrtc::InterleavedView<const float> interleaved( 969 packet, mPacketizerInput->mPacketSize, channelCountInput); 970 webrtc::DeinterleavedView<float> deinterleaved( 971 mDeinterleavedBuffer.Data(), mPacketizerInput->mPacketSize, 972 channelCountInput); 973 974 Deinterleave(interleaved, deinterleaved); 975 976 // Set up pointers into the deinterleaved data for code below 977 deinterleavedPacketizedInputDataChannelPointers.SetLength( 978 channelCountInput); 979 for (size_t i = 0; 980 i < deinterleavedPacketizedInputDataChannelPointers.Length(); ++i) { 981 deinterleavedPacketizedInputDataChannelPointers[i] = 982 deinterleaved[i].data(); 983 } 984 } 985 986 StreamConfig inputConfig(aTrack->mSampleRate, channelCountInput); 987 StreamConfig outputConfig = inputConfig; 988 989 EnsureAudioProcessing(aTrack); 990 // Bug 1404965: Get the right delay here, it saves some work down the line. 991 mAudioProcessing->set_stream_delay_ms(0); 992 993 // Bug 1414837: find a way to not allocate here. 994 CheckedInt<size_t> bufferSize(sizeof(float)); 995 bufferSize *= mPacketizerInput->mPacketSize; 996 bufferSize *= channelCountInput; 997 RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize); 998 999 // Prepare channel pointers to the SharedBuffer created above. 1000 AutoTArray<float*, 8> processedOutputChannelPointers; 1001 AutoTArray<const float*, 8> processedOutputChannelPointersConst; 1002 processedOutputChannelPointers.SetLength(channelCountInput); 1003 processedOutputChannelPointersConst.SetLength(channelCountInput); 1004 1005 offset = 0; 1006 for (size_t i = 0; i < processedOutputChannelPointers.Length(); ++i) { 1007 processedOutputChannelPointers[i] = 1008 static_cast<float*>(buffer->Data()) + offset; 1009 processedOutputChannelPointersConst[i] = 1010 static_cast<float*>(buffer->Data()) + offset; 1011 offset += mPacketizerInput->mPacketSize; 1012 } 1013 1014 mAudioProcessing->ProcessStream( 1015 deinterleavedPacketizedInputDataChannelPointers.Elements(), inputConfig, 1016 outputConfig, processedOutputChannelPointers.Elements()); 1017 1018 // If logging is enabled, dump the audio processing stats twice a second 1019 if (MOZ_LOG_TEST(gMediaManagerLog, LogLevel::Debug) && 1020 !(mPacketCount % 50)) { 1021 AudioProcessingStats stats = mAudioProcessing->GetStatistics(); 1022 char msg[1024]; 1023 msg[0] = '\0'; 1024 size_t offset = 0; 1025 #define AddIfValue(format, member) \ 1026 if (stats.member.has_value()) { \ 1027 offset += SprintfBuf(msg + offset, sizeof(msg) - offset, \ 1028 #member ":" format ", ", stats.member.value()); \ 1029 } 1030 AddIfValue("%d", voice_detected); 1031 AddIfValue("%lf", echo_return_loss); 1032 AddIfValue("%lf", echo_return_loss_enhancement); 1033 AddIfValue("%lf", divergent_filter_fraction); 1034 AddIfValue("%d", delay_median_ms); 1035 AddIfValue("%d", delay_standard_deviation_ms); 1036 AddIfValue("%d", delay_ms); 1037 #undef AddIfValue 1038 LOG("AudioProcessing statistics: %s", msg); 1039 } 1040 1041 if (mEnded) { 1042 continue; 1043 } 1044 1045 // We already have planar audio data of the right format. Insert into the 1046 // MTG. 1047 MOZ_ASSERT(processedOutputChannelPointers.Length() == channelCountInput); 1048 1049 // Insert the processed data chunk by chunk to mSegment with the paired 1050 // PrincipalHandle value. The chunks are tracked in mChunksInPacketizer. 1051 1052 auto getAudioChunk = [&](TrackTime aStart, TrackTime aEnd, 1053 const PrincipalHandle& aPrincipalHandle) { 1054 if (aStart == aEnd) { 1055 return AudioChunk(); 1056 } 1057 RefPtr<SharedBuffer> other = buffer; 1058 AudioChunk c = 1059 AudioChunk(other.forget(), processedOutputChannelPointersConst, 1060 static_cast<TrackTime>(mPacketizerInput->mPacketSize), 1061 aPrincipalHandle); 1062 c.SliceTo(aStart, aEnd); 1063 return c; 1064 }; 1065 1066 // The number of frames of data that needs to be labelled with Principal 1067 // values. 1068 TrackTime len = static_cast<TrackTime>(mPacketizerInput->mPacketSize); 1069 // The start offset of the unlabelled chunk. 1070 TrackTime start = 0; 1071 // By mChunksInPacketizer's information, we can keep labelling the 1072 // unlabelled frames chunk by chunk. 1073 while (!mChunksInPacketizer.empty()) { 1074 auto& [frames, principal] = mChunksInPacketizer.front(); 1075 const TrackTime end = start + frames; 1076 if (end > len) { 1077 // If the left unlabelled frames are part of this chunk, then we need to 1078 // adjust the number of frames in the chunk. 1079 if (len > start) { 1080 mSegment.AppendAndConsumeChunk(getAudioChunk(start, len, principal)); 1081 frames -= len - start; 1082 } 1083 break; 1084 } 1085 // Otherwise, the number of unlabelled frames is larger than or equal to 1086 // this chunk. We can label the whole chunk directly. 1087 mSegment.AppendAndConsumeChunk(getAudioChunk(start, end, principal)); 1088 start = end; 1089 mChunksInPacketizer.pop_front(); 1090 } 1091 1092 LOG_FRAME( 1093 "(Graph %p, Driver %p) AudioInputProcessing %p Appending %u frames of " 1094 "packetized audio, leaving %u frames in packetizer (%" PRId64 1095 " frames in mChunksInPacketizer)", 1096 graph, graph->CurrentDriver(), this, mPacketizerInput->mPacketSize, 1097 mPacketizerInput->FramesAvailable(), pendingFrames()); 1098 1099 // Postcondition of the Principal-labelling logic. 1100 MOZ_ASSERT(mPacketizerInput->FramesAvailable() == 1101 static_cast<uint32_t>(pendingFrames())); 1102 } 1103 } 1104 1105 void AudioInputProcessing::DeviceChanged(MediaTrackGraph* aGraph) { 1106 aGraph->AssertOnGraphThread(); 1107 1108 // Reset some processing 1109 if (mAudioProcessing) { 1110 mAudioProcessing->Initialize(); 1111 } 1112 LOG_FRAME( 1113 "(Graph %p, Driver %p) AudioInputProcessing %p Reinitializing audio " 1114 "processing", 1115 aGraph, aGraph->CurrentDriver(), this); 1116 } 1117 1118 cubeb_input_processing_params 1119 AudioInputProcessing::RequestedInputProcessingParams( 1120 MediaTrackGraph* aGraph) const { 1121 aGraph->AssertOnGraphThread(); 1122 if (!mPlatformProcessingEnabled) { 1123 return CUBEB_INPUT_PROCESSING_PARAM_NONE; 1124 } 1125 if (mPlatformProcessingSetError) { 1126 return CUBEB_INPUT_PROCESSING_PARAM_NONE; 1127 } 1128 cubeb_input_processing_params params = CUBEB_INPUT_PROCESSING_PARAM_NONE; 1129 if (mSettings.mAecOn) { 1130 params |= CUBEB_INPUT_PROCESSING_PARAM_ECHO_CANCELLATION; 1131 } 1132 if (mSettings.mAgcOn) { 1133 params |= CUBEB_INPUT_PROCESSING_PARAM_AUTOMATIC_GAIN_CONTROL; 1134 } 1135 if (mSettings.mNoiseOn) { 1136 params |= CUBEB_INPUT_PROCESSING_PARAM_NOISE_SUPPRESSION; 1137 } 1138 return params; 1139 } 1140 1141 void AudioInputProcessing::ApplySettings(MediaTrackGraph* aGraph, 1142 CubebUtils::AudioDeviceID aDeviceID, 1143 const MediaEnginePrefs& aSettings) { 1144 TRACE("AudioInputProcessing::ApplySettings"); 1145 aGraph->AssertOnGraphThread(); 1146 1147 // CUBEB_ERROR_NOT_SUPPORTED means the backend does not support platform 1148 // processing. In that case, leave the error in place so we don't request 1149 // processing anew. 1150 if (mPlatformProcessingSetError.valueOr(CUBEB_OK) != 1151 CUBEB_ERROR_NOT_SUPPORTED) { 1152 mPlatformProcessingSetError = Nothing(); 1153 } 1154 1155 // Read previous state from mSettings. 1156 uint32_t oldChannelCount = GetRequestedInputChannelCount(); 1157 1158 ApplySettingsInternal(aGraph, aSettings); 1159 1160 if (oldChannelCount != GetRequestedInputChannelCount()) { 1161 RequestedInputChannelCountChanged(aGraph, aDeviceID); 1162 } 1163 } 1164 1165 void AudioInputProcessing::ApplySettingsInternal( 1166 MediaTrackGraph* aGraph, const MediaEnginePrefs& aSettings) { 1167 TRACE("AudioInputProcessing::ApplySettingsInternal"); 1168 aGraph->AssertOnGraphThread(); 1169 1170 mPlatformProcessingEnabled = aSettings.mUsePlatformProcessing; 1171 1172 // Read previous state from the applied config. 1173 bool wasPassThrough = IsPassThrough(aGraph); 1174 1175 mSettings = aSettings; 1176 mAppliedConfig = ConfigForPrefs(aGraph, aSettings); 1177 if (mAudioProcessing) { 1178 mAudioProcessing->ApplyConfig(mAppliedConfig); 1179 } 1180 1181 if (wasPassThrough != IsPassThrough(aGraph)) { 1182 PassThroughChanged(aGraph); 1183 } 1184 } 1185 1186 const webrtc::AudioProcessing::Config& AudioInputProcessing::AppliedConfig( 1187 MediaTrackGraph* aGraph) const { 1188 aGraph->AssertOnGraphThread(); 1189 return mAppliedConfig; 1190 } 1191 1192 void AudioInputProcessing::End() { 1193 mEnded = true; 1194 mSegment.Clear(); 1195 } 1196 1197 TrackTime AudioInputProcessing::NumBufferedFrames( 1198 MediaTrackGraph* aGraph) const { 1199 aGraph->AssertOnGraphThread(); 1200 return mSegment.GetDuration(); 1201 } 1202 1203 void AudioInputProcessing::SetEnvironmentWrapper( 1204 AudioProcessingTrack* aTrack, 1205 RefPtr<WebrtcEnvironmentWrapper> aEnvWrapper) { 1206 aTrack->AssertOnGraphThread(); 1207 mEnvWrapper = std::move(aEnvWrapper); 1208 } 1209 1210 void AudioInputProcessing::EnsurePacketizer(AudioProcessingTrack* aTrack) { 1211 aTrack->AssertOnGraphThread(); 1212 MOZ_ASSERT(mEnabled); 1213 MediaTrackGraph* graph = aTrack->Graph(); 1214 MOZ_ASSERT(!IsPassThrough(graph)); 1215 1216 uint32_t channelCount = GetRequestedInputChannelCount(); 1217 MOZ_ASSERT(channelCount > 0); 1218 if (mPacketizerInput && mPacketizerInput->mChannels == channelCount) { 1219 return; 1220 } 1221 1222 // If mPacketizerInput exists but with different channel-count, there is no 1223 // need to change pre-buffering since the packet size is the same as the old 1224 // one, since the rate is a constant. 1225 MOZ_ASSERT_IF(mPacketizerInput, mPacketizerInput->mPacketSize == 1226 GetPacketSize(aTrack->mSampleRate)); 1227 bool needPreBuffering = !mPacketizerInput; 1228 if (mPacketizerInput) { 1229 const TrackTime numBufferedFrames = 1230 static_cast<TrackTime>(mPacketizerInput->FramesAvailable()); 1231 mSegment.AppendNullData(numBufferedFrames); 1232 mPacketizerInput = Nothing(); 1233 mChunksInPacketizer.clear(); 1234 } 1235 1236 mPacketizerInput.emplace(GetPacketSize(aTrack->mSampleRate), channelCount); 1237 1238 if (needPreBuffering) { 1239 LOG_FRAME( 1240 "(Graph %p, Driver %p) AudioInputProcessing %p: Adding %u frames of " 1241 "silence as pre-buffering", 1242 graph, graph->CurrentDriver(), this, mPacketizerInput->mPacketSize); 1243 1244 AudioSegment buffering; 1245 buffering.AppendNullData( 1246 static_cast<TrackTime>(mPacketizerInput->mPacketSize)); 1247 PacketizeAndProcess(aTrack, buffering); 1248 } 1249 } 1250 1251 void AudioInputProcessing::EnsureAudioProcessing(AudioProcessingTrack* aTrack) { 1252 aTrack->AssertOnGraphThread(); 1253 1254 MediaTrackGraph* graph = aTrack->Graph(); 1255 // If the AEC might need to deal with drift then inform it of this and it 1256 // will be less conservative about echo suppression. This can lead to some 1257 // suppression of non-echo signal, so do this only when drift is expected. 1258 // https://bugs.chromium.org/p/webrtc/issues/detail?id=11985#c2 1259 bool haveAECAndDrift = mSettings.mAecOn; 1260 if (haveAECAndDrift) { 1261 if (mSettings.mExpectDrift < 0) { 1262 haveAECAndDrift = 1263 graph->OutputForAECMightDrift() || 1264 aTrack->GetDeviceInputTrackGraphThread()->AsNonNativeInputTrack(); 1265 } else { 1266 haveAECAndDrift = mSettings.mExpectDrift > 0; 1267 } 1268 } 1269 if (!mAudioProcessing || haveAECAndDrift != mHadAECAndDrift) { 1270 TRACE("AudioProcessing creation"); 1271 LOG("Track %p AudioInputProcessing %p creating AudioProcessing. " 1272 "aec+drift: %s", 1273 aTrack, this, haveAECAndDrift ? "Y" : "N"); 1274 MOZ_ASSERT(mEnvWrapper); 1275 mHadAECAndDrift = haveAECAndDrift; 1276 BuiltinAudioProcessingBuilder builder; 1277 builder.SetConfig(AppliedConfig(graph)); 1278 if (haveAECAndDrift) { 1279 // Setting an EchoControlFactory always enables AEC, overriding 1280 // Config::echo_canceller.enabled, so do this only when AEC is enabled. 1281 EchoCanceller3Config aec3Config; 1282 aec3Config.echo_removal_control.has_clock_drift = true; 1283 builder.SetEchoControlFactory( 1284 std::make_unique<EchoCanceller3Factory>(aec3Config)); 1285 } 1286 mAudioProcessing.reset(builder.Build(mEnvWrapper->Environment()).release()); 1287 } 1288 } 1289 1290 void AudioInputProcessing::ResetAudioProcessing(MediaTrackGraph* aGraph) { 1291 aGraph->AssertOnGraphThread(); 1292 MOZ_ASSERT(IsPassThrough(aGraph) || !mEnabled); 1293 1294 LOG_FRAME( 1295 "(Graph %p, Driver %p) AudioInputProcessing %p Resetting audio " 1296 "processing", 1297 aGraph, aGraph->CurrentDriver(), this); 1298 1299 // Reset AudioProcessing so that if we resume processing in the future it 1300 // doesn't depend on old state. 1301 if (mAudioProcessing) { 1302 mAudioProcessing->Initialize(); 1303 } 1304 1305 MOZ_ASSERT_IF(mPacketizerInput, 1306 static_cast<uint32_t>(mSegment.GetDuration()) + 1307 mPacketizerInput->FramesAvailable() == 1308 mPacketizerInput->mPacketSize); 1309 1310 // It's ok to clear all the internal buffer here since we won't use mSegment 1311 // in pass-through mode or when audio processing is disabled. 1312 LOG_FRAME( 1313 "(Graph %p, Driver %p) AudioInputProcessing %p Emptying out %" PRId64 1314 " frames of data", 1315 aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration()); 1316 mSegment.Clear(); 1317 1318 mPacketizerInput = Nothing(); 1319 mChunksInPacketizer.clear(); 1320 } 1321 1322 void AudioProcessingTrack::Destroy() { 1323 MOZ_ASSERT(NS_IsMainThread()); 1324 DisconnectDeviceInput(); 1325 1326 MediaTrack::Destroy(); 1327 } 1328 1329 void AudioProcessingTrack::SetInputProcessing( 1330 RefPtr<AudioInputProcessing> aInputProcessing) { 1331 if (IsDestroyed()) { 1332 return; 1333 } 1334 1335 RefPtr<WebrtcEnvironmentWrapper> envWrapper = 1336 WebrtcEnvironmentWrapper::Create(dom::RTCStatsTimestampMaker::Create( 1337 nsGlobalWindowInner::GetInnerWindowWithId(GetWindowId()))); 1338 1339 QueueControlMessageWithNoShutdown( 1340 [self = RefPtr{this}, this, inputProcessing = std::move(aInputProcessing), 1341 envWrapper = std::move(envWrapper)]() mutable { 1342 TRACE("AudioProcessingTrack::SetInputProcessingImpl"); 1343 inputProcessing->SetEnvironmentWrapper(self, std::move(envWrapper)); 1344 SetInputProcessingImpl(std::move(inputProcessing)); 1345 }); 1346 } 1347 1348 AudioProcessingTrack* AudioProcessingTrack::Create(MediaTrackGraph* aGraph) { 1349 MOZ_ASSERT(NS_IsMainThread()); 1350 AudioProcessingTrack* track = new AudioProcessingTrack(aGraph->GraphRate()); 1351 aGraph->AddTrack(track); 1352 return track; 1353 } 1354 1355 void AudioProcessingTrack::DestroyImpl() { 1356 ProcessedMediaTrack::DestroyImpl(); 1357 if (mInputProcessing) { 1358 mInputProcessing->End(); 1359 } 1360 } 1361 1362 void AudioProcessingTrack::ProcessInput(GraphTime aFrom, GraphTime aTo, 1363 uint32_t aFlags) { 1364 TRACE_COMMENT("AudioProcessingTrack::ProcessInput", "AudioProcessingTrack %p", 1365 this); 1366 MOZ_ASSERT(mInputProcessing); 1367 MOZ_ASSERT(aFrom < aTo); 1368 1369 LOG_FRAME( 1370 "(Graph %p, Driver %p) AudioProcessingTrack %p ProcessInput from %" PRId64 1371 " to %" PRId64 ", needs %" PRId64 " frames", 1372 mGraph, mGraph->CurrentDriver(), this, aFrom, aTo, aTo - aFrom); 1373 1374 if (!mInputProcessing->IsEnded()) { 1375 MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aFrom); 1376 if (mInputs.IsEmpty()) { 1377 GetData<AudioSegment>()->AppendNullData(aTo - aFrom); 1378 LOG_FRAME("(Graph %p, Driver %p) AudioProcessingTrack %p Filling %" PRId64 1379 " frames of null data (no input source)", 1380 mGraph, mGraph->CurrentDriver(), this, aTo - aFrom); 1381 } else { 1382 MOZ_ASSERT(mInputs.Length() == 1); 1383 AudioSegment data; 1384 DeviceInputConsumerTrack::GetInputSourceData(data, aFrom, aTo); 1385 mInputProcessing->Process(this, aFrom, aTo, &data, 1386 GetData<AudioSegment>()); 1387 } 1388 MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aTo); 1389 1390 ApplyTrackDisabling(mSegment.get()); 1391 } else if (aFlags & ALLOW_END) { 1392 mEnded = true; 1393 } 1394 } 1395 1396 void AudioProcessingTrack::NotifyOutputData(MediaTrackGraph* aGraph, 1397 const AudioChunk& aChunk) { 1398 MOZ_ASSERT(mGraph == aGraph, "Cannot feed audio output to another graph"); 1399 AssertOnGraphThread(); 1400 if (mInputProcessing) { 1401 mInputProcessing->ProcessOutputData(this, aChunk); 1402 } 1403 } 1404 1405 void AudioProcessingTrack::SetInputProcessingImpl( 1406 RefPtr<AudioInputProcessing> aInputProcessing) { 1407 AssertOnGraphThread(); 1408 mInputProcessing = std::move(aInputProcessing); 1409 } 1410 1411 MediaEngineWebRTCAudioCaptureSource::MediaEngineWebRTCAudioCaptureSource( 1412 const MediaDevice* aMediaDevice) { 1413 MOZ_ASSERT(aMediaDevice->mMediaSource == MediaSourceEnum::AudioCapture); 1414 } 1415 1416 /* static */ 1417 nsString MediaEngineWebRTCAudioCaptureSource::GetUUID() { 1418 nsID uuid{}; 1419 char uuidBuffer[NSID_LENGTH]; 1420 nsCString asciiString; 1421 ErrorResult rv; 1422 1423 rv = nsID::GenerateUUIDInPlace(uuid); 1424 if (rv.Failed()) { 1425 return u""_ns; 1426 } 1427 1428 uuid.ToProvidedString(uuidBuffer); 1429 asciiString.AssignASCII(uuidBuffer); 1430 1431 // Remove {} and the null terminator 1432 return NS_ConvertASCIItoUTF16(Substring(asciiString, 1, NSID_LENGTH - 3)); 1433 } 1434 1435 /* static */ 1436 nsString MediaEngineWebRTCAudioCaptureSource::GetGroupId() { 1437 return u"AudioCaptureGroup"_ns; 1438 } 1439 1440 void MediaEngineWebRTCAudioCaptureSource::SetTrack( 1441 const RefPtr<MediaTrack>& aTrack, const PrincipalHandle& aPrincipalHandle) { 1442 AssertIsOnOwningThread(); 1443 // Nothing to do here. aTrack is a placeholder dummy and not exposed. 1444 } 1445 1446 nsresult MediaEngineWebRTCAudioCaptureSource::Start() { 1447 AssertIsOnOwningThread(); 1448 return NS_OK; 1449 } 1450 1451 nsresult MediaEngineWebRTCAudioCaptureSource::Stop() { 1452 AssertIsOnOwningThread(); 1453 return NS_OK; 1454 } 1455 1456 nsresult MediaEngineWebRTCAudioCaptureSource::Reconfigure( 1457 const dom::MediaTrackConstraints& aConstraints, 1458 const MediaEnginePrefs& aPrefs, const char** aOutBadConstraint) { 1459 return NS_OK; 1460 } 1461 1462 void MediaEngineWebRTCAudioCaptureSource::GetSettings( 1463 dom::MediaTrackSettings& aOutSettings) const { 1464 aOutSettings.mAutoGainControl.Construct(false); 1465 aOutSettings.mEchoCancellation.Construct(false); 1466 aOutSettings.mNoiseSuppression.Construct(false); 1467 aOutSettings.mChannelCount.Construct(1); 1468 } 1469 1470 } // namespace mozilla 1471 1472 // Don't allow our macros to leak into other cpps in our unified build unit. 1473 #undef MAX_CHANNELS 1474 #undef MONO 1475 #undef MAX_SAMPLING_FREQ