tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MediaEngineWebRTCAudio.cpp (55722B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "MediaEngineWebRTCAudio.h"
      7 
      8 #include <algorithm>
      9 
     10 #include "AudioConverter.h"
     11 #include "MediaManager.h"
     12 #include "MediaTrackConstraints.h"
     13 #include "MediaTrackGraph.h"
     14 #include "Tracing.h"
     15 #include "api/audio/builtin_audio_processing_builder.h"
     16 #include "api/audio/echo_canceller3_factory.h"
     17 #include "api/environment/environment_factory.h"
     18 #include "common_audio/include/audio_util.h"
     19 #include "libwebrtcglue/WebrtcEnvironmentWrapper.h"
     20 #include "modules/audio_processing/include/audio_processing.h"
     21 #include "mozilla/Assertions.h"
     22 #include "mozilla/ErrorNames.h"
     23 #include "mozilla/Logging.h"
     24 #include "mozilla/Sprintf.h"
     25 #include "nsGlobalWindowInner.h"
     26 #include "nsIDUtils.h"
     27 #include "transport/runnable_utils.h"
     28 
     29 using namespace webrtc;
     30 
     31 // These are restrictions from the webrtc.org code
     32 #define MAX_CHANNELS 2
     33 #define MONO 1
     34 #define MAX_SAMPLING_FREQ 48000  // Hz - multiple of 100
     35 
     36 namespace mozilla {
     37 
     38 using dom::MediaSourceEnum;
     39 
     40 extern LazyLogModule gMediaManagerLog;
     41 #define LOG(...) MOZ_LOG(gMediaManagerLog, LogLevel::Debug, (__VA_ARGS__))
     42 #define LOG_FRAME(...) \
     43  MOZ_LOG(gMediaManagerLog, LogLevel::Verbose, (__VA_ARGS__))
     44 #define LOG_ERROR(...) MOZ_LOG(gMediaManagerLog, LogLevel::Error, (__VA_ARGS__))
     45 
     46 /**
     47 * WebRTC Microphone MediaEngineSource.
     48 */
     49 
     50 MediaEngineWebRTCMicrophoneSource::MediaEngineWebRTCMicrophoneSource(
     51    const MediaDevice* aMediaDevice)
     52    : mPrincipal(PRINCIPAL_HANDLE_NONE),
     53      mDeviceInfo(aMediaDevice->mAudioDeviceInfo),
     54      mDeviceMaxChannelCount(mDeviceInfo->MaxChannels()),
     55      mSettings(new nsMainThreadPtrHolder<
     56                media::Refcountable<dom::MediaTrackSettings>>(
     57          "MediaEngineWebRTCMicrophoneSource::mSettings",
     58          new media::Refcountable<dom::MediaTrackSettings>(),
     59          // Non-strict means it won't assert main thread for us.
     60          // It would be great if it did but we're already on the media thread.
     61          /* aStrict = */ false)),
     62      mCapabilities(new nsMainThreadPtrHolder<
     63                    media::Refcountable<dom::MediaTrackCapabilities>>(
     64          "MediaEngineWebRTCMicrophoneSource::mCapabilities",
     65          new media::Refcountable<dom::MediaTrackCapabilities>(),
     66          // Non-strict means it won't assert main thread for us.
     67          // It would be great if it did but we're already on the media thread.
     68          /* aStrict = */ false)) {
     69  MOZ_ASSERT(aMediaDevice->mMediaSource == MediaSourceEnum::Microphone);
     70 #ifndef ANDROID
     71  MOZ_ASSERT(mDeviceInfo->DeviceID());
     72 #endif
     73 
     74  // We'll init lazily as needed
     75  mSettings->mEchoCancellation.Construct(0);
     76  mSettings->mAutoGainControl.Construct(0);
     77  mSettings->mNoiseSuppression.Construct(0);
     78  mSettings->mChannelCount.Construct(0);
     79 
     80  mState = kReleased;
     81 
     82  // Set mMaxChannelsCapablitiy on main thread.
     83  NS_DispatchToMainThread(NS_NewRunnableFunction(
     84      __func__, [capabilities = mCapabilities,
     85                 deviceMaxChannelCount = mDeviceMaxChannelCount] {
     86        nsTArray<bool> echoCancellation;
     87        echoCancellation.AppendElement(true);
     88        echoCancellation.AppendElement(false);
     89        capabilities->mEchoCancellation.Reset();
     90        capabilities->mEchoCancellation.Construct(std::move(echoCancellation));
     91 
     92        nsTArray<bool> autoGainControl;
     93        autoGainControl.AppendElement(true);
     94        autoGainControl.AppendElement(false);
     95        capabilities->mAutoGainControl.Reset();
     96        capabilities->mAutoGainControl.Construct(std::move(autoGainControl));
     97 
     98        nsTArray<bool> noiseSuppression;
     99        noiseSuppression.AppendElement(true);
    100        noiseSuppression.AppendElement(false);
    101        capabilities->mNoiseSuppression.Reset();
    102        capabilities->mNoiseSuppression.Construct(std::move(noiseSuppression));
    103 
    104        if (deviceMaxChannelCount) {
    105          dom::ULongRange channelCountRange;
    106          channelCountRange.mMax.Construct(deviceMaxChannelCount);
    107          channelCountRange.mMin.Construct(1);
    108          capabilities->mChannelCount.Reset();
    109          capabilities->mChannelCount.Construct(channelCountRange);
    110        }
    111      }));
    112 }
    113 
    114 /*static*/ already_AddRefed<MediaEngineWebRTCMicrophoneSource>
    115 MediaEngineWebRTCMicrophoneSource::CreateFrom(
    116    const MediaEngineWebRTCMicrophoneSource* aSource,
    117    const MediaDevice* aMediaDevice) {
    118  auto src = MakeRefPtr<MediaEngineWebRTCMicrophoneSource>(aMediaDevice);
    119  *static_cast<dom::MediaTrackSettings*>(src->mSettings) = *aSource->mSettings;
    120  *static_cast<dom::MediaTrackCapabilities*>(src->mCapabilities) =
    121      *aSource->mCapabilities;
    122  return src.forget();
    123 }
    124 
    125 nsresult MediaEngineWebRTCMicrophoneSource::EvaluateSettings(
    126    const NormalizedConstraints& aConstraintsUpdate,
    127    const MediaEnginePrefs& aInPrefs, MediaEnginePrefs* aOutPrefs,
    128    const char** aOutBadConstraint) {
    129  AssertIsOnOwningThread();
    130 
    131  FlattenedConstraints c(aConstraintsUpdate);
    132  MediaEnginePrefs prefs = aInPrefs;
    133 
    134  prefs.mAecOn = c.mEchoCancellation.Get(aInPrefs.mAecOn);
    135  prefs.mAgcOn = c.mAutoGainControl.Get(aInPrefs.mAgcOn && prefs.mAecOn);
    136  prefs.mNoiseOn = c.mNoiseSuppression.Get(aInPrefs.mNoiseOn && prefs.mAecOn);
    137 
    138  // Determine an actual channel count to use for this source. Three factors at
    139  // play here: the device capabilities, the constraints passed in by content,
    140  // and a pref that can force things (for testing)
    141  int32_t maxChannels = static_cast<int32_t>(mDeviceInfo->MaxChannels());
    142 
    143  // First, check channelCount violation wrt constraints. This fails in case of
    144  // error.
    145  if (c.mChannelCount.mMin > maxChannels) {
    146    *aOutBadConstraint = "channelCount";
    147    return NS_ERROR_FAILURE;
    148  }
    149  // A pref can force the channel count to use. If the pref has a value of zero
    150  // or lower, it has no effect.
    151  if (aInPrefs.mChannels <= 0) {
    152    prefs.mChannels = maxChannels;
    153  }
    154 
    155  // Get the number of channels asked for by content, and clamp it between the
    156  // pref and the maximum number of channels that the device supports.
    157  prefs.mChannels = c.mChannelCount.Get(std::min(prefs.mChannels, maxChannels));
    158  prefs.mChannels = std::clamp(prefs.mChannels, 1, maxChannels);
    159 
    160  LOG("Mic source %p Audio config: aec: %s, agc: %s, noise: %s, channels: %d",
    161      this, prefs.mAecOn ? "on" : "off", prefs.mAgcOn ? "on" : "off",
    162      prefs.mNoiseOn ? "on" : "off", prefs.mChannels);
    163 
    164  *aOutPrefs = prefs;
    165 
    166  return NS_OK;
    167 }
    168 
    169 nsresult MediaEngineWebRTCMicrophoneSource::Reconfigure(
    170    const dom::MediaTrackConstraints& aConstraints,
    171    const MediaEnginePrefs& aPrefs, const char** aOutBadConstraint) {
    172  AssertIsOnOwningThread();
    173  MOZ_ASSERT(mTrack);
    174 
    175  LOG("Mic source %p Reconfigure ", this);
    176 
    177  NormalizedConstraints constraints(aConstraints);
    178  MediaEnginePrefs outputPrefs;
    179  nsresult rv =
    180      EvaluateSettings(constraints, aPrefs, &outputPrefs, aOutBadConstraint);
    181  if (NS_FAILED(rv)) {
    182    if (aOutBadConstraint) {
    183      return NS_ERROR_INVALID_ARG;
    184    }
    185 
    186    nsAutoCString name;
    187    GetErrorName(rv, name);
    188    LOG("Mic source %p Reconfigure() failed unexpectedly. rv=%s", this,
    189        name.Data());
    190    Stop();
    191    return NS_ERROR_UNEXPECTED;
    192  }
    193 
    194  ApplySettings(outputPrefs);
    195 
    196  mCurrentPrefs = outputPrefs;
    197 
    198  return NS_OK;
    199 }
    200 
    201 AudioProcessing::Config AudioInputProcessing::ConfigForPrefs(
    202    MediaTrackGraph* aGraph, const MediaEnginePrefs& aPrefs) const {
    203  AudioProcessing::Config config;
    204 
    205  config.pipeline.multi_channel_render = true;
    206  config.pipeline.multi_channel_capture = true;
    207 
    208  config.echo_canceller.enabled = aPrefs.mAecOn;
    209  config.echo_canceller.mobile_mode = aPrefs.mUseAecMobile;
    210 
    211  if ((config.gain_controller1.enabled =
    212           aPrefs.mAgcOn && !aPrefs.mAgc2Forced)) {
    213    auto mode = static_cast<AudioProcessing::Config::GainController1::Mode>(
    214        aPrefs.mAgc);
    215    if (mode != AudioProcessing::Config::GainController1::kAdaptiveAnalog &&
    216        mode != AudioProcessing::Config::GainController1::kAdaptiveDigital &&
    217        mode != AudioProcessing::Config::GainController1::kFixedDigital) {
    218      LOG_ERROR("AudioInputProcessing %p Attempt to set invalid AGC mode %d",
    219                this, static_cast<int>(mode));
    220      mode = AudioProcessing::Config::GainController1::kAdaptiveDigital;
    221    }
    222 #if defined(WEBRTC_IOS) || defined(ATA) || defined(WEBRTC_ANDROID)
    223    if (mode == AudioProcessing::Config::GainController1::kAdaptiveAnalog) {
    224      LOG_ERROR(
    225          "AudioInputProcessing %p Invalid AGC mode kAdaptiveAnalog on "
    226          "mobile",
    227          this);
    228      MOZ_ASSERT_UNREACHABLE(
    229          "Bad pref set in all.js or in about:config"
    230          " for the auto gain, on mobile.");
    231      mode = AudioProcessing::Config::GainController1::kFixedDigital;
    232    }
    233 #endif
    234    config.gain_controller1.mode = mode;
    235  }
    236  config.gain_controller2.enabled =
    237      config.gain_controller2.adaptive_digital.enabled =
    238          aPrefs.mAgcOn && aPrefs.mAgc2Forced;
    239 
    240  if ((config.noise_suppression.enabled = aPrefs.mNoiseOn)) {
    241    auto level = static_cast<AudioProcessing::Config::NoiseSuppression::Level>(
    242        aPrefs.mNoise);
    243    if (level != AudioProcessing::Config::NoiseSuppression::kLow &&
    244        level != AudioProcessing::Config::NoiseSuppression::kModerate &&
    245        level != AudioProcessing::Config::NoiseSuppression::kHigh &&
    246        level != AudioProcessing::Config::NoiseSuppression::kVeryHigh) {
    247      LOG_ERROR(
    248          "AudioInputProcessing %p Attempt to set invalid noise suppression "
    249          "level %d",
    250          this, static_cast<int>(level));
    251 
    252      level = AudioProcessing::Config::NoiseSuppression::kModerate;
    253    }
    254    config.noise_suppression.level = level;
    255  }
    256 
    257  config.transient_suppression.enabled = aPrefs.mTransientOn;
    258 
    259  config.high_pass_filter.enabled = aPrefs.mHPFOn;
    260 
    261  if ((mPlatformProcessingSetParams &
    262       CUBEB_INPUT_PROCESSING_PARAM_ECHO_CANCELLATION)) {
    263    // Platform processing (VPIO on macOS) will cancel echo from the output
    264    // device used as the output stream. Leave it on here when rendering audio
    265    // to another output device.
    266    config.echo_canceller.enabled = !aGraph->OutputForAECIsPrimary();
    267  }
    268  if (mPlatformProcessingSetParams &
    269      CUBEB_INPUT_PROCESSING_PARAM_AUTOMATIC_GAIN_CONTROL) {
    270    config.gain_controller1.enabled = config.gain_controller2.enabled = false;
    271  }
    272  if (mPlatformProcessingSetParams &
    273      CUBEB_INPUT_PROCESSING_PARAM_NOISE_SUPPRESSION) {
    274    config.noise_suppression.enabled = false;
    275  }
    276 
    277  return config;
    278 }
    279 
    280 void MediaEngineWebRTCMicrophoneSource::ApplySettings(
    281    const MediaEnginePrefs& aPrefs) {
    282  AssertIsOnOwningThread();
    283 
    284  TRACE("ApplySettings");
    285  MOZ_ASSERT(
    286      mTrack,
    287      "ApplySetting is to be called only after SetTrack has been called");
    288 
    289  RefPtr<MediaEngineWebRTCMicrophoneSource> that = this;
    290  CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID();
    291  NS_DispatchToMainThread(NS_NewRunnableFunction(
    292      __func__, [this, that, deviceID, track = mTrack, prefs = aPrefs] {
    293        mSettings->mEchoCancellation.Value() = prefs.mAecOn;
    294        mSettings->mAutoGainControl.Value() = prefs.mAgcOn;
    295        mSettings->mNoiseSuppression.Value() = prefs.mNoiseOn;
    296        mSettings->mChannelCount.Value() = prefs.mChannels;
    297 
    298        if (track->IsDestroyed()) {
    299          return;
    300        }
    301        track->QueueControlMessageWithNoShutdown(
    302            [track, deviceID, prefs, inputProcessing = mInputProcessing] {
    303              inputProcessing->ApplySettings(track->Graph(), deviceID, prefs);
    304            });
    305      }));
    306 }
    307 
    308 nsresult MediaEngineWebRTCMicrophoneSource::Allocate(
    309    const dom::MediaTrackConstraints& aConstraints,
    310    const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
    311    const char** aOutBadConstraint) {
    312  AssertIsOnOwningThread();
    313 
    314  mState = kAllocated;
    315 
    316  NormalizedConstraints normalized(aConstraints);
    317  MediaEnginePrefs outputPrefs;
    318  nsresult rv =
    319      EvaluateSettings(normalized, aPrefs, &outputPrefs, aOutBadConstraint);
    320  if (NS_FAILED(rv)) {
    321    return rv;
    322  }
    323 
    324  NS_DispatchToMainThread(NS_NewRunnableFunction(
    325      __func__, [settings = mSettings, prefs = outputPrefs] {
    326        settings->mEchoCancellation.Value() = prefs.mAecOn;
    327        settings->mAutoGainControl.Value() = prefs.mAgcOn;
    328        settings->mNoiseSuppression.Value() = prefs.mNoiseOn;
    329        settings->mChannelCount.Value() = prefs.mChannels;
    330      }));
    331 
    332  mCurrentPrefs = outputPrefs;
    333 
    334  return rv;
    335 }
    336 
    337 nsresult MediaEngineWebRTCMicrophoneSource::Deallocate() {
    338  AssertIsOnOwningThread();
    339 
    340  MOZ_ASSERT(mState == kStopped || mState == kAllocated);
    341 
    342  if (mTrack) {
    343    NS_DispatchToMainThread(NS_NewRunnableFunction(
    344        __func__,
    345        [track = std::move(mTrack), inputProcessing = mInputProcessing] {
    346          if (track->IsDestroyed()) {
    347            // This track has already been destroyed on main thread by its
    348            // DOMMediaStream. No cleanup left to do.
    349            return;
    350          }
    351          track->QueueControlMessageWithNoShutdown([inputProcessing] {
    352            TRACE("mInputProcessing::End");
    353            inputProcessing->End();
    354          });
    355        }));
    356  }
    357 
    358  // Reset all state. This is not strictly necessary, this instance will get
    359  // destroyed soon.
    360  mTrack = nullptr;
    361  mPrincipal = PRINCIPAL_HANDLE_NONE;
    362 
    363  // If empty, no callbacks to deliver data should be occuring
    364  MOZ_ASSERT(mState != kReleased, "Source not allocated");
    365  MOZ_ASSERT(mState != kStarted, "Source not stopped");
    366 
    367  mState = kReleased;
    368  LOG("Mic source %p Audio device %s deallocated", this,
    369      NS_ConvertUTF16toUTF8(mDeviceInfo->Name()).get());
    370  return NS_OK;
    371 }
    372 
    373 void MediaEngineWebRTCMicrophoneSource::SetTrack(
    374    const RefPtr<MediaTrack>& aTrack, const PrincipalHandle& aPrincipal) {
    375  AssertIsOnOwningThread();
    376  MOZ_ASSERT(aTrack);
    377  MOZ_ASSERT(aTrack->AsAudioProcessingTrack());
    378 
    379  MOZ_ASSERT(!mTrack);
    380  MOZ_ASSERT(mPrincipal == PRINCIPAL_HANDLE_NONE);
    381  mTrack = aTrack->AsAudioProcessingTrack();
    382  mPrincipal = aPrincipal;
    383 
    384  mInputProcessing =
    385      MakeAndAddRef<AudioInputProcessing>(mDeviceMaxChannelCount);
    386 
    387  NS_DispatchToMainThread(NS_NewRunnableFunction(
    388      __func__, [track = mTrack, processing = mInputProcessing]() mutable {
    389        track->SetInputProcessing(std::move(processing));
    390        track->Resume();  // Suspended by MediaManager
    391      }));
    392 
    393  LOG("Mic source %p Track %p registered for microphone capture", this,
    394      aTrack.get());
    395 }
    396 
    397 nsresult MediaEngineWebRTCMicrophoneSource::Start() {
    398  AssertIsOnOwningThread();
    399 
    400  // This spans setting both the enabled state and mState.
    401  if (mState == kStarted) {
    402    return NS_OK;
    403  }
    404 
    405  MOZ_ASSERT(mState == kAllocated || mState == kStopped);
    406 
    407  ApplySettings(mCurrentPrefs);
    408 
    409  CubebUtils::AudioDeviceID deviceID = mDeviceInfo->DeviceID();
    410  NS_DispatchToMainThread(NS_NewRunnableFunction(
    411      __func__, [inputProcessing = mInputProcessing, deviceID, track = mTrack,
    412                 principal = mPrincipal] {
    413        if (track->IsDestroyed()) {
    414          return;
    415        }
    416 
    417        track->QueueControlMessageWithNoShutdown([track, inputProcessing] {
    418          TRACE("mInputProcessing::Start");
    419          inputProcessing->Start(track->Graph());
    420        });
    421        track->ConnectDeviceInput(deviceID, inputProcessing.get(), principal);
    422      }));
    423 
    424  MOZ_ASSERT(mState != kReleased);
    425  mState = kStarted;
    426 
    427  return NS_OK;
    428 }
    429 
    430 nsresult MediaEngineWebRTCMicrophoneSource::Stop() {
    431  AssertIsOnOwningThread();
    432 
    433  LOG("Mic source %p Stop()", this);
    434  MOZ_ASSERT(mTrack, "SetTrack must have been called before ::Stop");
    435 
    436  if (mState == kStopped) {
    437    // Already stopped - this is allowed
    438    return NS_OK;
    439  }
    440 
    441  NS_DispatchToMainThread(NS_NewRunnableFunction(
    442      __func__, [inputProcessing = mInputProcessing, deviceInfo = mDeviceInfo,
    443                 track = mTrack] {
    444        if (track->IsDestroyed()) {
    445          return;
    446        }
    447 
    448        MOZ_ASSERT(track->DeviceId().value() == deviceInfo->DeviceID());
    449        track->DisconnectDeviceInput();
    450        track->QueueControlMessageWithNoShutdown([track, inputProcessing] {
    451          TRACE("mInputProcessing::Stop");
    452          inputProcessing->Stop(track->Graph());
    453        });
    454      }));
    455 
    456  MOZ_ASSERT(mState == kStarted, "Should be started when stopping");
    457  mState = kStopped;
    458 
    459  return NS_OK;
    460 }
    461 
    462 void MediaEngineWebRTCMicrophoneSource::GetSettings(
    463    dom::MediaTrackSettings& aOutSettings) const {
    464  MOZ_ASSERT(NS_IsMainThread());
    465  aOutSettings = *mSettings;
    466 }
    467 
    468 void MediaEngineWebRTCMicrophoneSource::GetCapabilities(
    469    dom::MediaTrackCapabilities& aOutCapabilities) const {
    470  MOZ_ASSERT(NS_IsMainThread());
    471  aOutCapabilities = *mCapabilities;
    472 }
    473 
    474 AudioInputProcessing::AudioInputProcessing(uint32_t aMaxChannelCount)
    475    : mInputDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100),
    476      mEnabled(false),
    477      mEnded(false),
    478      mPacketCount(0) {
    479  mSettings.mChannels = static_cast<int32_t>(std::min<uint32_t>(
    480      std::numeric_limits<int32_t>::max(), aMaxChannelCount));
    481 }
    482 
    483 void AudioInputProcessing::Disconnect(MediaTrackGraph* aGraph) {
    484  aGraph->AssertOnGraphThread();
    485  mPlatformProcessingSetGeneration = 0;
    486  mPlatformProcessingSetParams = CUBEB_INPUT_PROCESSING_PARAM_NONE;
    487  ApplySettingsInternal(aGraph, mSettings);
    488 }
    489 
    490 void AudioInputProcessing::NotifySetRequestedInputProcessingParams(
    491    MediaTrackGraph* aGraph, int aGeneration,
    492    cubeb_input_processing_params aRequestedParams) {
    493  aGraph->AssertOnGraphThread();
    494  MOZ_ASSERT(aGeneration >= mPlatformProcessingSetGeneration);
    495  if (aGeneration <= mPlatformProcessingSetGeneration) {
    496    return;
    497  }
    498  mPlatformProcessingSetGeneration = aGeneration;
    499  cubeb_input_processing_params intersection =
    500      mPlatformProcessingSetParams & aRequestedParams;
    501  LOG("AudioInputProcessing %p platform processing params being applied are "
    502      "now %s (Gen %d). Assuming %s while waiting for the result.",
    503      this, CubebUtils::ProcessingParamsToString(aRequestedParams).get(),
    504      aGeneration, CubebUtils::ProcessingParamsToString(intersection).get());
    505  if (mPlatformProcessingSetParams == intersection) {
    506    LOG("AudioInputProcessing %p intersection %s of platform processing params "
    507        "already applied. Doing nothing.",
    508        this, CubebUtils::ProcessingParamsToString(intersection).get());
    509    return;
    510  }
    511  mPlatformProcessingSetParams = intersection;
    512  ApplySettingsInternal(aGraph, mSettings);
    513 }
    514 
    515 void AudioInputProcessing::NotifySetRequestedInputProcessingParamsResult(
    516    MediaTrackGraph* aGraph, int aGeneration,
    517    const Result<cubeb_input_processing_params, int>& aResult) {
    518  aGraph->AssertOnGraphThread();
    519  if (aGeneration != mPlatformProcessingSetGeneration) {
    520    // This is a result from an old request, wait for a more recent one.
    521    return;
    522  }
    523  if (aResult.isOk()) {
    524    if (mPlatformProcessingSetParams == aResult.inspect()) {
    525      // No change.
    526      return;
    527    }
    528    mPlatformProcessingSetError = Nothing();
    529    mPlatformProcessingSetParams = aResult.inspect();
    530    LOG("AudioInputProcessing %p platform processing params are now %s.", this,
    531        CubebUtils::ProcessingParamsToString(mPlatformProcessingSetParams)
    532            .get());
    533  } else {
    534    mPlatformProcessingSetError = Some(aResult.inspectErr());
    535    mPlatformProcessingSetParams = CUBEB_INPUT_PROCESSING_PARAM_NONE;
    536    LOG("AudioInputProcessing %p platform processing params failed to apply. "
    537        "Applying input processing config in libwebrtc.",
    538        this);
    539  }
    540  ApplySettingsInternal(aGraph, mSettings);
    541 }
    542 
    543 bool AudioInputProcessing::IsPassThrough(MediaTrackGraph* aGraph) const {
    544  aGraph->AssertOnGraphThread();
    545  // The high-pass filter is not taken into account when activating the
    546  // pass through, since it's not controllable from content.
    547  auto config = AppliedConfig(aGraph);
    548  auto aec = [](const auto& config) { return config.echo_canceller.enabled; };
    549  auto agc = [](const auto& config) {
    550    return config.gain_controller1.enabled || config.gain_controller2.enabled;
    551  };
    552  auto ns = [](const auto& config) { return config.noise_suppression.enabled; };
    553  return !(aec(config) || agc(config) || ns(config));
    554 }
    555 
    556 void AudioInputProcessing::PassThroughChanged(MediaTrackGraph* aGraph) {
    557  aGraph->AssertOnGraphThread();
    558 
    559  if (!mEnabled) {
    560    MOZ_ASSERT(!mPacketizerInput);
    561    return;
    562  }
    563 
    564  if (IsPassThrough(aGraph)) {
    565    // Switching to pass-through.  Clear state so that it doesn't affect any
    566    // future processing, if re-enabled.
    567    ResetAudioProcessing(aGraph);
    568  } else {
    569    // No longer pass-through.  Processing will not use old state.
    570    // Packetizer setup is deferred until needed.
    571    MOZ_ASSERT(!mPacketizerInput);
    572  }
    573 }
    574 
    575 uint32_t AudioInputProcessing::GetRequestedInputChannelCount() const {
    576  return mSettings.mChannels;
    577 }
    578 
    579 void AudioInputProcessing::RequestedInputChannelCountChanged(
    580    MediaTrackGraph* aGraph, CubebUtils::AudioDeviceID aDeviceId) {
    581  aGraph->ReevaluateInputDevice(aDeviceId);
    582 }
    583 
    584 void AudioInputProcessing::Start(MediaTrackGraph* aGraph) {
    585  aGraph->AssertOnGraphThread();
    586 
    587  if (mEnabled) {
    588    return;
    589  }
    590  mEnabled = true;
    591 
    592  MOZ_ASSERT(!mPacketizerInput);
    593 }
    594 
    595 void AudioInputProcessing::Stop(MediaTrackGraph* aGraph) {
    596  aGraph->AssertOnGraphThread();
    597 
    598  if (!mEnabled) {
    599    return;
    600  }
    601 
    602  mEnabled = false;
    603 
    604  if (IsPassThrough(aGraph)) {
    605    return;
    606  }
    607 
    608  // Packetizer is active and we were just stopped. Stop the packetizer and
    609  // processing.
    610  ResetAudioProcessing(aGraph);
    611 }
    612 
    613 // The following is how how Process() works in pass-through and non-pass-through
    614 // mode. In both mode, Process() outputs the same amount of the frames as its
    615 // input data.
    616 //
    617 // I. In non-pass-through mode:
    618 //
    619 // We will use webrtc::AudioProcessing to process the input audio data in this
    620 // mode. The data input in webrtc::AudioProcessing needs to be a 10ms chunk,
    621 // while the input data passed to Process() is not necessary to have times of
    622 // 10ms-chunk length. To divide the input data into 10ms chunks,
    623 // mPacketizerInput is introduced.
    624 //
    625 // We will add one 10ms-chunk silence into the internal buffer before Process()
    626 // works. Those extra frames is called pre-buffering. It aims to avoid glitches
    627 // we may have when producing data in mPacketizerInput. Without pre-buffering,
    628 // when the input data length is not 10ms-times, we could end up having no
    629 // enough output needs since mPacketizerInput would keep some input data, which
    630 // is the remainder of the 10ms-chunk length. To force processing those data
    631 // left in mPacketizerInput, we would need to add some extra frames to make
    632 // mPacketizerInput produce a 10ms-chunk. For example, if the sample rate is
    633 // 44100 Hz, then the packet-size is 441 frames. When we only have 384 input
    634 // frames, we would need to put additional 57 frames to mPacketizerInput to
    635 // produce a packet. However, those extra 57 frames result in a glitch sound.
    636 //
    637 // By adding one 10ms-chunk silence in advance to the internal buffer, we won't
    638 // need to add extra frames between the input data no matter what data length it
    639 // is. The only drawback is the input data won't be processed and send to output
    640 // immediately. Process() will consume pre-buffering data for its output first.
    641 // The below describes how it works:
    642 //
    643 //
    644 //                          Process()
    645 //               +-----------------------------+
    646 //   input D(N)  |   +--------+   +--------+   |  output D(N)
    647 // --------------|-->|  P(N)  |-->|  S(N)  |---|-------------->
    648 //               |   +--------+   +--------+   |
    649 //               |   packetizer    mSegment    |
    650 //               +-----------------------------+
    651 //               <------ internal buffer ------>
    652 //
    653 //
    654 //   D(N): number of frames from the input and the output needs in the N round
    655 //      Z: number of frames of a 10ms chunk(packet) in mPacketizerInput, Z >= 1
    656 //         (if Z = 1, packetizer has no effect)
    657 //   P(N): number of frames left in mPacketizerInput after the N round. Once the
    658 //         frames in packetizer >= Z, packetizer will produce a packet to
    659 //         mSegment, so P(N) = (P(N-1) + D(N)) % Z, 0 <= P(N) <= Z-1
    660 //   S(N): number of frames left in mSegment after the N round. The input D(N)
    661 //         frames will be passed to mPacketizerInput first, and then
    662 //         mPacketizerInput may append some packets to mSegment, so
    663 //         S(N) = S(N-1) + Z * floor((P(N-1) + D(N)) / Z) - D(N)
    664 //
    665 // At the first, we set P(0) = 0, S(0) = X, where X >= Z-1. X is the
    666 // pre-buffering put in the internal buffer. With this settings, P(K) + S(K) = X
    667 // always holds.
    668 //
    669 // Intuitively, this seems true: We put X frames in the internal buffer at
    670 // first. If the data won't be blocked in packetizer, after the Process(), the
    671 // internal buffer should still hold X frames since the number of frames coming
    672 // from input is the same as the output needs. The key of having enough data for
    673 // output needs, while the input data is piled up in packetizer, is by putting
    674 // at least Z-1 frames as pre-buffering, since the maximum number of frames
    675 // stuck in the packetizer before it can emit a packet is packet-size - 1.
    676 // Otherwise, we don't have enough data for output if the new input data plus
    677 // the data left in packetizer produces a smaller-than-10ms chunk, which will be
    678 // left in packetizer. Thus we must have some pre-buffering frames in the
    679 // mSegment to make up the length of the left chunk we need for output. This can
    680 // also be told by by induction:
    681 //   (1) This holds when K = 0
    682 //   (2) Assume this holds when K = N: so P(N) + S(N) = X
    683 //       => P(N) + S(N) = X >= Z-1 => S(N) >= Z-1-P(N)
    684 //   (3) When K = N+1, we have D(N+1) input frames comes
    685 //     a. if P(N) + D(N+1) < Z, then packetizer has no enough data for one
    686 //        packet. No data produced by packertizer, so the mSegment now has
    687 //        S(N) >= Z-1-P(N) frames. Output needs D(N+1) < Z-P(N) frames. So it
    688 //        needs at most Z-P(N)-1 frames, and mSegment has enough frames for
    689 //        output, Then, P(N+1) = P(N) + D(N+1) and S(N+1) = S(N) - D(N+1)
    690 //        => P(N+1) + S(N+1) = P(N) + S(N) = X
    691 //     b. if P(N) + D(N+1) = Z, then packetizer will produce one packet for
    692 //        mSegment, so mSegment now has S(N) + Z frames. Output needs D(N+1)
    693 //        = Z-P(N) frames. S(N) has at least Z-1-P(N)+Z >= Z-P(N) frames, since
    694 //        Z >= 1. So mSegment has enough frames for output. Then, P(N+1) = 0 and
    695 //        S(N+1) = S(N) + Z - D(N+1) = S(N) + P(N)
    696 //        => P(N+1) + S(N+1) = P(N) + S(N) = X
    697 //     c. if P(N) + D(N+1) > Z, and let P(N) + D(N+1) = q * Z + r, where q >= 1
    698 //        and 0 <= r <= Z-1, then packetizer will produce can produce q packets
    699 //        for mSegment. Output needs D(N+1) = q * Z - P(N) + r frames and
    700 //        mSegment has S(N) + q * z >= q * z - P(N) + Z-1 >= q*z -P(N) + r,
    701 //        since r <= Z-1. So mSegment has enough frames for output. Then,
    702 //        P(N+1) = r and S(N+1) = S(N) + q * Z - D(N+1)
    703 //         => P(N+1) + S(N+1) = S(N) + (q * Z + r - D(N+1)) =  S(N) + P(N) = X
    704 //   => P(K) + S(K) = X always holds
    705 //
    706 // Since P(K) + S(K) = X and P(K) is in [0, Z-1], the S(K) is in [X-Z+1, X]
    707 // range. In our implementation, X is set to Z so S(K) is in [1, Z].
    708 // By the above workflow, we always have enough data for output and no extra
    709 // frames put into packetizer. It means we don't have any glitch!
    710 //
    711 // II. In pass-through mode:
    712 //
    713 //                Process()
    714 //               +--------+
    715 //   input D(N)  |        |  output D(N)
    716 // -------------->-------->--------------->
    717 //               |        |
    718 //               +--------+
    719 //
    720 // The D(N) frames of data are just forwarded from input to output without any
    721 // processing
    722 void AudioInputProcessing::Process(AudioProcessingTrack* aTrack,
    723                                   GraphTime aFrom, GraphTime aTo,
    724                                   AudioSegment* aInput,
    725                                   AudioSegment* aOutput) {
    726  aTrack->AssertOnGraphThread();
    727  MOZ_ASSERT(aFrom <= aTo);
    728  MOZ_ASSERT(!mEnded);
    729 
    730  TrackTime need = aTo - aFrom;
    731  if (need == 0) {
    732    return;
    733  }
    734 
    735  MediaTrackGraph* graph = aTrack->Graph();
    736  if (!mEnabled) {
    737    LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Filling %" PRId64
    738              " frames of silence to output (disabled)",
    739              graph, graph->CurrentDriver(), this, need);
    740    aOutput->AppendNullData(need);
    741    return;
    742  }
    743 
    744  MOZ_ASSERT(aInput->GetDuration() == need,
    745             "Wrong data length from input port source");
    746 
    747  if (mSettings.mAecOn &&
    748      (mPlatformProcessingSetParams &
    749       CUBEB_INPUT_PROCESSING_PARAM_ECHO_CANCELLATION) &&
    750      mAppliedConfig.echo_canceller.enabled ==
    751          aTrack->Graph()->OutputForAECIsPrimary()) {
    752    ApplySettingsInternal(aTrack->Graph(), mSettings);
    753  }
    754 
    755  if (IsPassThrough(graph)) {
    756    LOG_FRAME(
    757        "(Graph %p, Driver %p) AudioInputProcessing %p Forwarding %" PRId64
    758        " frames of input data to output directly (PassThrough)",
    759        graph, graph->CurrentDriver(), this, aInput->GetDuration());
    760    aOutput->AppendSegment(aInput);
    761    return;
    762  }
    763 
    764  // If the requested input channel count is updated, create a new
    765  // packetizer. No need to change the pre-buffering since the rate is always
    766  // the same. The frames left in the packetizer would be replaced by null
    767  // data and then transferred to mSegment.
    768  EnsurePacketizer(aTrack);
    769 
    770  // Preconditions of the audio-processing logic.
    771  MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
    772                 mPacketizerInput->FramesAvailable() ==
    773             mPacketizerInput->mPacketSize);
    774  // We pre-buffer mPacketSize frames, but the maximum number of frames stuck in
    775  // the packetizer before it can emit a packet is mPacketSize-1. Thus that
    776  // remaining 1 frame will always be present in mSegment.
    777  MOZ_ASSERT(mSegment.GetDuration() >= 1);
    778  MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize);
    779 
    780  PacketizeAndProcess(aTrack, *aInput);
    781  LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Buffer has %" PRId64
    782            " frames of data now, after packetizing and processing",
    783            graph, graph->CurrentDriver(), this, mSegment.GetDuration());
    784 
    785  // By setting pre-buffering to the number of frames of one packet, and
    786  // because the maximum number of frames stuck in the packetizer before
    787  // it can emit a packet is the mPacketSize-1, we always have at least
    788  // one more frame than output needs.
    789  MOZ_ASSERT(mSegment.GetDuration() > need);
    790  aOutput->AppendSlice(mSegment, 0, need);
    791  mSegment.RemoveLeading(need);
    792  LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p moving %" PRId64
    793            " frames of data to output, leaving %" PRId64 " frames in buffer",
    794            graph, graph->CurrentDriver(), this, need, mSegment.GetDuration());
    795 
    796  // Postconditions of the audio-processing logic.
    797  MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
    798                 mPacketizerInput->FramesAvailable() ==
    799             mPacketizerInput->mPacketSize);
    800  MOZ_ASSERT(mSegment.GetDuration() >= 1);
    801  MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize);
    802 }
    803 
    804 void AudioInputProcessing::ProcessOutputData(AudioProcessingTrack* aTrack,
    805                                             const AudioChunk& aChunk) {
    806  MOZ_ASSERT(aChunk.ChannelCount() > 0);
    807  aTrack->AssertOnGraphThread();
    808 
    809  if (!mEnabled) {
    810    return;
    811  }
    812 
    813  if (IsPassThrough(aTrack->Graph())) {
    814    return;
    815  }
    816 
    817  if (aChunk.mDuration == 0) {
    818    return;
    819  }
    820 
    821  TrackRate sampleRate = aTrack->mSampleRate;
    822  uint32_t framesPerPacket = GetPacketSize(sampleRate);  // in frames
    823  // Downmix from aChannels to MAX_CHANNELS if needed.
    824  uint32_t channelCount =
    825      std::min<uint32_t>(aChunk.ChannelCount(), MAX_CHANNELS);
    826  if (channelCount != mOutputBufferChannelCount ||
    827      channelCount * framesPerPacket != mOutputBuffer.Length()) {
    828    mOutputBuffer.SetLength(channelCount * framesPerPacket);
    829    mOutputBufferChannelCount = channelCount;
    830    // It's ok to drop the audio still in the packetizer here: if this changes,
    831    // we changed devices or something.
    832    mOutputBufferFrameCount = 0;
    833  }
    834 
    835  TrackTime chunkOffset = 0;
    836  AutoTArray<float*, MAX_CHANNELS> channelPtrs;
    837  channelPtrs.SetLength(channelCount);
    838  do {
    839    MOZ_ASSERT(mOutputBufferFrameCount < framesPerPacket);
    840    uint32_t packetRemainder = framesPerPacket - mOutputBufferFrameCount;
    841    mSubChunk = aChunk;
    842    mSubChunk.SliceTo(
    843        chunkOffset, std::min(chunkOffset + packetRemainder, aChunk.mDuration));
    844    MOZ_ASSERT(mSubChunk.mDuration <= packetRemainder);
    845 
    846    for (uint32_t channel = 0; channel < channelCount; channel++) {
    847      channelPtrs[channel] =
    848          &mOutputBuffer[channel * framesPerPacket + mOutputBufferFrameCount];
    849    }
    850    mSubChunk.DownMixTo(channelPtrs);
    851 
    852    chunkOffset += mSubChunk.mDuration;
    853    MOZ_ASSERT(chunkOffset <= aChunk.mDuration);
    854    mOutputBufferFrameCount += mSubChunk.mDuration;
    855    MOZ_ASSERT(mOutputBufferFrameCount <= framesPerPacket);
    856 
    857    if (mOutputBufferFrameCount == framesPerPacket) {
    858      // Have a complete packet.  Analyze it.
    859      EnsureAudioProcessing(aTrack);
    860      for (uint32_t channel = 0; channel < channelCount; channel++) {
    861        channelPtrs[channel] = &mOutputBuffer[channel * framesPerPacket];
    862      }
    863      StreamConfig reverseConfig(sampleRate, channelCount);
    864      DebugOnly<int> err = mAudioProcessing->AnalyzeReverseStream(
    865          channelPtrs.Elements(), reverseConfig);
    866      MOZ_ASSERT(!err, "Could not process the reverse stream.");
    867 
    868      mOutputBufferFrameCount = 0;
    869    }
    870  } while (chunkOffset < aChunk.mDuration);
    871 
    872  mSubChunk.SetNull(0);
    873 }
    874 
    875 // Only called if we're not in passthrough mode
    876 void AudioInputProcessing::PacketizeAndProcess(AudioProcessingTrack* aTrack,
    877                                               const AudioSegment& aSegment) {
    878  MediaTrackGraph* graph = aTrack->Graph();
    879  MOZ_ASSERT(!IsPassThrough(graph),
    880             "This should be bypassed when in PassThrough mode.");
    881  MOZ_ASSERT(mEnabled);
    882  MOZ_ASSERT(mPacketizerInput);
    883  MOZ_ASSERT(mPacketizerInput->mPacketSize ==
    884             GetPacketSize(aTrack->mSampleRate));
    885 
    886  // Calculate number of the pending frames in mChunksInPacketizer.
    887  auto pendingFrames = [&]() {
    888    TrackTime frames = 0;
    889    for (const auto& p : mChunksInPacketizer) {
    890      frames += p.first;
    891    }
    892    return frames;
    893  };
    894 
    895  // Precondition of the Principal-labelling logic below.
    896  MOZ_ASSERT(mPacketizerInput->FramesAvailable() ==
    897             static_cast<uint32_t>(pendingFrames()));
    898 
    899  // The WriteToInterleavedBuffer will do upmix or downmix if the channel-count
    900  // in aSegment's chunks is different from mPacketizerInput->mChannels
    901  // WriteToInterleavedBuffer could be avoided once Bug 1729041 is done.
    902  size_t sampleCount = aSegment.WriteToInterleavedBuffer(
    903      mInterleavedBuffer, mPacketizerInput->mChannels);
    904  size_t frameCount =
    905      sampleCount / static_cast<size_t>(mPacketizerInput->mChannels);
    906 
    907  // Packetize our input data into 10ms chunks, deinterleave into planar channel
    908  // buffers, process, and append to the right MediaStreamTrack.
    909  mPacketizerInput->Input(mInterleavedBuffer.Elements(),
    910                          static_cast<uint32_t>(frameCount));
    911 
    912  // Update mChunksInPacketizer and make sure the precondition for the
    913  // Principal-labelling logic still holds.
    914  for (AudioSegment::ConstChunkIterator iter(aSegment); !iter.IsEnded();
    915       iter.Next()) {
    916    MOZ_ASSERT(iter->mDuration > 0);
    917    mChunksInPacketizer.emplace_back(
    918        std::make_pair(iter->mDuration, iter->mPrincipalHandle));
    919  }
    920  MOZ_ASSERT(mPacketizerInput->FramesAvailable() ==
    921             static_cast<uint32_t>(pendingFrames()));
    922 
    923  LOG_FRAME(
    924      "(Graph %p, Driver %p) AudioInputProcessing %p Packetizing %zu frames. "
    925      "Packetizer has %u frames (enough for %u packets) now",
    926      graph, graph->CurrentDriver(), this, frameCount,
    927      mPacketizerInput->FramesAvailable(),
    928      mPacketizerInput->PacketsAvailable());
    929 
    930  size_t offset = 0;
    931 
    932  while (mPacketizerInput->PacketsAvailable()) {
    933    mPacketCount++;
    934    uint32_t samplesPerPacket =
    935        mPacketizerInput->mPacketSize * mPacketizerInput->mChannels;
    936    if (mInputBuffer.Length() < samplesPerPacket) {
    937      mInputBuffer.SetLength(samplesPerPacket);
    938    }
    939    if (mDeinterleavedBuffer.Length() < samplesPerPacket) {
    940      mDeinterleavedBuffer.SetLength(samplesPerPacket);
    941    }
    942    float* packet = mInputBuffer.Data();
    943    mPacketizerInput->Output(packet);
    944 
    945    // Downmix from mPacketizerInput->mChannels to mono if needed. We always
    946    // have floats here, the packetizer performed the conversion.
    947    AutoTArray<float*, 8> deinterleavedPacketizedInputDataChannelPointers;
    948    uint32_t channelCountInput = 0;
    949    if (mPacketizerInput->mChannels > MAX_CHANNELS) {
    950      channelCountInput = MONO;
    951      deinterleavedPacketizedInputDataChannelPointers.SetLength(
    952          channelCountInput);
    953      deinterleavedPacketizedInputDataChannelPointers[0] =
    954          mDeinterleavedBuffer.Data();
    955      // Downmix to mono (and effectively have a planar buffer) by summing all
    956      // channels in the first channel, and scaling by the number of channels to
    957      // avoid clipping.
    958      float gain = 1.f / mPacketizerInput->mChannels;
    959      size_t readIndex = 0;
    960      for (size_t i = 0; i < mPacketizerInput->mPacketSize; i++) {
    961        mDeinterleavedBuffer.Data()[i] = 0.;
    962        for (size_t j = 0; j < mPacketizerInput->mChannels; j++) {
    963          mDeinterleavedBuffer.Data()[i] += gain * packet[readIndex++];
    964        }
    965      }
    966    } else {
    967      channelCountInput = mPacketizerInput->mChannels;
    968      webrtc::InterleavedView<const float> interleaved(
    969          packet, mPacketizerInput->mPacketSize, channelCountInput);
    970      webrtc::DeinterleavedView<float> deinterleaved(
    971          mDeinterleavedBuffer.Data(), mPacketizerInput->mPacketSize,
    972          channelCountInput);
    973 
    974      Deinterleave(interleaved, deinterleaved);
    975 
    976      // Set up pointers into the deinterleaved data for code below
    977      deinterleavedPacketizedInputDataChannelPointers.SetLength(
    978          channelCountInput);
    979      for (size_t i = 0;
    980           i < deinterleavedPacketizedInputDataChannelPointers.Length(); ++i) {
    981        deinterleavedPacketizedInputDataChannelPointers[i] =
    982            deinterleaved[i].data();
    983      }
    984    }
    985 
    986    StreamConfig inputConfig(aTrack->mSampleRate, channelCountInput);
    987    StreamConfig outputConfig = inputConfig;
    988 
    989    EnsureAudioProcessing(aTrack);
    990    // Bug 1404965: Get the right delay here, it saves some work down the line.
    991    mAudioProcessing->set_stream_delay_ms(0);
    992 
    993    // Bug 1414837: find a way to not allocate here.
    994    CheckedInt<size_t> bufferSize(sizeof(float));
    995    bufferSize *= mPacketizerInput->mPacketSize;
    996    bufferSize *= channelCountInput;
    997    RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
    998 
    999    // Prepare channel pointers to the SharedBuffer created above.
   1000    AutoTArray<float*, 8> processedOutputChannelPointers;
   1001    AutoTArray<const float*, 8> processedOutputChannelPointersConst;
   1002    processedOutputChannelPointers.SetLength(channelCountInput);
   1003    processedOutputChannelPointersConst.SetLength(channelCountInput);
   1004 
   1005    offset = 0;
   1006    for (size_t i = 0; i < processedOutputChannelPointers.Length(); ++i) {
   1007      processedOutputChannelPointers[i] =
   1008          static_cast<float*>(buffer->Data()) + offset;
   1009      processedOutputChannelPointersConst[i] =
   1010          static_cast<float*>(buffer->Data()) + offset;
   1011      offset += mPacketizerInput->mPacketSize;
   1012    }
   1013 
   1014    mAudioProcessing->ProcessStream(
   1015        deinterleavedPacketizedInputDataChannelPointers.Elements(), inputConfig,
   1016        outputConfig, processedOutputChannelPointers.Elements());
   1017 
   1018    // If logging is enabled, dump the audio processing stats twice a second
   1019    if (MOZ_LOG_TEST(gMediaManagerLog, LogLevel::Debug) &&
   1020        !(mPacketCount % 50)) {
   1021      AudioProcessingStats stats = mAudioProcessing->GetStatistics();
   1022      char msg[1024];
   1023      msg[0] = '\0';
   1024      size_t offset = 0;
   1025 #define AddIfValue(format, member)                                       \
   1026  if (stats.member.has_value()) {                                        \
   1027    offset += SprintfBuf(msg + offset, sizeof(msg) - offset,             \
   1028                         #member ":" format ", ", stats.member.value()); \
   1029  }
   1030      AddIfValue("%d", voice_detected);
   1031      AddIfValue("%lf", echo_return_loss);
   1032      AddIfValue("%lf", echo_return_loss_enhancement);
   1033      AddIfValue("%lf", divergent_filter_fraction);
   1034      AddIfValue("%d", delay_median_ms);
   1035      AddIfValue("%d", delay_standard_deviation_ms);
   1036      AddIfValue("%d", delay_ms);
   1037 #undef AddIfValue
   1038      LOG("AudioProcessing statistics: %s", msg);
   1039    }
   1040 
   1041    if (mEnded) {
   1042      continue;
   1043    }
   1044 
   1045    // We already have planar audio data of the right format. Insert into the
   1046    // MTG.
   1047    MOZ_ASSERT(processedOutputChannelPointers.Length() == channelCountInput);
   1048 
   1049    // Insert the processed data chunk by chunk to mSegment with the paired
   1050    // PrincipalHandle value. The chunks are tracked in mChunksInPacketizer.
   1051 
   1052    auto getAudioChunk = [&](TrackTime aStart, TrackTime aEnd,
   1053                             const PrincipalHandle& aPrincipalHandle) {
   1054      if (aStart == aEnd) {
   1055        return AudioChunk();
   1056      }
   1057      RefPtr<SharedBuffer> other = buffer;
   1058      AudioChunk c =
   1059          AudioChunk(other.forget(), processedOutputChannelPointersConst,
   1060                     static_cast<TrackTime>(mPacketizerInput->mPacketSize),
   1061                     aPrincipalHandle);
   1062      c.SliceTo(aStart, aEnd);
   1063      return c;
   1064    };
   1065 
   1066    // The number of frames of data that needs to be labelled with Principal
   1067    // values.
   1068    TrackTime len = static_cast<TrackTime>(mPacketizerInput->mPacketSize);
   1069    // The start offset of the unlabelled chunk.
   1070    TrackTime start = 0;
   1071    // By mChunksInPacketizer's information, we can keep labelling the
   1072    // unlabelled frames chunk by chunk.
   1073    while (!mChunksInPacketizer.empty()) {
   1074      auto& [frames, principal] = mChunksInPacketizer.front();
   1075      const TrackTime end = start + frames;
   1076      if (end > len) {
   1077        // If the left unlabelled frames are part of this chunk, then we need to
   1078        // adjust the number of frames in the chunk.
   1079        if (len > start) {
   1080          mSegment.AppendAndConsumeChunk(getAudioChunk(start, len, principal));
   1081          frames -= len - start;
   1082        }
   1083        break;
   1084      }
   1085      // Otherwise, the number of unlabelled frames is larger than or equal to
   1086      // this chunk. We can label the whole chunk directly.
   1087      mSegment.AppendAndConsumeChunk(getAudioChunk(start, end, principal));
   1088      start = end;
   1089      mChunksInPacketizer.pop_front();
   1090    }
   1091 
   1092    LOG_FRAME(
   1093        "(Graph %p, Driver %p) AudioInputProcessing %p Appending %u frames of "
   1094        "packetized audio, leaving %u frames in packetizer (%" PRId64
   1095        " frames in mChunksInPacketizer)",
   1096        graph, graph->CurrentDriver(), this, mPacketizerInput->mPacketSize,
   1097        mPacketizerInput->FramesAvailable(), pendingFrames());
   1098 
   1099    // Postcondition of the Principal-labelling logic.
   1100    MOZ_ASSERT(mPacketizerInput->FramesAvailable() ==
   1101               static_cast<uint32_t>(pendingFrames()));
   1102  }
   1103 }
   1104 
   1105 void AudioInputProcessing::DeviceChanged(MediaTrackGraph* aGraph) {
   1106  aGraph->AssertOnGraphThread();
   1107 
   1108  // Reset some processing
   1109  if (mAudioProcessing) {
   1110    mAudioProcessing->Initialize();
   1111  }
   1112  LOG_FRAME(
   1113      "(Graph %p, Driver %p) AudioInputProcessing %p Reinitializing audio "
   1114      "processing",
   1115      aGraph, aGraph->CurrentDriver(), this);
   1116 }
   1117 
   1118 cubeb_input_processing_params
   1119 AudioInputProcessing::RequestedInputProcessingParams(
   1120    MediaTrackGraph* aGraph) const {
   1121  aGraph->AssertOnGraphThread();
   1122  if (!mPlatformProcessingEnabled) {
   1123    return CUBEB_INPUT_PROCESSING_PARAM_NONE;
   1124  }
   1125  if (mPlatformProcessingSetError) {
   1126    return CUBEB_INPUT_PROCESSING_PARAM_NONE;
   1127  }
   1128  cubeb_input_processing_params params = CUBEB_INPUT_PROCESSING_PARAM_NONE;
   1129  if (mSettings.mAecOn) {
   1130    params |= CUBEB_INPUT_PROCESSING_PARAM_ECHO_CANCELLATION;
   1131  }
   1132  if (mSettings.mAgcOn) {
   1133    params |= CUBEB_INPUT_PROCESSING_PARAM_AUTOMATIC_GAIN_CONTROL;
   1134  }
   1135  if (mSettings.mNoiseOn) {
   1136    params |= CUBEB_INPUT_PROCESSING_PARAM_NOISE_SUPPRESSION;
   1137  }
   1138  return params;
   1139 }
   1140 
   1141 void AudioInputProcessing::ApplySettings(MediaTrackGraph* aGraph,
   1142                                         CubebUtils::AudioDeviceID aDeviceID,
   1143                                         const MediaEnginePrefs& aSettings) {
   1144  TRACE("AudioInputProcessing::ApplySettings");
   1145  aGraph->AssertOnGraphThread();
   1146 
   1147  // CUBEB_ERROR_NOT_SUPPORTED means the backend does not support platform
   1148  // processing. In that case, leave the error in place so we don't request
   1149  // processing anew.
   1150  if (mPlatformProcessingSetError.valueOr(CUBEB_OK) !=
   1151      CUBEB_ERROR_NOT_SUPPORTED) {
   1152    mPlatformProcessingSetError = Nothing();
   1153  }
   1154 
   1155  // Read previous state from mSettings.
   1156  uint32_t oldChannelCount = GetRequestedInputChannelCount();
   1157 
   1158  ApplySettingsInternal(aGraph, aSettings);
   1159 
   1160  if (oldChannelCount != GetRequestedInputChannelCount()) {
   1161    RequestedInputChannelCountChanged(aGraph, aDeviceID);
   1162  }
   1163 }
   1164 
   1165 void AudioInputProcessing::ApplySettingsInternal(
   1166    MediaTrackGraph* aGraph, const MediaEnginePrefs& aSettings) {
   1167  TRACE("AudioInputProcessing::ApplySettingsInternal");
   1168  aGraph->AssertOnGraphThread();
   1169 
   1170  mPlatformProcessingEnabled = aSettings.mUsePlatformProcessing;
   1171 
   1172  // Read previous state from the applied config.
   1173  bool wasPassThrough = IsPassThrough(aGraph);
   1174 
   1175  mSettings = aSettings;
   1176  mAppliedConfig = ConfigForPrefs(aGraph, aSettings);
   1177  if (mAudioProcessing) {
   1178    mAudioProcessing->ApplyConfig(mAppliedConfig);
   1179  }
   1180 
   1181  if (wasPassThrough != IsPassThrough(aGraph)) {
   1182    PassThroughChanged(aGraph);
   1183  }
   1184 }
   1185 
   1186 const webrtc::AudioProcessing::Config& AudioInputProcessing::AppliedConfig(
   1187    MediaTrackGraph* aGraph) const {
   1188  aGraph->AssertOnGraphThread();
   1189  return mAppliedConfig;
   1190 }
   1191 
   1192 void AudioInputProcessing::End() {
   1193  mEnded = true;
   1194  mSegment.Clear();
   1195 }
   1196 
   1197 TrackTime AudioInputProcessing::NumBufferedFrames(
   1198    MediaTrackGraph* aGraph) const {
   1199  aGraph->AssertOnGraphThread();
   1200  return mSegment.GetDuration();
   1201 }
   1202 
   1203 void AudioInputProcessing::SetEnvironmentWrapper(
   1204    AudioProcessingTrack* aTrack,
   1205    RefPtr<WebrtcEnvironmentWrapper> aEnvWrapper) {
   1206  aTrack->AssertOnGraphThread();
   1207  mEnvWrapper = std::move(aEnvWrapper);
   1208 }
   1209 
   1210 void AudioInputProcessing::EnsurePacketizer(AudioProcessingTrack* aTrack) {
   1211  aTrack->AssertOnGraphThread();
   1212  MOZ_ASSERT(mEnabled);
   1213  MediaTrackGraph* graph = aTrack->Graph();
   1214  MOZ_ASSERT(!IsPassThrough(graph));
   1215 
   1216  uint32_t channelCount = GetRequestedInputChannelCount();
   1217  MOZ_ASSERT(channelCount > 0);
   1218  if (mPacketizerInput && mPacketizerInput->mChannels == channelCount) {
   1219    return;
   1220  }
   1221 
   1222  // If mPacketizerInput exists but with different channel-count, there is no
   1223  // need to change pre-buffering since the packet size is the same as the old
   1224  // one, since the rate is a constant.
   1225  MOZ_ASSERT_IF(mPacketizerInput, mPacketizerInput->mPacketSize ==
   1226                                      GetPacketSize(aTrack->mSampleRate));
   1227  bool needPreBuffering = !mPacketizerInput;
   1228  if (mPacketizerInput) {
   1229    const TrackTime numBufferedFrames =
   1230        static_cast<TrackTime>(mPacketizerInput->FramesAvailable());
   1231    mSegment.AppendNullData(numBufferedFrames);
   1232    mPacketizerInput = Nothing();
   1233    mChunksInPacketizer.clear();
   1234  }
   1235 
   1236  mPacketizerInput.emplace(GetPacketSize(aTrack->mSampleRate), channelCount);
   1237 
   1238  if (needPreBuffering) {
   1239    LOG_FRAME(
   1240        "(Graph %p, Driver %p) AudioInputProcessing %p: Adding %u frames of "
   1241        "silence as pre-buffering",
   1242        graph, graph->CurrentDriver(), this, mPacketizerInput->mPacketSize);
   1243 
   1244    AudioSegment buffering;
   1245    buffering.AppendNullData(
   1246        static_cast<TrackTime>(mPacketizerInput->mPacketSize));
   1247    PacketizeAndProcess(aTrack, buffering);
   1248  }
   1249 }
   1250 
   1251 void AudioInputProcessing::EnsureAudioProcessing(AudioProcessingTrack* aTrack) {
   1252  aTrack->AssertOnGraphThread();
   1253 
   1254  MediaTrackGraph* graph = aTrack->Graph();
   1255  // If the AEC might need to deal with drift then inform it of this and it
   1256  // will be less conservative about echo suppression.  This can lead to some
   1257  // suppression of non-echo signal, so do this only when drift is expected.
   1258  // https://bugs.chromium.org/p/webrtc/issues/detail?id=11985#c2
   1259  bool haveAECAndDrift = mSettings.mAecOn;
   1260  if (haveAECAndDrift) {
   1261    if (mSettings.mExpectDrift < 0) {
   1262      haveAECAndDrift =
   1263          graph->OutputForAECMightDrift() ||
   1264          aTrack->GetDeviceInputTrackGraphThread()->AsNonNativeInputTrack();
   1265    } else {
   1266      haveAECAndDrift = mSettings.mExpectDrift > 0;
   1267    }
   1268  }
   1269  if (!mAudioProcessing || haveAECAndDrift != mHadAECAndDrift) {
   1270    TRACE("AudioProcessing creation");
   1271    LOG("Track %p AudioInputProcessing %p creating AudioProcessing. "
   1272        "aec+drift: %s",
   1273        aTrack, this, haveAECAndDrift ? "Y" : "N");
   1274    MOZ_ASSERT(mEnvWrapper);
   1275    mHadAECAndDrift = haveAECAndDrift;
   1276    BuiltinAudioProcessingBuilder builder;
   1277    builder.SetConfig(AppliedConfig(graph));
   1278    if (haveAECAndDrift) {
   1279      // Setting an EchoControlFactory always enables AEC, overriding
   1280      // Config::echo_canceller.enabled, so do this only when AEC is enabled.
   1281      EchoCanceller3Config aec3Config;
   1282      aec3Config.echo_removal_control.has_clock_drift = true;
   1283      builder.SetEchoControlFactory(
   1284          std::make_unique<EchoCanceller3Factory>(aec3Config));
   1285    }
   1286    mAudioProcessing.reset(builder.Build(mEnvWrapper->Environment()).release());
   1287  }
   1288 }
   1289 
   1290 void AudioInputProcessing::ResetAudioProcessing(MediaTrackGraph* aGraph) {
   1291  aGraph->AssertOnGraphThread();
   1292  MOZ_ASSERT(IsPassThrough(aGraph) || !mEnabled);
   1293 
   1294  LOG_FRAME(
   1295      "(Graph %p, Driver %p) AudioInputProcessing %p Resetting audio "
   1296      "processing",
   1297      aGraph, aGraph->CurrentDriver(), this);
   1298 
   1299  // Reset AudioProcessing so that if we resume processing in the future it
   1300  // doesn't depend on old state.
   1301  if (mAudioProcessing) {
   1302    mAudioProcessing->Initialize();
   1303  }
   1304 
   1305  MOZ_ASSERT_IF(mPacketizerInput,
   1306                static_cast<uint32_t>(mSegment.GetDuration()) +
   1307                        mPacketizerInput->FramesAvailable() ==
   1308                    mPacketizerInput->mPacketSize);
   1309 
   1310  // It's ok to clear all the internal buffer here since we won't use mSegment
   1311  // in pass-through mode or when audio processing is disabled.
   1312  LOG_FRAME(
   1313      "(Graph %p, Driver %p) AudioInputProcessing %p Emptying out %" PRId64
   1314      " frames of data",
   1315      aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration());
   1316  mSegment.Clear();
   1317 
   1318  mPacketizerInput = Nothing();
   1319  mChunksInPacketizer.clear();
   1320 }
   1321 
   1322 void AudioProcessingTrack::Destroy() {
   1323  MOZ_ASSERT(NS_IsMainThread());
   1324  DisconnectDeviceInput();
   1325 
   1326  MediaTrack::Destroy();
   1327 }
   1328 
   1329 void AudioProcessingTrack::SetInputProcessing(
   1330    RefPtr<AudioInputProcessing> aInputProcessing) {
   1331  if (IsDestroyed()) {
   1332    return;
   1333  }
   1334 
   1335  RefPtr<WebrtcEnvironmentWrapper> envWrapper =
   1336      WebrtcEnvironmentWrapper::Create(dom::RTCStatsTimestampMaker::Create(
   1337          nsGlobalWindowInner::GetInnerWindowWithId(GetWindowId())));
   1338 
   1339  QueueControlMessageWithNoShutdown(
   1340      [self = RefPtr{this}, this, inputProcessing = std::move(aInputProcessing),
   1341       envWrapper = std::move(envWrapper)]() mutable {
   1342        TRACE("AudioProcessingTrack::SetInputProcessingImpl");
   1343        inputProcessing->SetEnvironmentWrapper(self, std::move(envWrapper));
   1344        SetInputProcessingImpl(std::move(inputProcessing));
   1345      });
   1346 }
   1347 
   1348 AudioProcessingTrack* AudioProcessingTrack::Create(MediaTrackGraph* aGraph) {
   1349  MOZ_ASSERT(NS_IsMainThread());
   1350  AudioProcessingTrack* track = new AudioProcessingTrack(aGraph->GraphRate());
   1351  aGraph->AddTrack(track);
   1352  return track;
   1353 }
   1354 
   1355 void AudioProcessingTrack::DestroyImpl() {
   1356  ProcessedMediaTrack::DestroyImpl();
   1357  if (mInputProcessing) {
   1358    mInputProcessing->End();
   1359  }
   1360 }
   1361 
   1362 void AudioProcessingTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
   1363                                        uint32_t aFlags) {
   1364  TRACE_COMMENT("AudioProcessingTrack::ProcessInput", "AudioProcessingTrack %p",
   1365                this);
   1366  MOZ_ASSERT(mInputProcessing);
   1367  MOZ_ASSERT(aFrom < aTo);
   1368 
   1369  LOG_FRAME(
   1370      "(Graph %p, Driver %p) AudioProcessingTrack %p ProcessInput from %" PRId64
   1371      " to %" PRId64 ", needs %" PRId64 " frames",
   1372      mGraph, mGraph->CurrentDriver(), this, aFrom, aTo, aTo - aFrom);
   1373 
   1374  if (!mInputProcessing->IsEnded()) {
   1375    MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aFrom);
   1376    if (mInputs.IsEmpty()) {
   1377      GetData<AudioSegment>()->AppendNullData(aTo - aFrom);
   1378      LOG_FRAME("(Graph %p, Driver %p) AudioProcessingTrack %p Filling %" PRId64
   1379                " frames of null data (no input source)",
   1380                mGraph, mGraph->CurrentDriver(), this, aTo - aFrom);
   1381    } else {
   1382      MOZ_ASSERT(mInputs.Length() == 1);
   1383      AudioSegment data;
   1384      DeviceInputConsumerTrack::GetInputSourceData(data, aFrom, aTo);
   1385      mInputProcessing->Process(this, aFrom, aTo, &data,
   1386                                GetData<AudioSegment>());
   1387    }
   1388    MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aTo);
   1389 
   1390    ApplyTrackDisabling(mSegment.get());
   1391  } else if (aFlags & ALLOW_END) {
   1392    mEnded = true;
   1393  }
   1394 }
   1395 
   1396 void AudioProcessingTrack::NotifyOutputData(MediaTrackGraph* aGraph,
   1397                                            const AudioChunk& aChunk) {
   1398  MOZ_ASSERT(mGraph == aGraph, "Cannot feed audio output to another graph");
   1399  AssertOnGraphThread();
   1400  if (mInputProcessing) {
   1401    mInputProcessing->ProcessOutputData(this, aChunk);
   1402  }
   1403 }
   1404 
   1405 void AudioProcessingTrack::SetInputProcessingImpl(
   1406    RefPtr<AudioInputProcessing> aInputProcessing) {
   1407  AssertOnGraphThread();
   1408  mInputProcessing = std::move(aInputProcessing);
   1409 }
   1410 
   1411 MediaEngineWebRTCAudioCaptureSource::MediaEngineWebRTCAudioCaptureSource(
   1412    const MediaDevice* aMediaDevice) {
   1413  MOZ_ASSERT(aMediaDevice->mMediaSource == MediaSourceEnum::AudioCapture);
   1414 }
   1415 
   1416 /* static */
   1417 nsString MediaEngineWebRTCAudioCaptureSource::GetUUID() {
   1418  nsID uuid{};
   1419  char uuidBuffer[NSID_LENGTH];
   1420  nsCString asciiString;
   1421  ErrorResult rv;
   1422 
   1423  rv = nsID::GenerateUUIDInPlace(uuid);
   1424  if (rv.Failed()) {
   1425    return u""_ns;
   1426  }
   1427 
   1428  uuid.ToProvidedString(uuidBuffer);
   1429  asciiString.AssignASCII(uuidBuffer);
   1430 
   1431  // Remove {} and the null terminator
   1432  return NS_ConvertASCIItoUTF16(Substring(asciiString, 1, NSID_LENGTH - 3));
   1433 }
   1434 
   1435 /* static */
   1436 nsString MediaEngineWebRTCAudioCaptureSource::GetGroupId() {
   1437  return u"AudioCaptureGroup"_ns;
   1438 }
   1439 
   1440 void MediaEngineWebRTCAudioCaptureSource::SetTrack(
   1441    const RefPtr<MediaTrack>& aTrack, const PrincipalHandle& aPrincipalHandle) {
   1442  AssertIsOnOwningThread();
   1443  // Nothing to do here. aTrack is a placeholder dummy and not exposed.
   1444 }
   1445 
   1446 nsresult MediaEngineWebRTCAudioCaptureSource::Start() {
   1447  AssertIsOnOwningThread();
   1448  return NS_OK;
   1449 }
   1450 
   1451 nsresult MediaEngineWebRTCAudioCaptureSource::Stop() {
   1452  AssertIsOnOwningThread();
   1453  return NS_OK;
   1454 }
   1455 
   1456 nsresult MediaEngineWebRTCAudioCaptureSource::Reconfigure(
   1457    const dom::MediaTrackConstraints& aConstraints,
   1458    const MediaEnginePrefs& aPrefs, const char** aOutBadConstraint) {
   1459  return NS_OK;
   1460 }
   1461 
   1462 void MediaEngineWebRTCAudioCaptureSource::GetSettings(
   1463    dom::MediaTrackSettings& aOutSettings) const {
   1464  aOutSettings.mAutoGainControl.Construct(false);
   1465  aOutSettings.mEchoCancellation.Construct(false);
   1466  aOutSettings.mNoiseSuppression.Construct(false);
   1467  aOutSettings.mChannelCount.Construct(1);
   1468 }
   1469 
   1470 }  // namespace mozilla
   1471 
   1472 // Don't allow our macros to leak into other cpps in our unified build unit.
   1473 #undef MAX_CHANNELS
   1474 #undef MONO
   1475 #undef MAX_SAMPLING_FREQ