tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MediaEngineWebRTCAudio.h (16439B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef MediaEngineWebRTCAudio_h
      7 #define MediaEngineWebRTCAudio_h
      8 
      9 #include "AudioDeviceInfo.h"
     10 #include "AudioPacketizer.h"
     11 #include "AudioSegment.h"
     12 #include "DeviceInputTrack.h"
     13 #include "MediaEnginePrefs.h"
     14 #include "MediaEngineWebRTC.h"
     15 #include "MediaTrackListener.h"
     16 #include "modules/audio_processing/include/audio_processing.h"
     17 
     18 namespace mozilla {
     19 
     20 class AudioInputProcessing;
     21 class AudioProcessingTrack;
     22 class WebrtcEnvironmentWrapper;
     23 
     24 // This class is created and used exclusively on the Media Manager thread, with
     25 // exactly two exceptions:
     26 // - Pull is always called on the MTG thread. It only ever uses
     27 //   mInputProcessing. mInputProcessing is set, then a message is sent first to
     28 //   the main thread and then the MTG thread so that it can be used as part of
     29 //   the graph processing. On destruction, similarly, a message is sent to the
     30 //   graph so that it stops using it, and then it is deleted.
     31 // - mSettings is created on the MediaManager thread is always ever accessed on
     32 //   the Main Thread. It is const.
     33 class MediaEngineWebRTCMicrophoneSource : public MediaEngineSource {
     34 public:
     35  explicit MediaEngineWebRTCMicrophoneSource(const MediaDevice* aMediaDevice);
     36 
     37  static already_AddRefed<MediaEngineWebRTCMicrophoneSource> CreateFrom(
     38      const MediaEngineWebRTCMicrophoneSource* aSource,
     39      const MediaDevice* aMediaDevice);
     40 
     41  nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
     42                    const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
     43                    const char** aOutBadConstraint) override;
     44  nsresult Deallocate() override;
     45  void SetTrack(const RefPtr<MediaTrack>& aTrack,
     46                const PrincipalHandle& aPrincipal) override;
     47  nsresult Start() override;
     48  nsresult Stop() override;
     49  nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
     50                       const MediaEnginePrefs& aPrefs,
     51                       const char** aOutBadConstraint) override;
     52 
     53  /**
     54   * Assigns the current settings of the capture to aOutSettings.
     55   * Main thread only.
     56   */
     57  void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;
     58 
     59  void GetCapabilities(
     60      dom::MediaTrackCapabilities& aOutCapabilities) const override;
     61 
     62  nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
     63    return NS_ERROR_NOT_IMPLEMENTED;
     64  }
     65 
     66 protected:
     67  ~MediaEngineWebRTCMicrophoneSource() = default;
     68 
     69 private:
     70  /**
     71   * From a set of constraints and about:config preferences, output the correct
     72   * set of preferences that can be sent to AudioInputProcessing.
     73   *
     74   * This can fail if the number of channels requested is zero, negative, or
     75   * more than the device supports.
     76   */
     77  nsresult EvaluateSettings(const NormalizedConstraints& aConstraintsUpdate,
     78                            const MediaEnginePrefs& aInPrefs,
     79                            MediaEnginePrefs* aOutPrefs,
     80                            const char** aOutBadConstraint);
     81  /**
     82   * From settings output by EvaluateSettings, send those settings to the
     83   * AudioInputProcessing instance and the main thread (for use in GetSettings).
     84   */
     85  void ApplySettings(const MediaEnginePrefs& aPrefs);
     86 
     87  PrincipalHandle mPrincipal = PRINCIPAL_HANDLE_NONE;
     88 
     89  const RefPtr<AudioDeviceInfo> mDeviceInfo;
     90 
     91  // The maximum number of channels that this device supports.
     92  const uint32_t mDeviceMaxChannelCount;
     93  // The current settings for the underlying device.
     94  // Constructed on the MediaManager thread, and then only ever accessed on the
     95  // main thread.
     96  const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackSettings>>
     97      mSettings;
     98 
     99  // The media capabilities for the underlying device.
    100  // Constructed on the MediaManager thread, and then only ever accessed on the
    101  // main thread.
    102  const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackCapabilities>>
    103      mCapabilities;
    104 
    105  // Current state of the resource for this source.
    106  MediaEngineSourceState mState;
    107 
    108  // The current preferences that will be forwarded to mInputProcessing below.
    109  MediaEnginePrefs mCurrentPrefs;
    110 
    111  // The AudioProcessingTrack used to inteface with the MediaTrackGraph. Set in
    112  // SetTrack as part of the initialization, and nulled in ::Deallocate.
    113  RefPtr<AudioProcessingTrack> mTrack;
    114 
    115  // See note at the top of this class.
    116  RefPtr<AudioInputProcessing> mInputProcessing;
    117 };
    118 
    119 // This class is created on the MediaManager thread, and then exclusively used
    120 // on the MTG thread.
    121 // All communication is done via message passing using MTG ControlMessages
    122 class AudioInputProcessing : public AudioDataListener {
    123 public:
    124  explicit AudioInputProcessing(uint32_t aMaxChannelCount);
    125  void Process(AudioProcessingTrack* aTrack, GraphTime aFrom, GraphTime aTo,
    126               AudioSegment* aInput, AudioSegment* aOutput);
    127 
    128  void ProcessOutputData(AudioProcessingTrack* aTrack,
    129                         const AudioChunk& aChunk);
    130  bool IsVoiceInput(MediaTrackGraph* aGraph) const override {
    131    // If we're passing data directly without AEC or any other process, this
    132    // means that all voice-processing has been disabled intentionaly. In this
    133    // case, consider that the device is not used for voice input.
    134    return !IsPassThrough(aGraph) ||
    135           mPlatformProcessingSetParams != CUBEB_INPUT_PROCESSING_PARAM_NONE;
    136  }
    137 
    138  // Start processing data. Note that ApplySettings must be called prior to
    139  // Start().
    140  void Start(MediaTrackGraph* aGraph);
    141  // Stop processing data and reset mAudioProcessing state.
    142  void Stop(MediaTrackGraph* aGraph);
    143 
    144  void DeviceChanged(MediaTrackGraph* aGraph) override;
    145 
    146  uint32_t RequestedInputChannelCount(MediaTrackGraph*) const override {
    147    return GetRequestedInputChannelCount();
    148  }
    149 
    150  cubeb_input_processing_params RequestedInputProcessingParams(
    151      MediaTrackGraph* aGraph) const override;
    152 
    153  void Disconnect(MediaTrackGraph* aGraph) override;
    154 
    155  // Prepare for a change to platform processing params by assuming the platform
    156  // applies the intersection of the already applied params and
    157  // aRequestedParams, and set the software config accordingly.
    158  void NotifySetRequestedInputProcessingParams(
    159      MediaTrackGraph* aGraph, int aGeneration,
    160      cubeb_input_processing_params aRequestedParams) override;
    161 
    162  // Handle the result of an async operation to set processing params on a cubeb
    163  // stream. If the operation succeeded, disable the applied processing params
    164  // from the software processing config. If the operation failed, request
    165  // platform processing to be disabled so as to not prevent a cubeb stream from
    166  // being created.
    167  void NotifySetRequestedInputProcessingParamsResult(
    168      MediaTrackGraph* aGraph, int aGeneration,
    169      const Result<cubeb_input_processing_params, int>& aResult) override;
    170 
    171  void PacketizeAndProcess(AudioProcessingTrack* aTrack,
    172                           const AudioSegment& aSegment);
    173 
    174  uint32_t GetRequestedInputChannelCount() const;
    175 
    176  // This is true when all processing is disabled, in which case we can skip
    177  // packetization, resampling and other processing passes. Processing may still
    178  // be applied by the platform on the underlying input track.
    179  bool IsPassThrough(MediaTrackGraph* aGraph) const;
    180 
    181  // This allow changing the APM options, enabling or disabling processing
    182  // steps. The settings get applied the next time we're about to process input
    183  // data.
    184  void ApplySettings(MediaTrackGraph* aGraph,
    185                     CubebUtils::AudioDeviceID aDeviceID,
    186                     const MediaEnginePrefs& aSettings);
    187 
    188  // The config currently applied to the audio processing module.
    189  const webrtc::AudioProcessing::Config& AppliedConfig(
    190      MediaTrackGraph* aGraph) const;
    191 
    192  void End();
    193 
    194  TrackTime NumBufferedFrames(MediaTrackGraph* aGraph) const;
    195 
    196  // The packet size contains samples in 10ms. The unit of aRate is hz.
    197  static uint32_t GetPacketSize(TrackRate aRate) {
    198    return webrtc::AudioProcessing::GetFrameSize(aRate);
    199  }
    200 
    201  bool IsEnded() const { return mEnded; }
    202 
    203  // For testing:
    204  bool HadAECAndDrift() const { return mHadAECAndDrift; }
    205 
    206  void SetEnvironmentWrapper(AudioProcessingTrack* aTrack,
    207                             RefPtr<WebrtcEnvironmentWrapper> aEnvWrapper);
    208 
    209 private:
    210  ~AudioInputProcessing() = default;
    211  webrtc::AudioProcessing::Config ConfigForPrefs(
    212      MediaTrackGraph* aGraph, const MediaEnginePrefs& aPrefs) const;
    213  void PassThroughChanged(MediaTrackGraph* aGraph);
    214  void RequestedInputChannelCountChanged(MediaTrackGraph* aGraph,
    215                                         CubebUtils::AudioDeviceID aDeviceId);
    216  void EnsurePacketizer(AudioProcessingTrack* aTrack);
    217  void EnsureAudioProcessing(AudioProcessingTrack* aTrack);
    218  void ResetAudioProcessing(MediaTrackGraph* aGraph);
    219  void ApplySettingsInternal(MediaTrackGraph* aGraph,
    220                             const MediaEnginePrefs& aSettings);
    221  PrincipalHandle GetCheckedPrincipal(const AudioSegment& aSegment);
    222  // This implements the processing algoritm to apply to the input (e.g. a
    223  // microphone). If all algorithms are disabled, this class in not used. This
    224  // class only accepts audio chunks of 10ms. It has two inputs and one output:
    225  // it is fed the speaker data and the microphone data. It outputs processed
    226  // input data.
    227  UniquePtr<webrtc::AudioProcessing> mAudioProcessing;
    228  // Whether mAudioProcessing was created for AEC with clock drift.
    229  // Meaningful only when mAudioProcessing is non-null;
    230  bool mHadAECAndDrift = false;
    231  // Packetizer to be able to feed 10ms packets to the input side of
    232  // mAudioProcessing. Not used if the processing is bypassed.
    233  Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerInput;
    234  // The current settings from about:config preferences and content-provided
    235  // constraints.
    236  MediaEnginePrefs mSettings;
    237  // The currently applied audio processing config. Set even if mAudioProcessing
    238  // is not. This is needed because ConfigForPrefs(mSettings) is not static --
    239  // it relies on mPlatformProcessingSetParams and MTG::OutputForAECIsPrimary(),
    240  // which can change between calls to ApplySettingsInternal -- and an
    241  // AudioProcessing::Config is available from mAudioProcessing only when that
    242  // exists. Initialized as needed -- it is up to the owner to call
    243  // ApplySettings prior to Start.
    244  webrtc::AudioProcessing::Config mAppliedConfig;
    245  // When false, RequestedInputProcessingParams() returns no params, resulting
    246  // in platform processing getting disabled in the platform.
    247  bool mPlatformProcessingEnabled = false;
    248  // The generation tracking the latest requested set of platform processing
    249  // params.
    250  int mPlatformProcessingSetGeneration = -1;
    251  // The latest error notified to us through
    252  // NotifySetRequestedInputProcessingParamsResult, or Nothing if the latest
    253  // request was successful, or if a request is pending a result.
    254  Maybe<int> mPlatformProcessingSetError;
    255  // The processing params currently applied, or about to be applied, in the
    256  // platform. This allows adapting the AudioProcessingConfig accordingly.
    257  cubeb_input_processing_params mPlatformProcessingSetParams =
    258      CUBEB_INPUT_PROCESSING_PARAM_NONE;
    259  // Buffer for up to one 10ms packet of planar mixed audio output for the
    260  // reverse-stream (speaker data) of mAudioProcessing AEC.
    261  // Length is packet size * channel count, regardless of how many frames are
    262  // buffered.  Not used if the processing is bypassed.
    263  AlignedFloatBuffer mOutputBuffer;
    264  // Number of channels into which mOutputBuffer is divided.
    265  uint32_t mOutputBufferChannelCount = 0;
    266  // Number of frames buffered in mOutputBuffer for the reverse stream.
    267  uint32_t mOutputBufferFrameCount = 0;
    268  // Stores the input audio, to be processed by the APM.
    269  AlignedFloatBuffer mInputBuffer;
    270  // Stores the deinterleaved microphone audio
    271  AlignedFloatBuffer mDeinterleavedBuffer;
    272  // Stores the mixed down input audio
    273  AlignedFloatBuffer mInputDownmixBuffer;
    274  // Stores data waiting to be pulled.
    275  AudioSegment mSegment;
    276  // Whether or not this MediaEngine is enabled. If it's not enabled, it
    277  // operates in "pull" mode, and we append silence only, releasing the audio
    278  // input track.
    279  bool mEnabled;
    280  // Whether or not we've ended and removed the AudioProcessingTrack.
    281  bool mEnded;
    282  // When processing is enabled, the number of packets received by this
    283  // instance, to implement periodic logging.
    284  uint64_t mPacketCount;
    285  // Temporary descriptor for a slice of an AudioChunk parameter passed to
    286  // ProcessOutputData().  This is a member rather than on the stack so that
    287  // any memory allocated for its mChannelData pointer array is not
    288  // reallocated on each iteration.
    289  AudioChunk mSubChunk;
    290  // A storage holding the interleaved audio data converted the AudioSegment.
    291  // This will be used as an input parameter for PacketizeAndProcess. This
    292  // should be removed once bug 1729041 is done.
    293  AutoTArray<AudioDataValue,
    294             SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
    295      mInterleavedBuffer;
    296  // Tracks the pending frames with paired principals piled up in packetizer.
    297  std::deque<std::pair<TrackTime, PrincipalHandle>> mChunksInPacketizer;
    298  RefPtr<WebrtcEnvironmentWrapper> mEnvWrapper;
    299 };
    300 
    301 // MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource.
    302 class AudioProcessingTrack : public DeviceInputConsumerTrack {
    303  // Only accessed on the graph thread.
    304  RefPtr<AudioInputProcessing> mInputProcessing;
    305 
    306  explicit AudioProcessingTrack(TrackRate aSampleRate)
    307      : DeviceInputConsumerTrack(aSampleRate) {}
    308 
    309  ~AudioProcessingTrack() = default;
    310 
    311 public:
    312  // Main Thread API
    313  void Destroy() override;
    314  void SetInputProcessing(RefPtr<AudioInputProcessing> aInputProcessing);
    315  static AudioProcessingTrack* Create(MediaTrackGraph* aGraph);
    316 
    317  // Graph Thread API
    318  void DestroyImpl() override;
    319  void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
    320  uint32_t NumberOfChannels() const override {
    321    if (!mInputProcessing) {
    322      // There's an async gap between adding the track to the graph
    323      // (AudioProcessingTrack::Create) and setting mInputProcessing
    324      // (SetInputProcessing on the media manager thread).
    325      // Return 0 to indicate the default within this gap.
    326      return 0;
    327    }
    328    return mInputProcessing->GetRequestedInputChannelCount();
    329  }
    330  // Pass the graph's mixed audio output to mInputProcessing for processing as
    331  // the reverse stream.
    332  void NotifyOutputData(MediaTrackGraph* aGraph, const AudioChunk& aChunk);
    333 
    334  // Any thread
    335  AudioProcessingTrack* AsAudioProcessingTrack() override { return this; }
    336 
    337 private:
    338  // Graph thread API
    339  void SetInputProcessingImpl(RefPtr<AudioInputProcessing> aInputProcessing);
    340 };
    341 
    342 class MediaEngineWebRTCAudioCaptureSource : public MediaEngineSource {
    343 public:
    344  explicit MediaEngineWebRTCAudioCaptureSource(const MediaDevice* aMediaDevice);
    345  static nsString GetUUID();
    346  static nsString GetGroupId();
    347  nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
    348                    const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
    349                    const char** aOutBadConstraint) override {
    350    // Nothing to do here, everything is managed in MediaManager.cpp
    351    return NS_OK;
    352  }
    353  nsresult Deallocate() override {
    354    // Nothing to do here, everything is managed in MediaManager.cpp
    355    return NS_OK;
    356  }
    357  void SetTrack(const RefPtr<MediaTrack>& aTrack,
    358                const PrincipalHandle& aPrincipal) override;
    359  nsresult Start() override;
    360  nsresult Stop() override;
    361  nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
    362                       const MediaEnginePrefs& aPrefs,
    363                       const char** aOutBadConstraint) override;
    364 
    365  nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
    366    return NS_ERROR_NOT_IMPLEMENTED;
    367  }
    368 
    369  void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;
    370 
    371  void GetCapabilities(
    372      dom::MediaTrackCapabilities& aOutCapabilities) const override {}
    373 
    374 protected:
    375  virtual ~MediaEngineWebRTCAudioCaptureSource() = default;
    376 };
    377 
    378 }  // end namespace mozilla
    379 
    380 #endif  // MediaEngineWebRTCAudio_h