MediaEngineWebRTCAudio.h (16439B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef MediaEngineWebRTCAudio_h 7 #define MediaEngineWebRTCAudio_h 8 9 #include "AudioDeviceInfo.h" 10 #include "AudioPacketizer.h" 11 #include "AudioSegment.h" 12 #include "DeviceInputTrack.h" 13 #include "MediaEnginePrefs.h" 14 #include "MediaEngineWebRTC.h" 15 #include "MediaTrackListener.h" 16 #include "modules/audio_processing/include/audio_processing.h" 17 18 namespace mozilla { 19 20 class AudioInputProcessing; 21 class AudioProcessingTrack; 22 class WebrtcEnvironmentWrapper; 23 24 // This class is created and used exclusively on the Media Manager thread, with 25 // exactly two exceptions: 26 // - Pull is always called on the MTG thread. It only ever uses 27 // mInputProcessing. mInputProcessing is set, then a message is sent first to 28 // the main thread and then the MTG thread so that it can be used as part of 29 // the graph processing. On destruction, similarly, a message is sent to the 30 // graph so that it stops using it, and then it is deleted. 31 // - mSettings is created on the MediaManager thread is always ever accessed on 32 // the Main Thread. It is const. 33 class MediaEngineWebRTCMicrophoneSource : public MediaEngineSource { 34 public: 35 explicit MediaEngineWebRTCMicrophoneSource(const MediaDevice* aMediaDevice); 36 37 static already_AddRefed<MediaEngineWebRTCMicrophoneSource> CreateFrom( 38 const MediaEngineWebRTCMicrophoneSource* aSource, 39 const MediaDevice* aMediaDevice); 40 41 nsresult Allocate(const dom::MediaTrackConstraints& aConstraints, 42 const MediaEnginePrefs& aPrefs, uint64_t aWindowID, 43 const char** aOutBadConstraint) override; 44 nsresult Deallocate() override; 45 void SetTrack(const RefPtr<MediaTrack>& aTrack, 46 const PrincipalHandle& aPrincipal) override; 47 nsresult Start() override; 48 nsresult Stop() override; 49 nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints, 50 const MediaEnginePrefs& aPrefs, 51 const char** aOutBadConstraint) override; 52 53 /** 54 * Assigns the current settings of the capture to aOutSettings. 55 * Main thread only. 56 */ 57 void GetSettings(dom::MediaTrackSettings& aOutSettings) const override; 58 59 void GetCapabilities( 60 dom::MediaTrackCapabilities& aOutCapabilities) const override; 61 62 nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override { 63 return NS_ERROR_NOT_IMPLEMENTED; 64 } 65 66 protected: 67 ~MediaEngineWebRTCMicrophoneSource() = default; 68 69 private: 70 /** 71 * From a set of constraints and about:config preferences, output the correct 72 * set of preferences that can be sent to AudioInputProcessing. 73 * 74 * This can fail if the number of channels requested is zero, negative, or 75 * more than the device supports. 76 */ 77 nsresult EvaluateSettings(const NormalizedConstraints& aConstraintsUpdate, 78 const MediaEnginePrefs& aInPrefs, 79 MediaEnginePrefs* aOutPrefs, 80 const char** aOutBadConstraint); 81 /** 82 * From settings output by EvaluateSettings, send those settings to the 83 * AudioInputProcessing instance and the main thread (for use in GetSettings). 84 */ 85 void ApplySettings(const MediaEnginePrefs& aPrefs); 86 87 PrincipalHandle mPrincipal = PRINCIPAL_HANDLE_NONE; 88 89 const RefPtr<AudioDeviceInfo> mDeviceInfo; 90 91 // The maximum number of channels that this device supports. 92 const uint32_t mDeviceMaxChannelCount; 93 // The current settings for the underlying device. 94 // Constructed on the MediaManager thread, and then only ever accessed on the 95 // main thread. 96 const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackSettings>> 97 mSettings; 98 99 // The media capabilities for the underlying device. 100 // Constructed on the MediaManager thread, and then only ever accessed on the 101 // main thread. 102 const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackCapabilities>> 103 mCapabilities; 104 105 // Current state of the resource for this source. 106 MediaEngineSourceState mState; 107 108 // The current preferences that will be forwarded to mInputProcessing below. 109 MediaEnginePrefs mCurrentPrefs; 110 111 // The AudioProcessingTrack used to inteface with the MediaTrackGraph. Set in 112 // SetTrack as part of the initialization, and nulled in ::Deallocate. 113 RefPtr<AudioProcessingTrack> mTrack; 114 115 // See note at the top of this class. 116 RefPtr<AudioInputProcessing> mInputProcessing; 117 }; 118 119 // This class is created on the MediaManager thread, and then exclusively used 120 // on the MTG thread. 121 // All communication is done via message passing using MTG ControlMessages 122 class AudioInputProcessing : public AudioDataListener { 123 public: 124 explicit AudioInputProcessing(uint32_t aMaxChannelCount); 125 void Process(AudioProcessingTrack* aTrack, GraphTime aFrom, GraphTime aTo, 126 AudioSegment* aInput, AudioSegment* aOutput); 127 128 void ProcessOutputData(AudioProcessingTrack* aTrack, 129 const AudioChunk& aChunk); 130 bool IsVoiceInput(MediaTrackGraph* aGraph) const override { 131 // If we're passing data directly without AEC or any other process, this 132 // means that all voice-processing has been disabled intentionaly. In this 133 // case, consider that the device is not used for voice input. 134 return !IsPassThrough(aGraph) || 135 mPlatformProcessingSetParams != CUBEB_INPUT_PROCESSING_PARAM_NONE; 136 } 137 138 // Start processing data. Note that ApplySettings must be called prior to 139 // Start(). 140 void Start(MediaTrackGraph* aGraph); 141 // Stop processing data and reset mAudioProcessing state. 142 void Stop(MediaTrackGraph* aGraph); 143 144 void DeviceChanged(MediaTrackGraph* aGraph) override; 145 146 uint32_t RequestedInputChannelCount(MediaTrackGraph*) const override { 147 return GetRequestedInputChannelCount(); 148 } 149 150 cubeb_input_processing_params RequestedInputProcessingParams( 151 MediaTrackGraph* aGraph) const override; 152 153 void Disconnect(MediaTrackGraph* aGraph) override; 154 155 // Prepare for a change to platform processing params by assuming the platform 156 // applies the intersection of the already applied params and 157 // aRequestedParams, and set the software config accordingly. 158 void NotifySetRequestedInputProcessingParams( 159 MediaTrackGraph* aGraph, int aGeneration, 160 cubeb_input_processing_params aRequestedParams) override; 161 162 // Handle the result of an async operation to set processing params on a cubeb 163 // stream. If the operation succeeded, disable the applied processing params 164 // from the software processing config. If the operation failed, request 165 // platform processing to be disabled so as to not prevent a cubeb stream from 166 // being created. 167 void NotifySetRequestedInputProcessingParamsResult( 168 MediaTrackGraph* aGraph, int aGeneration, 169 const Result<cubeb_input_processing_params, int>& aResult) override; 170 171 void PacketizeAndProcess(AudioProcessingTrack* aTrack, 172 const AudioSegment& aSegment); 173 174 uint32_t GetRequestedInputChannelCount() const; 175 176 // This is true when all processing is disabled, in which case we can skip 177 // packetization, resampling and other processing passes. Processing may still 178 // be applied by the platform on the underlying input track. 179 bool IsPassThrough(MediaTrackGraph* aGraph) const; 180 181 // This allow changing the APM options, enabling or disabling processing 182 // steps. The settings get applied the next time we're about to process input 183 // data. 184 void ApplySettings(MediaTrackGraph* aGraph, 185 CubebUtils::AudioDeviceID aDeviceID, 186 const MediaEnginePrefs& aSettings); 187 188 // The config currently applied to the audio processing module. 189 const webrtc::AudioProcessing::Config& AppliedConfig( 190 MediaTrackGraph* aGraph) const; 191 192 void End(); 193 194 TrackTime NumBufferedFrames(MediaTrackGraph* aGraph) const; 195 196 // The packet size contains samples in 10ms. The unit of aRate is hz. 197 static uint32_t GetPacketSize(TrackRate aRate) { 198 return webrtc::AudioProcessing::GetFrameSize(aRate); 199 } 200 201 bool IsEnded() const { return mEnded; } 202 203 // For testing: 204 bool HadAECAndDrift() const { return mHadAECAndDrift; } 205 206 void SetEnvironmentWrapper(AudioProcessingTrack* aTrack, 207 RefPtr<WebrtcEnvironmentWrapper> aEnvWrapper); 208 209 private: 210 ~AudioInputProcessing() = default; 211 webrtc::AudioProcessing::Config ConfigForPrefs( 212 MediaTrackGraph* aGraph, const MediaEnginePrefs& aPrefs) const; 213 void PassThroughChanged(MediaTrackGraph* aGraph); 214 void RequestedInputChannelCountChanged(MediaTrackGraph* aGraph, 215 CubebUtils::AudioDeviceID aDeviceId); 216 void EnsurePacketizer(AudioProcessingTrack* aTrack); 217 void EnsureAudioProcessing(AudioProcessingTrack* aTrack); 218 void ResetAudioProcessing(MediaTrackGraph* aGraph); 219 void ApplySettingsInternal(MediaTrackGraph* aGraph, 220 const MediaEnginePrefs& aSettings); 221 PrincipalHandle GetCheckedPrincipal(const AudioSegment& aSegment); 222 // This implements the processing algoritm to apply to the input (e.g. a 223 // microphone). If all algorithms are disabled, this class in not used. This 224 // class only accepts audio chunks of 10ms. It has two inputs and one output: 225 // it is fed the speaker data and the microphone data. It outputs processed 226 // input data. 227 UniquePtr<webrtc::AudioProcessing> mAudioProcessing; 228 // Whether mAudioProcessing was created for AEC with clock drift. 229 // Meaningful only when mAudioProcessing is non-null; 230 bool mHadAECAndDrift = false; 231 // Packetizer to be able to feed 10ms packets to the input side of 232 // mAudioProcessing. Not used if the processing is bypassed. 233 Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerInput; 234 // The current settings from about:config preferences and content-provided 235 // constraints. 236 MediaEnginePrefs mSettings; 237 // The currently applied audio processing config. Set even if mAudioProcessing 238 // is not. This is needed because ConfigForPrefs(mSettings) is not static -- 239 // it relies on mPlatformProcessingSetParams and MTG::OutputForAECIsPrimary(), 240 // which can change between calls to ApplySettingsInternal -- and an 241 // AudioProcessing::Config is available from mAudioProcessing only when that 242 // exists. Initialized as needed -- it is up to the owner to call 243 // ApplySettings prior to Start. 244 webrtc::AudioProcessing::Config mAppliedConfig; 245 // When false, RequestedInputProcessingParams() returns no params, resulting 246 // in platform processing getting disabled in the platform. 247 bool mPlatformProcessingEnabled = false; 248 // The generation tracking the latest requested set of platform processing 249 // params. 250 int mPlatformProcessingSetGeneration = -1; 251 // The latest error notified to us through 252 // NotifySetRequestedInputProcessingParamsResult, or Nothing if the latest 253 // request was successful, or if a request is pending a result. 254 Maybe<int> mPlatformProcessingSetError; 255 // The processing params currently applied, or about to be applied, in the 256 // platform. This allows adapting the AudioProcessingConfig accordingly. 257 cubeb_input_processing_params mPlatformProcessingSetParams = 258 CUBEB_INPUT_PROCESSING_PARAM_NONE; 259 // Buffer for up to one 10ms packet of planar mixed audio output for the 260 // reverse-stream (speaker data) of mAudioProcessing AEC. 261 // Length is packet size * channel count, regardless of how many frames are 262 // buffered. Not used if the processing is bypassed. 263 AlignedFloatBuffer mOutputBuffer; 264 // Number of channels into which mOutputBuffer is divided. 265 uint32_t mOutputBufferChannelCount = 0; 266 // Number of frames buffered in mOutputBuffer for the reverse stream. 267 uint32_t mOutputBufferFrameCount = 0; 268 // Stores the input audio, to be processed by the APM. 269 AlignedFloatBuffer mInputBuffer; 270 // Stores the deinterleaved microphone audio 271 AlignedFloatBuffer mDeinterleavedBuffer; 272 // Stores the mixed down input audio 273 AlignedFloatBuffer mInputDownmixBuffer; 274 // Stores data waiting to be pulled. 275 AudioSegment mSegment; 276 // Whether or not this MediaEngine is enabled. If it's not enabled, it 277 // operates in "pull" mode, and we append silence only, releasing the audio 278 // input track. 279 bool mEnabled; 280 // Whether or not we've ended and removed the AudioProcessingTrack. 281 bool mEnded; 282 // When processing is enabled, the number of packets received by this 283 // instance, to implement periodic logging. 284 uint64_t mPacketCount; 285 // Temporary descriptor for a slice of an AudioChunk parameter passed to 286 // ProcessOutputData(). This is a member rather than on the stack so that 287 // any memory allocated for its mChannelData pointer array is not 288 // reallocated on each iteration. 289 AudioChunk mSubChunk; 290 // A storage holding the interleaved audio data converted the AudioSegment. 291 // This will be used as an input parameter for PacketizeAndProcess. This 292 // should be removed once bug 1729041 is done. 293 AutoTArray<AudioDataValue, 294 SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS> 295 mInterleavedBuffer; 296 // Tracks the pending frames with paired principals piled up in packetizer. 297 std::deque<std::pair<TrackTime, PrincipalHandle>> mChunksInPacketizer; 298 RefPtr<WebrtcEnvironmentWrapper> mEnvWrapper; 299 }; 300 301 // MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource. 302 class AudioProcessingTrack : public DeviceInputConsumerTrack { 303 // Only accessed on the graph thread. 304 RefPtr<AudioInputProcessing> mInputProcessing; 305 306 explicit AudioProcessingTrack(TrackRate aSampleRate) 307 : DeviceInputConsumerTrack(aSampleRate) {} 308 309 ~AudioProcessingTrack() = default; 310 311 public: 312 // Main Thread API 313 void Destroy() override; 314 void SetInputProcessing(RefPtr<AudioInputProcessing> aInputProcessing); 315 static AudioProcessingTrack* Create(MediaTrackGraph* aGraph); 316 317 // Graph Thread API 318 void DestroyImpl() override; 319 void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override; 320 uint32_t NumberOfChannels() const override { 321 if (!mInputProcessing) { 322 // There's an async gap between adding the track to the graph 323 // (AudioProcessingTrack::Create) and setting mInputProcessing 324 // (SetInputProcessing on the media manager thread). 325 // Return 0 to indicate the default within this gap. 326 return 0; 327 } 328 return mInputProcessing->GetRequestedInputChannelCount(); 329 } 330 // Pass the graph's mixed audio output to mInputProcessing for processing as 331 // the reverse stream. 332 void NotifyOutputData(MediaTrackGraph* aGraph, const AudioChunk& aChunk); 333 334 // Any thread 335 AudioProcessingTrack* AsAudioProcessingTrack() override { return this; } 336 337 private: 338 // Graph thread API 339 void SetInputProcessingImpl(RefPtr<AudioInputProcessing> aInputProcessing); 340 }; 341 342 class MediaEngineWebRTCAudioCaptureSource : public MediaEngineSource { 343 public: 344 explicit MediaEngineWebRTCAudioCaptureSource(const MediaDevice* aMediaDevice); 345 static nsString GetUUID(); 346 static nsString GetGroupId(); 347 nsresult Allocate(const dom::MediaTrackConstraints& aConstraints, 348 const MediaEnginePrefs& aPrefs, uint64_t aWindowID, 349 const char** aOutBadConstraint) override { 350 // Nothing to do here, everything is managed in MediaManager.cpp 351 return NS_OK; 352 } 353 nsresult Deallocate() override { 354 // Nothing to do here, everything is managed in MediaManager.cpp 355 return NS_OK; 356 } 357 void SetTrack(const RefPtr<MediaTrack>& aTrack, 358 const PrincipalHandle& aPrincipal) override; 359 nsresult Start() override; 360 nsresult Stop() override; 361 nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints, 362 const MediaEnginePrefs& aPrefs, 363 const char** aOutBadConstraint) override; 364 365 nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override { 366 return NS_ERROR_NOT_IMPLEMENTED; 367 } 368 369 void GetSettings(dom::MediaTrackSettings& aOutSettings) const override; 370 371 void GetCapabilities( 372 dom::MediaTrackCapabilities& aOutCapabilities) const override {} 373 374 protected: 375 virtual ~MediaEngineWebRTCAudioCaptureSource() = default; 376 }; 377 378 } // end namespace mozilla 379 380 #endif // MediaEngineWebRTCAudio_h