ConvolverNode.cpp (19781B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "ConvolverNode.h" 8 9 #include "AlignmentUtils.h" 10 #include "AudioNodeEngine.h" 11 #include "AudioNodeTrack.h" 12 #include "PlayingRefChangeHandler.h" 13 #include "Tracing.h" 14 #include "blink/Reverb.h" 15 #include "mozilla/dom/ConvolverNodeBinding.h" 16 17 namespace mozilla::dom { 18 19 NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer) 20 21 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode) 22 NS_INTERFACE_MAP_END_INHERITING(AudioNode) 23 24 NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode) 25 NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode) 26 27 class ConvolverNodeEngine final : public AudioNodeEngine { 28 typedef PlayingRefChangeHandler PlayingRefChanged; 29 30 public: 31 ConvolverNodeEngine(AudioNode* aNode, bool aNormalize) 32 : AudioNodeEngine(aNode) {} 33 34 // Indicates how the right output channel is generated. 35 enum class RightConvolverMode { 36 // A right convolver is always used when there is more than one impulse 37 // response channel. 38 Always, 39 // With a single response channel, the mode may be either Direct or 40 // Difference. The decision on which to use is made when stereo input is 41 // received. Once the right convolver is in use, convolver state is 42 // suitable only for the selected mode, and so the mode cannot change 43 // until the right convolver contains only silent history. 44 // 45 // With Direct mode, each convolver processes a corresponding channel. 46 // This mode is selected when input is initially stereo or 47 // channelInterpretation is "discrete" at the time or starting the right 48 // convolver when input changes from non-silent mono to stereo. 49 Direct, 50 // Difference mode is selected if channelInterpretation is "speakers" at 51 // the time starting the right convolver when the input changes from mono 52 // to stereo. 53 // 54 // When non-silent input is initially mono, with a single response 55 // channel, the right output channel is not produced until input becomes 56 // stereo. Only a single convolver is used for mono processing. When 57 // stereo input arrives after mono input, output must be as if the mono 58 // signal remaining in the left convolver is up-mixed, but the right 59 // convolver has not been initialized with the history of the mono input. 60 // Copying the state of the left convolver into the right convolver is not 61 // desirable, because there is considerable state to copy, and the 62 // different convolvers are intended to process out of phase, which means 63 // that state from one convolver would not directly map to state in 64 // another convolver. 65 // 66 // Instead the distributive property of convolution is used to generate 67 // the right output channel using information in the left output channel. 68 // Using l and r to denote the left and right channel input signals, g the 69 // impulse response, and * convolution, the convolution of the right 70 // channel can be given by 71 // 72 // r * g = (l + (r - l)) * g 73 // = l * g + (r - l) * g 74 // 75 // The left convolver continues to process the left channel l to produce 76 // l * g. The right convolver processes the difference of input channel 77 // signals r - l to produce (r - l) * g. The outputs of the two 78 // convolvers are added to generate the right channel output r * g. 79 // 80 // The benefit of doing this is that the history of the r - l input for a 81 // "speakers" up-mixed mono signal is zero, and so an empty convolver 82 // already has exactly the right history for mixing the previous mono 83 // signal with the new stereo signal. 84 Difference 85 }; 86 87 void SetReverb(WebCore::Reverb* aReverb, 88 uint32_t aImpulseChannelCount) override { 89 mRemainingLeftOutput = INT32_MIN; 90 mRemainingRightOutput = 0; 91 mRemainingRightHistory = 0; 92 93 // Assume for now that convolution of channel difference is not required. 94 // Direct may change to Difference during processing. 95 if (aReverb) { 96 mRightConvolverMode = aImpulseChannelCount == 1 97 ? RightConvolverMode::Direct 98 : RightConvolverMode::Always; 99 } else { 100 mRightConvolverMode = RightConvolverMode::Always; 101 } 102 103 mReverb.reset(aReverb); 104 } 105 106 void AllocateReverbInput(const AudioBlock& aInput, 107 uint32_t aTotalChannelCount) { 108 uint32_t inputChannelCount = aInput.ChannelCount(); 109 MOZ_ASSERT(inputChannelCount <= aTotalChannelCount); 110 mReverbInput.AllocateChannels(aTotalChannelCount); 111 // Pre-multiply the input's volume 112 for (uint32_t i = 0; i < inputChannelCount; ++i) { 113 const float* src = static_cast<const float*>(aInput.mChannelData[i]); 114 float* dest = mReverbInput.ChannelFloatsForWrite(i); 115 AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest); 116 } 117 // Fill remaining channels with silence 118 for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) { 119 float* dest = mReverbInput.ChannelFloatsForWrite(i); 120 std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f); 121 } 122 } 123 124 void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom, 125 const AudioBlock& aInput, AudioBlock* aOutput, 126 bool* aFinished) override; 127 128 bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; } 129 130 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override { 131 size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf); 132 133 amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false); 134 135 if (mReverb) { 136 amount += mReverb->sizeOfIncludingThis(aMallocSizeOf); 137 } 138 139 return amount; 140 } 141 142 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override { 143 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); 144 } 145 146 private: 147 // Keeping mReverbInput across process calls avoids unnecessary reallocation. 148 AudioBlock mReverbInput; 149 UniquePtr<WebCore::Reverb> mReverb; 150 // Tracks samples of the tail remaining to be output. INT32_MIN is a 151 // special value to indicate that the end of any previous tail has been 152 // handled. 153 int32_t mRemainingLeftOutput = INT32_MIN; 154 // mRemainingRightOutput and mRemainingRightHistory are only used when 155 // mRightOutputMode != Always. There is no special handling required at the 156 // end of tail times and so INT32_MIN is not used. 157 // mRemainingRightOutput tracks how much longer this node needs to continue 158 // to produce a right output channel. 159 int32_t mRemainingRightOutput = 0; 160 // mRemainingRightHistory tracks how much silent input would be required to 161 // drain the right convolver, which may sometimes be longer than the period 162 // a right output channel is required. 163 int32_t mRemainingRightHistory = 0; 164 RightConvolverMode mRightConvolverMode = RightConvolverMode::Always; 165 }; 166 167 static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) { 168 const float* left = static_cast<const float*>(aBlock->mChannelData[0]); 169 float* right = aBlock->ChannelFloatsForWrite(1); 170 AudioBlockAddChannelWithScale(left, aScale, right); 171 } 172 173 void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom, 174 const AudioBlock& aInput, 175 AudioBlock* aOutput, bool* aFinished) { 176 TRACE("ConvolverNodeEngine::ProcessBlock"); 177 if (!mReverb) { 178 aOutput->SetNull(WEBAUDIO_BLOCK_SIZE); 179 return; 180 } 181 182 uint32_t inputChannelCount = aInput.ChannelCount(); 183 if (aInput.IsNull()) { 184 if (mRemainingLeftOutput > 0) { 185 mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE; 186 AllocateReverbInput(aInput, 1); // floats for silence 187 } else { 188 if (mRemainingLeftOutput != INT32_MIN) { 189 mRemainingLeftOutput = INT32_MIN; 190 MOZ_ASSERT(mRemainingRightOutput <= 0); 191 MOZ_ASSERT(mRemainingRightHistory <= 0); 192 aTrack->ScheduleCheckForInactive(); 193 RefPtr<PlayingRefChanged> refchanged = 194 new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE); 195 aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget()); 196 } 197 aOutput->SetNull(WEBAUDIO_BLOCK_SIZE); 198 return; 199 } 200 } else { 201 if (mRemainingLeftOutput <= 0) { 202 RefPtr<PlayingRefChanged> refchanged = 203 new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF); 204 aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget()); 205 } 206 207 // Use mVolume as a flag to detect whether AllocateReverbInput() gets 208 // called. 209 mReverbInput.mVolume = 0.0f; 210 211 // Special handling of input channel count changes is used when there is 212 // only a single impulse response channel. See RightConvolverMode. 213 if (mRightConvolverMode != RightConvolverMode::Always) { 214 ChannelInterpretation channelInterpretation = 215 aTrack->GetChannelInterpretation(); 216 if (inputChannelCount == 2) { 217 if (mRemainingRightHistory <= 0) { 218 // Will start the second convolver. Choose to convolve the right 219 // channel directly if there is no left tail to up-mix or up-mixing 220 // is "discrete". 221 mRightConvolverMode = 222 (mRemainingLeftOutput <= 0 || 223 channelInterpretation == ChannelInterpretation::Discrete) 224 ? RightConvolverMode::Direct 225 : RightConvolverMode::Difference; 226 } 227 // The extra WEBAUDIO_BLOCK_SIZE is subtracted below. 228 mRemainingRightOutput = 229 mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE; 230 mRemainingRightHistory = mRemainingRightOutput; 231 if (mRightConvolverMode == RightConvolverMode::Difference) { 232 AllocateReverbInput(aInput, 2); 233 // Subtract left from right. 234 AddScaledLeftToRight(&mReverbInput, -1.0f); 235 } 236 } else if (mRemainingRightHistory > 0) { 237 // There is one channel of input, but a second convolver also 238 // requires input. Up-mix appropriately for the second convolver. 239 if ((mRightConvolverMode == RightConvolverMode::Difference) ^ 240 (channelInterpretation == ChannelInterpretation::Discrete)) { 241 MOZ_ASSERT( 242 (mRightConvolverMode == RightConvolverMode::Difference && 243 channelInterpretation == ChannelInterpretation::Speakers) || 244 (mRightConvolverMode == RightConvolverMode::Direct && 245 channelInterpretation == ChannelInterpretation::Discrete)); 246 // The state is one of the following combinations: 247 // 1) Difference and speakers. 248 // Up-mixing gives r = l. 249 // The input to the second convolver is r - l. 250 // 2) Direct and discrete. 251 // Up-mixing gives r = 0. 252 // The input to the second convolver is r. 253 // 254 // In each case the input for the second convolver is silence, which 255 // will drain the convolver. 256 AllocateReverbInput(aInput, 2); 257 } else { 258 if (channelInterpretation == ChannelInterpretation::Discrete) { 259 MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference); 260 // channelInterpretation has changed since the second convolver 261 // was added. "discrete" up-mixing of input would produce a 262 // silent right channel r = 0, but the second convolver needs 263 // r - l for RightConvolverMode::Difference. 264 AllocateReverbInput(aInput, 2); 265 AddScaledLeftToRight(&mReverbInput, -1.0f); 266 } else { 267 MOZ_ASSERT(channelInterpretation == 268 ChannelInterpretation::Speakers); 269 MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct); 270 // The Reverb will essentially up-mix the single input channel by 271 // feeding it into both convolvers. 272 } 273 // The second convolver does not have silent input, and so it will 274 // not drain. It will need to continue processing up-mixed input 275 // because the next input block may be stereo, which would be mixed 276 // with the signal remaining in the convolvers. 277 // The extra WEBAUDIO_BLOCK_SIZE is subtracted below. 278 mRemainingRightHistory = 279 mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE; 280 } 281 } 282 } 283 284 if (mReverbInput.mVolume == 0.0f) { // not yet set 285 if (aInput.mVolume != 1.0f) { 286 AllocateReverbInput(aInput, inputChannelCount); // pre-multiply 287 } else { 288 mReverbInput = aInput; 289 } 290 } 291 292 mRemainingLeftOutput = mReverb->impulseResponseLength(); 293 MOZ_ASSERT(mRemainingLeftOutput > 0); 294 } 295 296 // "The ConvolverNode produces a mono output only in the single case where 297 // there is a single input channel and a single-channel buffer." 298 uint32_t outputChannelCount = 2; 299 uint32_t reverbOutputChannelCount = 2; 300 if (mRightConvolverMode != RightConvolverMode::Always) { 301 // When the input changes from stereo to mono, the output continues to be 302 // stereo for the length of the tail time, during which the two channels 303 // may differ. 304 if (mRemainingRightOutput > 0) { 305 MOZ_ASSERT(mRemainingRightHistory > 0); 306 mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE; 307 } else { 308 outputChannelCount = 1; 309 } 310 // The second convolver keeps processing until it drains. 311 if (mRemainingRightHistory > 0) { 312 mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE; 313 } else { 314 reverbOutputChannelCount = 1; 315 } 316 } 317 318 // If there are two convolvers, then they each need an output buffer, even 319 // if the second convolver is only processing to keep history of up-mixed 320 // input. 321 aOutput->AllocateChannels(reverbOutputChannelCount); 322 323 mReverb->process(&mReverbInput, aOutput); 324 325 if (mRightConvolverMode == RightConvolverMode::Difference && 326 outputChannelCount == 2) { 327 // Add left to right. 328 AddScaledLeftToRight(aOutput, 1.0f); 329 } else { 330 // Trim if outputChannelCount < reverbOutputChannelCount 331 aOutput->mChannelData.TruncateLength(outputChannelCount); 332 } 333 } 334 335 ConvolverNode::ConvolverNode(AudioContext* aContext) 336 : AudioNode(aContext, 2, ChannelCountMode::Clamped_max, 337 ChannelInterpretation::Speakers), 338 mNormalize(true) { 339 ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize); 340 mTrack = AudioNodeTrack::Create( 341 aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph()); 342 } 343 344 /* static */ 345 already_AddRefed<ConvolverNode> ConvolverNode::Create( 346 JSContext* aCx, AudioContext& aAudioContext, 347 const ConvolverOptions& aOptions, ErrorResult& aRv) { 348 RefPtr<ConvolverNode> audioNode = new ConvolverNode(&aAudioContext); 349 350 audioNode->Initialize(aOptions, aRv); 351 if (NS_WARN_IF(aRv.Failed())) { 352 return nullptr; 353 } 354 355 // This must be done before setting the buffer. 356 audioNode->SetNormalize(!aOptions.mDisableNormalization); 357 358 if (aOptions.mBuffer.WasPassed()) { 359 MOZ_ASSERT(aCx); 360 audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv); 361 if (NS_WARN_IF(aRv.Failed())) { 362 return nullptr; 363 } 364 } 365 366 return audioNode.forget(); 367 } 368 369 size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const { 370 size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf); 371 if (mBuffer) { 372 // NB: mBuffer might be shared with the associated engine, by convention 373 // the AudioNode will report. 374 amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf); 375 } 376 return amount; 377 } 378 379 size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { 380 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); 381 } 382 383 JSObject* ConvolverNode::WrapObject(JSContext* aCx, 384 JS::Handle<JSObject*> aGivenProto) { 385 return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto); 386 } 387 388 void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer, 389 ErrorResult& aRv) { 390 if (aBuffer) { 391 switch (aBuffer->NumberOfChannels()) { 392 case 1: 393 case 2: 394 case 4: 395 // Supported number of channels 396 break; 397 default: 398 aRv.ThrowNotSupportedError( 399 nsPrintfCString("%u is not a supported number of channels", 400 aBuffer->NumberOfChannels())); 401 return; 402 } 403 } 404 405 if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) { 406 aRv.ThrowNotSupportedError(nsPrintfCString( 407 "Buffer sample rate (%g) does not match AudioContext sample rate (%g)", 408 aBuffer->SampleRate(), Context()->SampleRate())); 409 return; 410 } 411 412 // Send the buffer to the track 413 AudioNodeTrack* ns = mTrack; 414 MOZ_ASSERT(ns, "Why don't we have a track here?"); 415 if (aBuffer) { 416 AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx); 417 if (data.mBufferFormat == AUDIO_FORMAT_S16) { 418 // Reverb expects data in float format. 419 // Convert on the main thread so as to minimize allocations on the audio 420 // thread. 421 // Reverb will dispose of the buffer once initialized, so convert here 422 // and leave the smaller arrays in the AudioBuffer. 423 // There is currently no value in providing 16/32-byte aligned data 424 // because PadAndMakeScaledDFT() will copy the data (without SIMD 425 // instructions) to aligned arrays for the FFT. 426 CheckedInt<size_t> bufferSize(sizeof(float)); 427 bufferSize *= data.mDuration; 428 bufferSize *= data.ChannelCount(); 429 RefPtr<SharedBuffer> floatBuffer = 430 SharedBuffer::Create(bufferSize, fallible); 431 if (!floatBuffer) { 432 aRv.Throw(NS_ERROR_OUT_OF_MEMORY); 433 return; 434 } 435 auto floatData = static_cast<float*>(floatBuffer->Data()); 436 for (size_t i = 0; i < data.ChannelCount(); ++i) { 437 ConvertAudioSamples(data.ChannelData<int16_t>()[i], floatData, 438 data.mDuration); 439 data.mChannelData[i] = floatData; 440 floatData += data.mDuration; 441 } 442 data.mBuffer = std::move(floatBuffer); 443 data.mBufferFormat = AUDIO_FORMAT_FLOAT32; 444 } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) { 445 // This is valid, but a signal convolved by a silent signal is silent, set 446 // the reverb to nullptr and return. 447 ns->SetReverb(nullptr, 0); 448 mBuffer = aBuffer; 449 return; 450 } 451 452 // Note about empirical tuning (this is copied from Blink) 453 // The maximum FFT size affects reverb performance and accuracy. 454 // If the reverb is single-threaded and processes entirely in the real-time 455 // audio thread, it's important not to make this too high. In this case 456 // 8192 is a good value. But, the Reverb object is multi-threaded, so we 457 // want this as high as possible without losing too much accuracy. Very 458 // large FFTs will have worse phase errors. Given these constraints 32768 is 459 // a good compromise. 460 const size_t MaxFFTSize = 32768; 461 462 bool allocationFailure = false; 463 UniquePtr<WebCore::Reverb> reverb(new WebCore::Reverb( 464 data, MaxFFTSize, !Context()->IsOffline(), mNormalize, 465 aBuffer->SampleRate(), &allocationFailure)); 466 if (!allocationFailure) { 467 ns->SetReverb(reverb.release(), data.ChannelCount()); 468 } else { 469 aRv.Throw(NS_ERROR_OUT_OF_MEMORY); 470 return; 471 } 472 } else { 473 ns->SetReverb(nullptr, 0); 474 } 475 mBuffer = aBuffer; 476 } 477 478 void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; } 479 480 } // namespace mozilla::dom