tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ConvolverNode.cpp (19781B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "ConvolverNode.h"
      8 
      9 #include "AlignmentUtils.h"
     10 #include "AudioNodeEngine.h"
     11 #include "AudioNodeTrack.h"
     12 #include "PlayingRefChangeHandler.h"
     13 #include "Tracing.h"
     14 #include "blink/Reverb.h"
     15 #include "mozilla/dom/ConvolverNodeBinding.h"
     16 
     17 namespace mozilla::dom {
     18 
     19 NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer)
     20 
     21 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode)
     22 NS_INTERFACE_MAP_END_INHERITING(AudioNode)
     23 
     24 NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode)
     25 NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode)
     26 
     27 class ConvolverNodeEngine final : public AudioNodeEngine {
     28  typedef PlayingRefChangeHandler PlayingRefChanged;
     29 
     30 public:
     31  ConvolverNodeEngine(AudioNode* aNode, bool aNormalize)
     32      : AudioNodeEngine(aNode) {}
     33 
     34  // Indicates how the right output channel is generated.
     35  enum class RightConvolverMode {
     36    // A right convolver is always used when there is more than one impulse
     37    // response channel.
     38    Always,
     39    // With a single response channel, the mode may be either Direct or
     40    // Difference.  The decision on which to use is made when stereo input is
     41    // received.  Once the right convolver is in use, convolver state is
     42    // suitable only for the selected mode, and so the mode cannot change
     43    // until the right convolver contains only silent history.
     44    //
     45    // With Direct mode, each convolver processes a corresponding channel.
     46    // This mode is selected when input is initially stereo or
     47    // channelInterpretation is "discrete" at the time or starting the right
     48    // convolver when input changes from non-silent mono to stereo.
     49    Direct,
     50    // Difference mode is selected if channelInterpretation is "speakers" at
     51    // the time starting the right convolver when the input changes from mono
     52    // to stereo.
     53    //
     54    // When non-silent input is initially mono, with a single response
     55    // channel, the right output channel is not produced until input becomes
     56    // stereo.  Only a single convolver is used for mono processing.  When
     57    // stereo input arrives after mono input, output must be as if the mono
     58    // signal remaining in the left convolver is up-mixed, but the right
     59    // convolver has not been initialized with the history of the mono input.
     60    // Copying the state of the left convolver into the right convolver is not
     61    // desirable, because there is considerable state to copy, and the
     62    // different convolvers are intended to process out of phase, which means
     63    // that state from one convolver would not directly map to state in
     64    // another convolver.
     65    //
     66    // Instead the distributive property of convolution is used to generate
     67    // the right output channel using information in the left output channel.
     68    // Using l and r to denote the left and right channel input signals, g the
     69    // impulse response, and * convolution, the convolution of the right
     70    // channel can be given by
     71    //
     72    //   r * g = (l + (r - l)) * g
     73    //         = l * g + (r - l) * g
     74    //
     75    // The left convolver continues to process the left channel l to produce
     76    // l * g.  The right convolver processes the difference of input channel
     77    // signals r - l to produce (r - l) * g.  The outputs of the two
     78    // convolvers are added to generate the right channel output r * g.
     79    //
     80    // The benefit of doing this is that the history of the r - l input for a
     81    // "speakers" up-mixed mono signal is zero, and so an empty convolver
     82    // already has exactly the right history for mixing the previous mono
     83    // signal with the new stereo signal.
     84    Difference
     85  };
     86 
     87  void SetReverb(WebCore::Reverb* aReverb,
     88                 uint32_t aImpulseChannelCount) override {
     89    mRemainingLeftOutput = INT32_MIN;
     90    mRemainingRightOutput = 0;
     91    mRemainingRightHistory = 0;
     92 
     93    // Assume for now that convolution of channel difference is not required.
     94    // Direct may change to Difference during processing.
     95    if (aReverb) {
     96      mRightConvolverMode = aImpulseChannelCount == 1
     97                                ? RightConvolverMode::Direct
     98                                : RightConvolverMode::Always;
     99    } else {
    100      mRightConvolverMode = RightConvolverMode::Always;
    101    }
    102 
    103    mReverb.reset(aReverb);
    104  }
    105 
    106  void AllocateReverbInput(const AudioBlock& aInput,
    107                           uint32_t aTotalChannelCount) {
    108    uint32_t inputChannelCount = aInput.ChannelCount();
    109    MOZ_ASSERT(inputChannelCount <= aTotalChannelCount);
    110    mReverbInput.AllocateChannels(aTotalChannelCount);
    111    // Pre-multiply the input's volume
    112    for (uint32_t i = 0; i < inputChannelCount; ++i) {
    113      const float* src = static_cast<const float*>(aInput.mChannelData[i]);
    114      float* dest = mReverbInput.ChannelFloatsForWrite(i);
    115      AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest);
    116    }
    117    // Fill remaining channels with silence
    118    for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) {
    119      float* dest = mReverbInput.ChannelFloatsForWrite(i);
    120      std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f);
    121    }
    122  }
    123 
    124  void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
    125                    const AudioBlock& aInput, AudioBlock* aOutput,
    126                    bool* aFinished) override;
    127 
    128  bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; }
    129 
    130  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
    131    size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf);
    132 
    133    amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false);
    134 
    135    if (mReverb) {
    136      amount += mReverb->sizeOfIncludingThis(aMallocSizeOf);
    137    }
    138 
    139    return amount;
    140  }
    141 
    142  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
    143    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
    144  }
    145 
    146 private:
    147  // Keeping mReverbInput across process calls avoids unnecessary reallocation.
    148  AudioBlock mReverbInput;
    149  UniquePtr<WebCore::Reverb> mReverb;
    150  // Tracks samples of the tail remaining to be output.  INT32_MIN is a
    151  // special value to indicate that the end of any previous tail has been
    152  // handled.
    153  int32_t mRemainingLeftOutput = INT32_MIN;
    154  // mRemainingRightOutput and mRemainingRightHistory are only used when
    155  // mRightOutputMode != Always.  There is no special handling required at the
    156  // end of tail times and so INT32_MIN is not used.
    157  // mRemainingRightOutput tracks how much longer this node needs to continue
    158  // to produce a right output channel.
    159  int32_t mRemainingRightOutput = 0;
    160  // mRemainingRightHistory tracks how much silent input would be required to
    161  // drain the right convolver, which may sometimes be longer than the period
    162  // a right output channel is required.
    163  int32_t mRemainingRightHistory = 0;
    164  RightConvolverMode mRightConvolverMode = RightConvolverMode::Always;
    165 };
    166 
    167 static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) {
    168  const float* left = static_cast<const float*>(aBlock->mChannelData[0]);
    169  float* right = aBlock->ChannelFloatsForWrite(1);
    170  AudioBlockAddChannelWithScale(left, aScale, right);
    171 }
    172 
    173 void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,
    174                                       const AudioBlock& aInput,
    175                                       AudioBlock* aOutput, bool* aFinished) {
    176  TRACE("ConvolverNodeEngine::ProcessBlock");
    177  if (!mReverb) {
    178    aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
    179    return;
    180  }
    181 
    182  uint32_t inputChannelCount = aInput.ChannelCount();
    183  if (aInput.IsNull()) {
    184    if (mRemainingLeftOutput > 0) {
    185      mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE;
    186      AllocateReverbInput(aInput, 1);  // floats for silence
    187    } else {
    188      if (mRemainingLeftOutput != INT32_MIN) {
    189        mRemainingLeftOutput = INT32_MIN;
    190        MOZ_ASSERT(mRemainingRightOutput <= 0);
    191        MOZ_ASSERT(mRemainingRightHistory <= 0);
    192        aTrack->ScheduleCheckForInactive();
    193        RefPtr<PlayingRefChanged> refchanged =
    194            new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE);
    195        aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
    196      }
    197      aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);
    198      return;
    199    }
    200  } else {
    201    if (mRemainingLeftOutput <= 0) {
    202      RefPtr<PlayingRefChanged> refchanged =
    203          new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF);
    204      aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());
    205    }
    206 
    207    // Use mVolume as a flag to detect whether AllocateReverbInput() gets
    208    // called.
    209    mReverbInput.mVolume = 0.0f;
    210 
    211    // Special handling of input channel count changes is used when there is
    212    // only a single impulse response channel.  See RightConvolverMode.
    213    if (mRightConvolverMode != RightConvolverMode::Always) {
    214      ChannelInterpretation channelInterpretation =
    215          aTrack->GetChannelInterpretation();
    216      if (inputChannelCount == 2) {
    217        if (mRemainingRightHistory <= 0) {
    218          // Will start the second convolver.  Choose to convolve the right
    219          // channel directly if there is no left tail to up-mix or up-mixing
    220          // is "discrete".
    221          mRightConvolverMode =
    222              (mRemainingLeftOutput <= 0 ||
    223               channelInterpretation == ChannelInterpretation::Discrete)
    224                  ? RightConvolverMode::Direct
    225                  : RightConvolverMode::Difference;
    226        }
    227        // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
    228        mRemainingRightOutput =
    229            mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
    230        mRemainingRightHistory = mRemainingRightOutput;
    231        if (mRightConvolverMode == RightConvolverMode::Difference) {
    232          AllocateReverbInput(aInput, 2);
    233          // Subtract left from right.
    234          AddScaledLeftToRight(&mReverbInput, -1.0f);
    235        }
    236      } else if (mRemainingRightHistory > 0) {
    237        // There is one channel of input, but a second convolver also
    238        // requires input.  Up-mix appropriately for the second convolver.
    239        if ((mRightConvolverMode == RightConvolverMode::Difference) ^
    240            (channelInterpretation == ChannelInterpretation::Discrete)) {
    241          MOZ_ASSERT(
    242              (mRightConvolverMode == RightConvolverMode::Difference &&
    243               channelInterpretation == ChannelInterpretation::Speakers) ||
    244              (mRightConvolverMode == RightConvolverMode::Direct &&
    245               channelInterpretation == ChannelInterpretation::Discrete));
    246          // The state is one of the following combinations:
    247          // 1) Difference and speakers.
    248          //    Up-mixing gives r = l.
    249          //    The input to the second convolver is r - l.
    250          // 2) Direct and discrete.
    251          //    Up-mixing gives r = 0.
    252          //    The input to the second convolver is r.
    253          //
    254          // In each case the input for the second convolver is silence, which
    255          // will drain the convolver.
    256          AllocateReverbInput(aInput, 2);
    257        } else {
    258          if (channelInterpretation == ChannelInterpretation::Discrete) {
    259            MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference);
    260            // channelInterpretation has changed since the second convolver
    261            // was added.  "discrete" up-mixing of input would produce a
    262            // silent right channel r = 0, but the second convolver needs
    263            // r - l for RightConvolverMode::Difference.
    264            AllocateReverbInput(aInput, 2);
    265            AddScaledLeftToRight(&mReverbInput, -1.0f);
    266          } else {
    267            MOZ_ASSERT(channelInterpretation ==
    268                       ChannelInterpretation::Speakers);
    269            MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct);
    270            // The Reverb will essentially up-mix the single input channel by
    271            // feeding it into both convolvers.
    272          }
    273          // The second convolver does not have silent input, and so it will
    274          // not drain.  It will need to continue processing up-mixed input
    275          // because the next input block may be stereo, which would be mixed
    276          // with the signal remaining in the convolvers.
    277          // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
    278          mRemainingRightHistory =
    279              mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
    280        }
    281      }
    282    }
    283 
    284    if (mReverbInput.mVolume == 0.0f) {  // not yet set
    285      if (aInput.mVolume != 1.0f) {
    286        AllocateReverbInput(aInput, inputChannelCount);  // pre-multiply
    287      } else {
    288        mReverbInput = aInput;
    289      }
    290    }
    291 
    292    mRemainingLeftOutput = mReverb->impulseResponseLength();
    293    MOZ_ASSERT(mRemainingLeftOutput > 0);
    294  }
    295 
    296  // "The ConvolverNode produces a mono output only in the single case where
    297  // there is a single input channel and a single-channel buffer."
    298  uint32_t outputChannelCount = 2;
    299  uint32_t reverbOutputChannelCount = 2;
    300  if (mRightConvolverMode != RightConvolverMode::Always) {
    301    // When the input changes from stereo to mono, the output continues to be
    302    // stereo for the length of the tail time, during which the two channels
    303    // may differ.
    304    if (mRemainingRightOutput > 0) {
    305      MOZ_ASSERT(mRemainingRightHistory > 0);
    306      mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE;
    307    } else {
    308      outputChannelCount = 1;
    309    }
    310    // The second convolver keeps processing until it drains.
    311    if (mRemainingRightHistory > 0) {
    312      mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE;
    313    } else {
    314      reverbOutputChannelCount = 1;
    315    }
    316  }
    317 
    318  // If there are two convolvers, then they each need an output buffer, even
    319  // if the second convolver is only processing to keep history of up-mixed
    320  // input.
    321  aOutput->AllocateChannels(reverbOutputChannelCount);
    322 
    323  mReverb->process(&mReverbInput, aOutput);
    324 
    325  if (mRightConvolverMode == RightConvolverMode::Difference &&
    326      outputChannelCount == 2) {
    327    // Add left to right.
    328    AddScaledLeftToRight(aOutput, 1.0f);
    329  } else {
    330    // Trim if outputChannelCount < reverbOutputChannelCount
    331    aOutput->mChannelData.TruncateLength(outputChannelCount);
    332  }
    333 }
    334 
    335 ConvolverNode::ConvolverNode(AudioContext* aContext)
    336    : AudioNode(aContext, 2, ChannelCountMode::Clamped_max,
    337                ChannelInterpretation::Speakers),
    338      mNormalize(true) {
    339  ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize);
    340  mTrack = AudioNodeTrack::Create(
    341      aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph());
    342 }
    343 
    344 /* static */
    345 already_AddRefed<ConvolverNode> ConvolverNode::Create(
    346    JSContext* aCx, AudioContext& aAudioContext,
    347    const ConvolverOptions& aOptions, ErrorResult& aRv) {
    348  RefPtr<ConvolverNode> audioNode = new ConvolverNode(&aAudioContext);
    349 
    350  audioNode->Initialize(aOptions, aRv);
    351  if (NS_WARN_IF(aRv.Failed())) {
    352    return nullptr;
    353  }
    354 
    355  // This must be done before setting the buffer.
    356  audioNode->SetNormalize(!aOptions.mDisableNormalization);
    357 
    358  if (aOptions.mBuffer.WasPassed()) {
    359    MOZ_ASSERT(aCx);
    360    audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv);
    361    if (NS_WARN_IF(aRv.Failed())) {
    362      return nullptr;
    363    }
    364  }
    365 
    366  return audioNode.forget();
    367 }
    368 
    369 size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
    370  size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf);
    371  if (mBuffer) {
    372    // NB: mBuffer might be shared with the associated engine, by convention
    373    //     the AudioNode will report.
    374    amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
    375  }
    376  return amount;
    377 }
    378 
    379 size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
    380  return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
    381 }
    382 
    383 JSObject* ConvolverNode::WrapObject(JSContext* aCx,
    384                                    JS::Handle<JSObject*> aGivenProto) {
    385  return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto);
    386 }
    387 
    388 void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer,
    389                              ErrorResult& aRv) {
    390  if (aBuffer) {
    391    switch (aBuffer->NumberOfChannels()) {
    392      case 1:
    393      case 2:
    394      case 4:
    395        // Supported number of channels
    396        break;
    397      default:
    398        aRv.ThrowNotSupportedError(
    399            nsPrintfCString("%u is not a supported number of channels",
    400                            aBuffer->NumberOfChannels()));
    401        return;
    402    }
    403  }
    404 
    405  if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) {
    406    aRv.ThrowNotSupportedError(nsPrintfCString(
    407        "Buffer sample rate (%g) does not match AudioContext sample rate (%g)",
    408        aBuffer->SampleRate(), Context()->SampleRate()));
    409    return;
    410  }
    411 
    412  // Send the buffer to the track
    413  AudioNodeTrack* ns = mTrack;
    414  MOZ_ASSERT(ns, "Why don't we have a track here?");
    415  if (aBuffer) {
    416    AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx);
    417    if (data.mBufferFormat == AUDIO_FORMAT_S16) {
    418      // Reverb expects data in float format.
    419      // Convert on the main thread so as to minimize allocations on the audio
    420      // thread.
    421      // Reverb will dispose of the buffer once initialized, so convert here
    422      // and leave the smaller arrays in the AudioBuffer.
    423      // There is currently no value in providing 16/32-byte aligned data
    424      // because PadAndMakeScaledDFT() will copy the data (without SIMD
    425      // instructions) to aligned arrays for the FFT.
    426      CheckedInt<size_t> bufferSize(sizeof(float));
    427      bufferSize *= data.mDuration;
    428      bufferSize *= data.ChannelCount();
    429      RefPtr<SharedBuffer> floatBuffer =
    430          SharedBuffer::Create(bufferSize, fallible);
    431      if (!floatBuffer) {
    432        aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
    433        return;
    434      }
    435      auto floatData = static_cast<float*>(floatBuffer->Data());
    436      for (size_t i = 0; i < data.ChannelCount(); ++i) {
    437        ConvertAudioSamples(data.ChannelData<int16_t>()[i], floatData,
    438                            data.mDuration);
    439        data.mChannelData[i] = floatData;
    440        floatData += data.mDuration;
    441      }
    442      data.mBuffer = std::move(floatBuffer);
    443      data.mBufferFormat = AUDIO_FORMAT_FLOAT32;
    444    } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) {
    445      // This is valid, but a signal convolved by a silent signal is silent, set
    446      // the reverb to nullptr and return.
    447      ns->SetReverb(nullptr, 0);
    448      mBuffer = aBuffer;
    449      return;
    450    }
    451 
    452    // Note about empirical tuning (this is copied from Blink)
    453    // The maximum FFT size affects reverb performance and accuracy.
    454    // If the reverb is single-threaded and processes entirely in the real-time
    455    // audio thread, it's important not to make this too high.  In this case
    456    // 8192 is a good value. But, the Reverb object is multi-threaded, so we
    457    // want this as high as possible without losing too much accuracy. Very
    458    // large FFTs will have worse phase errors. Given these constraints 32768 is
    459    // a good compromise.
    460    const size_t MaxFFTSize = 32768;
    461 
    462    bool allocationFailure = false;
    463    UniquePtr<WebCore::Reverb> reverb(new WebCore::Reverb(
    464        data, MaxFFTSize, !Context()->IsOffline(), mNormalize,
    465        aBuffer->SampleRate(), &allocationFailure));
    466    if (!allocationFailure) {
    467      ns->SetReverb(reverb.release(), data.ChannelCount());
    468    } else {
    469      aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
    470      return;
    471    }
    472  } else {
    473    ns->SetReverb(nullptr, 0);
    474  }
    475  mBuffer = aBuffer;
    476 }
    477 
    478 void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; }
    479 
    480 }  // namespace mozilla::dom