tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

DynamicResampler.h (14071B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
      7 #define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
      8 
      9 #include <speex/speex_resampler.h>
     10 
     11 #include "AudioRingBuffer.h"
     12 #include "AudioSegment.h"
     13 #include "TimeUnits.h"
     14 #include "WavDumper.h"
     15 
     16 namespace mozilla {
     17 
     18 const uint32_t STEREO = 2;
     19 
     20 /**
     21 * DynamicResampler allows updating on the fly the output sample rate and the
     22 * number of channels. In addition to that, it maintains an internal buffer for
     23 * the input data and allows pre-buffering as well. The Resample() method
     24 * strives to provide the requested number of output frames by using the input
     25 * data including any pre-buffering. If there are fewer frames in the internal
     26 * buffer than is requested, the internal buffer is padded with enough silence
     27 * to allow the requested to be resampled and returned.
     28 *
     29 * Input data buffering makes use of the AudioRingBuffer. The capacity of the
     30 * buffer is initially 100ms of audio and it is pre-allocated during
     31 * SetSampleFormat(). Should the input data grow beyond that, the input buffer
     32 * is re-allocated on the fly. In addition to that, due to special feature of
     33 * AudioRingBuffer, no extra copies take place when the input data is fed to the
     34 * resampler.
     35 *
     36 * The sample format must be set before using any method.
     37 *
     38 * The DynamicResampler is not thread-safe, so all the methods appart from the
     39 * constructor must be called on the same thread.
     40 */
     41 class DynamicResampler final {
     42 public:
     43  /**
     44   * Provide the initial input and output rate and the amount of pre-buffering.
     45   * The channel count will be set to stereo. Memory allocation will take
     46   * place. The input buffer is non-interleaved.
     47   */
     48  DynamicResampler(uint32_t aInRate, uint32_t aOutRate,
     49                   uint32_t aInputPreBufferFrameCount = 0);
     50  ~DynamicResampler();
     51 
     52  /**
     53   * Set the sample format type to float or short.
     54   */
     55  void SetSampleFormat(AudioSampleFormat aFormat);
     56  uint32_t GetInRate() const { return mInRate; }
     57  uint32_t GetChannels() const { return mChannels; }
     58 
     59  /**
     60   * Append `aInFrames` number of frames from `aInBuffer` to the internal input
     61   * buffer. Memory copy/move takes place.
     62   */
     63  void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames);
     64  void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames);
     65  /**
     66   * Append `aInFrames` number of frames of silence to the internal input
     67   * buffer. Memory copy/move takes place.
     68   */
     69  void AppendInputSilence(const uint32_t aInFrames);
     70  /**
     71   * Return the number of frames the internal input buffer can store.
     72   */
     73  uint32_t InFramesBufferSize() const;
     74  /**
     75   * Return the number of frames stored in the internal input buffer.
     76   */
     77  uint32_t InFramesBuffered(uint32_t aChannelIndex) const;
     78 
     79  /**
     80   * Prepends existing input data with a silent pre-buffer if not already done.
     81   * Data will be prepended so that after resampling aDuration of data,
     82   * the buffering level will be as close as possible to
     83   * mInputPreBufferFrameCount, which is the desired buffering level.
     84   */
     85  void EnsurePreBuffer(media::TimeUnit aDuration);
     86 
     87  /**
     88   * Set the number of frames that should be used for input pre-buffering.
     89   */
     90  void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount);
     91 
     92  /*
     93   * Resample as much frames as needed from the internal input buffer to the
     94   * `aOutBuffer` in order to provide all `aOutFrames`.
     95   *
     96   * On first call, prepends the input buffer with silence so that after
     97   * resampling aOutFrames frames of data, the input buffer holds data as close
     98   * as possible to the configured pre-buffer size.
     99   *
    100   * If there are not enough input frames to provide the requested output
    101   * frames, the input buffer is padded with enough silence to allow the
    102   * requested frames to be resampled, and the pre-buffer is reset so that the
    103   * next call will be treated as the first.
    104   *
    105   * Returns true if the internal input buffer underran and had to be padded
    106   * with silence, otherwise false.
    107   */
    108  bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex);
    109  bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames,
    110                uint32_t aChannelIndex);
    111 
    112  /**
    113   * Update the output rate or/and the channel count. If a value is not updated
    114   * compared to the current one nothing happens. Changing the `aInRate`
    115   * results in recalculation in the resampler. Changing `aChannels` results in
    116   * the reallocation of the internal input buffer with the exception of
    117   * changes between mono to stereo and vice versa where no reallocation takes
    118   * place. A stereo internal input buffer is always maintained even if the
    119   * sound is mono.
    120   */
    121  void UpdateResampler(uint32_t aInRate, uint32_t aChannels);
    122 
    123 private:
    124  template <typename T>
    125  void AppendInputInternal(Span<const T* const>& aInBuffer,
    126                           uint32_t aInFrames) {
    127    MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
    128    for (uint32_t i = 0; i < mChannels; ++i) {
    129      PushInFrames(aInBuffer[i], aInFrames, i);
    130    }
    131  }
    132 
    133  void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
    134                        float* aOutBuffer, uint32_t* aOutFrames,
    135                        uint32_t aChannelIndex);
    136  void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
    137                        int16_t* aOutBuffer, uint32_t* aOutFrames,
    138                        uint32_t aChannelIndex);
    139 
    140  template <typename T>
    141  bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames,
    142                        uint32_t aChannelIndex) {
    143    MOZ_ASSERT(mInRate);
    144    MOZ_ASSERT(mOutRate);
    145    MOZ_ASSERT(mChannels);
    146    MOZ_ASSERT(aChannelIndex < mChannels);
    147    MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
    148    MOZ_ASSERT(aOutFrames);
    149 
    150    uint32_t outFramesNeeded = aOutFrames;
    151    T* nextOutFrame = aOutBuffer;
    152    if (mInRate == mOutRate) {
    153      if (!mResamplerIsBypassed) {
    154        uint32_t latency = speex_resampler_get_input_latency(mResampler);
    155        mInternalInBuffer[aChannelIndex].ReadNoCopy(
    156            [&](const Span<const T>& aInBuffer) -> uint32_t {
    157              // Although unlikely with the sample rates used with this class,
    158              // the resampler input latency may temporarily be higher than
    159              // indicated, after a change in resampling rate that reduces the
    160              // indicated latency. The resampler's "magic" samples cause
    161              // this. All frames in the resampler are extracted when
    162              // `latency` output frames have been extracted.
    163              uint32_t outFramesResampled = std::min(outFramesNeeded, latency);
    164              uint32_t inFrames = aInBuffer.Length();
    165              ResampleInternal(aInBuffer.Elements(), &inFrames, nextOutFrame,
    166                               &outFramesResampled, aChannelIndex);
    167              nextOutFrame += outFramesResampled;
    168              outFramesNeeded -= outFramesResampled;
    169              if (outFramesResampled == latency) {
    170                mResamplerIsBypassed = true;
    171                // The last `latency` frames of input to the resampler will not
    172                // be extracted from the resampler. Leave them in
    173                // mInternalInBuffer to be copied directly to nextOutFrame.
    174                MOZ_ASSERT(inFrames >= latency);
    175                return inFrames - latency;
    176              }
    177              return inFrames;
    178            });
    179      }
    180      bool underrun = false;
    181      if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead();
    182          buffered < outFramesNeeded) {
    183        underrun = true;
    184        mIsPreBufferSet = false;
    185        mInternalInBuffer[aChannelIndex].WriteSilence(outFramesNeeded -
    186                                                      buffered);
    187      }
    188      DebugOnly<uint32_t> numFramesRead = mInternalInBuffer[aChannelIndex].Read(
    189          Span(nextOutFrame, outFramesNeeded));
    190      MOZ_ASSERT(numFramesRead == outFramesNeeded);
    191      // Workaround to avoid discontinuity when the speex resampler operates
    192      // again. Feed it with the last 20 frames to warm up the internal memory
    193      // of the resampler and then skip memory equals to resampler's input
    194      // latency.
    195      mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames);
    196      if (aChannelIndex == 0 && !mIsWarmingUp) {
    197        mInputStreamFile.Write(nextOutFrame, outFramesNeeded);
    198        mOutputStreamFile.Write(nextOutFrame, outFramesNeeded);
    199      }
    200      return underrun;
    201    }
    202 
    203    auto resample = [&](const T* aInBuffer, uint32_t aInLength) -> uint32_t {
    204      uint32_t outFramesResampled = outFramesNeeded;
    205      uint32_t inFrames = aInLength;
    206      ResampleInternal(aInBuffer, &inFrames, nextOutFrame, &outFramesResampled,
    207                       aChannelIndex);
    208      nextOutFrame += outFramesResampled;
    209      outFramesNeeded -= outFramesResampled;
    210      mInputTail[aChannelIndex].StoreTail<T>(aInBuffer, inFrames);
    211      return inFrames;
    212    };
    213 
    214    MOZ_ASSERT(!mResamplerIsBypassed);
    215    mInternalInBuffer[aChannelIndex].ReadNoCopy(
    216        [&](const Span<const T>& aInBuffer) -> uint32_t {
    217          if (!outFramesNeeded) {
    218            return 0;
    219          }
    220          return resample(aInBuffer.Elements(), aInBuffer.Length());
    221        });
    222 
    223    if (outFramesNeeded == 0) {
    224      return false;
    225    }
    226 
    227    while (outFramesNeeded > 0) {
    228      MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0);
    229      // Round up.
    230      uint32_t totalInFramesNeeded =
    231          ((CheckedUint32(outFramesNeeded) * mInRate + mOutRate - 1) / mOutRate)
    232              .value();
    233      resample(nullptr, totalInFramesNeeded);
    234    }
    235    mIsPreBufferSet = false;
    236    return true;
    237  }
    238 
    239  template <typename T>
    240  void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
    241                    uint32_t aChannelIndex) {
    242    MOZ_ASSERT(aInBuffer);
    243    MOZ_ASSERT(aInFrames);
    244    MOZ_ASSERT(mChannels);
    245    MOZ_ASSERT(aChannelIndex < mChannels);
    246    MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
    247    EnsureInputBufferSizeInFrames(
    248        mInternalInBuffer[aChannelIndex].AvailableRead() + aInFrames);
    249    mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
    250  }
    251 
    252  void WarmUpResampler(bool aSkipLatency);
    253 
    254  bool EnsureInputBufferSizeInFrames(uint32_t aSizeInFrames) {
    255    uint32_t sampleSize = 0;
    256    if (mSampleFormat == AUDIO_FORMAT_FLOAT32) {
    257      sampleSize = sizeof(float);
    258    } else if (mSampleFormat == AUDIO_FORMAT_S16) {
    259      sampleSize = sizeof(short);
    260    }
    261 
    262    if (sampleSize == 0) {
    263      // No sample format set, we wouldn't know how many bytes to allocate.
    264      return true;
    265    }
    266 
    267    uint32_t sizeInFrames = InFramesBufferSize();
    268    if (aSizeInFrames <= sizeInFrames) {
    269      // Buffer size is sufficient.
    270      return true;  // no reallocation necessary
    271    }
    272 
    273    // 5 second cap.
    274    const uint32_t cap = 5 * mInRate;
    275    if (sizeInFrames >= cap) {
    276      // Already at the cap.
    277      return false;
    278    }
    279 
    280    // As a backoff strategy, at least double the previous size.
    281    sizeInFrames *= 2;
    282 
    283    if (aSizeInFrames > sizeInFrames) {
    284      // A larger buffer than the normal backoff strategy provides is needed, or
    285      // this is the first time setting the buffer size. Add another 50ms, as
    286      // some jitter is expected.
    287      sizeInFrames = aSizeInFrames + mInRate / 20;
    288    }
    289 
    290    // mInputPreBufferFrameCount is an indication of the desired average
    291    // buffering.  Provide for at least twice this.
    292    sizeInFrames = std::max(sizeInFrames, mInputPreBufferFrameCount * 2);
    293 
    294    sizeInFrames = std::min(cap, sizeInFrames);
    295 
    296    bool success = true;
    297    for (auto& b : mInternalInBuffer) {
    298      success = success && b.EnsureLengthBytes(sampleSize * sizeInFrames);
    299    }
    300 
    301    if (success) {
    302      // All buffers have the new size.
    303      return true;
    304    }
    305 
    306    // Allocating an input buffer failed. We stick with the old buffer size.
    307    NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u "
    308                               "frames). Expect glitches.",
    309                               sampleSize * sizeInFrames, sizeInFrames)
    310                   .get());
    311    return false;
    312  }
    313 
    314 public:
    315  const uint32_t mOutRate;
    316 
    317 private:
    318  bool mIsPreBufferSet = false;
    319  bool mIsWarmingUp = false;
    320  // The resampler can be bypassed when the input and output rates match and
    321  // any frames buffered in the resampler have been extracted.  This initial
    322  // value is reset on construction by UpdateResampler() if the rates differ.
    323  bool mResamplerIsBypassed = true;
    324  uint32_t mInputPreBufferFrameCount;
    325  uint32_t mChannels = 0;
    326  uint32_t mInRate;
    327 
    328  AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
    329 
    330  SpeexResamplerState* mResampler = nullptr;
    331  AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
    332 
    333  class TailBuffer {
    334   public:
    335    template <typename T>
    336    T* Buffer() {
    337      return reinterpret_cast<T*>(mBuffer);
    338    }
    339    /* Store the MAXSIZE last elements of the buffer. */
    340    template <typename T>
    341    void StoreTail(const Span<const T>& aInBuffer) {
    342      StoreTail(aInBuffer.data(), aInBuffer.size());
    343    }
    344    template <typename T>
    345    void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
    346      const T* inBuffer = aInBuffer;
    347      mSize = std::min(aInFrames, MAXSIZE);
    348      if (inBuffer) {
    349        PodCopy(Buffer<T>(), inBuffer + aInFrames - mSize, mSize);
    350      } else {
    351        std::fill_n(Buffer<T>(), mSize, static_cast<T>(0));
    352      }
    353    }
    354    uint32_t Length() { return mSize; }
    355    static constexpr uint32_t MAXSIZE = 20;
    356 
    357   private:
    358    float mBuffer[MAXSIZE] = {};
    359    uint32_t mSize = 0;
    360  };
    361  AutoTArray<TailBuffer, STEREO> mInputTail;
    362 
    363  WavDumper mInputStreamFile;
    364  WavDumper mOutputStreamFile;
    365 };
    366 
    367 }  // namespace mozilla
    368 
    369 #endif  // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_