DynamicResampler.h (14071B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ 7 #define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_ 8 9 #include <speex/speex_resampler.h> 10 11 #include "AudioRingBuffer.h" 12 #include "AudioSegment.h" 13 #include "TimeUnits.h" 14 #include "WavDumper.h" 15 16 namespace mozilla { 17 18 const uint32_t STEREO = 2; 19 20 /** 21 * DynamicResampler allows updating on the fly the output sample rate and the 22 * number of channels. In addition to that, it maintains an internal buffer for 23 * the input data and allows pre-buffering as well. The Resample() method 24 * strives to provide the requested number of output frames by using the input 25 * data including any pre-buffering. If there are fewer frames in the internal 26 * buffer than is requested, the internal buffer is padded with enough silence 27 * to allow the requested to be resampled and returned. 28 * 29 * Input data buffering makes use of the AudioRingBuffer. The capacity of the 30 * buffer is initially 100ms of audio and it is pre-allocated during 31 * SetSampleFormat(). Should the input data grow beyond that, the input buffer 32 * is re-allocated on the fly. In addition to that, due to special feature of 33 * AudioRingBuffer, no extra copies take place when the input data is fed to the 34 * resampler. 35 * 36 * The sample format must be set before using any method. 37 * 38 * The DynamicResampler is not thread-safe, so all the methods appart from the 39 * constructor must be called on the same thread. 40 */ 41 class DynamicResampler final { 42 public: 43 /** 44 * Provide the initial input and output rate and the amount of pre-buffering. 45 * The channel count will be set to stereo. Memory allocation will take 46 * place. The input buffer is non-interleaved. 47 */ 48 DynamicResampler(uint32_t aInRate, uint32_t aOutRate, 49 uint32_t aInputPreBufferFrameCount = 0); 50 ~DynamicResampler(); 51 52 /** 53 * Set the sample format type to float or short. 54 */ 55 void SetSampleFormat(AudioSampleFormat aFormat); 56 uint32_t GetInRate() const { return mInRate; } 57 uint32_t GetChannels() const { return mChannels; } 58 59 /** 60 * Append `aInFrames` number of frames from `aInBuffer` to the internal input 61 * buffer. Memory copy/move takes place. 62 */ 63 void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames); 64 void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames); 65 /** 66 * Append `aInFrames` number of frames of silence to the internal input 67 * buffer. Memory copy/move takes place. 68 */ 69 void AppendInputSilence(const uint32_t aInFrames); 70 /** 71 * Return the number of frames the internal input buffer can store. 72 */ 73 uint32_t InFramesBufferSize() const; 74 /** 75 * Return the number of frames stored in the internal input buffer. 76 */ 77 uint32_t InFramesBuffered(uint32_t aChannelIndex) const; 78 79 /** 80 * Prepends existing input data with a silent pre-buffer if not already done. 81 * Data will be prepended so that after resampling aDuration of data, 82 * the buffering level will be as close as possible to 83 * mInputPreBufferFrameCount, which is the desired buffering level. 84 */ 85 void EnsurePreBuffer(media::TimeUnit aDuration); 86 87 /** 88 * Set the number of frames that should be used for input pre-buffering. 89 */ 90 void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount); 91 92 /* 93 * Resample as much frames as needed from the internal input buffer to the 94 * `aOutBuffer` in order to provide all `aOutFrames`. 95 * 96 * On first call, prepends the input buffer with silence so that after 97 * resampling aOutFrames frames of data, the input buffer holds data as close 98 * as possible to the configured pre-buffer size. 99 * 100 * If there are not enough input frames to provide the requested output 101 * frames, the input buffer is padded with enough silence to allow the 102 * requested frames to be resampled, and the pre-buffer is reset so that the 103 * next call will be treated as the first. 104 * 105 * Returns true if the internal input buffer underran and had to be padded 106 * with silence, otherwise false. 107 */ 108 bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex); 109 bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames, 110 uint32_t aChannelIndex); 111 112 /** 113 * Update the output rate or/and the channel count. If a value is not updated 114 * compared to the current one nothing happens. Changing the `aInRate` 115 * results in recalculation in the resampler. Changing `aChannels` results in 116 * the reallocation of the internal input buffer with the exception of 117 * changes between mono to stereo and vice versa where no reallocation takes 118 * place. A stereo internal input buffer is always maintained even if the 119 * sound is mono. 120 */ 121 void UpdateResampler(uint32_t aInRate, uint32_t aChannels); 122 123 private: 124 template <typename T> 125 void AppendInputInternal(Span<const T* const>& aInBuffer, 126 uint32_t aInFrames) { 127 MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels); 128 for (uint32_t i = 0; i < mChannels; ++i) { 129 PushInFrames(aInBuffer[i], aInFrames, i); 130 } 131 } 132 133 void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames, 134 float* aOutBuffer, uint32_t* aOutFrames, 135 uint32_t aChannelIndex); 136 void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames, 137 int16_t* aOutBuffer, uint32_t* aOutFrames, 138 uint32_t aChannelIndex); 139 140 template <typename T> 141 bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames, 142 uint32_t aChannelIndex) { 143 MOZ_ASSERT(mInRate); 144 MOZ_ASSERT(mOutRate); 145 MOZ_ASSERT(mChannels); 146 MOZ_ASSERT(aChannelIndex < mChannels); 147 MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length()); 148 MOZ_ASSERT(aOutFrames); 149 150 uint32_t outFramesNeeded = aOutFrames; 151 T* nextOutFrame = aOutBuffer; 152 if (mInRate == mOutRate) { 153 if (!mResamplerIsBypassed) { 154 uint32_t latency = speex_resampler_get_input_latency(mResampler); 155 mInternalInBuffer[aChannelIndex].ReadNoCopy( 156 [&](const Span<const T>& aInBuffer) -> uint32_t { 157 // Although unlikely with the sample rates used with this class, 158 // the resampler input latency may temporarily be higher than 159 // indicated, after a change in resampling rate that reduces the 160 // indicated latency. The resampler's "magic" samples cause 161 // this. All frames in the resampler are extracted when 162 // `latency` output frames have been extracted. 163 uint32_t outFramesResampled = std::min(outFramesNeeded, latency); 164 uint32_t inFrames = aInBuffer.Length(); 165 ResampleInternal(aInBuffer.Elements(), &inFrames, nextOutFrame, 166 &outFramesResampled, aChannelIndex); 167 nextOutFrame += outFramesResampled; 168 outFramesNeeded -= outFramesResampled; 169 if (outFramesResampled == latency) { 170 mResamplerIsBypassed = true; 171 // The last `latency` frames of input to the resampler will not 172 // be extracted from the resampler. Leave them in 173 // mInternalInBuffer to be copied directly to nextOutFrame. 174 MOZ_ASSERT(inFrames >= latency); 175 return inFrames - latency; 176 } 177 return inFrames; 178 }); 179 } 180 bool underrun = false; 181 if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead(); 182 buffered < outFramesNeeded) { 183 underrun = true; 184 mIsPreBufferSet = false; 185 mInternalInBuffer[aChannelIndex].WriteSilence(outFramesNeeded - 186 buffered); 187 } 188 DebugOnly<uint32_t> numFramesRead = mInternalInBuffer[aChannelIndex].Read( 189 Span(nextOutFrame, outFramesNeeded)); 190 MOZ_ASSERT(numFramesRead == outFramesNeeded); 191 // Workaround to avoid discontinuity when the speex resampler operates 192 // again. Feed it with the last 20 frames to warm up the internal memory 193 // of the resampler and then skip memory equals to resampler's input 194 // latency. 195 mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames); 196 if (aChannelIndex == 0 && !mIsWarmingUp) { 197 mInputStreamFile.Write(nextOutFrame, outFramesNeeded); 198 mOutputStreamFile.Write(nextOutFrame, outFramesNeeded); 199 } 200 return underrun; 201 } 202 203 auto resample = [&](const T* aInBuffer, uint32_t aInLength) -> uint32_t { 204 uint32_t outFramesResampled = outFramesNeeded; 205 uint32_t inFrames = aInLength; 206 ResampleInternal(aInBuffer, &inFrames, nextOutFrame, &outFramesResampled, 207 aChannelIndex); 208 nextOutFrame += outFramesResampled; 209 outFramesNeeded -= outFramesResampled; 210 mInputTail[aChannelIndex].StoreTail<T>(aInBuffer, inFrames); 211 return inFrames; 212 }; 213 214 MOZ_ASSERT(!mResamplerIsBypassed); 215 mInternalInBuffer[aChannelIndex].ReadNoCopy( 216 [&](const Span<const T>& aInBuffer) -> uint32_t { 217 if (!outFramesNeeded) { 218 return 0; 219 } 220 return resample(aInBuffer.Elements(), aInBuffer.Length()); 221 }); 222 223 if (outFramesNeeded == 0) { 224 return false; 225 } 226 227 while (outFramesNeeded > 0) { 228 MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0); 229 // Round up. 230 uint32_t totalInFramesNeeded = 231 ((CheckedUint32(outFramesNeeded) * mInRate + mOutRate - 1) / mOutRate) 232 .value(); 233 resample(nullptr, totalInFramesNeeded); 234 } 235 mIsPreBufferSet = false; 236 return true; 237 } 238 239 template <typename T> 240 void PushInFrames(const T* aInBuffer, const uint32_t aInFrames, 241 uint32_t aChannelIndex) { 242 MOZ_ASSERT(aInBuffer); 243 MOZ_ASSERT(aInFrames); 244 MOZ_ASSERT(mChannels); 245 MOZ_ASSERT(aChannelIndex < mChannels); 246 MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length()); 247 EnsureInputBufferSizeInFrames( 248 mInternalInBuffer[aChannelIndex].AvailableRead() + aInFrames); 249 mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames)); 250 } 251 252 void WarmUpResampler(bool aSkipLatency); 253 254 bool EnsureInputBufferSizeInFrames(uint32_t aSizeInFrames) { 255 uint32_t sampleSize = 0; 256 if (mSampleFormat == AUDIO_FORMAT_FLOAT32) { 257 sampleSize = sizeof(float); 258 } else if (mSampleFormat == AUDIO_FORMAT_S16) { 259 sampleSize = sizeof(short); 260 } 261 262 if (sampleSize == 0) { 263 // No sample format set, we wouldn't know how many bytes to allocate. 264 return true; 265 } 266 267 uint32_t sizeInFrames = InFramesBufferSize(); 268 if (aSizeInFrames <= sizeInFrames) { 269 // Buffer size is sufficient. 270 return true; // no reallocation necessary 271 } 272 273 // 5 second cap. 274 const uint32_t cap = 5 * mInRate; 275 if (sizeInFrames >= cap) { 276 // Already at the cap. 277 return false; 278 } 279 280 // As a backoff strategy, at least double the previous size. 281 sizeInFrames *= 2; 282 283 if (aSizeInFrames > sizeInFrames) { 284 // A larger buffer than the normal backoff strategy provides is needed, or 285 // this is the first time setting the buffer size. Add another 50ms, as 286 // some jitter is expected. 287 sizeInFrames = aSizeInFrames + mInRate / 20; 288 } 289 290 // mInputPreBufferFrameCount is an indication of the desired average 291 // buffering. Provide for at least twice this. 292 sizeInFrames = std::max(sizeInFrames, mInputPreBufferFrameCount * 2); 293 294 sizeInFrames = std::min(cap, sizeInFrames); 295 296 bool success = true; 297 for (auto& b : mInternalInBuffer) { 298 success = success && b.EnsureLengthBytes(sampleSize * sizeInFrames); 299 } 300 301 if (success) { 302 // All buffers have the new size. 303 return true; 304 } 305 306 // Allocating an input buffer failed. We stick with the old buffer size. 307 NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u " 308 "frames). Expect glitches.", 309 sampleSize * sizeInFrames, sizeInFrames) 310 .get()); 311 return false; 312 } 313 314 public: 315 const uint32_t mOutRate; 316 317 private: 318 bool mIsPreBufferSet = false; 319 bool mIsWarmingUp = false; 320 // The resampler can be bypassed when the input and output rates match and 321 // any frames buffered in the resampler have been extracted. This initial 322 // value is reset on construction by UpdateResampler() if the rates differ. 323 bool mResamplerIsBypassed = true; 324 uint32_t mInputPreBufferFrameCount; 325 uint32_t mChannels = 0; 326 uint32_t mInRate; 327 328 AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer; 329 330 SpeexResamplerState* mResampler = nullptr; 331 AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE; 332 333 class TailBuffer { 334 public: 335 template <typename T> 336 T* Buffer() { 337 return reinterpret_cast<T*>(mBuffer); 338 } 339 /* Store the MAXSIZE last elements of the buffer. */ 340 template <typename T> 341 void StoreTail(const Span<const T>& aInBuffer) { 342 StoreTail(aInBuffer.data(), aInBuffer.size()); 343 } 344 template <typename T> 345 void StoreTail(const T* aInBuffer, uint32_t aInFrames) { 346 const T* inBuffer = aInBuffer; 347 mSize = std::min(aInFrames, MAXSIZE); 348 if (inBuffer) { 349 PodCopy(Buffer<T>(), inBuffer + aInFrames - mSize, mSize); 350 } else { 351 std::fill_n(Buffer<T>(), mSize, static_cast<T>(0)); 352 } 353 } 354 uint32_t Length() { return mSize; } 355 static constexpr uint32_t MAXSIZE = 20; 356 357 private: 358 float mBuffer[MAXSIZE] = {}; 359 uint32_t mSize = 0; 360 }; 361 AutoTArray<TailBuffer, STEREO> mInputTail; 362 363 WavDumper mInputStreamFile; 364 WavDumper mOutputStreamFile; 365 }; 366 367 } // namespace mozilla 368 369 #endif // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_