AudioSegment.cpp (10640B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "AudioSegment.h" 7 8 #include <speex/speex_resampler.h> 9 10 #include "AudioChannelFormat.h" 11 #include "AudioMixer.h" 12 #include "MediaTrackGraph.h" // for nsAutoRefTraits<SpeexResamplerState> 13 14 namespace mozilla { 15 16 const uint8_t 17 SilentChannel::gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * 18 SilentChannel::AUDIO_PROCESSING_FRAMES] = {0}; 19 20 template <> 21 const float* SilentChannel::ZeroChannel<float>() { 22 return reinterpret_cast<const float*>(SilentChannel::gZeroChannel); 23 } 24 25 template <> 26 const int16_t* SilentChannel::ZeroChannel<int16_t>() { 27 return reinterpret_cast<const int16_t*>(SilentChannel::gZeroChannel); 28 } 29 30 void AudioSegment::ApplyVolume(float aVolume) { 31 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 32 ci->mVolume *= aVolume; 33 } 34 } 35 36 template <typename T> 37 void AudioSegment::Resample(nsAutoRef<SpeexResamplerState>& aResampler, 38 uint32_t* aResamplerChannelCount, uint32_t aInRate, 39 uint32_t aOutRate) { 40 mDuration = 0; 41 42 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 43 AutoTArray<nsTArray<T>, GUESS_AUDIO_CHANNELS> output; 44 AutoTArray<const T*, GUESS_AUDIO_CHANNELS> bufferPtrs; 45 AudioChunk& c = *ci; 46 // If this chunk is null, don't bother resampling, just alter its duration 47 if (c.IsNull()) { 48 c.mDuration = (c.mDuration * aOutRate) / aInRate; 49 mDuration += c.mDuration; 50 continue; 51 } 52 uint32_t channels = c.mChannelData.Length(); 53 // This might introduce a discontinuity, but a channel count change in the 54 // middle of a stream is not that common. This also initializes the 55 // resampler as late as possible. 56 if (channels != *aResamplerChannelCount) { 57 SpeexResamplerState* state = 58 speex_resampler_init(channels, aInRate, aOutRate, 59 SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr); 60 MOZ_ASSERT(state); 61 aResampler.own(state); 62 *aResamplerChannelCount = channels; 63 } 64 output.SetLength(channels); 65 bufferPtrs.SetLength(channels); 66 uint32_t inFrames = c.mDuration; 67 // Round up to allocate; the last frame may not be used. 68 NS_ASSERTION((UINT64_MAX - aInRate + 1) / c.mDuration >= aOutRate, 69 "Dropping samples"); 70 uint32_t outSize = 71 (static_cast<uint64_t>(c.mDuration) * aOutRate + aInRate - 1) / aInRate; 72 for (uint32_t i = 0; i < channels; i++) { 73 T* out = output[i].AppendElements(outSize); 74 uint32_t outFrames = outSize; 75 76 const T* in = static_cast<const T*>(c.mChannelData[i]); 77 dom::WebAudioUtils::SpeexResamplerProcess(aResampler.get(), i, in, 78 &inFrames, out, &outFrames); 79 MOZ_ASSERT(inFrames == c.mDuration); 80 81 bufferPtrs[i] = out; 82 output[i].SetLength(outFrames); 83 } 84 MOZ_ASSERT(channels > 0); 85 c.mDuration = output[0].Length(); 86 c.mBuffer = new mozilla::SharedChannelArrayBuffer<T>(std::move(output)); 87 for (uint32_t i = 0; i < channels; i++) { 88 c.mChannelData[i] = bufferPtrs[i]; 89 } 90 mDuration += c.mDuration; 91 } 92 } 93 94 void AudioSegment::ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler, 95 uint32_t* aResamplerChannelCount, 96 uint32_t aInRate, uint32_t aOutRate) { 97 if (mChunks.IsEmpty()) { 98 return; 99 } 100 101 AudioSampleFormat format = AUDIO_FORMAT_SILENCE; 102 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 103 if (ci->mBufferFormat != AUDIO_FORMAT_SILENCE) { 104 format = ci->mBufferFormat; 105 } 106 } 107 108 switch (format) { 109 // If the format is silence at this point, all the chunks are silent. The 110 // actual function we use does not matter, it's just a matter of changing 111 // the chunks duration. 112 case AUDIO_FORMAT_SILENCE: 113 case AUDIO_FORMAT_FLOAT32: 114 Resample<float>(aResampler, aResamplerChannelCount, aInRate, aOutRate); 115 break; 116 case AUDIO_FORMAT_S16: 117 Resample<int16_t>(aResampler, aResamplerChannelCount, aInRate, aOutRate); 118 break; 119 default: 120 MOZ_ASSERT(false); 121 break; 122 } 123 } 124 125 size_t AudioSegment::WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer, 126 uint32_t aChannels) const { 127 size_t offset = 0; 128 if (GetDuration() <= 0) { 129 MOZ_ASSERT(GetDuration() == 0); 130 return offset; 131 } 132 133 // Calculate how many samples in this segment 134 size_t frames = static_cast<size_t>(GetDuration()); 135 CheckedInt<size_t> samples(frames); 136 samples *= static_cast<size_t>(aChannels); 137 MOZ_ASSERT(samples.isValid()); 138 if (!samples.isValid()) { 139 return offset; 140 } 141 142 // Enlarge buffer space if needed 143 if (samples.value() > aBuffer.Capacity()) { 144 aBuffer.SetCapacity(samples.value()); 145 } 146 aBuffer.SetLengthAndRetainStorage(samples.value()); 147 aBuffer.ClearAndRetainStorage(); 148 149 // Convert the de-interleaved chunks into an interleaved buffer. Note that 150 // we may upmix or downmix the audio data if the channel in the chunks 151 // mismatch with aChannels 152 for (ConstChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 153 const AudioChunk& c = *ci; 154 size_t samplesInChunk = static_cast<size_t>(c.mDuration) * aChannels; 155 switch (c.mBufferFormat) { 156 case AUDIO_FORMAT_S16: 157 WriteChunk<int16_t>(c, aChannels, c.mVolume, 158 aBuffer.Elements() + offset); 159 break; 160 case AUDIO_FORMAT_FLOAT32: 161 WriteChunk<float>(c, aChannels, c.mVolume, aBuffer.Elements() + offset); 162 break; 163 case AUDIO_FORMAT_SILENCE: 164 PodZero(aBuffer.Elements() + offset, samplesInChunk); 165 break; 166 default: 167 MOZ_ASSERT_UNREACHABLE("Unknown format"); 168 PodZero(aBuffer.Elements() + offset, samplesInChunk); 169 break; 170 } 171 offset += samplesInChunk; 172 } 173 MOZ_DIAGNOSTIC_ASSERT(samples.value() == offset, 174 "Segment's duration is incorrect"); 175 aBuffer.SetLengthAndRetainStorage(offset); 176 return offset; 177 } 178 179 // This helps to to safely get a pointer to the position we want to start 180 // writing a planar audio buffer, depending on the channel and the offset in the 181 // buffer. 182 static AudioDataValue* PointerForOffsetInChannel(AudioDataValue* aData, 183 size_t aLengthSamples, 184 uint32_t aChannelCount, 185 uint32_t aChannel, 186 uint32_t aOffsetSamples) { 187 size_t samplesPerChannel = aLengthSamples / aChannelCount; 188 size_t beginningOfChannel = samplesPerChannel * aChannel; 189 MOZ_ASSERT(aChannel * samplesPerChannel + aOffsetSamples < aLengthSamples, 190 "Offset request out of bounds."); 191 return aData + beginningOfChannel + aOffsetSamples; 192 } 193 194 template <typename SrcT> 195 static void DownMixChunk(const AudioChunk& aChunk, 196 Span<AudioDataValue* const> aOutputChannels) { 197 Span<const SrcT* const> channelData = aChunk.ChannelData<SrcT>(); 198 uint32_t frameCount = aChunk.mDuration; 199 if (channelData.Length() > aOutputChannels.Length()) { 200 // Down mix. 201 AudioChannelsDownMix(channelData, aOutputChannels, frameCount); 202 for (AudioDataValue* outChannel : aOutputChannels) { 203 ScaleAudioSamples(outChannel, frameCount, aChunk.mVolume); 204 } 205 } else { 206 // The channel count is already what we want. 207 for (uint32_t channel = 0; channel < aOutputChannels.Length(); channel++) { 208 ConvertAudioSamplesWithScale(channelData[channel], 209 aOutputChannels[channel], frameCount, 210 aChunk.mVolume); 211 } 212 } 213 } 214 215 void AudioChunk::DownMixTo( 216 Span<AudioDataValue* const> aOutputChannelPtrs) const { 217 switch (mBufferFormat) { 218 case AUDIO_FORMAT_FLOAT32: 219 DownMixChunk<float>(*this, aOutputChannelPtrs); 220 return; 221 case AUDIO_FORMAT_S16: 222 DownMixChunk<int16_t>(*this, aOutputChannelPtrs); 223 return; 224 case AUDIO_FORMAT_SILENCE: 225 for (AudioDataValue* outChannel : aOutputChannelPtrs) { 226 std::fill_n(outChannel, mDuration, static_cast<AudioDataValue>(0)); 227 } 228 return; 229 // Avoid `default:` so that `-Wswitch` catches missing enumerators at 230 // compile time. 231 } 232 MOZ_ASSERT_UNREACHABLE("buffer format"); 233 } 234 235 void AudioSegment::Mix(AudioMixer& aMixer, uint32_t aOutputChannels, 236 uint32_t aSampleRate) { 237 AutoTArray<AudioDataValue, 238 SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS> 239 buf; 240 AudioChunk upMixChunk; 241 uint32_t offsetSamples = 0; 242 uint32_t duration = GetDuration(); 243 244 if (duration <= 0) { 245 MOZ_ASSERT(duration == 0); 246 return; 247 } 248 249 uint32_t outBufferLength = duration * aOutputChannels; 250 buf.SetLength(outBufferLength); 251 252 AutoTArray<AudioDataValue*, GUESS_AUDIO_CHANNELS> outChannelPtrs; 253 outChannelPtrs.SetLength(aOutputChannels); 254 255 uint32_t frames; 256 for (ChunkIterator ci(*this); !ci.IsEnded(); 257 ci.Next(), offsetSamples += frames) { 258 const AudioChunk& c = *ci; 259 frames = c.mDuration; 260 for (uint32_t channel = 0; channel < aOutputChannels; channel++) { 261 outChannelPtrs[channel] = 262 PointerForOffsetInChannel(buf.Elements(), outBufferLength, 263 aOutputChannels, channel, offsetSamples); 264 } 265 266 // If the chunk is silent, simply write the right number of silence in the 267 // buffers. 268 if (c.mBufferFormat == AUDIO_FORMAT_SILENCE) { 269 for (AudioDataValue* outChannel : outChannelPtrs) { 270 PodZero(outChannel, frames); 271 } 272 continue; 273 } 274 // We need to upmix and downmix appropriately, depending on the 275 // desired input and output channels. 276 const AudioChunk* downMixInput = &c; 277 if (c.ChannelCount() < aOutputChannels) { 278 // Up-mix. 279 upMixChunk = c; 280 AudioChannelsUpMix<void>(&upMixChunk.mChannelData, aOutputChannels, 281 SilentChannel::gZeroChannel); 282 downMixInput = &upMixChunk; 283 } 284 downMixInput->DownMixTo(outChannelPtrs); 285 } 286 287 if (offsetSamples) { 288 MOZ_ASSERT(offsetSamples == outBufferLength / aOutputChannels, 289 "We forgot to write some samples?"); 290 aMixer.Mix(buf.Elements(), aOutputChannels, offsetSamples, aSampleRate); 291 } 292 } 293 294 } // namespace mozilla