AudioSegment.h (18217B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef MOZILLA_AUDIOSEGMENT_H_ 7 #define MOZILLA_AUDIOSEGMENT_H_ 8 9 #include <speex/speex_resampler.h> 10 11 #include "AudioChannelFormat.h" 12 #include "AudioSampleFormat.h" 13 #include "MediaSegment.h" 14 #include "SharedBuffer.h" 15 #include "WebAudioUtils.h" 16 #include "mozilla/ScopeExit.h" 17 #include "nsAutoRef.h" 18 #ifdef MOZILLA_INTERNAL_API 19 # include "mozilla/TimeStamp.h" 20 #endif 21 #include <float.h> 22 23 namespace mozilla { 24 struct AudioChunk; 25 class AudioSegment; 26 } // namespace mozilla 27 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk) 28 29 /** 30 * This allows compilation of nsTArray<AudioSegment> and 31 * AutoTArray<AudioSegment> since without it, static analysis fails on the 32 * mChunks member being a non-memmovable AutoTArray. 33 * 34 * Note that AudioSegment(const AudioSegment&) is deleted, so this should 35 * never come into effect. 36 */ 37 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment) 38 39 namespace mozilla { 40 41 template <typename T> 42 class SharedChannelArrayBuffer : public ThreadSharedObject { 43 public: 44 explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers) 45 : mBuffers(std::move(aBuffers)) {} 46 47 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override { 48 size_t amount = 0; 49 amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf); 50 for (size_t i = 0; i < mBuffers.Length(); i++) { 51 amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf); 52 } 53 54 return amount; 55 } 56 57 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override { 58 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); 59 } 60 61 nsTArray<nsTArray<T> > mBuffers; 62 }; 63 64 class AudioMixer; 65 66 /** 67 * For auto-arrays etc, guess this as the common number of channels. 68 */ 69 const int GUESS_AUDIO_CHANNELS = 2; 70 71 // We ensure that the graph advances in steps that are multiples of the Web 72 // Audio block size 73 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7; 74 const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS; 75 76 template <typename SrcT, typename DestT> 77 static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels, 78 uint32_t aLength, float aVolume, 79 uint32_t aChannels, DestT* aOutput) { 80 DestT* output = aOutput; 81 for (size_t i = 0; i < aLength; ++i) { 82 for (size_t channel = 0; channel < aChannels; ++channel) { 83 float v = 84 ConvertAudioSample<float>(aSourceChannels[channel][i]) * aVolume; 85 *output = FloatToAudioSample<DestT>(v); 86 ++output; 87 } 88 } 89 } 90 91 template <typename SrcT, typename DestT> 92 static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer, 93 uint32_t aFrames, uint32_t aChannels, 94 DestT** aOutput) { 95 for (size_t i = 0; i < aChannels; i++) { 96 size_t interleavedIndex = i; 97 for (size_t j = 0; j < aFrames; j++) { 98 aOutput[i][j] = 99 ConvertAudioSample<DestT>(aSourceBuffer[interleavedIndex]); 100 interleavedIndex += aChannels; 101 } 102 } 103 } 104 105 class SilentChannel { 106 public: 107 static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */ 108 static const uint8_t 109 gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES]; 110 // We take advantage of the fact that zero in float and zero in int have the 111 // same all-zeros bit layout. 112 template <typename T> 113 static const T* ZeroChannel(); 114 }; 115 116 /** 117 * Given an array of input channels (aChannelData), downmix to aOutputChannels, 118 * interleave the channel data. A total of aOutputChannels*aDuration 119 * interleaved samples will be copied to a channel buffer in aOutput. 120 */ 121 template <typename SrcT, typename DestT> 122 void DownmixAndInterleave(Span<const SrcT* const> aChannelData, 123 int32_t aDuration, float aVolume, 124 uint32_t aOutputChannels, DestT* aOutput) { 125 if (aChannelData.Length() == aOutputChannels) { 126 InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume, 127 aOutputChannels, aOutput); 128 } else { 129 AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData; 130 AutoTArray<SrcT, 131 SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS> 132 outputBuffers; 133 outputChannelData.SetLength(aOutputChannels); 134 outputBuffers.SetLength(aDuration * aOutputChannels); 135 for (uint32_t i = 0; i < aOutputChannels; i++) { 136 outputChannelData[i] = outputBuffers.Elements() + aDuration * i; 137 } 138 AudioChannelsDownMix<SrcT, SrcT>(aChannelData, outputChannelData, 139 aDuration); 140 InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume, 141 aOutputChannels, aOutput); 142 } 143 } 144 145 /** 146 * An AudioChunk represents a multi-channel buffer of audio samples. 147 * It references an underlying ThreadSharedObject which manages the lifetime 148 * of the buffer. An AudioChunk maintains its own duration and channel data 149 * pointers so it can represent a subinterval of a buffer without copying. 150 * An AudioChunk can store its individual channels anywhere; it maintains 151 * separate pointers to each channel's buffer. 152 */ 153 struct AudioChunk { 154 using SampleFormat = mozilla::AudioSampleFormat; 155 156 AudioChunk() = default; 157 158 template <typename T> 159 AudioChunk(already_AddRefed<ThreadSharedObject> aBuffer, 160 const nsTArray<const T*>& aChannelData, TrackTime aDuration, 161 PrincipalHandle aPrincipalHandle) 162 : mDuration(aDuration), 163 mBuffer(aBuffer), 164 mBufferFormat(AudioSampleTypeToFormat<T>::Format), 165 mPrincipalHandle(std::move(aPrincipalHandle)) { 166 MOZ_ASSERT(!mBuffer == aChannelData.IsEmpty(), "Appending invalid data ?"); 167 for (const T* data : aChannelData) { 168 mChannelData.AppendElement(data); 169 } 170 } 171 172 // Generic methods 173 void SliceTo(TrackTime aStart, TrackTime aEnd) { 174 MOZ_ASSERT(aStart >= 0, "Slice out of bounds: invalid start"); 175 MOZ_ASSERT(aStart < aEnd, "Slice out of bounds: invalid range"); 176 MOZ_ASSERT(aEnd <= mDuration, "Slice out of bounds: invalid end"); 177 178 if (mBuffer) { 179 MOZ_ASSERT(aStart < INT32_MAX, 180 "Can't slice beyond 32-bit sample lengths"); 181 for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) { 182 mChannelData[channel] = AddAudioSampleOffset( 183 mChannelData[channel], mBufferFormat, int32_t(aStart)); 184 } 185 } 186 mDuration = aEnd - aStart; 187 } 188 TrackTime GetDuration() const { return mDuration; } 189 bool CanCombineWithFollowing(const AudioChunk& aOther) const { 190 if (aOther.mBuffer != mBuffer) { 191 return false; 192 } 193 if (!mBuffer) { 194 return true; 195 } 196 if (aOther.mVolume != mVolume) { 197 return false; 198 } 199 if (aOther.mPrincipalHandle != mPrincipalHandle) { 200 return false; 201 } 202 NS_ASSERTION(aOther.mBufferFormat == mBufferFormat, 203 "Wrong metadata about buffer"); 204 NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(), 205 "Mismatched channel count"); 206 if (mDuration > INT32_MAX) { 207 return false; 208 } 209 for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) { 210 if (aOther.mChannelData[channel] != 211 AddAudioSampleOffset(mChannelData[channel], mBufferFormat, 212 int32_t(mDuration))) { 213 return false; 214 } 215 } 216 return true; 217 } 218 bool IsNull() const { return mBuffer == nullptr; } 219 void SetNull(TrackTime aDuration) { 220 mBuffer = nullptr; 221 mChannelData.Clear(); 222 mDuration = aDuration; 223 mVolume = 1.0f; 224 mBufferFormat = AUDIO_FORMAT_SILENCE; 225 mPrincipalHandle = PRINCIPAL_HANDLE_NONE; 226 } 227 228 uint32_t ChannelCount() const { return mChannelData.Length(); } 229 230 bool IsMuted() const { return mVolume == 0.0f; } 231 232 size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const { 233 return SizeOfExcludingThis(aMallocSizeOf, true); 234 } 235 236 size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const { 237 size_t amount = 0; 238 239 // Possibly owned: 240 // - mBuffer - Can hold data that is also in the decoded audio queue. If it 241 // is not shared, or unshared == false it gets counted. 242 if (mBuffer && (!aUnshared || !mBuffer->IsShared())) { 243 amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf); 244 } 245 246 // Memory in the array is owned by mBuffer. 247 amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf); 248 return amount; 249 } 250 251 template <typename T> 252 Span<const T* const> ChannelData() const { 253 MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat); 254 return Span(reinterpret_cast<const T* const*>(mChannelData.Elements()), 255 mChannelData.Length()); 256 } 257 258 /** 259 * ChannelFloatsForWrite() should be used only when mBuffer is owned solely 260 * by the calling thread. 261 */ 262 template <typename T> 263 T* ChannelDataForWrite(size_t aChannel) { 264 MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat); 265 MOZ_ASSERT(!mBuffer->IsShared()); 266 // Array access check for 1905287 267 if (aChannel >= mChannelData.Length()) { 268 MOZ_CRASH_UNSAFE_PRINTF( 269 "Invalid index: aChannel: %zu, mChannelData size: %zu\n", aChannel, 270 mChannelData.Length()); 271 } 272 return static_cast<T*>(const_cast<void*>(mChannelData[aChannel])); 273 } 274 275 template <typename T> 276 static AudioChunk FromInterleavedBuffer( 277 const T* aBuffer, size_t aFrames, uint32_t aChannels, 278 const PrincipalHandle& aPrincipalHandle) { 279 CheckedInt<size_t> bufferSize(sizeof(T)); 280 bufferSize *= aFrames; 281 bufferSize *= aChannels; 282 RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize); 283 284 AutoTArray<T*, 8> deinterleaved; 285 if (aChannels == 1) { 286 PodCopy(static_cast<T*>(buffer->Data()), aBuffer, aFrames); 287 deinterleaved.AppendElement(static_cast<T*>(buffer->Data())); 288 } else { 289 deinterleaved.SetLength(aChannels); 290 T* samples = static_cast<T*>(buffer->Data()); 291 292 size_t offset = 0; 293 for (uint32_t i = 0; i < aChannels; ++i) { 294 deinterleaved[i] = samples + offset; 295 offset += aFrames; 296 } 297 298 DeinterleaveAndConvertBuffer(aBuffer, static_cast<uint32_t>(aFrames), 299 aChannels, deinterleaved.Elements()); 300 } 301 302 AutoTArray<const T*, GUESS_AUDIO_CHANNELS> channelData; 303 channelData.AppendElements(deinterleaved); 304 return AudioChunk(buffer.forget(), channelData, 305 static_cast<TrackTime>(aFrames), aPrincipalHandle); 306 } 307 308 const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; } 309 310 // aOutputChannels must contain pointers to channel data of length mDuration. 311 void DownMixTo(Span<AudioDataValue* const> aOutputChannels) const; 312 313 TrackTime mDuration = 0; // in frames within the buffer 314 RefPtr<ThreadSharedObject> mBuffer; // the buffer object whose lifetime is 315 // managed; null means data is all zeroes 316 // one pointer per channel; empty if and only if mBuffer is null 317 CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData; 318 float mVolume = 1.0f; // volume multiplier to apply 319 // format of frames in mBuffer (or silence if mBuffer is null) 320 SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE; 321 // principalHandle for the data in this chunk. 322 // This can be compared to an nsIPrincipal* when back on main thread. 323 PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE; 324 }; 325 326 /** 327 * A list of audio samples consisting of a sequence of slices of SharedBuffers. 328 * The audio rate is determined by the track, not stored in this class. 329 */ 330 class AudioSegment final : public MediaSegmentBase<AudioSegment, AudioChunk> { 331 // The channel count that MaxChannelCount() returned last time it was called. 332 uint32_t mMemoizedMaxChannelCount = 0; 333 334 public: 335 typedef mozilla::AudioSampleFormat SampleFormat; 336 337 AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {} 338 339 AudioSegment(AudioSegment&& aSegment) = default; 340 341 AudioSegment(const AudioSegment&) = delete; 342 AudioSegment& operator=(const AudioSegment&) = delete; 343 344 ~AudioSegment() = default; 345 346 // Resample the whole segment in place. `aResampler` is an instance of a 347 // resampler, initialized with `aResamplerChannelCount` channels. If this 348 // function finds a chunk with more channels, `aResampler` is destroyed and a 349 // new resampler is created, and `aResamplerChannelCount` is updated with the 350 // new channel count value. 351 void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler, 352 uint32_t* aResamplerChannelCount, uint32_t aInRate, 353 uint32_t aOutRate); 354 355 template <typename T> 356 void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer, 357 const nsTArray<const T*>& aChannelData, TrackTime aDuration, 358 const PrincipalHandle& aPrincipalHandle) { 359 AppendAndConsumeChunk(AudioChunk(std::move(aBuffer), aChannelData, 360 aDuration, aPrincipalHandle)); 361 } 362 void AppendSegment(const AudioSegment* aSegment) { 363 MOZ_ASSERT(aSegment); 364 365 for (const AudioChunk& c : aSegment->mChunks) { 366 AudioChunk* chunk = AppendChunk(c.GetDuration()); 367 chunk->mBuffer = c.mBuffer; 368 chunk->mChannelData = c.mChannelData; 369 chunk->mBufferFormat = c.mBufferFormat; 370 chunk->mPrincipalHandle = c.mPrincipalHandle; 371 } 372 } 373 template <typename T> 374 void AppendFromInterleavedBuffer(const T* aBuffer, size_t aFrames, 375 uint32_t aChannels, 376 const PrincipalHandle& aPrincipalHandle) { 377 AppendAndConsumeChunk(AudioChunk::FromInterleavedBuffer<T>( 378 aBuffer, aFrames, aChannels, aPrincipalHandle)); 379 } 380 // Write the segement data into an interleaved buffer. Do mixing if the 381 // AudioChunk's channel count in the segment is different from aChannels. 382 // Returns sample count of the converted audio data. The converted data will 383 // be stored into aBuffer. 384 size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer, 385 uint32_t aChannels) const; 386 // Consumes aChunk, and append it to the segment if its duration is not zero. 387 void AppendAndConsumeChunk(AudioChunk&& aChunk) { 388 AudioChunk unused; 389 AudioChunk* chunk = &unused; 390 391 // Always consume aChunk. The chunk's mBuffer can be non-null even if its 392 // duration is 0. 393 auto consume = MakeScopeExit([&] { 394 chunk->mBuffer = std::move(aChunk.mBuffer); 395 chunk->mChannelData = std::move(aChunk.mChannelData); 396 397 MOZ_ASSERT(chunk->mBuffer || chunk->mChannelData.IsEmpty(), 398 "Appending invalid data ?"); 399 400 chunk->mVolume = aChunk.mVolume; 401 chunk->mBufferFormat = aChunk.mBufferFormat; 402 chunk->mPrincipalHandle = std::move(aChunk.mPrincipalHandle); 403 }); 404 405 if (aChunk.GetDuration() == 0) { 406 return; 407 } 408 409 if (!mChunks.IsEmpty() && 410 mChunks.LastElement().CanCombineWithFollowing(aChunk)) { 411 mChunks.LastElement().mDuration += aChunk.GetDuration(); 412 mDuration += aChunk.GetDuration(); 413 return; 414 } 415 416 chunk = AppendChunk(aChunk.mDuration); 417 } 418 void ApplyVolume(float aVolume); 419 // Mix the segment into a mixer, keeping it planar, up or down mixing to 420 // aChannelCount channels. 421 void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate); 422 423 // Returns the maximum channel count across all chunks in this segment. 424 // Should there be no chunk with a channel count we return the memoized return 425 // value from last time this method was called. 426 uint32_t MaxChannelCount() { 427 uint32_t channelCount = 0; 428 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 429 if (ci->ChannelCount()) { 430 channelCount = std::max(channelCount, ci->ChannelCount()); 431 } 432 } 433 if (channelCount == 0) { 434 return mMemoizedMaxChannelCount; 435 } 436 return mMemoizedMaxChannelCount = channelCount; 437 } 438 439 static Type StaticType() { return AUDIO; } 440 441 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override { 442 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); 443 } 444 445 PrincipalHandle GetOldestPrinciple() const { 446 const AudioChunk* chunk = mChunks.IsEmpty() ? nullptr : &mChunks[0]; 447 return chunk ? chunk->GetPrincipalHandle() : PRINCIPAL_HANDLE_NONE; 448 } 449 450 // Iterate on each chunks until the input function returns true. 451 template <typename Function> 452 void IterateOnChunks(const Function&& aFunction) { 453 for (uint32_t idx = 0; idx < mChunks.Length(); idx++) { 454 if (aFunction(&mChunks[idx])) { 455 return; 456 } 457 } 458 } 459 460 private: 461 template <typename T> 462 void Resample(nsAutoRef<SpeexResamplerState>& aResampler, 463 uint32_t* aResamplerChannelCount, uint32_t aInRate, 464 uint32_t aOutRate); 465 }; 466 467 template <typename SrcT> 468 void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels, 469 float aVolume, AudioDataValue* aOutputBuffer) { 470 CopyableAutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData; 471 channelData.AppendElements(aChunk.ChannelData<SrcT>()); 472 473 if (channelData.Length() < aOutputChannels) { 474 // Up-mix. Note that this might actually make channelData have more 475 // than aOutputChannels temporarily. 476 AudioChannelsUpMix(&channelData, aOutputChannels, 477 SilentChannel::ZeroChannel<SrcT>()); 478 } 479 if (channelData.Length() > aOutputChannels) { 480 // Down-mix. 481 DownmixAndInterleave<SrcT>(channelData, aChunk.mDuration, aVolume, 482 aOutputChannels, aOutputBuffer); 483 } else { 484 InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration, 485 aVolume, aOutputChannels, aOutputBuffer); 486 } 487 } 488 489 } // namespace mozilla 490 491 #endif /* MOZILLA_AUDIOSEGMENT_H_ */