tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

AudioSegment.h (18217B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef MOZILLA_AUDIOSEGMENT_H_
      7 #define MOZILLA_AUDIOSEGMENT_H_
      8 
      9 #include <speex/speex_resampler.h>
     10 
     11 #include "AudioChannelFormat.h"
     12 #include "AudioSampleFormat.h"
     13 #include "MediaSegment.h"
     14 #include "SharedBuffer.h"
     15 #include "WebAudioUtils.h"
     16 #include "mozilla/ScopeExit.h"
     17 #include "nsAutoRef.h"
     18 #ifdef MOZILLA_INTERNAL_API
     19 #  include "mozilla/TimeStamp.h"
     20 #endif
     21 #include <float.h>
     22 
     23 namespace mozilla {
     24 struct AudioChunk;
     25 class AudioSegment;
     26 }  // namespace mozilla
     27 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk)
     28 
     29 /**
     30 * This allows compilation of nsTArray<AudioSegment> and
     31 * AutoTArray<AudioSegment> since without it, static analysis fails on the
     32 * mChunks member being a non-memmovable AutoTArray.
     33 *
     34 * Note that AudioSegment(const AudioSegment&) is deleted, so this should
     35 * never come into effect.
     36 */
     37 MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment)
     38 
     39 namespace mozilla {
     40 
     41 template <typename T>
     42 class SharedChannelArrayBuffer : public ThreadSharedObject {
     43 public:
     44  explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers)
     45      : mBuffers(std::move(aBuffers)) {}
     46 
     47  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {
     48    size_t amount = 0;
     49    amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);
     50    for (size_t i = 0; i < mBuffers.Length(); i++) {
     51      amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
     52    }
     53 
     54    return amount;
     55  }
     56 
     57  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
     58    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
     59  }
     60 
     61  nsTArray<nsTArray<T> > mBuffers;
     62 };
     63 
     64 class AudioMixer;
     65 
     66 /**
     67 * For auto-arrays etc, guess this as the common number of channels.
     68 */
     69 const int GUESS_AUDIO_CHANNELS = 2;
     70 
     71 // We ensure that the graph advances in steps that are multiples of the Web
     72 // Audio block size
     73 const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;
     74 const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;
     75 
     76 template <typename SrcT, typename DestT>
     77 static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,
     78                                       uint32_t aLength, float aVolume,
     79                                       uint32_t aChannels, DestT* aOutput) {
     80  DestT* output = aOutput;
     81  for (size_t i = 0; i < aLength; ++i) {
     82    for (size_t channel = 0; channel < aChannels; ++channel) {
     83      float v =
     84          ConvertAudioSample<float>(aSourceChannels[channel][i]) * aVolume;
     85      *output = FloatToAudioSample<DestT>(v);
     86      ++output;
     87    }
     88  }
     89 }
     90 
     91 template <typename SrcT, typename DestT>
     92 static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,
     93                                         uint32_t aFrames, uint32_t aChannels,
     94                                         DestT** aOutput) {
     95  for (size_t i = 0; i < aChannels; i++) {
     96    size_t interleavedIndex = i;
     97    for (size_t j = 0; j < aFrames; j++) {
     98      aOutput[i][j] =
     99          ConvertAudioSample<DestT>(aSourceBuffer[interleavedIndex]);
    100      interleavedIndex += aChannels;
    101    }
    102  }
    103 }
    104 
    105 class SilentChannel {
    106 public:
    107  static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
    108  static const uint8_t
    109      gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES];
    110  // We take advantage of the fact that zero in float and zero in int have the
    111  // same all-zeros bit layout.
    112  template <typename T>
    113  static const T* ZeroChannel();
    114 };
    115 
    116 /**
    117 * Given an array of input channels (aChannelData), downmix to aOutputChannels,
    118 * interleave the channel data. A total of aOutputChannels*aDuration
    119 * interleaved samples will be copied to a channel buffer in aOutput.
    120 */
    121 template <typename SrcT, typename DestT>
    122 void DownmixAndInterleave(Span<const SrcT* const> aChannelData,
    123                          int32_t aDuration, float aVolume,
    124                          uint32_t aOutputChannels, DestT* aOutput) {
    125  if (aChannelData.Length() == aOutputChannels) {
    126    InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume,
    127                               aOutputChannels, aOutput);
    128  } else {
    129    AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData;
    130    AutoTArray<SrcT,
    131               SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
    132        outputBuffers;
    133    outputChannelData.SetLength(aOutputChannels);
    134    outputBuffers.SetLength(aDuration * aOutputChannels);
    135    for (uint32_t i = 0; i < aOutputChannels; i++) {
    136      outputChannelData[i] = outputBuffers.Elements() + aDuration * i;
    137    }
    138    AudioChannelsDownMix<SrcT, SrcT>(aChannelData, outputChannelData,
    139                                     aDuration);
    140    InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume,
    141                               aOutputChannels, aOutput);
    142  }
    143 }
    144 
    145 /**
    146 * An AudioChunk represents a multi-channel buffer of audio samples.
    147 * It references an underlying ThreadSharedObject which manages the lifetime
    148 * of the buffer. An AudioChunk maintains its own duration and channel data
    149 * pointers so it can represent a subinterval of a buffer without copying.
    150 * An AudioChunk can store its individual channels anywhere; it maintains
    151 * separate pointers to each channel's buffer.
    152 */
    153 struct AudioChunk {
    154  using SampleFormat = mozilla::AudioSampleFormat;
    155 
    156  AudioChunk() = default;
    157 
    158  template <typename T>
    159  AudioChunk(already_AddRefed<ThreadSharedObject> aBuffer,
    160             const nsTArray<const T*>& aChannelData, TrackTime aDuration,
    161             PrincipalHandle aPrincipalHandle)
    162      : mDuration(aDuration),
    163        mBuffer(aBuffer),
    164        mBufferFormat(AudioSampleTypeToFormat<T>::Format),
    165        mPrincipalHandle(std::move(aPrincipalHandle)) {
    166    MOZ_ASSERT(!mBuffer == aChannelData.IsEmpty(), "Appending invalid data ?");
    167    for (const T* data : aChannelData) {
    168      mChannelData.AppendElement(data);
    169    }
    170  }
    171 
    172  // Generic methods
    173  void SliceTo(TrackTime aStart, TrackTime aEnd) {
    174    MOZ_ASSERT(aStart >= 0, "Slice out of bounds: invalid start");
    175    MOZ_ASSERT(aStart < aEnd, "Slice out of bounds: invalid range");
    176    MOZ_ASSERT(aEnd <= mDuration, "Slice out of bounds: invalid end");
    177 
    178    if (mBuffer) {
    179      MOZ_ASSERT(aStart < INT32_MAX,
    180                 "Can't slice beyond 32-bit sample lengths");
    181      for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
    182        mChannelData[channel] = AddAudioSampleOffset(
    183            mChannelData[channel], mBufferFormat, int32_t(aStart));
    184      }
    185    }
    186    mDuration = aEnd - aStart;
    187  }
    188  TrackTime GetDuration() const { return mDuration; }
    189  bool CanCombineWithFollowing(const AudioChunk& aOther) const {
    190    if (aOther.mBuffer != mBuffer) {
    191      return false;
    192    }
    193    if (!mBuffer) {
    194      return true;
    195    }
    196    if (aOther.mVolume != mVolume) {
    197      return false;
    198    }
    199    if (aOther.mPrincipalHandle != mPrincipalHandle) {
    200      return false;
    201    }
    202    NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,
    203                 "Wrong metadata about buffer");
    204    NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),
    205                 "Mismatched channel count");
    206    if (mDuration > INT32_MAX) {
    207      return false;
    208    }
    209    for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {
    210      if (aOther.mChannelData[channel] !=
    211          AddAudioSampleOffset(mChannelData[channel], mBufferFormat,
    212                               int32_t(mDuration))) {
    213        return false;
    214      }
    215    }
    216    return true;
    217  }
    218  bool IsNull() const { return mBuffer == nullptr; }
    219  void SetNull(TrackTime aDuration) {
    220    mBuffer = nullptr;
    221    mChannelData.Clear();
    222    mDuration = aDuration;
    223    mVolume = 1.0f;
    224    mBufferFormat = AUDIO_FORMAT_SILENCE;
    225    mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
    226  }
    227 
    228  uint32_t ChannelCount() const { return mChannelData.Length(); }
    229 
    230  bool IsMuted() const { return mVolume == 0.0f; }
    231 
    232  size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const {
    233    return SizeOfExcludingThis(aMallocSizeOf, true);
    234  }
    235 
    236  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const {
    237    size_t amount = 0;
    238 
    239    // Possibly owned:
    240    // - mBuffer - Can hold data that is also in the decoded audio queue. If it
    241    //             is not shared, or unshared == false it gets counted.
    242    if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {
    243      amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);
    244    }
    245 
    246    // Memory in the array is owned by mBuffer.
    247    amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);
    248    return amount;
    249  }
    250 
    251  template <typename T>
    252  Span<const T* const> ChannelData() const {
    253    MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
    254    return Span(reinterpret_cast<const T* const*>(mChannelData.Elements()),
    255                mChannelData.Length());
    256  }
    257 
    258  /**
    259   * ChannelFloatsForWrite() should be used only when mBuffer is owned solely
    260   * by the calling thread.
    261   */
    262  template <typename T>
    263  T* ChannelDataForWrite(size_t aChannel) {
    264    MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);
    265    MOZ_ASSERT(!mBuffer->IsShared());
    266    // Array access check for 1905287
    267    if (aChannel >= mChannelData.Length()) {
    268      MOZ_CRASH_UNSAFE_PRINTF(
    269          "Invalid index: aChannel: %zu, mChannelData size: %zu\n", aChannel,
    270          mChannelData.Length());
    271    }
    272    return static_cast<T*>(const_cast<void*>(mChannelData[aChannel]));
    273  }
    274 
    275  template <typename T>
    276  static AudioChunk FromInterleavedBuffer(
    277      const T* aBuffer, size_t aFrames, uint32_t aChannels,
    278      const PrincipalHandle& aPrincipalHandle) {
    279    CheckedInt<size_t> bufferSize(sizeof(T));
    280    bufferSize *= aFrames;
    281    bufferSize *= aChannels;
    282    RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
    283 
    284    AutoTArray<T*, 8> deinterleaved;
    285    if (aChannels == 1) {
    286      PodCopy(static_cast<T*>(buffer->Data()), aBuffer, aFrames);
    287      deinterleaved.AppendElement(static_cast<T*>(buffer->Data()));
    288    } else {
    289      deinterleaved.SetLength(aChannels);
    290      T* samples = static_cast<T*>(buffer->Data());
    291 
    292      size_t offset = 0;
    293      for (uint32_t i = 0; i < aChannels; ++i) {
    294        deinterleaved[i] = samples + offset;
    295        offset += aFrames;
    296      }
    297 
    298      DeinterleaveAndConvertBuffer(aBuffer, static_cast<uint32_t>(aFrames),
    299                                   aChannels, deinterleaved.Elements());
    300    }
    301 
    302    AutoTArray<const T*, GUESS_AUDIO_CHANNELS> channelData;
    303    channelData.AppendElements(deinterleaved);
    304    return AudioChunk(buffer.forget(), channelData,
    305                      static_cast<TrackTime>(aFrames), aPrincipalHandle);
    306  }
    307 
    308  const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; }
    309 
    310  // aOutputChannels must contain pointers to channel data of length mDuration.
    311  void DownMixTo(Span<AudioDataValue* const> aOutputChannels) const;
    312 
    313  TrackTime mDuration = 0;             // in frames within the buffer
    314  RefPtr<ThreadSharedObject> mBuffer;  // the buffer object whose lifetime is
    315                                       // managed; null means data is all zeroes
    316  // one pointer per channel; empty if and only if mBuffer is null
    317  CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData;
    318  float mVolume = 1.0f;  // volume multiplier to apply
    319  // format of frames in mBuffer (or silence if mBuffer is null)
    320  SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE;
    321  // principalHandle for the data in this chunk.
    322  // This can be compared to an nsIPrincipal* when back on main thread.
    323  PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
    324 };
    325 
    326 /**
    327 * A list of audio samples consisting of a sequence of slices of SharedBuffers.
    328 * The audio rate is determined by the track, not stored in this class.
    329 */
    330 class AudioSegment final : public MediaSegmentBase<AudioSegment, AudioChunk> {
    331  // The channel count that MaxChannelCount() returned last time it was called.
    332  uint32_t mMemoizedMaxChannelCount = 0;
    333 
    334 public:
    335  typedef mozilla::AudioSampleFormat SampleFormat;
    336 
    337  AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}
    338 
    339  AudioSegment(AudioSegment&& aSegment) = default;
    340 
    341  AudioSegment(const AudioSegment&) = delete;
    342  AudioSegment& operator=(const AudioSegment&) = delete;
    343 
    344  ~AudioSegment() = default;
    345 
    346  // Resample the whole segment in place.  `aResampler` is an instance of a
    347  // resampler, initialized with `aResamplerChannelCount` channels. If this
    348  // function finds a chunk with more channels, `aResampler` is destroyed and a
    349  // new resampler is created, and `aResamplerChannelCount` is updated with the
    350  // new channel count value.
    351  void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
    352                      uint32_t* aResamplerChannelCount, uint32_t aInRate,
    353                      uint32_t aOutRate);
    354 
    355  template <typename T>
    356  void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,
    357                    const nsTArray<const T*>& aChannelData, TrackTime aDuration,
    358                    const PrincipalHandle& aPrincipalHandle) {
    359    AppendAndConsumeChunk(AudioChunk(std::move(aBuffer), aChannelData,
    360                                     aDuration, aPrincipalHandle));
    361  }
    362  void AppendSegment(const AudioSegment* aSegment) {
    363    MOZ_ASSERT(aSegment);
    364 
    365    for (const AudioChunk& c : aSegment->mChunks) {
    366      AudioChunk* chunk = AppendChunk(c.GetDuration());
    367      chunk->mBuffer = c.mBuffer;
    368      chunk->mChannelData = c.mChannelData;
    369      chunk->mBufferFormat = c.mBufferFormat;
    370      chunk->mPrincipalHandle = c.mPrincipalHandle;
    371    }
    372  }
    373  template <typename T>
    374  void AppendFromInterleavedBuffer(const T* aBuffer, size_t aFrames,
    375                                   uint32_t aChannels,
    376                                   const PrincipalHandle& aPrincipalHandle) {
    377    AppendAndConsumeChunk(AudioChunk::FromInterleavedBuffer<T>(
    378        aBuffer, aFrames, aChannels, aPrincipalHandle));
    379  }
    380  // Write the segement data into an interleaved buffer. Do mixing if the
    381  // AudioChunk's channel count in the segment is different from aChannels.
    382  // Returns sample count of the converted audio data. The converted data will
    383  // be stored into aBuffer.
    384  size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
    385                                  uint32_t aChannels) const;
    386  // Consumes aChunk, and append it to the segment if its duration is not zero.
    387  void AppendAndConsumeChunk(AudioChunk&& aChunk) {
    388    AudioChunk unused;
    389    AudioChunk* chunk = &unused;
    390 
    391    // Always consume aChunk. The chunk's mBuffer can be non-null even if its
    392    // duration is 0.
    393    auto consume = MakeScopeExit([&] {
    394      chunk->mBuffer = std::move(aChunk.mBuffer);
    395      chunk->mChannelData = std::move(aChunk.mChannelData);
    396 
    397      MOZ_ASSERT(chunk->mBuffer || chunk->mChannelData.IsEmpty(),
    398                 "Appending invalid data ?");
    399 
    400      chunk->mVolume = aChunk.mVolume;
    401      chunk->mBufferFormat = aChunk.mBufferFormat;
    402      chunk->mPrincipalHandle = std::move(aChunk.mPrincipalHandle);
    403    });
    404 
    405    if (aChunk.GetDuration() == 0) {
    406      return;
    407    }
    408 
    409    if (!mChunks.IsEmpty() &&
    410        mChunks.LastElement().CanCombineWithFollowing(aChunk)) {
    411      mChunks.LastElement().mDuration += aChunk.GetDuration();
    412      mDuration += aChunk.GetDuration();
    413      return;
    414    }
    415 
    416    chunk = AppendChunk(aChunk.mDuration);
    417  }
    418  void ApplyVolume(float aVolume);
    419  // Mix the segment into a mixer, keeping it planar, up or down mixing to
    420  // aChannelCount channels.
    421  void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);
    422 
    423  // Returns the maximum channel count across all chunks in this segment.
    424  // Should there be no chunk with a channel count we return the memoized return
    425  // value from last time this method was called.
    426  uint32_t MaxChannelCount() {
    427    uint32_t channelCount = 0;
    428    for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
    429      if (ci->ChannelCount()) {
    430        channelCount = std::max(channelCount, ci->ChannelCount());
    431      }
    432    }
    433    if (channelCount == 0) {
    434      return mMemoizedMaxChannelCount;
    435    }
    436    return mMemoizedMaxChannelCount = channelCount;
    437  }
    438 
    439  static Type StaticType() { return AUDIO; }
    440 
    441  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {
    442    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
    443  }
    444 
    445  PrincipalHandle GetOldestPrinciple() const {
    446    const AudioChunk* chunk = mChunks.IsEmpty() ? nullptr : &mChunks[0];
    447    return chunk ? chunk->GetPrincipalHandle() : PRINCIPAL_HANDLE_NONE;
    448  }
    449 
    450  // Iterate on each chunks until the input function returns true.
    451  template <typename Function>
    452  void IterateOnChunks(const Function&& aFunction) {
    453    for (uint32_t idx = 0; idx < mChunks.Length(); idx++) {
    454      if (aFunction(&mChunks[idx])) {
    455        return;
    456      }
    457    }
    458  }
    459 
    460 private:
    461  template <typename T>
    462  void Resample(nsAutoRef<SpeexResamplerState>& aResampler,
    463                uint32_t* aResamplerChannelCount, uint32_t aInRate,
    464                uint32_t aOutRate);
    465 };
    466 
    467 template <typename SrcT>
    468 void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels,
    469                float aVolume, AudioDataValue* aOutputBuffer) {
    470  CopyableAutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;
    471  channelData.AppendElements(aChunk.ChannelData<SrcT>());
    472 
    473  if (channelData.Length() < aOutputChannels) {
    474    // Up-mix. Note that this might actually make channelData have more
    475    // than aOutputChannels temporarily.
    476    AudioChannelsUpMix(&channelData, aOutputChannels,
    477                       SilentChannel::ZeroChannel<SrcT>());
    478  }
    479  if (channelData.Length() > aOutputChannels) {
    480    // Down-mix.
    481    DownmixAndInterleave<SrcT>(channelData, aChunk.mDuration, aVolume,
    482                               aOutputChannels, aOutputBuffer);
    483  } else {
    484    InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration,
    485                               aVolume, aOutputChannels, aOutputBuffer);
    486  }
    487 }
    488 
    489 }  // namespace mozilla
    490 
    491 #endif /* MOZILLA_AUDIOSEGMENT_H_ */