tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

AppleATDecoder.cpp (33077B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "AppleATDecoder.h"
      8 
      9 #include <CoreAudioTypes/CoreAudioBaseTypes.h>
     10 #include <mozilla/Result.h>
     11 
     12 #include "ADTSDemuxer.h"
     13 #include "Adts.h"
     14 #include "ByteWriter.h"
     15 #include "ErrorList.h"
     16 #include "MP4Decoder.h"
     17 #include "MediaInfo.h"
     18 #include "MediaResult.h"
     19 #include "mozilla/Logging.h"
     20 #include "mozilla/Result.h"
     21 #include "mozilla/UniquePtr.h"
     22 #include "nsDebug.h"
     23 #include "nsTArray.h"
     24 
     25 #define LOG(...) \
     26  MOZ_LOG(mozilla::sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
     27 #define FourCC2Str(n) \
     28  ((char[5]){(char)(n >> 24), (char)(n >> 16), (char)(n >> 8), (char)(n), 0})
     29 
     30 const int AUDIO_OBJECT_TYPE_USAC = 42;
     31 const UInt32 kDynamicRangeControlProperty =
     32    0x64726370;  // "drcp", not present in macOS headers
     33 
     34 // Write ISO/IEC 14496-1 expandable size field (1-4 bytes) (8.3.3)
     35 // Each byte encodes 7 bits of size with MSB as continuation flag
     36 template <typename T>
     37 static bool WriteDescriptor(mozilla::ByteWriter<T>& writer, uint8_t tag,
     38                            uint32_t size) {
     39 #define TRY(x)    \
     40  if (!(x)) {     \
     41    return false; \
     42  }
     43  TRY(writer.WriteU8(tag));
     44  // Sizes are encoded as:
     45  // 0xxxxxxx                   - sizes 0 to 127 (1 byte)
     46  // 1xxxxxxx 0xxxxxxx          - sizes 128 to 16383 (2 bytes)
     47  // 1xxxxxxx 1xxxxxxx 0xxxxxxx - sizes 16384 to 2097151 (3 bytes)
     48  // 1xxxxxxx 1xxxxxxx 1xxxxxxx 0xxxxxxx - sizes 2097152+ (4 bytes)
     49  if (size < 0x80) {
     50    TRY(writer.WriteU8(size));
     51  } else if (size < 0x4000) {
     52    TRY(writer.WriteU8(0x80 | (size >> 7)));
     53    TRY(writer.WriteU8(size & 0x7F));
     54  } else if (size < 0x200000) {
     55    TRY(writer.WriteU8(0x80 | (size >> 14)));
     56    TRY(writer.WriteU8(0x80 | (size >> 7)));
     57    TRY(writer.WriteU8(size & 0x7F));
     58  } else {
     59    TRY(writer.WriteU8(0x80 | (size >> 21)));
     60    TRY(writer.WriteU8(0x80 | (size >> 14)));
     61    TRY(writer.WriteU8(0x80 | (size >> 7)));
     62    TRY(writer.WriteU8(size & 0x7F));
     63  }
     64 
     65  return true;
     66 }
     67 
     68 #undef TRY
     69 
     70 // ISO/IEC 14496-1 (7.2.6.5.1)
     71 static mozilla::Result<nsTArray<uint8_t>, nsresult> CreateEsds(
     72    const nsTArray<uint8_t>& extradata) {
     73  nsTArray<uint8_t> esds;
     74  mozilla::ByteWriter<mozilla::BigEndian> writer(esds);
     75 #define TRY(x)                                             \
     76  if (!(x)) {                                              \
     77    LOG("CreateEsds failed at line %d: %s", __LINE__, #x); \
     78    return mozilla::Err(nsresult::NS_ERROR_FAILURE);       \
     79  }
     80 
     81  // ES_Descriptor (ES_DescrTag = 0x03)
     82  // Size calculation breakdown:
     83  // - 3 bytes: ES_ID (2) + flags (1)
     84  // - 5 bytes: DecoderConfigDescriptor tag (1) + size field (4 max)
     85  // - 13 bytes: DecoderConfigDescriptor fixed content
     86  // - 5 bytes: DecoderSpecificInfo tag (1) + size field (4 max)
     87  // - extradata.Length(): AudioSpecificConfig data
     88  const uint32_t kESDescriptorHeaderSize = 3;        // ES_ID + flags
     89  const uint32_t kDecoderConfigDescrTagSize = 5;     // tag + size field
     90  const uint32_t kDecoderConfigDescrFixedSize = 13;  // fixed fields
     91  const uint32_t kDecoderSpecificInfoTagSize = 5;    // tag + size field
     92  const uint32_t esDescriptorSize =
     93      kESDescriptorHeaderSize + kDecoderConfigDescrTagSize +
     94      kDecoderConfigDescrFixedSize + kDecoderSpecificInfoTagSize +
     95      extradata.Length();
     96  WriteDescriptor(writer, 0x03, esDescriptorSize);
     97  TRY(writer.WriteU16(0x0000));  // ES_ID = 0
     98  TRY(writer.WriteU8(0x00));  // flags (streamDependenceFlag = 0, URL_Flag = 0,
     99                              // OCRstreamFlag = 0, streamPriority = 0)
    100 
    101  // DecoderConfigDescriptor (DecoderConfigDescrTag = 0x04)
    102  // ISO/IEC 14496-1 (7.2.6.6)
    103  const uint32_t decoderConfigDescrSize = kDecoderConfigDescrFixedSize +
    104                                          kDecoderSpecificInfoTagSize +
    105                                          extradata.Length();
    106  TRY(WriteDescriptor(writer, 0x04, decoderConfigDescrSize));
    107  TRY(writer.WriteU8(0x40));  // objectTypeIndication = 0x40 (MPEG-4 AAC)
    108  TRY(writer.WriteU8(
    109      0x15));  // streamType = 0x05 (AudioStream), upstream = 0, reserved = 1
    110 
    111  // bufferSizeDB = 0 (24 bits) - using default buffer size
    112  TRY(writer.WriteU8(0x00));
    113  TRY(writer.WriteU16(0x0000));
    114 
    115  TRY(writer.WriteU32(0x00000000));  // maxBitrate = 0 (no limit)
    116  TRY(writer.WriteU32(0x00000000));  // avgBitrate = 0 (unknown)
    117 
    118  // DecoderSpecificInfo (DecSpecificInfoTag = 0x05)
    119  // Contains the AudioSpecificConfig from ISO/IEC 14496-3 (7.2.6.7: to be
    120  // filled by classes extending it, we just write the extradata extracted from
    121  // the mp4)
    122  TRY(WriteDescriptor(writer, 0x05, extradata.Length()));
    123  TRY(writer.Write(extradata.Elements(), extradata.Length()));
    124 
    125  return esds;
    126 }
    127 
    128 #undef TRY
    129 
    130 namespace mozilla {
    131 
    132 AppleATDecoder::AppleATDecoder(const AudioInfo& aConfig)
    133    : mConfig(aConfig),
    134      mFileStreamError(false),
    135      mConverter(nullptr),
    136      mOutputFormat(),
    137      mStream(nullptr),
    138      mParsedFramesForAACMagicCookie(0),
    139      mErrored(false) {
    140  MOZ_COUNT_CTOR(AppleATDecoder);
    141  LOG("Creating Apple AudioToolbox decoder");
    142  LOG("Audio Decoder configuration: %s %d Hz %d channels %d bits per channel "
    143      "profile=%d extended_profile=%d",
    144      mConfig.mMimeType.get(), mConfig.mRate, mConfig.mChannels,
    145      mConfig.mBitDepth, mConfig.mProfile, mConfig.mExtendedProfile);
    146 
    147  if (mConfig.mMimeType.EqualsLiteral("audio/mpeg")) {
    148    mFormatID = kAudioFormatMPEGLayer3;
    149  } else if (mConfig.mMimeType.EqualsLiteral("audio/mp4a-latm")) {
    150    if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {
    151      const AacCodecSpecificData& aacCodecSpecificData =
    152          aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();
    153 
    154      // Check if this is xHE-AAC (USAC) based on profile or extended_profile
    155      if (mConfig.mProfile == AUDIO_OBJECT_TYPE_USAC ||
    156          mConfig.mExtendedProfile == AUDIO_OBJECT_TYPE_USAC) {
    157        mFormatID = kAudioFormatMPEGD_USAC;
    158        LOG("AppleATDecoder detected xHE-AAC/USAC format (profile=%d, "
    159            "extended_profile=%d)",
    160            mConfig.mProfile, mConfig.mExtendedProfile);
    161      } else {
    162        mFormatID = kAudioFormatMPEG4AAC;
    163      }
    164 
    165      mEncoderDelay = aacCodecSpecificData.mEncoderDelayFrames;
    166      mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount;
    167      LOG("AppleATDecoder (aac), found encoder delay (%" PRIu32
    168          ") and total frame count (%" PRIu64 ") in codec-specific side data",
    169          mEncoderDelay, mTotalMediaFrames);
    170    } else {
    171      mFormatID = kAudioFormatMPEG4AAC;
    172    }
    173  } else {
    174    mFormatID = 0;
    175  }
    176 }
    177 
    178 AppleATDecoder::~AppleATDecoder() {
    179  MOZ_COUNT_DTOR(AppleATDecoder);
    180  MOZ_ASSERT(!mConverter);
    181 }
    182 
    183 RefPtr<MediaDataDecoder::InitPromise> AppleATDecoder::Init() {
    184  AUTO_PROFILER_LABEL("AppleATDecoder::Init", MEDIA_PLAYBACK);
    185  if (!mFormatID) {
    186    LOG("AppleATDecoder::Init failure: unknown format ID");
    187    return InitPromise::CreateAndReject(
    188        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
    189                    RESULT_DETAIL("Non recognised format")),
    190        __func__);
    191  }
    192  mThread = GetCurrentSerialEventTarget();
    193 
    194  return InitPromise::CreateAndResolve(TrackType::kAudioTrack, __func__);
    195 }
    196 
    197 RefPtr<MediaDataDecoder::FlushPromise> AppleATDecoder::Flush() {
    198  AUTO_PROFILER_LABEL("AppleATDecoder::Flush", MEDIA_PLAYBACK);
    199  MOZ_ASSERT(mThread->IsOnCurrentThread());
    200  LOG("Flushing AudioToolbox AAC decoder");
    201  mQueuedSamples.Clear();
    202  mDecodedSamples.Clear();
    203 
    204  if (mConverter) {
    205    OSStatus rv = AudioConverterReset(mConverter);
    206    if (rv) {
    207      LOG("Error %d resetting AudioConverter", static_cast<int>(rv));
    208    }
    209  }
    210  if (mErrored) {
    211    LOG("Flush error");
    212    mParsedFramesForAACMagicCookie = 0;
    213    mMagicCookie.Clear();
    214    ProcessShutdown();
    215    mErrored = false;
    216  }
    217  return FlushPromise::CreateAndResolve(true, __func__);
    218 }
    219 
    220 RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Drain() {
    221  AUTO_PROFILER_LABEL("AppleATDecoder::Drain", MEDIA_PLAYBACK);
    222  MOZ_ASSERT(mThread->IsOnCurrentThread());
    223  LOG("Draining AudioToolbox AAC decoder");
    224  return DecodePromise::CreateAndResolve(DecodedData(), __func__);
    225 }
    226 
    227 RefPtr<ShutdownPromise> AppleATDecoder::Shutdown() {
    228  AUTO_PROFILER_LABEL("AppleATDecoder::Shutdown", MEDIA_PLAYBACK);
    229  // mThread may not be set if Init hasn't been called first.
    230  MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread());
    231  ProcessShutdown();
    232  return ShutdownPromise::CreateAndResolve(true, __func__);
    233 }
    234 
    235 void AppleATDecoder::ProcessShutdown() {
    236  // mThread may not be set if Init hasn't been called first.
    237  MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread());
    238 
    239  if (mStream) {
    240    OSStatus rv = AudioFileStreamClose(mStream);
    241    if (rv) {
    242      LOG("error %d disposing of AudioFileStream", static_cast<int>(rv));
    243      return;
    244    }
    245    mStream = nullptr;
    246  }
    247 
    248  if (mConverter) {
    249    LOG("Shutdown: Apple AudioToolbox AAC decoder");
    250    OSStatus rv = AudioConverterDispose(mConverter);
    251    if (rv) {
    252      LOG("error %d disposing of AudioConverter", static_cast<int>(rv));
    253    }
    254    mConverter = nullptr;
    255  }
    256 }
    257 
    258 nsCString AppleATDecoder::GetCodecName() const {
    259  switch (mFormatID) {
    260    case kAudioFormatMPEGLayer3:
    261      return "mp3"_ns;
    262    case kAudioFormatMPEG4AAC:
    263      return "aac"_ns;
    264    case kAudioFormatMPEGD_USAC:
    265      return "xhe-aac"_ns;
    266    default:
    267      return "unknown"_ns;
    268  }
    269 }
    270 
    271 struct PassthroughUserData {
    272  UInt32 mChannels;
    273  UInt32 mDataSize;
    274  const void* mData;
    275  AudioStreamPacketDescription mPacket;
    276 };
    277 
    278 // Error value we pass through the decoder to signal that nothing
    279 // has gone wrong during decoding and we're done processing the packet.
    280 const uint32_t kNoMoreDataErr = 'MOAR';
    281 
    282 static OSStatus _PassthroughInputDataCallback(
    283    AudioConverterRef aAudioConverter, UInt32* aNumDataPackets /* in/out */,
    284    AudioBufferList* aData /* in/out */,
    285    AudioStreamPacketDescription** aPacketDesc, void* aUserData) {
    286  PassthroughUserData* userData = (PassthroughUserData*)aUserData;
    287  if (!userData->mDataSize) {
    288    *aNumDataPackets = 0;
    289    return kNoMoreDataErr;
    290  }
    291 
    292  if (aPacketDesc) {
    293    userData->mPacket.mStartOffset = 0;
    294    userData->mPacket.mVariableFramesInPacket = 0;
    295    userData->mPacket.mDataByteSize = userData->mDataSize;
    296    *aPacketDesc = &userData->mPacket;
    297  }
    298 
    299  aData->mBuffers[0].mNumberChannels = userData->mChannels;
    300  aData->mBuffers[0].mDataByteSize = userData->mDataSize;
    301  aData->mBuffers[0].mData = const_cast<void*>(userData->mData);
    302 
    303  // No more data to provide following this run.
    304  userData->mDataSize = 0;
    305 
    306  return noErr;
    307 }
    308 
    309 RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Decode(
    310    MediaRawData* aSample) {
    311  AUTO_PROFILER_LABEL("AppleATDecoder::Decode", MEDIA_PLAYBACK);
    312  MOZ_ASSERT(mThread->IsOnCurrentThread());
    313  LOG("mp4 input sample pts=%s duration=%s %s %llu bytes audio",
    314      aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(),
    315      aSample->mKeyframe ? " keyframe" : "",
    316      (unsigned long long)aSample->Size());
    317 
    318  MediaResult rv = NS_OK;
    319  if (!mConverter) {
    320    LOG("Lazily initing the decoder");
    321    rv = SetupDecoder(aSample);
    322    if (rv != NS_OK && rv != NS_ERROR_NOT_INITIALIZED) {
    323      LOG("Decoder not initialized");
    324      return DecodePromise::CreateAndReject(rv, __func__);
    325    }
    326  }
    327 
    328  if (mIsADTS) {
    329    bool rv = ADTS::StripHeader(aSample);
    330    if (!rv) {
    331      LOG("Stripping the ADTS header in AppleATDecoder failed");
    332    }
    333  }
    334 
    335  mQueuedSamples.AppendElement(aSample);
    336 
    337  if (rv == NS_OK) {
    338    for (size_t i = 0; i < mQueuedSamples.Length(); i++) {
    339      rv = DecodeSample(mQueuedSamples[i]);
    340      if (NS_FAILED(rv)) {
    341        LOG("Decoding error");
    342        mErrored = true;
    343        return DecodePromise::CreateAndReject(rv, __func__);
    344      }
    345    }
    346    mQueuedSamples.Clear();
    347  }
    348 
    349  DecodedData results = std::move(mDecodedSamples);
    350  mDecodedSamples = DecodedData();
    351  return DecodePromise::CreateAndResolve(std::move(results), __func__);
    352 }
    353 
    354 MediaResult AppleATDecoder::DecodeSample(MediaRawData* aSample) {
    355  MOZ_ASSERT(mThread->IsOnCurrentThread());
    356 
    357  // Array containing the queued decoded audio frames, about to be output.
    358  nsTArray<AudioDataValue> outputData;
    359  UInt32 channels = mOutputFormat.mChannelsPerFrame;
    360  // Pick a multiple of the frame size close to a power of two
    361  // for efficient allocation. We're mainly using this decoder to decode AAC,
    362  // that has packets of 1024 audio frames.
    363  const uint32_t MAX_AUDIO_FRAMES = 1024;
    364  const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * channels;
    365 
    366  // Descriptions for _decompressed_ audio packets. ignored.
    367  auto packets = MakeUnique<AudioStreamPacketDescription[]>(MAX_AUDIO_FRAMES);
    368 
    369  // This API insists on having packets spoon-fed to it from a callback.
    370  // This structure exists only to pass our state.
    371  PassthroughUserData userData = {channels, (UInt32)aSample->Size(),
    372                                  aSample->Data()};
    373 
    374  // Decompressed audio buffer
    375  AlignedAudioBuffer decoded(maxDecodedSamples);
    376  if (!decoded) {
    377    return NS_ERROR_OUT_OF_MEMORY;
    378  }
    379 
    380  do {
    381    AudioBufferList decBuffer;
    382    decBuffer.mNumberBuffers = 1;
    383    decBuffer.mBuffers[0].mNumberChannels = channels;
    384    decBuffer.mBuffers[0].mDataByteSize =
    385        maxDecodedSamples * sizeof(AudioDataValue);
    386    decBuffer.mBuffers[0].mData = decoded.get();
    387 
    388    // in: the max number of packets we can handle from the decoder.
    389    // out: the number of packets the decoder is actually returning.
    390    UInt32 numFrames = MAX_AUDIO_FRAMES;
    391 
    392    OSStatus rv = AudioConverterFillComplexBuffer(
    393        mConverter, _PassthroughInputDataCallback, &userData,
    394        &numFrames /* in/out */, &decBuffer, packets.get());
    395 
    396    if (rv && rv != kNoMoreDataErr) {
    397      LOG("Error decoding audio sample: %d\n", static_cast<int>(rv));
    398      return MediaResult(
    399          NS_ERROR_DOM_MEDIA_DECODE_ERR,
    400          RESULT_DETAIL("Error decoding audio sample: %d @ %s",
    401                        static_cast<int>(rv), aSample->mTime.ToString().get()));
    402    }
    403 
    404    if (numFrames) {
    405      AudioDataValue* outputFrames = decoded.get();
    406      outputData.AppendElements(outputFrames, numFrames * channels);
    407    }
    408 
    409    if (rv == kNoMoreDataErr) {
    410      break;
    411    }
    412  } while (true);
    413 
    414  if (outputData.IsEmpty()) {
    415    return NS_OK;
    416  }
    417 
    418  size_t numFrames = outputData.Length() / channels;
    419  int rate = AssertedCast<int>(mOutputFormat.mSampleRate);
    420  media::TimeUnit duration(numFrames, rate);
    421  if (!duration.IsValid()) {
    422    NS_WARNING("Invalid count of accumulated audio samples");
    423    return MediaResult(
    424        NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
    425        RESULT_DETAIL(
    426            "Invalid count of accumulated audio samples: num:%llu rate:%d",
    427            uint64_t(numFrames), rate));
    428  }
    429 
    430  LOG("Decoded audio packet [%s, %s] (duration: %s)\n",
    431      aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(),
    432      duration.ToString().get());
    433 
    434  AudioSampleBuffer data(outputData.Elements(), outputData.Length());
    435  if (!data.Data()) {
    436    return NS_ERROR_OUT_OF_MEMORY;
    437  }
    438  if (mChannelLayout && !mAudioConverter) {
    439    AudioConfig in(*mChannelLayout, channels, rate);
    440    AudioConfig out(AudioConfig::ChannelLayout::SMPTEDefault(*mChannelLayout),
    441                    channels, rate);
    442    mAudioConverter = MakeUnique<AudioConverter>(in, out);
    443  }
    444  if (mAudioConverter && mChannelLayout && mChannelLayout->IsValid()) {
    445    MOZ_ASSERT(mAudioConverter->CanWorkInPlace());
    446    data = mAudioConverter->Process(std::move(data));
    447  }
    448 
    449  RefPtr<AudioData> audio = new AudioData(
    450      aSample->mOffset, aSample->mTime, data.Forget(), channels, rate,
    451      mChannelLayout && mChannelLayout->IsValid()
    452          ? mChannelLayout->Map()
    453          : AudioConfig::ChannelLayout::UNKNOWN_MAP);
    454  MOZ_DIAGNOSTIC_ASSERT(duration == audio->mDuration, "must be equal");
    455  mDecodedSamples.AppendElement(std::move(audio));
    456  return NS_OK;
    457 }
    458 
    459 MediaResult AppleATDecoder::GetInputAudioDescription(
    460    AudioStreamBasicDescription& aDesc, const nsTArray<uint8_t>& aExtraData) {
    461  MOZ_ASSERT(mThread->IsOnCurrentThread());
    462 
    463  // Request the properties from CoreAudio using the codec magic cookie
    464  AudioFormatInfo formatInfo;
    465  PodZero(&formatInfo.mASBD);
    466  formatInfo.mASBD.mFormatID = mFormatID;
    467  if (mFormatID == kAudioFormatMPEG4AAC) {
    468    formatInfo.mASBD.mFormatFlags = mConfig.mExtendedProfile;
    469  }
    470  formatInfo.mMagicCookieSize = aExtraData.Length();
    471  formatInfo.mMagicCookie = aExtraData.Elements();
    472 
    473  UInt32 formatListSize;
    474  // Attempt to retrieve the default format using
    475  // kAudioFormatProperty_FormatInfo method.
    476  // This method only retrieves the FramesPerPacket information required
    477  // by the decoder, which depends on the codec type and profile.
    478  aDesc.mFormatID = mFormatID;
    479  aDesc.mChannelsPerFrame = mConfig.mChannels;
    480  aDesc.mSampleRate = mConfig.mRate;
    481  UInt32 inputFormatSize = sizeof(aDesc);
    482  OSStatus rv;
    483 
    484  if (mFormatID == kAudioFormatMPEGD_USAC && aExtraData.Length() > 0) {
    485    // For xHE-AAC/USAC, we need to use the magic cookie to get the format info
    486    aDesc.mFormatID = mFormatID;
    487    aDesc.mChannelsPerFrame = mConfig.mChannels;
    488    aDesc.mSampleRate = mConfig.mRate;
    489 
    490    rv = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo,
    491                                aExtraData.Length(), aExtraData.Elements(),
    492                                &inputFormatSize, &aDesc);
    493  } else {
    494    rv = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, nullptr,
    495                                &inputFormatSize, &aDesc);
    496  }
    497 
    498  if (NS_WARN_IF(rv)) {
    499    return MediaResult(
    500        NS_ERROR_FAILURE,
    501        RESULT_DETAIL("Unable to get format info:%d", int32_t(rv)));
    502  }
    503 
    504  // If any of the methods below fail, we will return the default format as
    505  // created using kAudioFormatProperty_FormatInfo above.
    506  rv = AudioFormatGetPropertyInfo(kAudioFormatProperty_FormatList,
    507                                  sizeof(formatInfo), &formatInfo,
    508                                  &formatListSize);
    509  if (rv || (formatListSize % sizeof(AudioFormatListItem))) {
    510    return NS_OK;
    511  }
    512  size_t listCount = formatListSize / sizeof(AudioFormatListItem);
    513  auto formatList = MakeUnique<AudioFormatListItem[]>(listCount);
    514 
    515  rv = AudioFormatGetProperty(kAudioFormatProperty_FormatList,
    516                              sizeof(formatInfo), &formatInfo, &formatListSize,
    517                              formatList.get());
    518  if (rv) {
    519    return NS_OK;
    520  }
    521  LOG("found %zu available audio stream(s)",
    522      formatListSize / sizeof(AudioFormatListItem));
    523  // Get the index number of the first playable format.
    524  // This index number will be for the highest quality layer the platform
    525  // is capable of playing.
    526  UInt32 itemIndex;
    527  UInt32 indexSize = sizeof(itemIndex);
    528  rv = AudioFormatGetProperty(kAudioFormatProperty_FirstPlayableFormatFromList,
    529                              formatListSize, formatList.get(), &indexSize,
    530                              &itemIndex);
    531  if (rv) {
    532    return NS_OK;
    533  }
    534 
    535  aDesc = formatList[itemIndex].mASBD;
    536 
    537  return NS_OK;
    538 }
    539 
    540 AudioConfig::Channel ConvertChannelLabel(AudioChannelLabel id) {
    541  switch (id) {
    542    case kAudioChannelLabel_Left:
    543      return AudioConfig::CHANNEL_FRONT_LEFT;
    544    case kAudioChannelLabel_Right:
    545      return AudioConfig::CHANNEL_FRONT_RIGHT;
    546    case kAudioChannelLabel_Mono:
    547    case kAudioChannelLabel_Center:
    548      return AudioConfig::CHANNEL_FRONT_CENTER;
    549    case kAudioChannelLabel_LFEScreen:
    550      return AudioConfig::CHANNEL_LFE;
    551    case kAudioChannelLabel_LeftSurround:
    552      return AudioConfig::CHANNEL_SIDE_LEFT;
    553    case kAudioChannelLabel_RightSurround:
    554      return AudioConfig::CHANNEL_SIDE_RIGHT;
    555    case kAudioChannelLabel_CenterSurround:
    556      return AudioConfig::CHANNEL_BACK_CENTER;
    557    case kAudioChannelLabel_RearSurroundLeft:
    558      return AudioConfig::CHANNEL_BACK_LEFT;
    559    case kAudioChannelLabel_RearSurroundRight:
    560      return AudioConfig::CHANNEL_BACK_RIGHT;
    561    default:
    562      return AudioConfig::CHANNEL_INVALID;
    563  }
    564 }
    565 
    566 // Will set mChannelLayout if a channel layout could properly be identified
    567 // and is supported.
    568 nsresult AppleATDecoder::SetupChannelLayout() {
    569  MOZ_ASSERT(mThread->IsOnCurrentThread());
    570 
    571  // Determine the channel layout.
    572  UInt32 propertySize;
    573  UInt32 size;
    574  OSStatus status = AudioConverterGetPropertyInfo(
    575      mConverter, kAudioConverterOutputChannelLayout, &propertySize, nullptr);
    576  if (status || !propertySize) {
    577    LOG("Couldn't get channel layout property (%s)", FourCC2Str(status));
    578    return NS_ERROR_FAILURE;
    579  }
    580 
    581  auto data = MakeUnique<uint8_t[]>(propertySize);
    582  size = propertySize;
    583  status = AudioConverterGetProperty(
    584      mConverter, kAudioConverterInputChannelLayout, &size, data.get());
    585  if (status || size != propertySize) {
    586    LOG("Couldn't get channel layout property (%s)", FourCC2Str(status));
    587    return NS_ERROR_FAILURE;
    588  }
    589 
    590  AudioChannelLayout* layout =
    591      reinterpret_cast<AudioChannelLayout*>(data.get());
    592  AudioChannelLayoutTag tag = layout->mChannelLayoutTag;
    593 
    594  // if tag is kAudioChannelLayoutTag_UseChannelDescriptions then the structure
    595  // directly contains the the channel layout mapping.
    596  // If tag is kAudioChannelLayoutTag_UseChannelBitmap then the layout will
    597  // be defined via the bitmap and can be retrieved using
    598  // kAudioFormatProperty_ChannelLayoutForBitmap property.
    599  // Otherwise the tag itself describes the layout.
    600  if (tag != kAudioChannelLayoutTag_UseChannelDescriptions) {
    601    AudioFormatPropertyID property =
    602        tag == kAudioChannelLayoutTag_UseChannelBitmap
    603            ? kAudioFormatProperty_ChannelLayoutForBitmap
    604            : kAudioFormatProperty_ChannelLayoutForTag;
    605 
    606    if (property == kAudioFormatProperty_ChannelLayoutForBitmap) {
    607      status = AudioFormatGetPropertyInfo(
    608          property, sizeof(UInt32), &layout->mChannelBitmap, &propertySize);
    609    } else {
    610      status = AudioFormatGetPropertyInfo(
    611          property, sizeof(AudioChannelLayoutTag), &tag, &propertySize);
    612    }
    613    if (status || !propertySize) {
    614      LOG("Couldn't get channel layout property info (%s:%s)",
    615          FourCC2Str(property), FourCC2Str(status));
    616      return NS_ERROR_FAILURE;
    617    }
    618    data = MakeUnique<uint8_t[]>(propertySize);
    619    layout = reinterpret_cast<AudioChannelLayout*>(data.get());
    620    size = propertySize;
    621 
    622    if (property == kAudioFormatProperty_ChannelLayoutForBitmap) {
    623      status = AudioFormatGetProperty(property, sizeof(UInt32),
    624                                      &layout->mChannelBitmap, &size, layout);
    625    } else {
    626      status = AudioFormatGetProperty(property, sizeof(AudioChannelLayoutTag),
    627                                      &tag, &size, layout);
    628    }
    629    if (status || size != propertySize) {
    630      LOG("Couldn't get channel layout property (%s:%s)", FourCC2Str(property),
    631          FourCC2Str(status));
    632      return NS_ERROR_FAILURE;
    633    }
    634    // We have retrieved the channel layout from the tag or bitmap.
    635    // We can now directly use the channel descriptions.
    636    layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
    637  }
    638 
    639  if (layout->mNumberChannelDescriptions != mOutputFormat.mChannelsPerFrame) {
    640    LOG("Not matching the original channel number");
    641    return NS_ERROR_FAILURE;
    642  }
    643 
    644  AutoTArray<AudioConfig::Channel, 8> channels;
    645  channels.SetLength(layout->mNumberChannelDescriptions);
    646  for (uint32_t i = 0; i < layout->mNumberChannelDescriptions; i++) {
    647    AudioChannelLabel id = layout->mChannelDescriptions[i].mChannelLabel;
    648    AudioConfig::Channel channel = ConvertChannelLabel(id);
    649    channels[i] = channel;
    650  }
    651  mChannelLayout = MakeUnique<AudioConfig::ChannelLayout>(
    652      mOutputFormat.mChannelsPerFrame, channels.Elements());
    653  return NS_OK;
    654 }
    655 
    656 MediaResult AppleATDecoder::SetupDecoder(MediaRawData* aSample) {
    657  MOZ_ASSERT(mThread->IsOnCurrentThread());
    658  static const uint32_t MAX_FRAMES = 2;
    659 
    660  bool isADTS =
    661      ADTS::FrameHeader::MatchesSync(Span{aSample->Data(), aSample->Size()});
    662 
    663  if (isADTS) {
    664    ADTS::FrameParser parser;
    665    if (!parser.Parse(0, aSample->Data(), aSample->Data() + aSample->Size())) {
    666      LOG("ADTS frame parsing error");
    667      return NS_ERROR_NOT_INITIALIZED;
    668    }
    669 
    670    AudioCodecSpecificBinaryBlob blob;
    671    ADTS::InitAudioSpecificConfig(parser.FirstFrame(), blob.mBinaryBlob);
    672    mConfig.mCodecSpecificConfig = AudioCodecSpecificVariant{std::move(blob)};
    673    mConfig.mProfile = mConfig.mExtendedProfile =
    674        parser.FirstFrame().Header().mObjectType;
    675    mIsADTS = true;
    676 
    677    if (mFormatID == kAudioFormatMPEG4AAC &&
    678        mConfig.mExtendedProfile == AUDIO_OBJECT_TYPE_USAC) {
    679      LOG("Detected xHE-AAC profile 42 (USAC), switching to "
    680          "kAudioFormatMPEGD_USAC");
    681      mFormatID = kAudioFormatMPEGD_USAC;
    682    }
    683  }
    684 
    685  if (mFormatID == kAudioFormatMPEG4AAC && mConfig.mExtendedProfile == 2 &&
    686      mParsedFramesForAACMagicCookie < MAX_FRAMES) {
    687    LOG("Attempting to get implicit AAC magic cookie");
    688    // Check for implicit SBR signalling if stream is AAC-LC
    689    // This will provide us with an updated magic cookie for use with
    690    // GetInputAudioDescription.
    691    if (NS_SUCCEEDED(GetImplicitAACMagicCookie(aSample)) &&
    692        !mMagicCookie.Length() && !isADTS) {
    693      // nothing found yet, will try again later
    694      LOG("Getting implicit AAC magic cookie failed");
    695      mParsedFramesForAACMagicCookie++;
    696      LOG("Not initialized -- need magic cookie");
    697      return NS_ERROR_NOT_INITIALIZED;
    698    }
    699    // An error occurred, fallback to using default stream description
    700  }
    701 
    702  LOG("Initializing Apple AudioToolbox decoder");
    703 
    704  // Should we try and use magic cookie data from the AAC data? We do this if
    705  // - We have an AAC config &
    706  // - We do not aleady have magic cookie data.
    707  // Otherwise we just use the existing cookie (which may be empty).
    708  bool shouldUseAacMagicCookie =
    709      mConfig.mCodecSpecificConfig.is<AacCodecSpecificData>() &&
    710      mMagicCookie.IsEmpty();
    711 
    712  nsTArray<uint8_t>& magicCookie =
    713      shouldUseAacMagicCookie
    714          ? *mConfig.mCodecSpecificConfig.as<AacCodecSpecificData>()
    715                 .mEsDescriptorBinaryBlob
    716          : mMagicCookie;
    717  AudioStreamBasicDescription inputFormat;
    718  PodZero(&inputFormat);
    719 
    720  MediaResult rv = GetInputAudioDescription(inputFormat, magicCookie);
    721  if (NS_FAILED(rv)) {
    722    LOG("GetInputAudioDescription failure");
    723    return rv;
    724  }
    725  // Fill in the output format manually.
    726  PodZero(&mOutputFormat);
    727  mOutputFormat.mFormatID = kAudioFormatLinearPCM;
    728  mOutputFormat.mSampleRate = inputFormat.mSampleRate;
    729  mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame;
    730  mOutputFormat.mBitsPerChannel = 32;
    731  mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsFloat | 0;
    732  // Set up the decoder so it gives us one sample per frame
    733  mOutputFormat.mFramesPerPacket = 1;
    734  mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame =
    735      mOutputFormat.mChannelsPerFrame * mOutputFormat.mBitsPerChannel / 8;
    736 
    737  OSStatus status =
    738      AudioConverterNew(&inputFormat, &mOutputFormat, &mConverter);
    739  if (status) {
    740    LOG("Error %d constructing AudioConverter", int(status));
    741    mConverter = nullptr;
    742    return MediaResult(
    743        NS_ERROR_FAILURE,
    744        RESULT_DETAIL("Error constructing AudioConverter:%d", int32_t(status)));
    745  }
    746 
    747  if (magicCookie.Length() && mFormatID == kAudioFormatMPEG4AAC) {
    748    status = AudioConverterSetProperty(
    749        mConverter, kAudioConverterDecompressionMagicCookie,
    750        magicCookie.Length(), magicCookie.Elements());
    751    if (status) {
    752      LOG("Error setting AudioConverter AAC cookie:%d", int32_t(status));
    753      ProcessShutdown();
    754      return MediaResult(
    755          NS_ERROR_FAILURE,
    756          RESULT_DETAIL("Error setting AudioConverter AAC cookie:%d",
    757                        int32_t(status)));
    758    }
    759  } else if (magicCookie.Length() && mFormatID == kAudioFormatMPEGD_USAC) {
    760    auto maybeEsdsData = CreateEsds(magicCookie);
    761    if (maybeEsdsData.isErr()) {
    762      return MediaResult(NS_ERROR_FAILURE,
    763                         RESULT_DETAIL("Couldn't create ESDS data"));
    764    }
    765    nsTArray<uint8_t> esdsData = maybeEsdsData.unwrap();
    766    status = AudioConverterSetProperty(
    767        mConverter, kAudioConverterDecompressionMagicCookie,
    768        magicCookie.Length(), magicCookie.Elements());
    769    if (status) {
    770      LOG("AudioConvertSetProperty failed: %d", int32_t(status));
    771      return MediaResult(NS_ERROR_FAILURE,
    772                         RESULT_DETAIL("AudioConverterSetProperty failed: %d",
    773                                       int32_t(status)));
    774    }
    775  }
    776 
    777  if (NS_FAILED(SetupChannelLayout())) {
    778    NS_WARNING("Couldn't retrieve channel layout, will use default layout");
    779  }
    780 
    781  if (mFormatID == kAudioFormatMPEG4AAC &&
    782      mConfig.mExtendedProfile == AUDIO_OBJECT_TYPE_USAC) {
    783    const Float32 kDefaultLoudness = -16.0;
    784    status = AudioConverterSetProperty(
    785        mConverter, kAudioCodecPropertyProgramTargetLevel,
    786        sizeof(kDefaultLoudness), &kDefaultLoudness);
    787    if (status != noErr) {
    788      LOG("AudioConverterSetProperty() failed to set loudness: %d",
    789          int(status));
    790      // Non-fatal error, continue
    791    }
    792 
    793    // Dynamic range control setting isn't in the SDK yet
    794    // https://developer.apple.com/documentation/http-live-streaming/providing-metadata-for-xhe-aac-video-soundtracks
    795    // Values: none=0, night=1, noisy=2, limited=3
    796    const UInt32 kDefaultEffectType = 3;
    797    status = AudioConverterSetProperty(mConverter, kDynamicRangeControlProperty,
    798                                       sizeof(kDefaultEffectType),
    799                                       &kDefaultEffectType);
    800    if (status != noErr) {
    801      LOG("AudioConverterSetProperty() failed to set DRC effect type: %d",
    802          int(status));
    803      // Non-fatal error, continue
    804    }
    805  }
    806 
    807  return NS_OK;
    808 }
    809 
    810 static void _MetadataCallback(void* aAppleATDecoder, AudioFileStreamID aStream,
    811                              AudioFileStreamPropertyID aProperty,
    812                              UInt32* aFlags) {
    813  AppleATDecoder* decoder = static_cast<AppleATDecoder*>(aAppleATDecoder);
    814  MOZ_RELEASE_ASSERT(decoder->mThread->IsOnCurrentThread());
    815 
    816  LOG("MetadataCallback receiving: '%s'", FourCC2Str(aProperty));
    817  if (aProperty == kAudioFileStreamProperty_MagicCookieData) {
    818    UInt32 size;
    819    Boolean writeable;
    820    OSStatus rv =
    821        AudioFileStreamGetPropertyInfo(aStream, aProperty, &size, &writeable);
    822    if (rv) {
    823      LOG("Couldn't get property info for '%s' (%s)", FourCC2Str(aProperty),
    824          FourCC2Str(rv));
    825      decoder->mFileStreamError = true;
    826      return;
    827    }
    828    auto data = MakeUnique<uint8_t[]>(size);
    829    rv = AudioFileStreamGetProperty(aStream, aProperty, &size, data.get());
    830    if (rv) {
    831      LOG("Couldn't get property '%s' (%s)", FourCC2Str(aProperty),
    832          FourCC2Str(rv));
    833      decoder->mFileStreamError = true;
    834      return;
    835    }
    836    decoder->mMagicCookie.AppendElements(data.get(), size);
    837  }
    838 }
    839 
    840 static void _SampleCallback(void* aSBR, UInt32 aNumBytes, UInt32 aNumPackets,
    841                            const void* aData,
    842                            AudioStreamPacketDescription* aPackets) {}
    843 
    844 nsresult AppleATDecoder::GetImplicitAACMagicCookie(MediaRawData* aSample) {
    845  MOZ_ASSERT(mThread->IsOnCurrentThread());
    846 
    847  bool isADTS =
    848      ADTS::FrameHeader::MatchesSync(Span{aSample->Data(), aSample->Size()});
    849 
    850  RefPtr<MediaRawData> adtssample = aSample;
    851 
    852  if (!isADTS) {
    853    // Prepend ADTS header to AAC audio.
    854    adtssample = aSample->Clone();
    855    if (!adtssample) {
    856      return NS_ERROR_OUT_OF_MEMORY;
    857    }
    858    auto frequency_index = ADTS::GetFrequencyIndex(mConfig.mRate);
    859 
    860    if (frequency_index.isErr()) {
    861      LOG("%d isn't a valid rate for AAC", mConfig.mRate);
    862      return NS_ERROR_FAILURE;
    863    }
    864 
    865    // Arbitrarily pick main profile if not specified
    866    int profile = mConfig.mProfile ? mConfig.mProfile : 1;
    867    bool rv = ADTS::ConvertSample(mConfig.mChannels, frequency_index.unwrap(),
    868                                  profile, adtssample);
    869    if (!rv) {
    870      LOG("Failed to apply ADTS header");
    871      return NS_ERROR_FAILURE;
    872    }
    873  }
    874  if (!mStream) {
    875    OSStatus rv = AudioFileStreamOpen(this, _MetadataCallback, _SampleCallback,
    876                                      kAudioFileAAC_ADTSType, &mStream);
    877    if (rv) {
    878      LOG("Couldn't open AudioFileStream");
    879      return NS_ERROR_FAILURE;
    880    }
    881  }
    882 
    883  OSStatus status = AudioFileStreamParseBytes(
    884      mStream, adtssample->Size(), adtssample->Data(), 0 /* discontinuity */);
    885  if (status) {
    886    LOG("Couldn't parse sample");
    887  }
    888 
    889  if (status || mFileStreamError || mMagicCookie.Length()) {
    890    // We have decoded a magic cookie or an error occurred as such
    891    // we won't need the stream any longer.
    892    AudioFileStreamClose(mStream);
    893    mStream = nullptr;
    894  }
    895 
    896  return (mFileStreamError || status) ? NS_ERROR_FAILURE : NS_OK;
    897 }
    898 
    899 }  // namespace mozilla
    900 
    901 #undef LOG