AppleATDecoder.cpp (33077B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "AppleATDecoder.h" 8 9 #include <CoreAudioTypes/CoreAudioBaseTypes.h> 10 #include <mozilla/Result.h> 11 12 #include "ADTSDemuxer.h" 13 #include "Adts.h" 14 #include "ByteWriter.h" 15 #include "ErrorList.h" 16 #include "MP4Decoder.h" 17 #include "MediaInfo.h" 18 #include "MediaResult.h" 19 #include "mozilla/Logging.h" 20 #include "mozilla/Result.h" 21 #include "mozilla/UniquePtr.h" 22 #include "nsDebug.h" 23 #include "nsTArray.h" 24 25 #define LOG(...) \ 26 MOZ_LOG(mozilla::sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__)) 27 #define FourCC2Str(n) \ 28 ((char[5]){(char)(n >> 24), (char)(n >> 16), (char)(n >> 8), (char)(n), 0}) 29 30 const int AUDIO_OBJECT_TYPE_USAC = 42; 31 const UInt32 kDynamicRangeControlProperty = 32 0x64726370; // "drcp", not present in macOS headers 33 34 // Write ISO/IEC 14496-1 expandable size field (1-4 bytes) (8.3.3) 35 // Each byte encodes 7 bits of size with MSB as continuation flag 36 template <typename T> 37 static bool WriteDescriptor(mozilla::ByteWriter<T>& writer, uint8_t tag, 38 uint32_t size) { 39 #define TRY(x) \ 40 if (!(x)) { \ 41 return false; \ 42 } 43 TRY(writer.WriteU8(tag)); 44 // Sizes are encoded as: 45 // 0xxxxxxx - sizes 0 to 127 (1 byte) 46 // 1xxxxxxx 0xxxxxxx - sizes 128 to 16383 (2 bytes) 47 // 1xxxxxxx 1xxxxxxx 0xxxxxxx - sizes 16384 to 2097151 (3 bytes) 48 // 1xxxxxxx 1xxxxxxx 1xxxxxxx 0xxxxxxx - sizes 2097152+ (4 bytes) 49 if (size < 0x80) { 50 TRY(writer.WriteU8(size)); 51 } else if (size < 0x4000) { 52 TRY(writer.WriteU8(0x80 | (size >> 7))); 53 TRY(writer.WriteU8(size & 0x7F)); 54 } else if (size < 0x200000) { 55 TRY(writer.WriteU8(0x80 | (size >> 14))); 56 TRY(writer.WriteU8(0x80 | (size >> 7))); 57 TRY(writer.WriteU8(size & 0x7F)); 58 } else { 59 TRY(writer.WriteU8(0x80 | (size >> 21))); 60 TRY(writer.WriteU8(0x80 | (size >> 14))); 61 TRY(writer.WriteU8(0x80 | (size >> 7))); 62 TRY(writer.WriteU8(size & 0x7F)); 63 } 64 65 return true; 66 } 67 68 #undef TRY 69 70 // ISO/IEC 14496-1 (7.2.6.5.1) 71 static mozilla::Result<nsTArray<uint8_t>, nsresult> CreateEsds( 72 const nsTArray<uint8_t>& extradata) { 73 nsTArray<uint8_t> esds; 74 mozilla::ByteWriter<mozilla::BigEndian> writer(esds); 75 #define TRY(x) \ 76 if (!(x)) { \ 77 LOG("CreateEsds failed at line %d: %s", __LINE__, #x); \ 78 return mozilla::Err(nsresult::NS_ERROR_FAILURE); \ 79 } 80 81 // ES_Descriptor (ES_DescrTag = 0x03) 82 // Size calculation breakdown: 83 // - 3 bytes: ES_ID (2) + flags (1) 84 // - 5 bytes: DecoderConfigDescriptor tag (1) + size field (4 max) 85 // - 13 bytes: DecoderConfigDescriptor fixed content 86 // - 5 bytes: DecoderSpecificInfo tag (1) + size field (4 max) 87 // - extradata.Length(): AudioSpecificConfig data 88 const uint32_t kESDescriptorHeaderSize = 3; // ES_ID + flags 89 const uint32_t kDecoderConfigDescrTagSize = 5; // tag + size field 90 const uint32_t kDecoderConfigDescrFixedSize = 13; // fixed fields 91 const uint32_t kDecoderSpecificInfoTagSize = 5; // tag + size field 92 const uint32_t esDescriptorSize = 93 kESDescriptorHeaderSize + kDecoderConfigDescrTagSize + 94 kDecoderConfigDescrFixedSize + kDecoderSpecificInfoTagSize + 95 extradata.Length(); 96 WriteDescriptor(writer, 0x03, esDescriptorSize); 97 TRY(writer.WriteU16(0x0000)); // ES_ID = 0 98 TRY(writer.WriteU8(0x00)); // flags (streamDependenceFlag = 0, URL_Flag = 0, 99 // OCRstreamFlag = 0, streamPriority = 0) 100 101 // DecoderConfigDescriptor (DecoderConfigDescrTag = 0x04) 102 // ISO/IEC 14496-1 (7.2.6.6) 103 const uint32_t decoderConfigDescrSize = kDecoderConfigDescrFixedSize + 104 kDecoderSpecificInfoTagSize + 105 extradata.Length(); 106 TRY(WriteDescriptor(writer, 0x04, decoderConfigDescrSize)); 107 TRY(writer.WriteU8(0x40)); // objectTypeIndication = 0x40 (MPEG-4 AAC) 108 TRY(writer.WriteU8( 109 0x15)); // streamType = 0x05 (AudioStream), upstream = 0, reserved = 1 110 111 // bufferSizeDB = 0 (24 bits) - using default buffer size 112 TRY(writer.WriteU8(0x00)); 113 TRY(writer.WriteU16(0x0000)); 114 115 TRY(writer.WriteU32(0x00000000)); // maxBitrate = 0 (no limit) 116 TRY(writer.WriteU32(0x00000000)); // avgBitrate = 0 (unknown) 117 118 // DecoderSpecificInfo (DecSpecificInfoTag = 0x05) 119 // Contains the AudioSpecificConfig from ISO/IEC 14496-3 (7.2.6.7: to be 120 // filled by classes extending it, we just write the extradata extracted from 121 // the mp4) 122 TRY(WriteDescriptor(writer, 0x05, extradata.Length())); 123 TRY(writer.Write(extradata.Elements(), extradata.Length())); 124 125 return esds; 126 } 127 128 #undef TRY 129 130 namespace mozilla { 131 132 AppleATDecoder::AppleATDecoder(const AudioInfo& aConfig) 133 : mConfig(aConfig), 134 mFileStreamError(false), 135 mConverter(nullptr), 136 mOutputFormat(), 137 mStream(nullptr), 138 mParsedFramesForAACMagicCookie(0), 139 mErrored(false) { 140 MOZ_COUNT_CTOR(AppleATDecoder); 141 LOG("Creating Apple AudioToolbox decoder"); 142 LOG("Audio Decoder configuration: %s %d Hz %d channels %d bits per channel " 143 "profile=%d extended_profile=%d", 144 mConfig.mMimeType.get(), mConfig.mRate, mConfig.mChannels, 145 mConfig.mBitDepth, mConfig.mProfile, mConfig.mExtendedProfile); 146 147 if (mConfig.mMimeType.EqualsLiteral("audio/mpeg")) { 148 mFormatID = kAudioFormatMPEGLayer3; 149 } else if (mConfig.mMimeType.EqualsLiteral("audio/mp4a-latm")) { 150 if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) { 151 const AacCodecSpecificData& aacCodecSpecificData = 152 aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>(); 153 154 // Check if this is xHE-AAC (USAC) based on profile or extended_profile 155 if (mConfig.mProfile == AUDIO_OBJECT_TYPE_USAC || 156 mConfig.mExtendedProfile == AUDIO_OBJECT_TYPE_USAC) { 157 mFormatID = kAudioFormatMPEGD_USAC; 158 LOG("AppleATDecoder detected xHE-AAC/USAC format (profile=%d, " 159 "extended_profile=%d)", 160 mConfig.mProfile, mConfig.mExtendedProfile); 161 } else { 162 mFormatID = kAudioFormatMPEG4AAC; 163 } 164 165 mEncoderDelay = aacCodecSpecificData.mEncoderDelayFrames; 166 mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount; 167 LOG("AppleATDecoder (aac), found encoder delay (%" PRIu32 168 ") and total frame count (%" PRIu64 ") in codec-specific side data", 169 mEncoderDelay, mTotalMediaFrames); 170 } else { 171 mFormatID = kAudioFormatMPEG4AAC; 172 } 173 } else { 174 mFormatID = 0; 175 } 176 } 177 178 AppleATDecoder::~AppleATDecoder() { 179 MOZ_COUNT_DTOR(AppleATDecoder); 180 MOZ_ASSERT(!mConverter); 181 } 182 183 RefPtr<MediaDataDecoder::InitPromise> AppleATDecoder::Init() { 184 AUTO_PROFILER_LABEL("AppleATDecoder::Init", MEDIA_PLAYBACK); 185 if (!mFormatID) { 186 LOG("AppleATDecoder::Init failure: unknown format ID"); 187 return InitPromise::CreateAndReject( 188 MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, 189 RESULT_DETAIL("Non recognised format")), 190 __func__); 191 } 192 mThread = GetCurrentSerialEventTarget(); 193 194 return InitPromise::CreateAndResolve(TrackType::kAudioTrack, __func__); 195 } 196 197 RefPtr<MediaDataDecoder::FlushPromise> AppleATDecoder::Flush() { 198 AUTO_PROFILER_LABEL("AppleATDecoder::Flush", MEDIA_PLAYBACK); 199 MOZ_ASSERT(mThread->IsOnCurrentThread()); 200 LOG("Flushing AudioToolbox AAC decoder"); 201 mQueuedSamples.Clear(); 202 mDecodedSamples.Clear(); 203 204 if (mConverter) { 205 OSStatus rv = AudioConverterReset(mConverter); 206 if (rv) { 207 LOG("Error %d resetting AudioConverter", static_cast<int>(rv)); 208 } 209 } 210 if (mErrored) { 211 LOG("Flush error"); 212 mParsedFramesForAACMagicCookie = 0; 213 mMagicCookie.Clear(); 214 ProcessShutdown(); 215 mErrored = false; 216 } 217 return FlushPromise::CreateAndResolve(true, __func__); 218 } 219 220 RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Drain() { 221 AUTO_PROFILER_LABEL("AppleATDecoder::Drain", MEDIA_PLAYBACK); 222 MOZ_ASSERT(mThread->IsOnCurrentThread()); 223 LOG("Draining AudioToolbox AAC decoder"); 224 return DecodePromise::CreateAndResolve(DecodedData(), __func__); 225 } 226 227 RefPtr<ShutdownPromise> AppleATDecoder::Shutdown() { 228 AUTO_PROFILER_LABEL("AppleATDecoder::Shutdown", MEDIA_PLAYBACK); 229 // mThread may not be set if Init hasn't been called first. 230 MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread()); 231 ProcessShutdown(); 232 return ShutdownPromise::CreateAndResolve(true, __func__); 233 } 234 235 void AppleATDecoder::ProcessShutdown() { 236 // mThread may not be set if Init hasn't been called first. 237 MOZ_ASSERT(!mThread || mThread->IsOnCurrentThread()); 238 239 if (mStream) { 240 OSStatus rv = AudioFileStreamClose(mStream); 241 if (rv) { 242 LOG("error %d disposing of AudioFileStream", static_cast<int>(rv)); 243 return; 244 } 245 mStream = nullptr; 246 } 247 248 if (mConverter) { 249 LOG("Shutdown: Apple AudioToolbox AAC decoder"); 250 OSStatus rv = AudioConverterDispose(mConverter); 251 if (rv) { 252 LOG("error %d disposing of AudioConverter", static_cast<int>(rv)); 253 } 254 mConverter = nullptr; 255 } 256 } 257 258 nsCString AppleATDecoder::GetCodecName() const { 259 switch (mFormatID) { 260 case kAudioFormatMPEGLayer3: 261 return "mp3"_ns; 262 case kAudioFormatMPEG4AAC: 263 return "aac"_ns; 264 case kAudioFormatMPEGD_USAC: 265 return "xhe-aac"_ns; 266 default: 267 return "unknown"_ns; 268 } 269 } 270 271 struct PassthroughUserData { 272 UInt32 mChannels; 273 UInt32 mDataSize; 274 const void* mData; 275 AudioStreamPacketDescription mPacket; 276 }; 277 278 // Error value we pass through the decoder to signal that nothing 279 // has gone wrong during decoding and we're done processing the packet. 280 const uint32_t kNoMoreDataErr = 'MOAR'; 281 282 static OSStatus _PassthroughInputDataCallback( 283 AudioConverterRef aAudioConverter, UInt32* aNumDataPackets /* in/out */, 284 AudioBufferList* aData /* in/out */, 285 AudioStreamPacketDescription** aPacketDesc, void* aUserData) { 286 PassthroughUserData* userData = (PassthroughUserData*)aUserData; 287 if (!userData->mDataSize) { 288 *aNumDataPackets = 0; 289 return kNoMoreDataErr; 290 } 291 292 if (aPacketDesc) { 293 userData->mPacket.mStartOffset = 0; 294 userData->mPacket.mVariableFramesInPacket = 0; 295 userData->mPacket.mDataByteSize = userData->mDataSize; 296 *aPacketDesc = &userData->mPacket; 297 } 298 299 aData->mBuffers[0].mNumberChannels = userData->mChannels; 300 aData->mBuffers[0].mDataByteSize = userData->mDataSize; 301 aData->mBuffers[0].mData = const_cast<void*>(userData->mData); 302 303 // No more data to provide following this run. 304 userData->mDataSize = 0; 305 306 return noErr; 307 } 308 309 RefPtr<MediaDataDecoder::DecodePromise> AppleATDecoder::Decode( 310 MediaRawData* aSample) { 311 AUTO_PROFILER_LABEL("AppleATDecoder::Decode", MEDIA_PLAYBACK); 312 MOZ_ASSERT(mThread->IsOnCurrentThread()); 313 LOG("mp4 input sample pts=%s duration=%s %s %llu bytes audio", 314 aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(), 315 aSample->mKeyframe ? " keyframe" : "", 316 (unsigned long long)aSample->Size()); 317 318 MediaResult rv = NS_OK; 319 if (!mConverter) { 320 LOG("Lazily initing the decoder"); 321 rv = SetupDecoder(aSample); 322 if (rv != NS_OK && rv != NS_ERROR_NOT_INITIALIZED) { 323 LOG("Decoder not initialized"); 324 return DecodePromise::CreateAndReject(rv, __func__); 325 } 326 } 327 328 if (mIsADTS) { 329 bool rv = ADTS::StripHeader(aSample); 330 if (!rv) { 331 LOG("Stripping the ADTS header in AppleATDecoder failed"); 332 } 333 } 334 335 mQueuedSamples.AppendElement(aSample); 336 337 if (rv == NS_OK) { 338 for (size_t i = 0; i < mQueuedSamples.Length(); i++) { 339 rv = DecodeSample(mQueuedSamples[i]); 340 if (NS_FAILED(rv)) { 341 LOG("Decoding error"); 342 mErrored = true; 343 return DecodePromise::CreateAndReject(rv, __func__); 344 } 345 } 346 mQueuedSamples.Clear(); 347 } 348 349 DecodedData results = std::move(mDecodedSamples); 350 mDecodedSamples = DecodedData(); 351 return DecodePromise::CreateAndResolve(std::move(results), __func__); 352 } 353 354 MediaResult AppleATDecoder::DecodeSample(MediaRawData* aSample) { 355 MOZ_ASSERT(mThread->IsOnCurrentThread()); 356 357 // Array containing the queued decoded audio frames, about to be output. 358 nsTArray<AudioDataValue> outputData; 359 UInt32 channels = mOutputFormat.mChannelsPerFrame; 360 // Pick a multiple of the frame size close to a power of two 361 // for efficient allocation. We're mainly using this decoder to decode AAC, 362 // that has packets of 1024 audio frames. 363 const uint32_t MAX_AUDIO_FRAMES = 1024; 364 const uint32_t maxDecodedSamples = MAX_AUDIO_FRAMES * channels; 365 366 // Descriptions for _decompressed_ audio packets. ignored. 367 auto packets = MakeUnique<AudioStreamPacketDescription[]>(MAX_AUDIO_FRAMES); 368 369 // This API insists on having packets spoon-fed to it from a callback. 370 // This structure exists only to pass our state. 371 PassthroughUserData userData = {channels, (UInt32)aSample->Size(), 372 aSample->Data()}; 373 374 // Decompressed audio buffer 375 AlignedAudioBuffer decoded(maxDecodedSamples); 376 if (!decoded) { 377 return NS_ERROR_OUT_OF_MEMORY; 378 } 379 380 do { 381 AudioBufferList decBuffer; 382 decBuffer.mNumberBuffers = 1; 383 decBuffer.mBuffers[0].mNumberChannels = channels; 384 decBuffer.mBuffers[0].mDataByteSize = 385 maxDecodedSamples * sizeof(AudioDataValue); 386 decBuffer.mBuffers[0].mData = decoded.get(); 387 388 // in: the max number of packets we can handle from the decoder. 389 // out: the number of packets the decoder is actually returning. 390 UInt32 numFrames = MAX_AUDIO_FRAMES; 391 392 OSStatus rv = AudioConverterFillComplexBuffer( 393 mConverter, _PassthroughInputDataCallback, &userData, 394 &numFrames /* in/out */, &decBuffer, packets.get()); 395 396 if (rv && rv != kNoMoreDataErr) { 397 LOG("Error decoding audio sample: %d\n", static_cast<int>(rv)); 398 return MediaResult( 399 NS_ERROR_DOM_MEDIA_DECODE_ERR, 400 RESULT_DETAIL("Error decoding audio sample: %d @ %s", 401 static_cast<int>(rv), aSample->mTime.ToString().get())); 402 } 403 404 if (numFrames) { 405 AudioDataValue* outputFrames = decoded.get(); 406 outputData.AppendElements(outputFrames, numFrames * channels); 407 } 408 409 if (rv == kNoMoreDataErr) { 410 break; 411 } 412 } while (true); 413 414 if (outputData.IsEmpty()) { 415 return NS_OK; 416 } 417 418 size_t numFrames = outputData.Length() / channels; 419 int rate = AssertedCast<int>(mOutputFormat.mSampleRate); 420 media::TimeUnit duration(numFrames, rate); 421 if (!duration.IsValid()) { 422 NS_WARNING("Invalid count of accumulated audio samples"); 423 return MediaResult( 424 NS_ERROR_DOM_MEDIA_OVERFLOW_ERR, 425 RESULT_DETAIL( 426 "Invalid count of accumulated audio samples: num:%llu rate:%d", 427 uint64_t(numFrames), rate)); 428 } 429 430 LOG("Decoded audio packet [%s, %s] (duration: %s)\n", 431 aSample->mTime.ToString().get(), aSample->GetEndTime().ToString().get(), 432 duration.ToString().get()); 433 434 AudioSampleBuffer data(outputData.Elements(), outputData.Length()); 435 if (!data.Data()) { 436 return NS_ERROR_OUT_OF_MEMORY; 437 } 438 if (mChannelLayout && !mAudioConverter) { 439 AudioConfig in(*mChannelLayout, channels, rate); 440 AudioConfig out(AudioConfig::ChannelLayout::SMPTEDefault(*mChannelLayout), 441 channels, rate); 442 mAudioConverter = MakeUnique<AudioConverter>(in, out); 443 } 444 if (mAudioConverter && mChannelLayout && mChannelLayout->IsValid()) { 445 MOZ_ASSERT(mAudioConverter->CanWorkInPlace()); 446 data = mAudioConverter->Process(std::move(data)); 447 } 448 449 RefPtr<AudioData> audio = new AudioData( 450 aSample->mOffset, aSample->mTime, data.Forget(), channels, rate, 451 mChannelLayout && mChannelLayout->IsValid() 452 ? mChannelLayout->Map() 453 : AudioConfig::ChannelLayout::UNKNOWN_MAP); 454 MOZ_DIAGNOSTIC_ASSERT(duration == audio->mDuration, "must be equal"); 455 mDecodedSamples.AppendElement(std::move(audio)); 456 return NS_OK; 457 } 458 459 MediaResult AppleATDecoder::GetInputAudioDescription( 460 AudioStreamBasicDescription& aDesc, const nsTArray<uint8_t>& aExtraData) { 461 MOZ_ASSERT(mThread->IsOnCurrentThread()); 462 463 // Request the properties from CoreAudio using the codec magic cookie 464 AudioFormatInfo formatInfo; 465 PodZero(&formatInfo.mASBD); 466 formatInfo.mASBD.mFormatID = mFormatID; 467 if (mFormatID == kAudioFormatMPEG4AAC) { 468 formatInfo.mASBD.mFormatFlags = mConfig.mExtendedProfile; 469 } 470 formatInfo.mMagicCookieSize = aExtraData.Length(); 471 formatInfo.mMagicCookie = aExtraData.Elements(); 472 473 UInt32 formatListSize; 474 // Attempt to retrieve the default format using 475 // kAudioFormatProperty_FormatInfo method. 476 // This method only retrieves the FramesPerPacket information required 477 // by the decoder, which depends on the codec type and profile. 478 aDesc.mFormatID = mFormatID; 479 aDesc.mChannelsPerFrame = mConfig.mChannels; 480 aDesc.mSampleRate = mConfig.mRate; 481 UInt32 inputFormatSize = sizeof(aDesc); 482 OSStatus rv; 483 484 if (mFormatID == kAudioFormatMPEGD_USAC && aExtraData.Length() > 0) { 485 // For xHE-AAC/USAC, we need to use the magic cookie to get the format info 486 aDesc.mFormatID = mFormatID; 487 aDesc.mChannelsPerFrame = mConfig.mChannels; 488 aDesc.mSampleRate = mConfig.mRate; 489 490 rv = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 491 aExtraData.Length(), aExtraData.Elements(), 492 &inputFormatSize, &aDesc); 493 } else { 494 rv = AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, 0, nullptr, 495 &inputFormatSize, &aDesc); 496 } 497 498 if (NS_WARN_IF(rv)) { 499 return MediaResult( 500 NS_ERROR_FAILURE, 501 RESULT_DETAIL("Unable to get format info:%d", int32_t(rv))); 502 } 503 504 // If any of the methods below fail, we will return the default format as 505 // created using kAudioFormatProperty_FormatInfo above. 506 rv = AudioFormatGetPropertyInfo(kAudioFormatProperty_FormatList, 507 sizeof(formatInfo), &formatInfo, 508 &formatListSize); 509 if (rv || (formatListSize % sizeof(AudioFormatListItem))) { 510 return NS_OK; 511 } 512 size_t listCount = formatListSize / sizeof(AudioFormatListItem); 513 auto formatList = MakeUnique<AudioFormatListItem[]>(listCount); 514 515 rv = AudioFormatGetProperty(kAudioFormatProperty_FormatList, 516 sizeof(formatInfo), &formatInfo, &formatListSize, 517 formatList.get()); 518 if (rv) { 519 return NS_OK; 520 } 521 LOG("found %zu available audio stream(s)", 522 formatListSize / sizeof(AudioFormatListItem)); 523 // Get the index number of the first playable format. 524 // This index number will be for the highest quality layer the platform 525 // is capable of playing. 526 UInt32 itemIndex; 527 UInt32 indexSize = sizeof(itemIndex); 528 rv = AudioFormatGetProperty(kAudioFormatProperty_FirstPlayableFormatFromList, 529 formatListSize, formatList.get(), &indexSize, 530 &itemIndex); 531 if (rv) { 532 return NS_OK; 533 } 534 535 aDesc = formatList[itemIndex].mASBD; 536 537 return NS_OK; 538 } 539 540 AudioConfig::Channel ConvertChannelLabel(AudioChannelLabel id) { 541 switch (id) { 542 case kAudioChannelLabel_Left: 543 return AudioConfig::CHANNEL_FRONT_LEFT; 544 case kAudioChannelLabel_Right: 545 return AudioConfig::CHANNEL_FRONT_RIGHT; 546 case kAudioChannelLabel_Mono: 547 case kAudioChannelLabel_Center: 548 return AudioConfig::CHANNEL_FRONT_CENTER; 549 case kAudioChannelLabel_LFEScreen: 550 return AudioConfig::CHANNEL_LFE; 551 case kAudioChannelLabel_LeftSurround: 552 return AudioConfig::CHANNEL_SIDE_LEFT; 553 case kAudioChannelLabel_RightSurround: 554 return AudioConfig::CHANNEL_SIDE_RIGHT; 555 case kAudioChannelLabel_CenterSurround: 556 return AudioConfig::CHANNEL_BACK_CENTER; 557 case kAudioChannelLabel_RearSurroundLeft: 558 return AudioConfig::CHANNEL_BACK_LEFT; 559 case kAudioChannelLabel_RearSurroundRight: 560 return AudioConfig::CHANNEL_BACK_RIGHT; 561 default: 562 return AudioConfig::CHANNEL_INVALID; 563 } 564 } 565 566 // Will set mChannelLayout if a channel layout could properly be identified 567 // and is supported. 568 nsresult AppleATDecoder::SetupChannelLayout() { 569 MOZ_ASSERT(mThread->IsOnCurrentThread()); 570 571 // Determine the channel layout. 572 UInt32 propertySize; 573 UInt32 size; 574 OSStatus status = AudioConverterGetPropertyInfo( 575 mConverter, kAudioConverterOutputChannelLayout, &propertySize, nullptr); 576 if (status || !propertySize) { 577 LOG("Couldn't get channel layout property (%s)", FourCC2Str(status)); 578 return NS_ERROR_FAILURE; 579 } 580 581 auto data = MakeUnique<uint8_t[]>(propertySize); 582 size = propertySize; 583 status = AudioConverterGetProperty( 584 mConverter, kAudioConverterInputChannelLayout, &size, data.get()); 585 if (status || size != propertySize) { 586 LOG("Couldn't get channel layout property (%s)", FourCC2Str(status)); 587 return NS_ERROR_FAILURE; 588 } 589 590 AudioChannelLayout* layout = 591 reinterpret_cast<AudioChannelLayout*>(data.get()); 592 AudioChannelLayoutTag tag = layout->mChannelLayoutTag; 593 594 // if tag is kAudioChannelLayoutTag_UseChannelDescriptions then the structure 595 // directly contains the the channel layout mapping. 596 // If tag is kAudioChannelLayoutTag_UseChannelBitmap then the layout will 597 // be defined via the bitmap and can be retrieved using 598 // kAudioFormatProperty_ChannelLayoutForBitmap property. 599 // Otherwise the tag itself describes the layout. 600 if (tag != kAudioChannelLayoutTag_UseChannelDescriptions) { 601 AudioFormatPropertyID property = 602 tag == kAudioChannelLayoutTag_UseChannelBitmap 603 ? kAudioFormatProperty_ChannelLayoutForBitmap 604 : kAudioFormatProperty_ChannelLayoutForTag; 605 606 if (property == kAudioFormatProperty_ChannelLayoutForBitmap) { 607 status = AudioFormatGetPropertyInfo( 608 property, sizeof(UInt32), &layout->mChannelBitmap, &propertySize); 609 } else { 610 status = AudioFormatGetPropertyInfo( 611 property, sizeof(AudioChannelLayoutTag), &tag, &propertySize); 612 } 613 if (status || !propertySize) { 614 LOG("Couldn't get channel layout property info (%s:%s)", 615 FourCC2Str(property), FourCC2Str(status)); 616 return NS_ERROR_FAILURE; 617 } 618 data = MakeUnique<uint8_t[]>(propertySize); 619 layout = reinterpret_cast<AudioChannelLayout*>(data.get()); 620 size = propertySize; 621 622 if (property == kAudioFormatProperty_ChannelLayoutForBitmap) { 623 status = AudioFormatGetProperty(property, sizeof(UInt32), 624 &layout->mChannelBitmap, &size, layout); 625 } else { 626 status = AudioFormatGetProperty(property, sizeof(AudioChannelLayoutTag), 627 &tag, &size, layout); 628 } 629 if (status || size != propertySize) { 630 LOG("Couldn't get channel layout property (%s:%s)", FourCC2Str(property), 631 FourCC2Str(status)); 632 return NS_ERROR_FAILURE; 633 } 634 // We have retrieved the channel layout from the tag or bitmap. 635 // We can now directly use the channel descriptions. 636 layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions; 637 } 638 639 if (layout->mNumberChannelDescriptions != mOutputFormat.mChannelsPerFrame) { 640 LOG("Not matching the original channel number"); 641 return NS_ERROR_FAILURE; 642 } 643 644 AutoTArray<AudioConfig::Channel, 8> channels; 645 channels.SetLength(layout->mNumberChannelDescriptions); 646 for (uint32_t i = 0; i < layout->mNumberChannelDescriptions; i++) { 647 AudioChannelLabel id = layout->mChannelDescriptions[i].mChannelLabel; 648 AudioConfig::Channel channel = ConvertChannelLabel(id); 649 channels[i] = channel; 650 } 651 mChannelLayout = MakeUnique<AudioConfig::ChannelLayout>( 652 mOutputFormat.mChannelsPerFrame, channels.Elements()); 653 return NS_OK; 654 } 655 656 MediaResult AppleATDecoder::SetupDecoder(MediaRawData* aSample) { 657 MOZ_ASSERT(mThread->IsOnCurrentThread()); 658 static const uint32_t MAX_FRAMES = 2; 659 660 bool isADTS = 661 ADTS::FrameHeader::MatchesSync(Span{aSample->Data(), aSample->Size()}); 662 663 if (isADTS) { 664 ADTS::FrameParser parser; 665 if (!parser.Parse(0, aSample->Data(), aSample->Data() + aSample->Size())) { 666 LOG("ADTS frame parsing error"); 667 return NS_ERROR_NOT_INITIALIZED; 668 } 669 670 AudioCodecSpecificBinaryBlob blob; 671 ADTS::InitAudioSpecificConfig(parser.FirstFrame(), blob.mBinaryBlob); 672 mConfig.mCodecSpecificConfig = AudioCodecSpecificVariant{std::move(blob)}; 673 mConfig.mProfile = mConfig.mExtendedProfile = 674 parser.FirstFrame().Header().mObjectType; 675 mIsADTS = true; 676 677 if (mFormatID == kAudioFormatMPEG4AAC && 678 mConfig.mExtendedProfile == AUDIO_OBJECT_TYPE_USAC) { 679 LOG("Detected xHE-AAC profile 42 (USAC), switching to " 680 "kAudioFormatMPEGD_USAC"); 681 mFormatID = kAudioFormatMPEGD_USAC; 682 } 683 } 684 685 if (mFormatID == kAudioFormatMPEG4AAC && mConfig.mExtendedProfile == 2 && 686 mParsedFramesForAACMagicCookie < MAX_FRAMES) { 687 LOG("Attempting to get implicit AAC magic cookie"); 688 // Check for implicit SBR signalling if stream is AAC-LC 689 // This will provide us with an updated magic cookie for use with 690 // GetInputAudioDescription. 691 if (NS_SUCCEEDED(GetImplicitAACMagicCookie(aSample)) && 692 !mMagicCookie.Length() && !isADTS) { 693 // nothing found yet, will try again later 694 LOG("Getting implicit AAC magic cookie failed"); 695 mParsedFramesForAACMagicCookie++; 696 LOG("Not initialized -- need magic cookie"); 697 return NS_ERROR_NOT_INITIALIZED; 698 } 699 // An error occurred, fallback to using default stream description 700 } 701 702 LOG("Initializing Apple AudioToolbox decoder"); 703 704 // Should we try and use magic cookie data from the AAC data? We do this if 705 // - We have an AAC config & 706 // - We do not aleady have magic cookie data. 707 // Otherwise we just use the existing cookie (which may be empty). 708 bool shouldUseAacMagicCookie = 709 mConfig.mCodecSpecificConfig.is<AacCodecSpecificData>() && 710 mMagicCookie.IsEmpty(); 711 712 nsTArray<uint8_t>& magicCookie = 713 shouldUseAacMagicCookie 714 ? *mConfig.mCodecSpecificConfig.as<AacCodecSpecificData>() 715 .mEsDescriptorBinaryBlob 716 : mMagicCookie; 717 AudioStreamBasicDescription inputFormat; 718 PodZero(&inputFormat); 719 720 MediaResult rv = GetInputAudioDescription(inputFormat, magicCookie); 721 if (NS_FAILED(rv)) { 722 LOG("GetInputAudioDescription failure"); 723 return rv; 724 } 725 // Fill in the output format manually. 726 PodZero(&mOutputFormat); 727 mOutputFormat.mFormatID = kAudioFormatLinearPCM; 728 mOutputFormat.mSampleRate = inputFormat.mSampleRate; 729 mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame; 730 mOutputFormat.mBitsPerChannel = 32; 731 mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsFloat | 0; 732 // Set up the decoder so it gives us one sample per frame 733 mOutputFormat.mFramesPerPacket = 1; 734 mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame = 735 mOutputFormat.mChannelsPerFrame * mOutputFormat.mBitsPerChannel / 8; 736 737 OSStatus status = 738 AudioConverterNew(&inputFormat, &mOutputFormat, &mConverter); 739 if (status) { 740 LOG("Error %d constructing AudioConverter", int(status)); 741 mConverter = nullptr; 742 return MediaResult( 743 NS_ERROR_FAILURE, 744 RESULT_DETAIL("Error constructing AudioConverter:%d", int32_t(status))); 745 } 746 747 if (magicCookie.Length() && mFormatID == kAudioFormatMPEG4AAC) { 748 status = AudioConverterSetProperty( 749 mConverter, kAudioConverterDecompressionMagicCookie, 750 magicCookie.Length(), magicCookie.Elements()); 751 if (status) { 752 LOG("Error setting AudioConverter AAC cookie:%d", int32_t(status)); 753 ProcessShutdown(); 754 return MediaResult( 755 NS_ERROR_FAILURE, 756 RESULT_DETAIL("Error setting AudioConverter AAC cookie:%d", 757 int32_t(status))); 758 } 759 } else if (magicCookie.Length() && mFormatID == kAudioFormatMPEGD_USAC) { 760 auto maybeEsdsData = CreateEsds(magicCookie); 761 if (maybeEsdsData.isErr()) { 762 return MediaResult(NS_ERROR_FAILURE, 763 RESULT_DETAIL("Couldn't create ESDS data")); 764 } 765 nsTArray<uint8_t> esdsData = maybeEsdsData.unwrap(); 766 status = AudioConverterSetProperty( 767 mConverter, kAudioConverterDecompressionMagicCookie, 768 magicCookie.Length(), magicCookie.Elements()); 769 if (status) { 770 LOG("AudioConvertSetProperty failed: %d", int32_t(status)); 771 return MediaResult(NS_ERROR_FAILURE, 772 RESULT_DETAIL("AudioConverterSetProperty failed: %d", 773 int32_t(status))); 774 } 775 } 776 777 if (NS_FAILED(SetupChannelLayout())) { 778 NS_WARNING("Couldn't retrieve channel layout, will use default layout"); 779 } 780 781 if (mFormatID == kAudioFormatMPEG4AAC && 782 mConfig.mExtendedProfile == AUDIO_OBJECT_TYPE_USAC) { 783 const Float32 kDefaultLoudness = -16.0; 784 status = AudioConverterSetProperty( 785 mConverter, kAudioCodecPropertyProgramTargetLevel, 786 sizeof(kDefaultLoudness), &kDefaultLoudness); 787 if (status != noErr) { 788 LOG("AudioConverterSetProperty() failed to set loudness: %d", 789 int(status)); 790 // Non-fatal error, continue 791 } 792 793 // Dynamic range control setting isn't in the SDK yet 794 // https://developer.apple.com/documentation/http-live-streaming/providing-metadata-for-xhe-aac-video-soundtracks 795 // Values: none=0, night=1, noisy=2, limited=3 796 const UInt32 kDefaultEffectType = 3; 797 status = AudioConverterSetProperty(mConverter, kDynamicRangeControlProperty, 798 sizeof(kDefaultEffectType), 799 &kDefaultEffectType); 800 if (status != noErr) { 801 LOG("AudioConverterSetProperty() failed to set DRC effect type: %d", 802 int(status)); 803 // Non-fatal error, continue 804 } 805 } 806 807 return NS_OK; 808 } 809 810 static void _MetadataCallback(void* aAppleATDecoder, AudioFileStreamID aStream, 811 AudioFileStreamPropertyID aProperty, 812 UInt32* aFlags) { 813 AppleATDecoder* decoder = static_cast<AppleATDecoder*>(aAppleATDecoder); 814 MOZ_RELEASE_ASSERT(decoder->mThread->IsOnCurrentThread()); 815 816 LOG("MetadataCallback receiving: '%s'", FourCC2Str(aProperty)); 817 if (aProperty == kAudioFileStreamProperty_MagicCookieData) { 818 UInt32 size; 819 Boolean writeable; 820 OSStatus rv = 821 AudioFileStreamGetPropertyInfo(aStream, aProperty, &size, &writeable); 822 if (rv) { 823 LOG("Couldn't get property info for '%s' (%s)", FourCC2Str(aProperty), 824 FourCC2Str(rv)); 825 decoder->mFileStreamError = true; 826 return; 827 } 828 auto data = MakeUnique<uint8_t[]>(size); 829 rv = AudioFileStreamGetProperty(aStream, aProperty, &size, data.get()); 830 if (rv) { 831 LOG("Couldn't get property '%s' (%s)", FourCC2Str(aProperty), 832 FourCC2Str(rv)); 833 decoder->mFileStreamError = true; 834 return; 835 } 836 decoder->mMagicCookie.AppendElements(data.get(), size); 837 } 838 } 839 840 static void _SampleCallback(void* aSBR, UInt32 aNumBytes, UInt32 aNumPackets, 841 const void* aData, 842 AudioStreamPacketDescription* aPackets) {} 843 844 nsresult AppleATDecoder::GetImplicitAACMagicCookie(MediaRawData* aSample) { 845 MOZ_ASSERT(mThread->IsOnCurrentThread()); 846 847 bool isADTS = 848 ADTS::FrameHeader::MatchesSync(Span{aSample->Data(), aSample->Size()}); 849 850 RefPtr<MediaRawData> adtssample = aSample; 851 852 if (!isADTS) { 853 // Prepend ADTS header to AAC audio. 854 adtssample = aSample->Clone(); 855 if (!adtssample) { 856 return NS_ERROR_OUT_OF_MEMORY; 857 } 858 auto frequency_index = ADTS::GetFrequencyIndex(mConfig.mRate); 859 860 if (frequency_index.isErr()) { 861 LOG("%d isn't a valid rate for AAC", mConfig.mRate); 862 return NS_ERROR_FAILURE; 863 } 864 865 // Arbitrarily pick main profile if not specified 866 int profile = mConfig.mProfile ? mConfig.mProfile : 1; 867 bool rv = ADTS::ConvertSample(mConfig.mChannels, frequency_index.unwrap(), 868 profile, adtssample); 869 if (!rv) { 870 LOG("Failed to apply ADTS header"); 871 return NS_ERROR_FAILURE; 872 } 873 } 874 if (!mStream) { 875 OSStatus rv = AudioFileStreamOpen(this, _MetadataCallback, _SampleCallback, 876 kAudioFileAAC_ADTSType, &mStream); 877 if (rv) { 878 LOG("Couldn't open AudioFileStream"); 879 return NS_ERROR_FAILURE; 880 } 881 } 882 883 OSStatus status = AudioFileStreamParseBytes( 884 mStream, adtssample->Size(), adtssample->Data(), 0 /* discontinuity */); 885 if (status) { 886 LOG("Couldn't parse sample"); 887 } 888 889 if (status || mFileStreamError || mMagicCookie.Length()) { 890 // We have decoded a magic cookie or an error occurred as such 891 // we won't need the stream any longer. 892 AudioFileStreamClose(mStream); 893 mStream = nullptr; 894 } 895 896 return (mFileStreamError || status) ? NS_ERROR_FAILURE : NS_OK; 897 } 898 899 } // namespace mozilla 900 901 #undef LOG