AudioDecoder.cpp (15915B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "mozilla/dom/AudioDecoder.h" 8 9 #include "DecoderTraits.h" 10 #include "MediaContainerType.h" 11 #include "MediaData.h" 12 #include "VideoUtils.h" 13 #include "mozilla/Assertions.h" 14 #include "mozilla/Logging.h" 15 #include "mozilla/Maybe.h" 16 #include "mozilla/Try.h" 17 #include "mozilla/dom/AudioDataBinding.h" 18 #include "mozilla/dom/AudioDecoderBinding.h" 19 #include "mozilla/dom/EncodedAudioChunk.h" 20 #include "mozilla/dom/EncodedAudioChunkBinding.h" 21 #include "mozilla/dom/ImageUtils.h" 22 #include "mozilla/dom/Promise.h" 23 #include "mozilla/dom/TypedArray.h" 24 #include "mozilla/dom/WebCodecsUtils.h" 25 #include "nsPrintfCString.h" 26 #include "nsReadableUtils.h" 27 28 extern mozilla::LazyLogModule gWebCodecsLog; 29 30 namespace mozilla::dom { 31 32 #ifdef LOG_INTERNAL 33 # undef LOG_INTERNAL 34 #endif // LOG_INTERNAL 35 #define LOG_INTERNAL(level, msg, ...) \ 36 MOZ_LOG(gWebCodecsLog, LogLevel::level, (msg, ##__VA_ARGS__)) 37 38 #ifdef LOG 39 # undef LOG 40 #endif // LOG 41 #define LOG(msg, ...) LOG_INTERNAL(Debug, msg, ##__VA_ARGS__) 42 43 #ifdef LOGW 44 # undef LOGW 45 #endif // LOGW 46 #define LOGW(msg, ...) LOG_INTERNAL(Warning, msg, ##__VA_ARGS__) 47 48 #ifdef LOGE 49 # undef LOGE 50 #endif // LOGE 51 #define LOGE(msg, ...) LOG_INTERNAL(Error, msg, ##__VA_ARGS__) 52 53 #ifdef LOGV 54 # undef LOGV 55 #endif // LOGV 56 #define LOGV(msg, ...) LOG_INTERNAL(Verbose, msg, ##__VA_ARGS__) 57 58 NS_IMPL_CYCLE_COLLECTION_INHERITED(AudioDecoder, DOMEventTargetHelper, 59 mErrorCallback, mOutputCallback) 60 NS_IMPL_ADDREF_INHERITED(AudioDecoder, DOMEventTargetHelper) 61 NS_IMPL_RELEASE_INHERITED(AudioDecoder, DOMEventTargetHelper) 62 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(AudioDecoder) 63 NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper) 64 65 /* 66 * Below are helper classes 67 */ 68 69 AudioDecoderConfigInternal::AudioDecoderConfigInternal( 70 const nsAString& aCodec, uint32_t aSampleRate, uint32_t aNumberOfChannels, 71 already_AddRefed<MediaByteBuffer> aDescription) 72 : mCodec(aCodec), 73 mSampleRate(aSampleRate), 74 mNumberOfChannels(aNumberOfChannels), 75 mDescription(aDescription) {} 76 77 /*static*/ 78 RefPtr<AudioDecoderConfigInternal> AudioDecoderConfigInternal::Create( 79 const AudioDecoderConfig& aConfig) { 80 nsCString errorMessage; 81 if (!AudioDecoderTraits::Validate(aConfig, errorMessage)) { 82 LOGE("Failed to create AudioDecoderConfigInternal: %s", errorMessage.get()); 83 return nullptr; 84 } 85 86 RefPtr<MediaByteBuffer> description; 87 if (aConfig.mDescription.WasPassed()) { 88 auto rv = GetExtraDataFromArrayBuffer(aConfig.mDescription.Value()); 89 if (rv.isErr()) { // Invalid description data. 90 nsCString error; 91 GetErrorName(rv.unwrapErr(), error); 92 LOGE( 93 "Failed to create AudioDecoderConfigInternal due to invalid " 94 "description data. Error: %s", 95 error.get()); 96 return nullptr; 97 } 98 description = rv.unwrap(); 99 } 100 101 return MakeRefPtr<AudioDecoderConfigInternal>( 102 aConfig.mCodec, aConfig.mSampleRate, aConfig.mNumberOfChannels, 103 description.forget()); 104 } 105 106 nsCString AudioDecoderConfigInternal::ToString() const { 107 nsCString rv; 108 109 rv.AppendLiteral("AudioDecoderConfigInternal: "); 110 rv.AppendPrintf("%s %" PRIu32 "Hz %" PRIu32 " ch", 111 NS_ConvertUTF16toUTF8(mCodec).get(), mSampleRate, 112 mNumberOfChannels); 113 if (mDescription) { 114 rv.AppendPrintf("(%zu bytes of extradata)", mDescription->Length()); 115 } else { 116 rv.AppendLiteral("(no extradata)"); 117 } 118 119 return rv; 120 } 121 122 /* 123 * The followings are helpers for AudioDecoder methods 124 */ 125 126 // Map between WebCodecs pcm types as strings and codec numbers 127 // All other codecs 128 static nsTArray<nsCString> GuessMIMETypes(const nsAString& aCodec) { 129 nsCString codec = NS_ConvertUTF16toUTF8(aCodec); 130 nsTArray<nsCString> types; 131 for (const nsCString& container : GuessContainers(aCodec)) { 132 codec = ConvertCodecName(container, codec); 133 nsPrintfCString mime("audio/%s; codecs=%s", container.get(), codec.get()); 134 types.AppendElement(mime); 135 } 136 return types; 137 } 138 139 // https://w3c.github.io/webcodecs/#check-configuration-support 140 template <typename Config> 141 static bool CanDecodeAudio(const Config& aConfig) { 142 if (IsOnAndroid() && IsAACCodecString(aConfig.mCodec)) { 143 return false; 144 } 145 if (!IsSupportedAudioCodec(aConfig.mCodec)) { 146 return false; 147 } 148 bool typeSupported = false; 149 // TODO: Instead of calling CanHandleContainerType with the guessed the 150 // containers, DecoderTraits should provide an API to tell if a codec is 151 // decodable or not. 152 for (const nsCString& mime : GuessMIMETypes(aConfig.mCodec)) { 153 if (Maybe<MediaContainerType> containerType = 154 MakeMediaExtendedMIMEType(mime)) { 155 if (DecoderTraits::CanHandleContainerType( 156 *containerType, nullptr /* DecoderDoctorDiagnostics */) != 157 CANPLAY_NO) { 158 typeSupported = true; 159 } 160 } 161 } 162 163 if (!typeSupported) { 164 return false; 165 } 166 167 // Perform additional checks, often codec-specific. 168 // This is to error out only when attempting to `configure(...)` the decoder, 169 // not when calling `isConfigSupported(...)` 170 if constexpr (std::is_same_v<Config, AudioDecoderConfigInternal>) { 171 if (aConfig.mCodec.EqualsLiteral("opus")) { 172 if (aConfig.mNumberOfChannels > 2 && 173 (!aConfig.mDescription || aConfig.mDescription->Length() < 10)) { 174 LOG("Opus needs a description of at least 10 bytes when decoding > 2 " 175 "channels"); 176 return false; 177 } 178 } 179 if (!aConfig.mDescription && (aConfig.mCodec.EqualsLiteral("vorbis") || 180 aConfig.mCodec.EqualsLiteral("flac"))) { 181 LOG("vorbis and flac require a description"); 182 return false; 183 } 184 } 185 186 return true; 187 } 188 189 static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo( 190 const AudioDecoderConfigInternal& aConfig) { 191 // TODO: Instead of calling GetTracksInfo with the guessed containers, 192 // DecoderTraits should provide an API to create the TrackInfo directly. 193 for (const nsCString& mime : GuessMIMETypes(aConfig.mCodec)) { 194 if (Maybe<MediaContainerType> containerType = 195 MakeMediaExtendedMIMEType(mime)) { 196 if (nsTArray<UniquePtr<TrackInfo>> tracks = 197 DecoderTraits::GetTracksInfo(*containerType); 198 !tracks.IsEmpty()) { 199 return tracks; 200 } 201 } 202 } 203 return {}; 204 } 205 206 static Result<Ok, nsresult> CloneConfiguration( 207 RootedDictionary<AudioDecoderConfig>& aDest, JSContext* aCx, 208 const AudioDecoderConfig& aConfig, ErrorResult& aRv) { 209 aDest.mCodec = aConfig.mCodec; 210 if (aConfig.mDescription.WasPassed()) { 211 aDest.mDescription.Construct(); 212 MOZ_TRY(CloneBuffer(aCx, aDest.mDescription.Value(), 213 aConfig.mDescription.Value(), aRv)); 214 } 215 216 aDest.mNumberOfChannels = aConfig.mNumberOfChannels; 217 aDest.mSampleRate = aConfig.mSampleRate; 218 219 return Ok(); 220 } 221 222 // https://w3c.github.io/webcodecs/#create-a-audiodata 223 static RefPtr<AudioData> CreateAudioData(nsIGlobalObject* aGlobalObject, 224 mozilla::AudioData* aData) { 225 MOZ_ASSERT(aGlobalObject); 226 MOZ_ASSERT(aData); 227 228 auto buf = aData->MoveableData(); 229 // TODO: Ensure buf.Length() is a multiple of aData->mChannels and put it into 230 // AssertedCast<uint32_t> (sinze return type of buf.Length() is size_t). 231 uint32_t frames = buf.Length() / aData->mChannels; 232 RefPtr<AudioDataResource> resource = AudioDataResource::Create(Span{ 233 reinterpret_cast<uint8_t*>(buf.Data()), buf.Length() * sizeof(float)}); 234 return MakeRefPtr<AudioData>(aGlobalObject, resource.forget(), 235 aData->mTime.ToMicroseconds(), aData->mChannels, 236 frames, AssertedCast<float>(aData->mRate), 237 mozilla::dom::AudioSampleFormat::F32); 238 } 239 240 /* static */ 241 bool AudioDecoderTraits::IsSupported( 242 const AudioDecoderConfigInternal& aConfig) { 243 return CanDecodeAudio(aConfig); 244 } 245 246 /* static */ 247 Result<UniquePtr<TrackInfo>, nsresult> AudioDecoderTraits::CreateTrackInfo( 248 const AudioDecoderConfigInternal& aConfig) { 249 LOG("Create a AudioInfo from %s config", 250 NS_ConvertUTF16toUTF8(aConfig.mCodec).get()); 251 252 nsTArray<UniquePtr<TrackInfo>> tracks = GetTracksInfo(aConfig); 253 if (tracks.Length() != 1 || tracks[0]->GetType() != TrackInfo::kAudioTrack) { 254 LOGE("Failed to get TrackInfo"); 255 return Err(NS_ERROR_INVALID_ARG); 256 } 257 258 UniquePtr<TrackInfo> track(std::move(tracks[0])); 259 AudioInfo* ai = track->GetAsAudioInfo(); 260 if (!ai) { 261 LOGE("Failed to get AudioInfo"); 262 return Err(NS_ERROR_INVALID_ARG); 263 } 264 265 if (aConfig.mDescription) { 266 if (!aConfig.mDescription->IsEmpty()) { 267 LOG("The given config has %zu bytes of description data", 268 aConfig.mDescription->Length()); 269 ai->mCodecSpecificConfig = AudioCodecSpecificVariant{ 270 AudioCodecSpecificBinaryBlob{aConfig.mDescription}}; 271 } 272 } 273 274 ai->mChannels = aConfig.mNumberOfChannels; 275 ai->mRate = aConfig.mSampleRate; 276 277 LOG("Created AudioInfo %s (%" PRIu32 "ch %" PRIu32 278 "Hz - with extra-data: %s)", 279 NS_ConvertUTF16toUTF8(aConfig.mCodec).get(), ai->mChannels, ai->mRate, 280 aConfig.mDescription && !aConfig.mDescription->IsEmpty() ? "yes" : "no"); 281 282 return track; 283 } 284 285 // https://w3c.github.io/webcodecs/#valid-audiodecoderconfig 286 /* static */ 287 bool AudioDecoderTraits::Validate(const AudioDecoderConfig& aConfig, 288 nsCString& aErrorMessage) { 289 Maybe<nsString> codec = ParseCodecString(aConfig.mCodec); 290 if (!codec || codec->IsEmpty()) { 291 LOGE("Validating AudioDecoderConfig: invalid codec string"); 292 293 aErrorMessage.AppendPrintf("Invalid codec string %s", 294 NS_ConvertUTF16toUTF8(aConfig.mCodec).get()); 295 return false; 296 } 297 298 LOG("Validating AudioDecoderConfig: codec: %s %uch %uHz %s extradata", 299 NS_ConvertUTF16toUTF8(codec.value()).get(), aConfig.mNumberOfChannels, 300 aConfig.mSampleRate, aConfig.mDescription.WasPassed() ? "w/" : "no"); 301 302 if (aConfig.mNumberOfChannels == 0) { 303 aErrorMessage.AppendPrintf("Invalid number of channels of %u", 304 aConfig.mNumberOfChannels); 305 return false; 306 } 307 308 if (aConfig.mSampleRate == 0) { 309 aErrorMessage.AppendPrintf("Invalid sample-rate of %u", 310 aConfig.mNumberOfChannels); 311 return false; 312 } 313 314 bool detached = 315 aConfig.mDescription.WasPassed() && 316 (aConfig.mDescription.Value().IsArrayBuffer() 317 ? JS::ArrayBuffer::fromObject( 318 aConfig.mDescription.Value().GetAsArrayBuffer().Obj()) 319 .isDetached() 320 : JS::ArrayBufferView::fromObject( 321 aConfig.mDescription.Value().GetAsArrayBufferView().Obj()) 322 .isDetached()); 323 324 if (detached) { 325 LOGE("description is detached."); 326 return false; 327 } 328 329 return true; 330 } 331 332 /* static */ 333 RefPtr<AudioDecoderConfigInternal> AudioDecoderTraits::CreateConfigInternal( 334 const AudioDecoderConfig& aConfig) { 335 return AudioDecoderConfigInternal::Create(aConfig); 336 } 337 338 /* static */ 339 bool AudioDecoderTraits::IsKeyChunk(const EncodedAudioChunk& aInput) { 340 return aInput.Type() == EncodedAudioChunkType::Key; 341 } 342 343 /* static */ 344 UniquePtr<EncodedAudioChunkData> AudioDecoderTraits::CreateInputInternal( 345 const EncodedAudioChunk& aInput) { 346 return aInput.Clone(); 347 } 348 349 /* 350 * Below are AudioDecoder implementation 351 */ 352 353 AudioDecoder::AudioDecoder(nsIGlobalObject* aParent, 354 RefPtr<WebCodecsErrorCallback>&& aErrorCallback, 355 RefPtr<AudioDataOutputCallback>&& aOutputCallback) 356 : DecoderTemplate(aParent, std::move(aErrorCallback), 357 std::move(aOutputCallback)) { 358 MOZ_ASSERT(mErrorCallback); 359 MOZ_ASSERT(mOutputCallback); 360 LOG("AudioDecoder %p ctor", this); 361 } 362 363 AudioDecoder::~AudioDecoder() { 364 LOG("AudioDecoder %p dtor", this); 365 (void)ResetInternal(NS_ERROR_DOM_ABORT_ERR); 366 } 367 368 JSObject* AudioDecoder::WrapObject(JSContext* aCx, 369 JS::Handle<JSObject*> aGivenProto) { 370 AssertIsOnOwningThread(); 371 372 return AudioDecoder_Binding::Wrap(aCx, this, aGivenProto); 373 } 374 375 // https://w3c.github.io/webcodecs/#dom-audiodecoder-audiodecoder 376 /* static */ 377 already_AddRefed<AudioDecoder> AudioDecoder::Constructor( 378 const GlobalObject& aGlobal, const AudioDecoderInit& aInit, 379 ErrorResult& aRv) { 380 nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports()); 381 if (!global) { 382 aRv.Throw(NS_ERROR_FAILURE); 383 return nullptr; 384 } 385 386 return MakeAndAddRef<AudioDecoder>( 387 global.get(), RefPtr<WebCodecsErrorCallback>(aInit.mError), 388 RefPtr<AudioDataOutputCallback>(aInit.mOutput)); 389 } 390 391 // https://w3c.github.io/webcodecs/#dom-audiodecoder-isconfigsupported 392 /* static */ 393 already_AddRefed<Promise> AudioDecoder::IsConfigSupported( 394 const GlobalObject& aGlobal, const AudioDecoderConfig& aConfig, 395 ErrorResult& aRv) { 396 LOG("AudioDecoder::IsConfigSupported, config: %s", 397 NS_ConvertUTF16toUTF8(aConfig.mCodec).get()); 398 399 nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports()); 400 if (!global) { 401 aRv.Throw(NS_ERROR_FAILURE); 402 return nullptr; 403 } 404 405 RefPtr<Promise> p = Promise::Create(global.get(), aRv); 406 if (NS_WARN_IF(aRv.Failed())) { 407 return p.forget(); 408 } 409 410 nsCString errorMessage; 411 if (!AudioDecoderTraits::Validate(aConfig, errorMessage)) { 412 p->MaybeRejectWithTypeError(errorMessage); 413 return p.forget(); 414 } 415 416 RootedDictionary<AudioDecoderConfig> config(aGlobal.Context()); 417 auto r = CloneConfiguration(config, aGlobal.Context(), aConfig, aRv); 418 if (r.isErr()) { 419 // This can only be an OOM: all members to clone are known to be valid 420 // because this is check by ::Validate above. 421 MOZ_ASSERT(r.inspectErr() == NS_ERROR_OUT_OF_MEMORY && 422 aRv.ErrorCodeIs(NS_ERROR_OUT_OF_MEMORY)); 423 return p.forget(); 424 } 425 426 bool canDecode = CanDecodeAudio(config); 427 RootedDictionary<AudioDecoderSupport> s(aGlobal.Context()); 428 s.mConfig.Construct(std::move(config)); 429 s.mSupported.Construct(canDecode); 430 431 p->MaybeResolve(s); 432 return p.forget(); 433 } 434 435 already_AddRefed<MediaRawData> AudioDecoder::InputDataToMediaRawData( 436 UniquePtr<EncodedAudioChunkData>&& aData, TrackInfo& aInfo, 437 const AudioDecoderConfigInternal& aConfig) { 438 AssertIsOnOwningThread(); 439 MOZ_ASSERT(aInfo.GetAsAudioInfo()); 440 441 if (!aData) { 442 LOGE("No data for conversion"); 443 return nullptr; 444 } 445 446 RefPtr<MediaRawData> sample = aData->TakeData(); 447 if (!sample) { 448 LOGE("Take no data for conversion"); 449 return nullptr; 450 } 451 452 LOGV( 453 "EncodedAudioChunkData %p converted to %zu-byte MediaRawData - time: " 454 "%" PRIi64 "us, timecode: %" PRIi64 "us, duration: %" PRIi64 455 "us, key-frame: %s", 456 aData.get(), sample->Size(), sample->mTime.ToMicroseconds(), 457 sample->mTimecode.ToMicroseconds(), sample->mDuration.ToMicroseconds(), 458 sample->mKeyframe ? "yes" : "no"); 459 460 return sample.forget(); 461 } 462 463 nsTArray<RefPtr<AudioData>> AudioDecoder::DecodedDataToOutputType( 464 nsIGlobalObject* aGlobalObject, const nsTArray<RefPtr<MediaData>>&& aData, 465 const AudioDecoderConfigInternal& aConfig) { 466 AssertIsOnOwningThread(); 467 468 nsTArray<RefPtr<AudioData>> frames; 469 for (const RefPtr<MediaData>& data : aData) { 470 MOZ_RELEASE_ASSERT(data->mType == MediaData::Type::AUDIO_DATA); 471 RefPtr<mozilla::AudioData> d(data->As<mozilla::AudioData>()); 472 frames.AppendElement(CreateAudioData(aGlobalObject, d.get())); 473 } 474 return frames; 475 } 476 477 #undef LOG 478 #undef LOGW 479 #undef LOGE 480 #undef LOGV 481 #undef LOG_INTERNAL 482 483 } // namespace mozilla::dom