WMFAudioMFTManager.cpp (10898B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "WMFAudioMFTManager.h" 8 9 #include "BufferReader.h" 10 #include "MediaInfo.h" 11 #include "TimeUnits.h" 12 #include "VideoUtils.h" 13 #include "WMFUtils.h" 14 #include "mozilla/AbstractThread.h" 15 #include "mozilla/Logging.h" 16 #include "mozilla/ScopeExit.h" 17 #include "nsTArray.h" 18 19 #define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__)) 20 21 namespace mozilla { 22 23 using media::TimeUnit; 24 25 WMFAudioMFTManager::WMFAudioMFTManager(const AudioInfo& aConfig) 26 : mAudioChannels(aConfig.mChannels), 27 mChannelsMap(AudioConfig::ChannelLayout::UNKNOWN_MAP), 28 mAudioRate(aConfig.mRate), 29 mStreamType(GetStreamTypeFromMimeType(aConfig.mMimeType)) { 30 MOZ_COUNT_CTOR(WMFAudioMFTManager); 31 32 if (mStreamType == WMFStreamType::AAC) { 33 const uint8_t* audioSpecConfig; 34 uint32_t configLength; 35 if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) { 36 const AacCodecSpecificData& aacCodecSpecificData = 37 aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>(); 38 audioSpecConfig = 39 aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Elements(); 40 configLength = 41 aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Length(); 42 43 mRemainingEncoderDelay = mEncoderDelay = 44 aacCodecSpecificData.mEncoderDelayFrames; 45 mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount; 46 LOG("AudioMFT decoder: Found AAC decoder delay (%" PRIu32 47 "frames) and total media frames (%" PRIu64 " frames)\n", 48 mEncoderDelay, mTotalMediaFrames); 49 } else { 50 // Gracefully handle failure to cover all codec specific cases above. Once 51 // we're confident there is no fall through from these cases above, we 52 // should remove this code. 53 RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob = 54 GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig); 55 audioSpecConfig = audioCodecSpecificBinaryBlob->Elements(); 56 configLength = audioCodecSpecificBinaryBlob->Length(); 57 } 58 // If no extradata has been provided, assume this is ADTS. Otherwise, 59 // assume raw AAC packets. 60 mIsADTS = !configLength; 61 AACAudioSpecificConfigToUserData(aConfig.mExtendedProfile, audioSpecConfig, 62 configLength, mUserData); 63 } 64 } 65 66 WMFAudioMFTManager::~WMFAudioMFTManager() { 67 MOZ_COUNT_DTOR(WMFAudioMFTManager); 68 } 69 70 const GUID& WMFAudioMFTManager::GetMediaSubtypeGUID() { 71 MOZ_ASSERT(StreamTypeIsAudio(mStreamType)); 72 switch (mStreamType) { 73 case WMFStreamType::AAC: 74 return MFAudioFormat_AAC; 75 case WMFStreamType::MP3: 76 return MFAudioFormat_MP3; 77 default: 78 return GUID_NULL; 79 }; 80 } 81 82 bool WMFAudioMFTManager::Init() { 83 AUTO_PROFILER_LABEL("WMFAudioMFTManager::Init", MEDIA_PLAYBACK); 84 NS_ENSURE_TRUE(StreamTypeIsAudio(mStreamType), false); 85 86 RefPtr<MFTDecoder> decoder(new MFTDecoder()); 87 // Note: MP3 MFT isn't registered as supporting Float output, but it works. 88 // Find PCM output MFTs as this is the common type. 89 HRESULT hr = WMFDecoderModule::CreateMFTDecoder(mStreamType, decoder); 90 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 91 92 // Setup input/output media types 93 RefPtr<IMFMediaType> inputType; 94 95 hr = wmf::MFCreateMediaType(getter_AddRefs(inputType)); 96 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 97 98 hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio); 99 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 100 101 hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID()); 102 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 103 104 hr = inputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, mAudioRate); 105 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 106 107 hr = inputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, mAudioChannels); 108 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 109 110 if (mStreamType == WMFStreamType::AAC) { 111 UINT32 payloadType = mIsADTS ? 1 : 0; 112 hr = inputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, payloadType); 113 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 114 115 hr = inputType->SetBlob(MF_MT_USER_DATA, mUserData.Elements(), 116 mUserData.Length()); 117 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 118 } 119 120 RefPtr<IMFMediaType> outputType; 121 hr = wmf::MFCreateMediaType(getter_AddRefs(outputType)); 122 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 123 124 hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio); 125 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 126 127 hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float); 128 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 129 130 hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 32); 131 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 132 133 hr = decoder->SetMediaTypes(inputType, outputType, MFAudioFormat_Float); 134 NS_ENSURE_TRUE(SUCCEEDED(hr), false); 135 136 mDecoder = decoder; 137 138 return true; 139 } 140 141 HRESULT 142 WMFAudioMFTManager::Input(MediaRawData* aSample) { 143 mLastInputTime = aSample->mTime; 144 return mDecoder->Input(aSample->Data(), uint32_t(aSample->Size()), 145 aSample->mTime.ToMicroseconds(), 146 aSample->mDuration.ToMicroseconds()); 147 } 148 149 nsCString WMFAudioMFTManager::GetCodecName() const { 150 if (mStreamType == WMFStreamType::AAC) { 151 return "aac"_ns; 152 } 153 if (mStreamType == WMFStreamType::MP3) { 154 return "mp3"_ns; 155 } 156 return "unknown"_ns; 157 } 158 159 HRESULT 160 WMFAudioMFTManager::UpdateOutputType() { 161 HRESULT hr; 162 163 RefPtr<IMFMediaType> type; 164 hr = mDecoder->GetOutputMediaType(type); 165 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 166 167 hr = type->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &mAudioRate); 168 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 169 170 hr = type->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &mAudioChannels); 171 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 172 173 uint32_t channelsMap; 174 hr = type->GetUINT32(MF_MT_AUDIO_CHANNEL_MASK, &channelsMap); 175 if (SUCCEEDED(hr)) { 176 mChannelsMap = channelsMap; 177 } else { 178 LOG("Unable to retrieve channel layout. Ignoring"); 179 mChannelsMap = AudioConfig::ChannelLayout::UNKNOWN_MAP; 180 } 181 182 return S_OK; 183 } 184 185 HRESULT 186 WMFAudioMFTManager::Output(int64_t aStreamOffset, RefPtr<MediaData>& aOutput) { 187 aOutput = nullptr; 188 RefPtr<IMFSample> sample; 189 HRESULT hr; 190 int typeChangeCount = 0; 191 const auto oldAudioRate = mAudioRate; 192 while (true) { 193 hr = mDecoder->Output(&sample); 194 if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) { 195 return hr; 196 } 197 if (hr == MF_E_TRANSFORM_STREAM_CHANGE) { 198 hr = mDecoder->FindDecoderOutputType(MFAudioFormat_Float); 199 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 200 hr = UpdateOutputType(); 201 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 202 // Catch infinite loops, but some decoders perform at least 2 stream 203 // changes on consecutive calls, so be permissive. 204 // 100 is arbitrarily > 2. 205 NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE); 206 ++typeChangeCount; 207 continue; 208 } 209 break; 210 } 211 212 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 213 214 if (!sample) { 215 LOG("Audio MFTDecoder returned success but null output."); 216 return E_FAIL; 217 } 218 219 UINT32 discontinuity = false; 220 sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity); 221 if (mFirstFrame || discontinuity) { 222 // Update the output type, in case this segment has a different 223 // rate. This also triggers on the first sample, which can have a 224 // different rate than is advertised in the container, and sometimes we 225 // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes. 226 hr = UpdateOutputType(); 227 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 228 mFirstFrame = false; 229 } 230 231 LONGLONG hns; 232 hr = sample->GetSampleTime(&hns); 233 if (FAILED(hr)) { 234 return E_FAIL; 235 } 236 TimeUnit pts = TimeUnit::FromHns(hns, mAudioRate); 237 NS_ENSURE_TRUE(pts.IsValid(), E_FAIL); 238 239 RefPtr<IMFMediaBuffer> buffer; 240 hr = sample->ConvertToContiguousBuffer(getter_AddRefs(buffer)); 241 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 242 243 BYTE* data = nullptr; // Note: *data will be owned by the IMFMediaBuffer, we 244 // don't need to free it. 245 DWORD maxLength = 0, currentLength = 0; 246 hr = buffer->Lock(&data, &maxLength, ¤tLength); 247 ScopeExit exit([buffer] { buffer->Unlock(); }); 248 NS_ENSURE_TRUE(SUCCEEDED(hr), hr); 249 250 // Output is made of floats. 251 uint32_t numSamples = currentLength / sizeof(float); 252 uint32_t numFrames = numSamples / mAudioChannels; 253 MOZ_ASSERT(numFrames >= 0); 254 MOZ_ASSERT(numSamples >= 0); 255 if (numFrames == 0) { 256 // All data from this chunk stripped, loop back and try to output the next 257 // frame, if possible. 258 return S_OK; 259 } 260 261 if (oldAudioRate != mAudioRate) { 262 LOG("Audio rate changed from %" PRIu32 " to %" PRIu32, oldAudioRate, 263 mAudioRate); 264 } 265 266 AlignedAudioBuffer audioData(numSamples); 267 if (!audioData) { 268 return E_OUTOFMEMORY; 269 } 270 271 float* floatData = reinterpret_cast<float*>(data); 272 PodCopy(audioData.Data(), floatData, numSamples); 273 274 TimeUnit duration(numFrames, mAudioRate); 275 NS_ENSURE_TRUE(duration.IsValid(), E_FAIL); 276 277 const bool isAudioRateChangedToHigher = oldAudioRate < mAudioRate; 278 if (IsPartialOutput(duration, isAudioRateChangedToHigher)) { 279 LOG("Encounter a partial frame?! duration shrinks from %s to %s", 280 mLastOutputDuration.ToString().get(), duration.ToString().get()); 281 return MF_E_TRANSFORM_NEED_MORE_INPUT; 282 } 283 284 aOutput = new AudioData(aStreamOffset, pts, std::move(audioData), 285 mAudioChannels, mAudioRate, mChannelsMap); 286 MOZ_DIAGNOSTIC_ASSERT(duration == aOutput->mDuration, "must be equal"); 287 mLastOutputDuration = aOutput->mDuration; 288 289 #ifdef LOG_SAMPLE_DECODE 290 LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u", 291 pts.ToMicroseconds(), duration.ToMicroseconds(), currentLength); 292 #endif 293 294 return S_OK; 295 } 296 297 bool WMFAudioMFTManager::IsPartialOutput( 298 const media::TimeUnit& aNewOutputDuration, 299 const bool aIsRateChangedToHigher) const { 300 // This issue was found in Windows11, where AAC MFT decoder would incorrectly 301 // output partial output samples to us, even if MS's documentation said it 302 // won't happen [1]. More details are described in bug 1731430 comment 26. 303 // If the audio rate isn't changed to higher, which would result in shorter 304 // duration, but the new output duration is still shorter than the last one, 305 // then new output is possible an incorrect partial output. 306 // [1] 307 // https://docs.microsoft.com/en-us/windows/win32/medfound/mft-message-command-drain 308 if (mStreamType != WMFStreamType::AAC) { 309 return false; 310 } 311 if (mLastOutputDuration > aNewOutputDuration && !aIsRateChangedToHigher) { 312 return true; 313 } 314 return false; 315 } 316 317 void WMFAudioMFTManager::Shutdown() { mDecoder = nullptr; } 318 319 } // namespace mozilla 320 321 #undef LOG