tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

WMFAudioMFTManager.cpp (10898B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "WMFAudioMFTManager.h"
      8 
      9 #include "BufferReader.h"
     10 #include "MediaInfo.h"
     11 #include "TimeUnits.h"
     12 #include "VideoUtils.h"
     13 #include "WMFUtils.h"
     14 #include "mozilla/AbstractThread.h"
     15 #include "mozilla/Logging.h"
     16 #include "mozilla/ScopeExit.h"
     17 #include "nsTArray.h"
     18 
     19 #define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__))
     20 
     21 namespace mozilla {
     22 
     23 using media::TimeUnit;
     24 
     25 WMFAudioMFTManager::WMFAudioMFTManager(const AudioInfo& aConfig)
     26    : mAudioChannels(aConfig.mChannels),
     27      mChannelsMap(AudioConfig::ChannelLayout::UNKNOWN_MAP),
     28      mAudioRate(aConfig.mRate),
     29      mStreamType(GetStreamTypeFromMimeType(aConfig.mMimeType)) {
     30  MOZ_COUNT_CTOR(WMFAudioMFTManager);
     31 
     32  if (mStreamType == WMFStreamType::AAC) {
     33    const uint8_t* audioSpecConfig;
     34    uint32_t configLength;
     35    if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {
     36      const AacCodecSpecificData& aacCodecSpecificData =
     37          aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();
     38      audioSpecConfig =
     39          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Elements();
     40      configLength =
     41          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Length();
     42 
     43      mRemainingEncoderDelay = mEncoderDelay =
     44          aacCodecSpecificData.mEncoderDelayFrames;
     45      mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount;
     46      LOG("AudioMFT decoder: Found AAC decoder delay (%" PRIu32
     47          "frames) and total media frames (%" PRIu64 " frames)\n",
     48          mEncoderDelay, mTotalMediaFrames);
     49    } else {
     50      // Gracefully handle failure to cover all codec specific cases above. Once
     51      // we're confident there is no fall through from these cases above, we
     52      // should remove this code.
     53      RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob =
     54          GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig);
     55      audioSpecConfig = audioCodecSpecificBinaryBlob->Elements();
     56      configLength = audioCodecSpecificBinaryBlob->Length();
     57    }
     58    // If no extradata has been provided, assume this is ADTS. Otherwise,
     59    // assume raw AAC packets.
     60    mIsADTS = !configLength;
     61    AACAudioSpecificConfigToUserData(aConfig.mExtendedProfile, audioSpecConfig,
     62                                     configLength, mUserData);
     63  }
     64 }
     65 
     66 WMFAudioMFTManager::~WMFAudioMFTManager() {
     67  MOZ_COUNT_DTOR(WMFAudioMFTManager);
     68 }
     69 
     70 const GUID& WMFAudioMFTManager::GetMediaSubtypeGUID() {
     71  MOZ_ASSERT(StreamTypeIsAudio(mStreamType));
     72  switch (mStreamType) {
     73    case WMFStreamType::AAC:
     74      return MFAudioFormat_AAC;
     75    case WMFStreamType::MP3:
     76      return MFAudioFormat_MP3;
     77    default:
     78      return GUID_NULL;
     79  };
     80 }
     81 
     82 bool WMFAudioMFTManager::Init() {
     83  AUTO_PROFILER_LABEL("WMFAudioMFTManager::Init", MEDIA_PLAYBACK);
     84  NS_ENSURE_TRUE(StreamTypeIsAudio(mStreamType), false);
     85 
     86  RefPtr<MFTDecoder> decoder(new MFTDecoder());
     87  // Note: MP3 MFT isn't registered as supporting Float output, but it works.
     88  // Find PCM output MFTs as this is the common type.
     89  HRESULT hr = WMFDecoderModule::CreateMFTDecoder(mStreamType, decoder);
     90  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
     91 
     92  // Setup input/output media types
     93  RefPtr<IMFMediaType> inputType;
     94 
     95  hr = wmf::MFCreateMediaType(getter_AddRefs(inputType));
     96  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
     97 
     98  hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
     99  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    100 
    101  hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID());
    102  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    103 
    104  hr = inputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, mAudioRate);
    105  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    106 
    107  hr = inputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, mAudioChannels);
    108  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    109 
    110  if (mStreamType == WMFStreamType::AAC) {
    111    UINT32 payloadType = mIsADTS ? 1 : 0;
    112    hr = inputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, payloadType);
    113    NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    114 
    115    hr = inputType->SetBlob(MF_MT_USER_DATA, mUserData.Elements(),
    116                            mUserData.Length());
    117    NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    118  }
    119 
    120  RefPtr<IMFMediaType> outputType;
    121  hr = wmf::MFCreateMediaType(getter_AddRefs(outputType));
    122  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    123 
    124  hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
    125  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    126 
    127  hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
    128  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    129 
    130  hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 32);
    131  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    132 
    133  hr = decoder->SetMediaTypes(inputType, outputType, MFAudioFormat_Float);
    134  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
    135 
    136  mDecoder = decoder;
    137 
    138  return true;
    139 }
    140 
    141 HRESULT
    142 WMFAudioMFTManager::Input(MediaRawData* aSample) {
    143  mLastInputTime = aSample->mTime;
    144  return mDecoder->Input(aSample->Data(), uint32_t(aSample->Size()),
    145                         aSample->mTime.ToMicroseconds(),
    146                         aSample->mDuration.ToMicroseconds());
    147 }
    148 
    149 nsCString WMFAudioMFTManager::GetCodecName() const {
    150  if (mStreamType == WMFStreamType::AAC) {
    151    return "aac"_ns;
    152  }
    153  if (mStreamType == WMFStreamType::MP3) {
    154    return "mp3"_ns;
    155  }
    156  return "unknown"_ns;
    157 }
    158 
    159 HRESULT
    160 WMFAudioMFTManager::UpdateOutputType() {
    161  HRESULT hr;
    162 
    163  RefPtr<IMFMediaType> type;
    164  hr = mDecoder->GetOutputMediaType(type);
    165  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    166 
    167  hr = type->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &mAudioRate);
    168  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    169 
    170  hr = type->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &mAudioChannels);
    171  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    172 
    173  uint32_t channelsMap;
    174  hr = type->GetUINT32(MF_MT_AUDIO_CHANNEL_MASK, &channelsMap);
    175  if (SUCCEEDED(hr)) {
    176    mChannelsMap = channelsMap;
    177  } else {
    178    LOG("Unable to retrieve channel layout. Ignoring");
    179    mChannelsMap = AudioConfig::ChannelLayout::UNKNOWN_MAP;
    180  }
    181 
    182  return S_OK;
    183 }
    184 
    185 HRESULT
    186 WMFAudioMFTManager::Output(int64_t aStreamOffset, RefPtr<MediaData>& aOutput) {
    187  aOutput = nullptr;
    188  RefPtr<IMFSample> sample;
    189  HRESULT hr;
    190  int typeChangeCount = 0;
    191  const auto oldAudioRate = mAudioRate;
    192  while (true) {
    193    hr = mDecoder->Output(&sample);
    194    if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
    195      return hr;
    196    }
    197    if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
    198      hr = mDecoder->FindDecoderOutputType(MFAudioFormat_Float);
    199      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    200      hr = UpdateOutputType();
    201      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    202      // Catch infinite loops, but some decoders perform at least 2 stream
    203      // changes on consecutive calls, so be permissive.
    204      // 100 is arbitrarily > 2.
    205      NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE);
    206      ++typeChangeCount;
    207      continue;
    208    }
    209    break;
    210  }
    211 
    212  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    213 
    214  if (!sample) {
    215    LOG("Audio MFTDecoder returned success but null output.");
    216    return E_FAIL;
    217  }
    218 
    219  UINT32 discontinuity = false;
    220  sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);
    221  if (mFirstFrame || discontinuity) {
    222    // Update the output type, in case this segment has a different
    223    // rate. This also triggers on the first sample, which can have a
    224    // different rate than is advertised in the container, and sometimes we
    225    // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes.
    226    hr = UpdateOutputType();
    227    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    228    mFirstFrame = false;
    229  }
    230 
    231  LONGLONG hns;
    232  hr = sample->GetSampleTime(&hns);
    233  if (FAILED(hr)) {
    234    return E_FAIL;
    235  }
    236  TimeUnit pts = TimeUnit::FromHns(hns, mAudioRate);
    237  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);
    238 
    239  RefPtr<IMFMediaBuffer> buffer;
    240  hr = sample->ConvertToContiguousBuffer(getter_AddRefs(buffer));
    241  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    242 
    243  BYTE* data = nullptr;  // Note: *data will be owned by the IMFMediaBuffer, we
    244                         // don't need to free it.
    245  DWORD maxLength = 0, currentLength = 0;
    246  hr = buffer->Lock(&data, &maxLength, &currentLength);
    247  ScopeExit exit([buffer] { buffer->Unlock(); });
    248  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    249 
    250  // Output is made of floats.
    251  uint32_t numSamples = currentLength / sizeof(float);
    252  uint32_t numFrames = numSamples / mAudioChannels;
    253  MOZ_ASSERT(numFrames >= 0);
    254  MOZ_ASSERT(numSamples >= 0);
    255  if (numFrames == 0) {
    256    // All data from this chunk stripped, loop back and try to output the next
    257    // frame, if possible.
    258    return S_OK;
    259  }
    260 
    261  if (oldAudioRate != mAudioRate) {
    262    LOG("Audio rate changed from %" PRIu32 " to %" PRIu32, oldAudioRate,
    263        mAudioRate);
    264  }
    265 
    266  AlignedAudioBuffer audioData(numSamples);
    267  if (!audioData) {
    268    return E_OUTOFMEMORY;
    269  }
    270 
    271  float* floatData = reinterpret_cast<float*>(data);
    272  PodCopy(audioData.Data(), floatData, numSamples);
    273 
    274  TimeUnit duration(numFrames, mAudioRate);
    275  NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);
    276 
    277  const bool isAudioRateChangedToHigher = oldAudioRate < mAudioRate;
    278  if (IsPartialOutput(duration, isAudioRateChangedToHigher)) {
    279    LOG("Encounter a partial frame?! duration shrinks from %s to %s",
    280        mLastOutputDuration.ToString().get(), duration.ToString().get());
    281    return MF_E_TRANSFORM_NEED_MORE_INPUT;
    282  }
    283 
    284  aOutput = new AudioData(aStreamOffset, pts, std::move(audioData),
    285                          mAudioChannels, mAudioRate, mChannelsMap);
    286  MOZ_DIAGNOSTIC_ASSERT(duration == aOutput->mDuration, "must be equal");
    287  mLastOutputDuration = aOutput->mDuration;
    288 
    289 #ifdef LOG_SAMPLE_DECODE
    290  LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u",
    291      pts.ToMicroseconds(), duration.ToMicroseconds(), currentLength);
    292 #endif
    293 
    294  return S_OK;
    295 }
    296 
    297 bool WMFAudioMFTManager::IsPartialOutput(
    298    const media::TimeUnit& aNewOutputDuration,
    299    const bool aIsRateChangedToHigher) const {
    300  // This issue was found in Windows11, where AAC MFT decoder would incorrectly
    301  // output partial output samples to us, even if MS's documentation said it
    302  // won't happen [1]. More details are described in bug 1731430 comment 26.
    303  // If the audio rate isn't changed to higher, which would result in shorter
    304  // duration, but the new output duration is still shorter than the last one,
    305  // then new output is possible an incorrect partial output.
    306  // [1]
    307  // https://docs.microsoft.com/en-us/windows/win32/medfound/mft-message-command-drain
    308  if (mStreamType != WMFStreamType::AAC) {
    309    return false;
    310  }
    311  if (mLastOutputDuration > aNewOutputDuration && !aIsRateChangedToHigher) {
    312    return true;
    313  }
    314  return false;
    315 }
    316 
    317 void WMFAudioMFTManager::Shutdown() { mDecoder = nullptr; }
    318 
    319 }  // namespace mozilla
    320 
    321 #undef LOG