MFTEncoder.h (10998B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef DOM_MEDIA_PLATFORM_WMF_MFTENCODER_H 8 #define DOM_MEDIA_PLATFORM_WMF_MFTENCODER_H 9 10 #include <wrl.h> 11 12 #include <deque> 13 #include <functional> 14 #include <queue> 15 16 #include "EncoderConfig.h" 17 #include "WMF.h" 18 #include "mozilla/DataMutex.h" 19 #include "mozilla/DefineEnum.h" 20 #include "mozilla/EnumSet.h" 21 #include "mozilla/MozPromise.h" 22 #include "mozilla/RefPtr.h" 23 #include "mozilla/ResultVariant.h" 24 #include "nsDeque.h" 25 #include "nsISupportsImpl.h" 26 #include "nsTArray.h" 27 28 namespace mozilla { 29 30 class MFTEventSource; 31 32 class MFTEncoder final { 33 public: 34 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MFTEncoder) 35 36 struct InputSample { 37 RefPtr<IMFSample> mSample{}; 38 bool mKeyFrameRequested = false; 39 }; 40 using MPEGHeader = nsTArray<UINT8>; 41 struct OutputSample { 42 RefPtr<IMFSample> mSample{}; 43 MPEGHeader mHeader; 44 }; 45 46 using EncodedData = nsTArray<OutputSample>; 47 using EncodePromise = 48 MozPromise<EncodedData, MediaResult, /* IsExclusive = */ true>; 49 50 enum class HWPreference { 51 HardwareOnly, 52 SoftwareOnly, 53 PreferHardware, 54 PreferSoftware 55 }; 56 explicit MFTEncoder(const HWPreference aHWPreference) 57 : mHWPreference(aHWPreference) {} 58 59 HRESULT Create(const GUID& aSubtype, const gfx::IntSize& aFrameSize, 60 const EncoderConfig::CodecSpecific& aCodecSpecific); 61 HRESULT Destroy(); 62 HRESULT SetMediaTypes(IMFMediaType* aInputType, IMFMediaType* aOutputType); 63 HRESULT SetModes(const EncoderConfig& aConfig); 64 HRESULT SetBitrate(UINT32 aBitsPerSec); 65 bool IsHardwareAccelerated() const; 66 67 RefPtr<EncodePromise> Encode(nsTArray<InputSample>&& aInputs); 68 RefPtr<EncodePromise> Drain(); 69 70 HRESULT CreateInputSample(RefPtr<IMFSample>* aSample, size_t aSize); 71 72 Result<MPEGHeader, HRESULT> GetMPEGSequenceHeader(); 73 74 static nsCString GetFriendlyName(const GUID& aSubtype); 75 76 struct Info final { 77 GUID mSubtype; 78 nsCString mName; 79 }; 80 struct Factory { 81 MOZ_DEFINE_ENUM_CLASS_WITH_TOSTRING_AT_CLASS_SCOPE( 82 Provider, (HW_AMD, HW_Intel, HW_NVIDIA, HW_Qualcomm, HW_Unknown, SW)) 83 84 Provider mProvider; 85 Microsoft::WRL::ComPtr<IMFActivate> mActivate; 86 nsCString mName; 87 88 Factory(Provider aProvider, 89 Microsoft::WRL::ComPtr<IMFActivate>&& aActivate); 90 Factory(Factory&& aOther) = default; 91 Factory(const Factory& aOther) = delete; 92 ~Factory(); 93 94 explicit operator bool() const { return mActivate; } 95 96 HRESULT Shutdown(); 97 }; 98 99 private: 100 friend class MFTEventSource; 101 102 ~MFTEncoder() { Destroy(); }; 103 104 static nsTArray<Info>& Infos(); 105 static nsTArray<Info> Enumerate(); 106 static Maybe<Info> GetInfo(const GUID& aSubtype); 107 108 // APIs for synchronous processing model. 109 Result<EncodedData, MediaResult> EncodeSync(nsTArray<InputSample>&& aInputs); 110 Result<EncodedData, MediaResult> DrainSync(); 111 Result<EncodedData, HRESULT> PullOutputs(); 112 113 // APIs for asynchronous processing model for regular usage. 114 Result<EncodedData, MediaResult> EncodeAsync(nsTArray<InputSample>&& aInputs); 115 Result<EncodedData, MediaResult> DrainAsync(); 116 117 MOZ_DEFINE_ENUM_CLASS_WITH_TOSTRING_AT_CLASS_SCOPE( 118 ProcessedResult, (AllAvailableInputsProcessed, InputProcessed, 119 OutputHeaderYielded, OutputDataYielded, DrainComplete)); 120 using ProcessedResults = EnumSet<ProcessedResult>; 121 Result<ProcessedResults, HRESULT> ProcessPendingEvents(); 122 Result<MediaEventType, HRESULT> GetPendingEvent(); 123 124 // For realtime usage in asynchronous processing model only. 125 RefPtr<EncodePromise> EncodeWithAsyncCallback( 126 nsTArray<InputSample>&& aInputs); 127 RefPtr<EncodePromise> DrainWithAsyncCallback(); 128 RefPtr<EncodePromise> PrepareForDrain(); 129 RefPtr<EncodePromise> StartDraining(); 130 void EventHandler(MediaEventType aEventType, HRESULT aStatus); 131 void MaybeResolveOrRejectEncodePromise(); 132 void MaybeResolveOrRejectDrainPromise(); 133 void MaybeResolveOrRejectPreDrainPromise(); 134 void MaybeResolveOrRejectAnyPendingPromise( 135 const MediaResult& aResult = NS_OK); 136 137 // APIs for asynchronous processing model regardless of usages. 138 Result<ProcessedResult, HRESULT> ProcessEvent(MediaEventType aType); 139 Result<ProcessedResult, HRESULT> ProcessInput(); 140 Result<ProcessedResult, HRESULT> ProcessOutput(); 141 Result<ProcessedResult, HRESULT> ProcessDrainComplete(); 142 Result<ProcessedResult, HRESULT> ProcessPendingInputs(); 143 144 // Utilities for both processing models. 145 class OutputResult { 146 public: 147 explicit OutputResult(already_AddRefed<IMFSample> aSample) 148 : mSample(aSample), mHeader() {} 149 explicit OutputResult(MPEGHeader&& aHeader) 150 : mSample(nullptr), mHeader(std::move(aHeader)) {} 151 bool IsSample() const { return mSample != nullptr; } 152 bool IsHeader() const { return !IsSample(); } 153 already_AddRefed<IMFSample> TakeSample() { 154 MOZ_ASSERT(IsSample()); 155 return mSample.forget(); 156 } 157 MPEGHeader TakeHeader() { 158 MOZ_ASSERT(IsHeader()); 159 return std::move(mHeader); 160 } 161 162 private: 163 RefPtr<IMFSample> mSample; 164 MPEGHeader mHeader; 165 }; 166 Result<OutputResult, HRESULT> GetOutputOrNewHeader(); 167 // Set the output type to the first available type found for the output 168 // stream. 169 HRESULT UpdateOutputType(); 170 HRESULT ProcessOutput(RefPtr<IMFSample>& aSample, DWORD& aOutputStatus, 171 DWORD& aBufferStatus); 172 HRESULT ProcessInput(InputSample&& aInput); 173 174 bool IsAsync() const { return mAsyncEventSource; } 175 176 // Return true when successfully enabled, false for MFT that doesn't support 177 // async processing model, and error otherwise. 178 using AsyncMFTResult = Result<bool, HRESULT>; 179 AsyncMFTResult AttemptEnableAsync(); 180 HRESULT GetStreamIDs(); 181 GUID MatchInputSubtype(IMFMediaType* aInputType); 182 HRESULT SendMFTMessage(MFT_MESSAGE_TYPE aMsg, ULONG_PTR aData); 183 184 MOZ_DEFINE_ENUM_CLASS_WITH_TOSTRING_AT_CLASS_SCOPE( 185 State, 186 (Uninited, Initializing, Inited, Encoding, PreDraining, Draining, Error)); 187 void SetState(State aState); 188 189 const HWPreference mHWPreference; 190 RefPtr<IMFTransform> mEncoder; 191 // For MFT object creation. See 192 // https://docs.microsoft.com/en-us/windows/win32/medfound/activation-objects 193 Maybe<Factory> mFactory; 194 // For encoder configuration. See 195 // https://docs.microsoft.com/en-us/windows/win32/directshow/encoder-api 196 RefPtr<ICodecAPI> mConfig; 197 198 DWORD mInputStreamID; 199 DWORD mOutputStreamID; 200 MFT_INPUT_STREAM_INFO mInputStreamInfo; 201 MFT_OUTPUT_STREAM_INFO mOutputStreamInfo; 202 bool mOutputStreamProvidesSample; 203 204 State mState = State::Uninited; 205 bool mIsRealtime = false; 206 207 // The following members are used only for asynchronous processing model 208 size_t mNumNeedInput; 209 std::deque<InputSample> mPendingInputs; 210 211 nsTArray<OutputSample> mOutputs; 212 // Holds a temporary MPEGSequenceHeader to be attached to the first output 213 // packet after format renegotiation. 214 MPEGHeader mOutputHeader; 215 216 RefPtr<MFTEventSource> mAsyncEventSource; 217 218 // The following members are used only for realtime asynchronous processing 219 // model. 220 MediaResult mPendingError; 221 MozPromiseHolder<EncodePromise> mEncodePromise; 222 MozPromiseHolder<EncodePromise> mDrainPromise; 223 MozPromiseHolder<EncodePromise> mPreDrainPromise; 224 // Use to resolve the encode promise if mAsyncEventSource doesn't response in 225 // time. 226 nsCOMPtr<nsITimer> mTimer; 227 }; 228 229 class MFTEventSource final : public IMFAsyncCallback { 230 public: 231 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MFTEventSource) 232 233 // A basic IMFMediaEventGenerator wrapper that does not support retrieving 234 // events from asynchronous callbacks when constructed this way. Events should 235 // instead be obtained by calling GetEvent(). 236 explicit MFTEventSource( 237 already_AddRefed<IMFMediaEventGenerator> aEventGenerator) 238 : MFTEventSource(GetCurrentSerialEventTarget(), nullptr, 239 std::move(aEventGenerator)) {} 240 // This constructor creates an MFTEventSource that forwards events from 241 // asynchronous callbacks directly to the MFTEncoder's event handler. In this 242 // usage, GetEvent() should not be called, as events are handled 243 // automatically. 244 MFTEventSource(MFTEncoder* aEncoder, 245 already_AddRefed<IMFMediaEventGenerator> aEventGenerator) 246 : MFTEventSource(GetCurrentSerialEventTarget(), aEncoder, 247 std::move(aEventGenerator)) {} 248 249 bool CanForwardEvents() const { return mEncoder; } 250 Result<MediaEventType, HRESULT> GetEvent(DWORD aFlags); 251 252 HRESULT BeginEventListening(); 253 254 // IMFAsyncCallback implementations: 255 STDMETHODIMP GetParameters(DWORD* aFlags, DWORD* aQueue) override; 256 // Invoke() can be called on any thread by the OS, but it will forward the 257 // event to the MFTEncoder's working thread. 258 STDMETHODIMP Invoke(IMFAsyncResult* aAsyncResult) override; 259 STDMETHODIMP QueryInterface(REFIID aIID, void** aPPV) override; 260 // AddRef() and Release() are implemented by 261 // NS_INLINE_DECL_THREADSAFE_REFCOUNTING. 262 263 using Id = size_t; 264 const Id mId; 265 266 private: 267 MFTEventSource(nsISerialEventTarget* aEncoderThread, MFTEncoder* aEncoder, 268 already_AddRefed<IMFMediaEventGenerator> aEventGenerator); 269 ~MFTEventSource(); 270 271 static Id GenerateId() { 272 static Id sNextId = 0; 273 return sNextId++; 274 } 275 276 // The following members are used to forwards events from any OS thread to the 277 // MFTEncoder's working thread. 278 const nsCOMPtr<nsISerialEventTarget> mEncoderThread; 279 const RefPtr<MFTEncoder> mEncoder; 280 // When acting as a simple wrapper for IMFMediaEventGenerator, mEventGenerator 281 // is always accessed from a single thread, making locking effectively 282 // cost-free. In scenarios where MFTEventSource forwards events to MFTEncoder, 283 // mEventGenerator will be accessed from multiple threads: event requests are 284 // made on the MFTEncoder's working thread (via BeginEventListening()), while 285 // event delivery occurs on the OS thread (via Invoke()). Since these 286 // operations do not happen concurrently, the overhead of DataMutex locking is 287 // negligible. DataMutex is used here to clarify that event requests and 288 // deliveries are performed on separate threads. Furthermore, because 289 // MFTEncoder might release MFTEventSource while waiting for an event—and the 290 // Windows Media Foundation documentation does not specify whether releasing 291 // IMFMediaEventGenerator cancels pending event waits—we release 292 // mEventGenerator in the MFTEventSource destructor to ensure all pending 293 // events are properly handled, rather than resetting it when MFTEncoder 294 // releases MFTEventSource. 295 DataMutex<RefPtr<IMFMediaEventGenerator>> mEventGenerator; 296 }; 297 298 } // namespace mozilla 299 300 #endif // DOM_MEDIA_PLATFORM_WMF_MFTENCODER_H