FFmpegVideoDecoder.h (13938B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef __FFmpegVideoDecoder_h__ 8 #define __FFmpegVideoDecoder_h__ 9 10 #include <atomic> 11 12 #include "AndroidSurfaceTexture.h" 13 #include "FFmpegDataDecoder.h" 14 #include "FFmpegLibWrapper.h" 15 #include "ImageContainer.h" 16 #include "PerformanceRecorder.h" 17 #include "SimpleMap.h" 18 #include "nsTHashSet.h" 19 #if LIBAVCODEC_VERSION_MAJOR >= 57 && LIBAVUTIL_VERSION_MAJOR >= 56 20 # include "mozilla/layers/TextureClient.h" 21 #endif 22 #if defined(MOZ_USE_HWDECODE) && defined(MOZ_WIDGET_GTK) 23 # include "FFmpegVideoFramePool.h" 24 #endif 25 #include "libavutil/pixfmt.h" 26 #if LIBAVCODEC_VERSION_MAJOR < 54 27 # define AVPixelFormat PixelFormat 28 #endif 29 30 #ifdef MOZ_WIDGET_ANDROID 31 # include "mozilla/java/GeckoSurfaceWrappers.h" 32 #endif 33 34 #if LIBAVCODEC_VERSION_MAJOR < 58 || defined(MOZ_WIDGET_ANDROID) 35 # define MOZ_FFMPEG_USE_INPUT_INFO_MAP 36 #endif 37 38 struct _VADRMPRIMESurfaceDescriptor; 39 typedef struct _VADRMPRIMESurfaceDescriptor VADRMPRIMESurfaceDescriptor; 40 41 namespace mozilla { 42 namespace layers { 43 class BufferRecycleBin; 44 } 45 46 class ImageBufferWrapper; 47 48 #ifdef MOZ_ENABLE_D3D11VA 49 class DXVA2Manager; 50 #endif 51 52 template <int V> 53 class FFmpegVideoDecoder : public FFmpegDataDecoder<V> {}; 54 55 template <> 56 class FFmpegVideoDecoder<LIBAV_VER>; 57 DDLoggedTypeNameAndBase(FFmpegVideoDecoder<LIBAV_VER>, 58 FFmpegDataDecoder<LIBAV_VER>); 59 60 template <> 61 class FFmpegVideoDecoder<LIBAV_VER> 62 : public FFmpegDataDecoder<LIBAV_VER>, 63 public DecoderDoctorLifeLogger<FFmpegVideoDecoder<LIBAV_VER>> { 64 typedef mozilla::layers::Image Image; 65 typedef mozilla::layers::ImageContainer ImageContainer; 66 typedef mozilla::layers::KnowsCompositor KnowsCompositor; 67 68 public: 69 FFmpegVideoDecoder(const FFmpegLibWrapper* aLib, const VideoInfo& aConfig, 70 KnowsCompositor* aAllocator, 71 ImageContainer* aImageContainer, bool aLowLatency, 72 bool aDisableHardwareDecoding, bool a8BitOutput, 73 Maybe<TrackingId> aTrackingId, PRemoteCDMActor* aCDM); 74 75 ~FFmpegVideoDecoder(); 76 77 RefPtr<InitPromise> Init() override; 78 void InitCodecContext() MOZ_REQUIRES(sMutex) override; 79 nsCString GetDescriptionName() const override { 80 #ifdef USING_MOZFFVPX 81 return "ffvpx video decoder"_ns; 82 #else 83 return "ffmpeg video decoder"_ns; 84 #endif 85 } 86 nsCString GetCodecName() const override; 87 ConversionRequired NeedsConversion() const override { 88 #ifdef MOZ_WIDGET_ANDROID 89 return mCodecID == AV_CODEC_ID_H264 || mCodecID == AV_CODEC_ID_HEVC 90 ? ConversionRequired::kNeedAnnexB 91 : ConversionRequired::kNeedNone; 92 #else 93 # if LIBAVCODEC_VERSION_MAJOR >= 55 94 if (mCodecID == AV_CODEC_ID_HEVC) { 95 return ConversionRequired::kNeedHVCC; 96 } 97 # endif 98 return mCodecID == AV_CODEC_ID_H264 ? ConversionRequired::kNeedAVCC 99 : ConversionRequired::kNeedNone; 100 #endif 101 } 102 103 #ifdef MOZ_WIDGET_ANDROID 104 Maybe<MediaDataDecoder::PropertyValue> GetDecodeProperty( 105 MediaDataDecoder::PropertyName aName) const override; 106 #endif 107 108 static AVCodecID GetCodecId(const nsACString& aMimeType); 109 110 #if LIBAVCODEC_VERSION_MAJOR >= 57 && LIBAVUTIL_VERSION_MAJOR >= 56 111 int GetVideoBuffer(struct AVCodecContext* aCodecContext, AVFrame* aFrame, 112 int aFlags); 113 int GetVideoBufferDefault(struct AVCodecContext* aCodecContext, 114 AVFrame* aFrame, int aFlags) { 115 mIsUsingShmemBufferForDecode = Some(false); 116 return mLib->avcodec_default_get_buffer2(aCodecContext, aFrame, aFlags); 117 } 118 void ReleaseAllocatedImage(ImageBufferWrapper* aImage) { 119 mAllocatedImages.Remove(aImage); 120 } 121 #endif 122 bool IsHardwareAccelerated() const { 123 nsAutoCString dummy; 124 return IsHardwareAccelerated(dummy); 125 } 126 127 private: 128 RefPtr<FlushPromise> ProcessFlush() override; 129 void ProcessShutdown() override; 130 MediaResult DoDecode(MediaRawData* aSample, uint8_t* aData, int aSize, 131 bool* aGotFrame, DecodedData& aResults) override; 132 void OutputDelayedFrames(); 133 bool NeedParser() const override { 134 return 135 #if LIBAVCODEC_VERSION_MAJOR >= 58 136 false; 137 #else 138 # if LIBAVCODEC_VERSION_MAJOR >= 55 139 mCodecID == AV_CODEC_ID_VP9 || 140 # endif 141 mCodecID == AV_CODEC_ID_VP8; 142 #endif 143 } 144 gfx::ColorDepth GetColorDepth(const AVPixelFormat& aFormat) const; 145 gfx::YUVColorSpace GetFrameColorSpace() const; 146 gfx::ColorSpace2 GetFrameColorPrimaries() const; 147 gfx::ColorRange GetFrameColorRange() const; 148 gfx::SurfaceFormat GetSurfaceFormat() const; 149 150 MediaResult CreateImage(int64_t aOffset, int64_t aPts, int64_t aDuration, 151 MediaDataDecoder::DecodedData& aResults); 152 153 bool IsHardwareAccelerated(nsACString& aFailureReason) const override; 154 155 #if LIBAVCODEC_VERSION_MAJOR >= 57 && LIBAVUTIL_VERSION_MAJOR >= 56 156 layers::TextureClient* AllocateTextureClientForImage( 157 struct AVCodecContext* aCodecContext, layers::PlanarYCbCrImage* aImage); 158 159 gfx::IntSize GetAlignmentVideoFrameSize(struct AVCodecContext* aCodecContext, 160 int32_t aWidth, 161 int32_t aHeight) const; 162 #endif 163 164 RefPtr<KnowsCompositor> mImageAllocator; 165 RefPtr<ImageContainer> mImageContainer; 166 VideoInfo mInfo; 167 168 #ifdef MOZ_USE_HWDECODE 169 public: 170 static AVCodec* FindVideoHardwareAVCodec( 171 const FFmpegLibWrapper* aLib, AVCodecID aCodec, 172 AVHWDeviceType aDeviceType = AV_HWDEVICE_TYPE_NONE); 173 174 private: 175 // This will be called inside the ctor. 176 void InitHWDecoderIfAllowed(); 177 178 enum class ContextType { 179 D3D11VA, // Windows 180 MediaCodec, // Android 181 VAAPI, // Linux Desktop 182 V4L2, // Linux embedded 183 }; 184 void InitHWCodecContext(ContextType aType); 185 186 bool ShouldDisableHWDecoding(bool aDisableHardwareDecoding) const; 187 188 // True if hardware decoding is disabled explicitly. 189 const bool mHardwareDecodingDisabled; 190 #endif 191 192 #ifdef MOZ_ENABLE_D3D11VA 193 MediaResult InitD3D11VADecoder(); 194 195 MediaResult CreateImageD3D11(int64_t aOffset, int64_t aPts, int64_t aDuration, 196 MediaDataDecoder::DecodedData& aResults); 197 bool CanUseZeroCopyVideoFrame() const; 198 199 AVBufferRef* mD3D11VADeviceContext = nullptr; 200 RefPtr<ID3D11Device> mDevice; 201 UniquePtr<DXVA2Manager> mDXVA2Manager; 202 // Number of HW Textures are already in use by Gecko 203 std::atomic<uint8_t> mNumOfHWTexturesInUse{0}; 204 #endif 205 206 #ifdef MOZ_WIDGET_ANDROID 207 MediaResult InitMediaCodecDecoder(); 208 MediaResult CreateImageMediaCodec(int64_t aOffset, int64_t aPts, 209 int64_t aTimecode, int64_t aDuration, 210 MediaDataDecoder::DecodedData& aResults); 211 int32_t mTextureAlignment; 212 AVBufferRef* mMediaCodecDeviceContext = nullptr; 213 java::GeckoSurface::GlobalRef mSurface; 214 AndroidSurfaceTextureHandle mSurfaceHandle{}; 215 #endif 216 217 #if defined(MOZ_USE_HWDECODE) && defined(MOZ_WIDGET_GTK) 218 bool UploadSWDecodeToDMABuf() const; 219 bool IsLinuxHDR() const; 220 MediaResult InitVAAPIDecoder(); 221 MediaResult InitV4L2Decoder(); 222 bool CreateVAAPIDeviceContext(); 223 bool GetVAAPISurfaceDescriptor(VADRMPRIMESurfaceDescriptor* aVaDesc); 224 void AddAcceleratedFormats(nsTArray<AVCodecID>& aCodecList, 225 AVCodecID aCodecID, AVVAAPIHWConfig* hwconfig); 226 nsTArray<AVCodecID> GetAcceleratedFormats(); 227 bool IsFormatAccelerated(AVCodecID aCodecID) const; 228 229 MediaResult CreateImageVAAPI(int64_t aOffset, int64_t aPts, int64_t aDuration, 230 MediaDataDecoder::DecodedData& aResults); 231 MediaResult CreateImageV4L2(int64_t aOffset, int64_t aPts, int64_t aDuration, 232 MediaDataDecoder::DecodedData& aResults); 233 void AdjustHWDecodeLogging(); 234 235 AVBufferRef* mVAAPIDeviceContext = nullptr; 236 bool mUsingV4L2 = false; 237 // If video overlay is used we want to upload SW decoded frames to 238 // DMABuf and present it as a external texture to rendering pipeline. 239 bool mUploadSWDecodeToDMABuf = false; 240 VADisplay mDisplay = nullptr; 241 UniquePtr<VideoFramePool<LIBAV_VER>> mVideoFramePool; 242 static nsTArray<AVCodecID> mAcceleratedFormats; 243 #endif 244 245 #if LIBAVCODEC_VERSION_MAJOR >= 58 246 class DecodeStats { 247 public: 248 void DecodeStart(); 249 void UpdateDecodeTimes(int64_t aDuration); 250 bool IsDecodingSlow() const; 251 252 private: 253 uint32_t mDecodedFrames = 0; 254 255 float mAverageFrameDecodeTime = 0; 256 float mAverageFrameDuration = 0; 257 258 // Number of delayed frames until we consider decoding as slow. 259 const uint32_t mMaxLateDecodedFrames = 15; 260 // How many frames is decoded behind its pts time, i.e. video decode lags. 261 uint32_t mDecodedFramesLate = 0; 262 263 // Reset mDecodedFramesLate every 3 seconds of correct playback. 264 const uint32_t mDelayedFrameReset = 3000; 265 266 uint32_t mLastDelayedFrameNum = 0; 267 268 TimeStamp mDecodeStart; 269 }; 270 271 DecodeStats mDecodeStats; 272 #endif 273 274 #if LIBAVCODEC_VERSION_MAJOR >= 58 275 bool mHasSentDrainPacket = false; 276 #endif 277 278 #if LIBAVCODEC_VERSION_MAJOR < 58 279 class PtsCorrectionContext { 280 public: 281 PtsCorrectionContext(); 282 int64_t GuessCorrectPts(int64_t aPts, int64_t aDts); 283 void Reset(); 284 int64_t LastDts() const { return mLastDts; } 285 286 private: 287 int64_t mNumFaultyPts; /// Number of incorrect PTS values so far 288 int64_t mNumFaultyDts; /// Number of incorrect DTS values so far 289 int64_t mLastPts; /// PTS of the last frame 290 int64_t mLastDts; /// DTS of the last frame 291 }; 292 293 PtsCorrectionContext mPtsContext; 294 #endif 295 296 #ifdef MOZ_FFMPEG_USE_INPUT_INFO_MAP 297 struct InputInfo { 298 explicit InputInfo(const MediaRawData* aSample) 299 : mDuration(aSample->mDuration.ToMicroseconds()) 300 # ifdef MOZ_WIDGET_ANDROID 301 , 302 mTimecode(aSample->mTimecode.ToMicroseconds()) 303 # endif 304 { 305 } 306 307 int64_t mDuration; 308 # ifdef MOZ_WIDGET_ANDROID 309 int64_t mTimecode; 310 # endif 311 }; 312 313 SimpleMap<int64_t, InputInfo, ThreadSafePolicy> mInputInfo; 314 315 static int64_t GetSampleInputKey(const MediaRawData* aSample) { 316 # ifdef MOZ_WIDGET_ANDROID 317 return aSample->mTime.ToMicroseconds(); 318 # else 319 return aSample->mTimecode.ToMicroseconds(); 320 # endif 321 } 322 323 static int64_t GetFrameInputKey(const AVFrame* aFrame) { 324 # ifdef MOZ_WIDGET_ANDROID 325 return aFrame->pts; 326 # else 327 return aFrame->pkt_dts; 328 # endif 329 } 330 331 void InsertInputInfo(const MediaRawData* aSample) { 332 // LibAV provides no API to retrieve the decoded sample's duration. 333 // (FFmpeg >= 1.0 provides av_frame_get_pkt_duration) 334 // Additionally some platforms (e.g. Android) do not supply a valid duration 335 // after decoding. As such we instead use a map using the given ts as key 336 // that we will retrieve later. The map will have a typical size of 16 337 // entry. 338 mInputInfo.Insert(GetSampleInputKey(aSample), InputInfo(aSample)); 339 } 340 341 void TakeInputInfo(const AVFrame* aFrame, InputInfo& aEntry) { 342 // Retrieve duration from the given ts. 343 // We use the first entry found matching this ts (this is done to 344 // handle damaged file with multiple frames with the same ts) 345 if (!mInputInfo.Find(GetFrameInputKey(aFrame), aEntry)) { 346 NS_WARNING("Unable to retrieve input info from map"); 347 // dts are probably incorrectly reported ; so clear the map as we're 348 // unlikely to find them in the future anyway. This also guards 349 // against the map becoming extremely big. 350 mInputInfo.Clear(); 351 } 352 } 353 #endif 354 355 const bool mLowLatency; 356 const Maybe<TrackingId> mTrackingId; 357 358 void RecordFrame(const MediaRawData* aSample, const MediaData* aData); 359 360 PerformanceRecorderMulti<DecodeStage> mPerformanceRecorder; 361 362 bool MaybeQueueDrain(const MediaDataDecoder::DecodedData& aData); 363 #ifdef MOZ_WIDGET_ANDROID 364 void QueueResumeDrain(); 365 void ResumeDrain(); 366 367 Atomic<bool> mShouldResumeDrain{false}; 368 #endif 369 370 // True if we're allocating shmem for ffmpeg decode buffer. 371 Maybe<Atomic<bool>> mIsUsingShmemBufferForDecode; 372 373 #if LIBAVCODEC_VERSION_MAJOR >= 57 && LIBAVUTIL_VERSION_MAJOR >= 56 374 // These images are buffers for ffmpeg in order to store decoded data when 375 // using custom allocator for decoding. We want to explictly track all images 376 // we allocate to ensure that we won't leak any of them. 377 // 378 // All images tracked by mAllocatedImages are used by ffmpeg, 379 // i.e. ffmpeg holds a reference to them and uses them in 380 // its internal decoding queue. 381 // 382 // When an image is removed from mAllocatedImages it's recycled 383 // for a new frame by AllocateTextureClientForImage() in 384 // FFmpegVideoDecoder::GetVideoBuffer(). 385 nsTHashSet<RefPtr<ImageBufferWrapper>> mAllocatedImages; 386 #endif 387 388 // Convert dav1d output to 8-bit when GPU doesn't support higher bit images. 389 // See bug 1970771 for details. 390 Atomic<bool> m8BitOutput; 391 RefPtr<layers::BufferRecycleBin> m8BitRecycleBin; 392 }; 393 394 #if LIBAVCODEC_VERSION_MAJOR >= 57 && LIBAVUTIL_VERSION_MAJOR >= 56 395 class ImageBufferWrapper final { 396 public: 397 typedef mozilla::layers::Image Image; 398 typedef mozilla::layers::PlanarYCbCrImage PlanarYCbCrImage; 399 400 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(ImageBufferWrapper) 401 402 ImageBufferWrapper(Image* aImage, void* aDecoder) 403 : mImage(aImage), mDecoder(aDecoder) { 404 MOZ_ASSERT(aImage); 405 MOZ_ASSERT(mDecoder); 406 } 407 408 Image* AsImage() { return mImage; } 409 410 void ReleaseBuffer() { 411 auto* decoder = static_cast<FFmpegVideoDecoder<LIBAV_VER>*>(mDecoder); 412 decoder->ReleaseAllocatedImage(this); 413 } 414 415 private: 416 ~ImageBufferWrapper() = default; 417 const RefPtr<Image> mImage; 418 void* const MOZ_NON_OWNING_REF mDecoder; 419 }; 420 #endif 421 422 } // namespace mozilla 423 424 #endif // __FFmpegVideoDecoder_h__