tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MediaDecoderStateMachine.h (21332B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 #if !defined(MediaDecoderStateMachine_h__)
      7 #  define MediaDecoderStateMachine_h__
      8 
      9 #  include "AudioDeviceInfo.h"
     10 #  include "ImageContainer.h"
     11 #  include "MediaDecoder.h"
     12 #  include "MediaDecoderOwner.h"
     13 #  include "MediaDecoderStateMachineBase.h"
     14 #  include "MediaFormatReader.h"
     15 #  include "MediaQueue.h"
     16 #  include "MediaSink.h"
     17 #  include "MediaTimer.h"
     18 #  include "SeekJob.h"
     19 #  include "mozilla/Atomics.h"
     20 #  include "mozilla/ReentrantMonitor.h"
     21 #  include "mozilla/StateMirroring.h"
     22 #  include "nsThreadUtils.h"
     23 
     24 namespace mozilla {
     25 
     26 class AbstractThread;
     27 class AudioSegment;
     28 class DecodedStream;
     29 class DOMMediaStream;
     30 class ReaderProxy;
     31 class TaskQueue;
     32 
     33 extern LazyLogModule gMediaDecoderLog;
     34 
     35 DDLoggedTypeDeclName(MediaDecoderStateMachine);
     36 
     37 /*
     38 
     39 Each media element for a media file has one thread called the "audio thread".
     40 
     41 The audio thread  writes the decoded audio data to the audio
     42 hardware. This is done in a separate thread to ensure that the
     43 audio hardware gets a constant stream of data without
     44 interruption due to decoding or display. At some point
     45 AudioStream will be refactored to have a callback interface
     46 where it asks for data and this thread will no longer be
     47 needed.
     48 
     49 The element/state machine also has a TaskQueue which runs in a
     50 SharedThreadPool that is shared with all other elements/decoders. The state
     51 machine dispatches tasks to this to call into the MediaDecoderReader to
     52 request decoded audio or video data. The Reader will callback with decoded
     53 sampled when it has them available, and the state machine places the decoded
     54 samples into its queues for the consuming threads to pull from.
     55 
     56 The MediaDecoderReader can choose to decode asynchronously, or synchronously
     57 and return requested samples synchronously inside it's Request*Data()
     58 functions via callback. Asynchronous decoding is preferred, and should be
     59 used for any new readers.
     60 
     61 Synchronisation of state between the thread is done via a monitor owned
     62 by MediaDecoder.
     63 
     64 The lifetime of the audio thread is controlled by the state machine when
     65 it runs on the shared state machine thread. When playback needs to occur
     66 the audio thread is created and an event dispatched to run it. The audio
     67 thread exits when audio playback is completed or no longer required.
     68 
     69 A/V synchronisation is handled by the state machine. It examines the audio
     70 playback time and compares this to the next frame in the queue of video
     71 frames. If it is time to play the video frame it is then displayed, otherwise
     72 it schedules the state machine to run again at the time of the next frame.
     73 
     74 Frame skipping is done in the following ways:
     75 
     76  1) The state machine will skip all frames in the video queue whose
     77     display time is less than the current audio time. This ensures
     78     the correct frame for the current time is always displayed.
     79 
     80  2) The decode tasks will stop decoding interframes and read to the
     81     next keyframe if it determines that decoding the remaining
     82     interframes will cause playback issues. It detects this by:
     83       a) If the amount of audio data in the audio queue drops
     84          below a threshold whereby audio may start to skip.
     85       b) If the video queue drops below a threshold where it
     86          will be decoding video data that won't be displayed due
     87          to the decode thread dropping the frame immediately.
     88     TODO: In future we should only do this when the Reader is decoding
     89           synchronously.
     90 
     91 When hardware accelerated graphics is not available, YCbCr conversion
     92 is done on the decode task queue when video frames are decoded.
     93 
     94 The decode task queue pushes decoded audio and videos frames into two
     95 separate queues - one for audio and one for video. These are kept
     96 separate to make it easy to constantly feed audio data to the audio
     97 hardware while allowing frame skipping of video data. These queues are
     98 threadsafe, and neither the decode, audio, or state machine should
     99 be able to monopolize them, and cause starvation of the other threads.
    100 
    101 Both queues are bounded by a maximum size. When this size is reached
    102 the decode tasks will no longer request video or audio depending on the
    103 queue that has reached the threshold. If both queues are full, no more
    104 decode tasks will be dispatched to the decode task queue, so other
    105 decoders will have an opportunity to run.
    106 
    107 During playback the audio thread will be idle (via a Wait() on the
    108 monitor) if the audio queue is empty. Otherwise it constantly pops
    109 audio data off the queue and plays it with a blocking write to the audio
    110 hardware (via AudioStream).
    111 
    112 */
    113 class MediaDecoderStateMachine
    114    : public MediaDecoderStateMachineBase,
    115      public DecoderDoctorLifeLogger<MediaDecoderStateMachine> {
    116  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaDecoderStateMachine, override)
    117 
    118  using TrackSet = MediaFormatReader::TrackSet;
    119 
    120 public:
    121  using FrameID = mozilla::layers::ImageContainer::FrameID;
    122  MediaDecoderStateMachine(MediaDecoder* aDecoder, MediaFormatReader* aReader);
    123 
    124  nsresult Init(MediaDecoder* aDecoder) override;
    125 
    126  // Enumeration for the valid decoding states
    127  enum State {
    128    DECODER_STATE_DECODING_METADATA,
    129    DECODER_STATE_DORMANT,
    130    DECODER_STATE_DECODING_FIRSTFRAME,
    131    DECODER_STATE_DECODING,
    132    DECODER_STATE_LOOPING_DECODING,
    133    DECODER_STATE_SEEKING_ACCURATE,
    134    DECODER_STATE_SEEKING_FROMDORMANT,
    135    DECODER_STATE_SEEKING_NEXTFRAMESEEKING,
    136    DECODER_STATE_SEEKING_VIDEOONLY,
    137    DECODER_STATE_BUFFERING,
    138    DECODER_STATE_COMPLETED,
    139    DECODER_STATE_SHUTDOWN
    140  };
    141 
    142  RefPtr<GenericPromise> RequestDebugInfo(
    143      dom::MediaDecoderStateMachineDebugInfo& aInfo) override;
    144 
    145  size_t SizeOfVideoQueue() const override;
    146 
    147  size_t SizeOfAudioQueue() const override;
    148 
    149  // Sets the video decode mode. Used by the suspend-video-decoder feature.
    150  void SetVideoDecodeMode(VideoDecodeMode aMode) override;
    151 
    152  RefPtr<GenericPromise> InvokeSetSink(
    153      const RefPtr<AudioDeviceInfo>& aSink) override;
    154 
    155  void InvokeSuspendMediaSink() override;
    156  void InvokeResumeMediaSink() override;
    157 
    158  nsresult IsCDMProxySupported(CDMProxy* aProxy) override;
    159 
    160  RefPtr<SetCDMPromise> SetCDMProxy(CDMProxy* aProxy) override;
    161 
    162 private:
    163  class StateObject;
    164  class DecodeMetadataState;
    165  class DormantState;
    166  class DecodingFirstFrameState;
    167  class DecodingState;
    168  class LoopingDecodingState;
    169  class SeekingState;
    170  class AccurateSeekingState;
    171  class NextFrameSeekingState;
    172  class NextFrameSeekingFromDormantState;
    173  class VideoOnlySeekingState;
    174  class BufferingState;
    175  class CompletedState;
    176  class ShutdownState;
    177 
    178  static const char* ToStateStr(State aState);
    179  const char* ToStateStr();
    180 
    181  void GetDebugInfo(dom::MediaDecoderStateMachineDebugInfo& aInfo);
    182 
    183  // Initialization that needs to happen on the task queue. This is the first
    184  // task that gets run on the task queue, and is dispatched from the MDSM
    185  // constructor immediately after the task queue is created.
    186  void InitializationTask(MediaDecoder* aDecoder) override;
    187 
    188  RefPtr<MediaDecoder::SeekPromise> Seek(const SeekTarget& aTarget) override;
    189 
    190  RefPtr<ShutdownPromise> Shutdown() override;
    191 
    192  RefPtr<ShutdownPromise> FinishShutdown();
    193 
    194  // Update the playback position. This can result in a timeupdate event
    195  // and an invalidate of the frame being dispatched asynchronously if
    196  // there is no such event currently queued.
    197  // Only called on the decoder thread. Must be called with
    198  // the decode monitor held.
    199  void UpdatePlaybackPosition(const media::TimeUnit& aTime);
    200 
    201  // Schedules the shared state machine thread to run the state machine.
    202  void ScheduleStateMachine();
    203 
    204  // Invokes ScheduleStateMachine to run in |aTime|,
    205  // unless it's already scheduled to run earlier, in which case the
    206  // request is discarded.
    207  void ScheduleStateMachineIn(const media::TimeUnit& aTime);
    208 
    209  bool HaveEnoughDecodedAudio() const;
    210  bool HaveEnoughDecodedVideo() const;
    211 
    212  // The check is used to store more video frames than usual when playing 4K+
    213  // video.
    214  bool IsVideoDataEnoughComparedWithAudio() const;
    215 
    216  // Returns true if we're currently playing. The decoder monitor must
    217  // be held.
    218  bool IsPlaying() const;
    219 
    220  // Sets mMediaSeekable to false.
    221  void SetMediaNotSeekable();
    222 
    223  // Resets all states related to decoding and aborts all pending requests
    224  // to the decoders.
    225  void ResetDecode(const TrackSet& aTracks = TrackSet(TrackInfo::kAudioTrack,
    226                                                      TrackInfo::kVideoTrack));
    227 
    228  void SetVideoDecodeModeInternal(VideoDecodeMode aMode);
    229 
    230  RefPtr<GenericPromise> SetSink(RefPtr<AudioDeviceInfo> aDevice);
    231 
    232  // Shutdown MediaSink on suspend to clean up resources.
    233  void SuspendMediaSink();
    234  // Create a new MediaSink, it must have been stopped first.
    235  void ResumeMediaSink();
    236 
    237 protected:
    238  virtual ~MediaDecoderStateMachine();
    239 
    240  void BufferedRangeUpdated() override;
    241  void VolumeChanged() override;
    242  void PreservesPitchChanged() override;
    243  void PlayStateChanged() override;
    244  void LoopingChanged() override;
    245  void UpdateSecondaryVideoContainer() override;
    246 
    247  void ReaderSuspendedChanged();
    248 
    249  // Inserts a sample into the Audio/Video queue.
    250  // aSample must not be null.
    251  void PushAudio(AudioData* aSample);
    252  void PushVideo(VideoData* aSample);
    253 
    254  void OnAudioPopped(const RefPtr<AudioData>& aSample);
    255  void OnVideoPopped(const RefPtr<VideoData>& aSample);
    256 
    257  void AudioAudibleChanged(bool aAudible);
    258 
    259  void SetPlaybackRate(double aPlaybackRate) override;
    260  void SetCanPlayThrough(bool aCanPlayThrough) override {
    261    mCanPlayThrough = aCanPlayThrough;
    262  }
    263  void SetFragmentEndTime(const media::TimeUnit& aEndTime) override {
    264    // A negative number means we don't have a fragment end time at all.
    265    mFragmentEndTime = aEndTime >= media::TimeUnit::Zero()
    266                           ? aEndTime
    267                           : media::TimeUnit::Invalid();
    268  }
    269 
    270  void StreamNameChanged();
    271  void UpdateOutputCaptured();
    272  void OutputPrincipalChanged();
    273 
    274  MediaQueue<AudioData>& AudioQueue() { return mAudioQueue; }
    275  MediaQueue<VideoData>& VideoQueue() { return mVideoQueue; }
    276 
    277  const MediaQueue<AudioData>& AudioQueue() const { return mAudioQueue; }
    278  const MediaQueue<VideoData>& VideoQueue() const { return mVideoQueue; }
    279 
    280  // True if we are low in decoded audio/video data.
    281  // May not be invoked when mReader->UseBufferingHeuristics() is false.
    282  bool HasLowDecodedData();
    283 
    284  bool HasLowDecodedAudio();
    285 
    286  bool HasLowDecodedVideo();
    287 
    288  bool OutOfDecodedAudio();
    289 
    290  bool OutOfDecodedVideo() {
    291    MOZ_ASSERT(OnTaskQueue());
    292    return IsVideoDecoding() && VideoQueue().GetSize() <= 1;
    293  }
    294 
    295  // Returns true if we're running low on buffered data.
    296  bool HasLowBufferedData();
    297 
    298  // Returns true if we have less than aThreshold of buffered data available.
    299  bool HasLowBufferedData(const media::TimeUnit& aThreshold);
    300 
    301  // Return the current time, either the audio clock if available (if the media
    302  // has audio, and the playback is possible), or a clock for the video.
    303  // Called on the state machine thread.
    304  // If aTimeStamp is non-null, set *aTimeStamp to the TimeStamp corresponding
    305  // to the returned stream time.
    306  media::TimeUnit GetClock(TimeStamp* aTimeStamp = nullptr) const;
    307 
    308  // Update only the state machine's current playback position (and duration,
    309  // if unknown).  Does not update the playback position on the decoder or
    310  // media element -- use UpdatePlaybackPosition for that.  Called on the state
    311  // machine thread, caller must hold the decoder lock.
    312  void UpdatePlaybackPositionInternal(const media::TimeUnit& aTime);
    313 
    314  // Update playback position and trigger next update by default time period.
    315  // Called on the state machine thread.
    316  void UpdatePlaybackPositionPeriodically();
    317 
    318  MediaSink* CreateAudioSink();
    319 
    320  // Always create mediasink which contains an AudioSink or DecodedStream
    321  // inside.
    322  already_AddRefed<MediaSink> CreateMediaSink();
    323 
    324  // Stops the media sink and shut it down.
    325  // The decoder monitor must be held with exactly one lock count.
    326  // Called on the state machine thread.
    327  void StopMediaSink();
    328 
    329  // Create and start the media sink.
    330  // The decoder monitor must be held with exactly one lock count.
    331  // Called on the state machine thread.
    332  // If start fails an NS_ERROR_FAILURE is returned.
    333  nsresult StartMediaSink();
    334 
    335  // Notification method invoked when mIsVisible changes.
    336  void VisibilityChanged();
    337 
    338  // Sets internal state which causes playback of media to pause.
    339  // The decoder monitor must be held.
    340  void StopPlayback();
    341 
    342  // If the conditions are right, sets internal state which causes playback
    343  // of media to begin or resume.
    344  // Must be called with the decode monitor held.
    345  void MaybeStartPlayback();
    346 
    347  void EnqueueFirstFrameLoadedEvent();
    348 
    349  // Start a task to decode audio.
    350  void RequestAudioData();
    351 
    352  // Start a task to decode video.
    353  // @param aRequestNextVideoKeyFrame
    354  // If aRequestNextKeyFrame is true, will request data for the next keyframe
    355  // after aCurrentTime.
    356  void RequestVideoData(const media::TimeUnit& aCurrentTime,
    357                        bool aRequestNextKeyFrame = false);
    358 
    359  void WaitForData(MediaData::Type aType);
    360 
    361  // Returns the "current playback position" in HTML5, which is in the range
    362  // [0,duration].  The first frame of the media resource corresponds to 0
    363  // regardless of any codec-specific internal time code.
    364  media::TimeUnit GetMediaTime() const {
    365    MOZ_ASSERT(OnTaskQueue());
    366    return mCurrentPosition;
    367  }
    368 
    369  // Returns an upper bound on the number of microseconds of audio that is
    370  // decoded and playable. This is the sum of the number of usecs of audio which
    371  // is decoded and in the reader's audio queue, and the usecs of unplayed audio
    372  // which has been pushed to the audio hardware for playback. Note that after
    373  // calling this, the audio hardware may play some of the audio pushed to
    374  // hardware, so this can only be used as a upper bound. The decoder monitor
    375  // must be held when calling this. Called on the decode thread.
    376  media::TimeUnit GetDecodedAudioDuration() const;
    377 
    378  void FinishDecodeFirstFrame();
    379 
    380  // Performs one "cycle" of the state machine.
    381  void RunStateMachine();
    382 
    383  bool IsStateMachineScheduled() const;
    384 
    385  // These return true if the respective stream's decode has not yet reached
    386  // the end of stream.
    387  bool IsAudioDecoding();
    388  bool IsVideoDecoding();
    389 
    390 private:
    391  // Resolved by the MediaSink to signal that all audio/video outstanding
    392  // work is complete and identify which part(a/v) of the sink is shutting down.
    393  void OnMediaSinkAudioComplete();
    394  void OnMediaSinkVideoComplete();
    395 
    396  // Rejected by the MediaSink to signal errors for audio/video.
    397  void OnMediaSinkAudioError(nsresult aResult);
    398  void OnMediaSinkVideoError();
    399 
    400  // State-watching manager.
    401  WatchManager<MediaDecoderStateMachine> mWatchManager;
    402 
    403  // True if we've dispatched a task to run the state machine but the task has
    404  // yet to run.
    405  bool mDispatchedStateMachine;
    406 
    407  // Used to dispatch another round schedule with specific target time.
    408  DelayedScheduler<TimeStamp> mDelayedScheduler;
    409 
    410  // Queue of audio frames. This queue is threadsafe, and is accessed from
    411  // the audio, decoder, state machine, and main threads.
    412  MediaQueue<AudioData> mAudioQueue;
    413  // Queue of video frames. This queue is threadsafe, and is accessed from
    414  // the decoder, state machine, and main threads.
    415  MediaQueue<VideoData> mVideoQueue;
    416 
    417  UniquePtr<StateObject> mStateObj;
    418 
    419  media::TimeUnit Duration() const {
    420    MOZ_ASSERT(OnTaskQueue());
    421    return mDuration.Ref().ref();
    422  }
    423 
    424  // FrameID which increments every time a frame is pushed to our queue.
    425  FrameID mCurrentFrameID;
    426 
    427  // Media Fragment end time.
    428  media::TimeUnit mFragmentEndTime = media::TimeUnit::Invalid();
    429 
    430  // The media sink resource.  Used on the state machine thread.
    431  RefPtr<MediaSink> mMediaSink;
    432 
    433  // The end time of the last audio frame that's been pushed onto the media sink
    434  // in microseconds. This will approximately be the end time
    435  // of the audio stream, unless another frame is pushed to the hardware.
    436  media::TimeUnit AudioEndTime() const;
    437 
    438  // The end time of the last rendered video frame that's been sent to
    439  // compositor.
    440  media::TimeUnit VideoEndTime() const;
    441 
    442  // The end time of the last decoded audio frame. This signifies the end of
    443  // decoded audio data. Used to check if we are low in decoded data.
    444  media::TimeUnit mDecodedAudioEndTime;
    445 
    446  // The end time of the last decoded video frame. Used to check if we are low
    447  // on decoded video data.
    448  media::TimeUnit mDecodedVideoEndTime;
    449 
    450  // If we've got more than this number of decoded video frames waiting in
    451  // the video queue, we will not decode any more video frames until some have
    452  // been consumed by the play state machine thread.
    453  // Must hold monitor.
    454  uint32_t GetAmpleVideoFrames() const;
    455 
    456  // Our "ample" audio threshold. Once we've this much audio decoded, we
    457  // pause decoding.
    458  media::TimeUnit mAmpleAudioThreshold;
    459 
    460  const char* AudioRequestStatus() const;
    461  const char* VideoRequestStatus() const;
    462 
    463  void OnSuspendTimerResolved();
    464  void CancelSuspendTimer();
    465 
    466  bool IsInSeamlessLooping() const;
    467 
    468  bool mCanPlayThrough = false;
    469 
    470  // True if all audio frames are already rendered.
    471  bool mAudioCompleted = false;
    472 
    473  // True if all video frames are already rendered.
    474  bool mVideoCompleted = false;
    475 
    476  // True if video decoding is suspended.
    477  bool mVideoDecodeSuspended;
    478 
    479  // Track enabling video decode suspension via timer
    480  DelayedScheduler<TimeStamp> mVideoDecodeSuspendTimer;
    481 
    482  // Track the current video decode mode.
    483  VideoDecodeMode mVideoDecodeMode;
    484 
    485  // Track the complete & error for audio/video separately
    486  MozPromiseRequestHolder<MediaSink::EndedPromise> mMediaSinkAudioEndedPromise;
    487  MozPromiseRequestHolder<MediaSink::EndedPromise> mMediaSinkVideoEndedPromise;
    488 
    489  MediaEventListener mAudioQueueListener;
    490  MediaEventListener mVideoQueueListener;
    491  MediaEventListener mAudibleListener;
    492  MediaEventListener mOnMediaNotSeekable;
    493 
    494  const bool mIsMSE;
    495 
    496  const bool mShouldResistFingerprinting;
    497 
    498  bool mSeamlessLoopingAllowed;
    499 
    500  // If media was in looping and had reached to the end before, then we need
    501  // to adjust sample time from clock time to media time.
    502  void AdjustByLooping(media::TimeUnit& aTime) const;
    503 
    504  // These are used for seamless looping. When looping has been enable at least
    505  // once, `mOriginalDecodedDuration` would be set to the larger duration
    506  // between two tracks.
    507  media::TimeUnit mOriginalDecodedDuration;
    508  Maybe<media::TimeUnit> mAudioTrackDecodedDuration;
    509  Maybe<media::TimeUnit> mVideoTrackDecodedDuration;
    510 
    511  bool HasLastDecodedData(MediaData::Type aType);
    512 
    513  // Current playback position in the stream in bytes.
    514  int64_t mPlaybackOffset = 0;
    515 
    516  // For seamless looping video, we don't want to trigger skip-to-next-keyframe
    517  // after reaching video EOS. Because we've reset the demuxer to 0, and are
    518  // going to request data from start. If playback hasn't looped back, the media
    519  // time would still be too large, which makes the reader think the playback is
    520  // way behind and performs unnecessary skipping. Eg. Media is 10s long,
    521  // reaching EOS at 8s, requesting data at 9s. Assume media's keyframe interval
    522  // is 3s, which means keyframes will appear on 0s, 3s, 6s and 9s. If we use
    523  // current time as a threshold, the reader sees the next key frame is 3s but
    524  // the threashold is 9s, which usually happens when the decoding is too slow.
    525  // But that is not the case for us, we should by pass thskip-to-next-keyframe
    526  // logic until the media loops back.
    527  bool mBypassingSkipToNextKeyFrameCheck = false;
    528 
    529  // The total amount of time we've spent on the buffering state.
    530  TimeDuration mTotalBufferingDuration;
    531 
    532 private:
    533  // Audio stream name
    534  Mirror<nsAutoString> mStreamName;
    535 
    536  // The device used with SetSink, or nullptr if no explicit device has been
    537  // set.
    538  Mirror<RefPtr<AudioDeviceInfo>> mSinkDevice;
    539 
    540  // Whether all output should be captured into mOutputTracks, halted, or not
    541  // captured.
    542  Mirror<MediaDecoder::OutputCaptureState> mOutputCaptureState;
    543 
    544  // A dummy track used to access the right MediaTrackGraph instance. Needed
    545  // since there's no guarantee that output tracks are present.
    546  Mirror<nsMainThreadPtrHandle<SharedDummyTrack>> mOutputDummyTrack;
    547 
    548  // Tracks to capture data into.
    549  Mirror<CopyableTArray<RefPtr<ProcessedMediaTrack>>> mOutputTracks;
    550 
    551  // PrincipalHandle to feed with data captured into mOutputTracks.
    552  Mirror<PrincipalHandle> mOutputPrincipal;
    553 
    554  Canonical<PrincipalHandle> mCanonicalOutputPrincipal;
    555 
    556  // Track when MediaSink is supsended. When that happens some actions are
    557  // restricted like starting the sink or changing sink id. The flag is valid
    558  // after Initialization. TaskQueue thread only.
    559  bool mIsMediaSinkSuspended = false;
    560 
    561  Atomic<bool> mShuttingDown;
    562 
    563  Atomic<bool> mInitialized;
    564 
    565 public:
    566  AbstractCanonical<PrincipalHandle>* CanonicalOutputPrincipal() {
    567    return &mCanonicalOutputPrincipal;
    568  }
    569 };
    570 
    571 }  // namespace mozilla
    572 
    573 #endif