commit 5df6a99098033eac5a05134437d4393200c8652b
parent 953d9a7d100b92c2094dc296962d3f3c5e02f40e
Author: Andreas Pehrson <apehrson@mozilla.com>
Date: Tue, 11 Nov 2025 08:21:04 +0000
Bug 1931328 - Account for playback rate when setting frame timestamps in DecodedStream. r=padenot
Differential Revision: https://phabricator.services.mozilla.com/D238765
Diffstat:
3 files changed, 115 insertions(+), 6 deletions(-)
diff --git a/dom/media/gtest/TestDecodedStream.cpp b/dom/media/gtest/TestDecodedStream.cpp
@@ -6,16 +6,22 @@
#include "BlankDecoderModule.h"
#include "DecodedStream.h"
+#include "ImageContainer.h"
#include "MediaData.h"
#include "MediaQueue.h"
#include "MediaTrackGraphImpl.h"
#include "MediaTrackListener.h"
#include "MockCubeb.h"
+#include "VideoSegment.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "mozilla/gtest/WaitFor.h"
#include "nsJSEnvironment.h"
+using mozilla::layers::ImageContainer;
+using mozilla::layers::ImageUsageType;
using mozilla::media::TimeUnit;
+using testing::ElementsAre;
using testing::Test;
namespace mozilla {
@@ -71,6 +77,17 @@ class OnFallbackListener : public MediaTrackListener {
}
};
+template <typename Segment>
+class CapturingListener : public MediaTrackListener {
+ public:
+ Segment mSegment;
+
+ void NotifyQueuedChanges(MediaTrackGraph* aGraph, TrackTime aTrackOffset,
+ const MediaSegment& aQueuedMedia) {
+ mSegment.AppendSlice(aQueuedMedia, 0, aQueuedMedia.GetDuration());
+ }
+};
+
class TestableDecodedStream : public DecodedStream {
public:
TestableDecodedStream(
@@ -87,6 +104,7 @@ class TestableDecodedStream : public DecodedStream {
using DecodedStream::GetPositionImpl;
using DecodedStream::LastOutputSystemTime;
+ using DecodedStream::LastVideoTimeStamp;
};
template <MediaType Type>
@@ -202,6 +220,8 @@ class TestDecodedStream : public Test {
}
MediaInfo CreateMediaInfo() { return mozilla::CreateMediaInfo<Type>(); }
+
+ void TestVideoTimestampsWithPlaybackRate(double aPlaybackRate);
};
using TestDecodedStreamA = TestDecodedStream<Audio>;
@@ -278,4 +298,85 @@ TEST_F(TestDecodedStreamA, InterpolatedPosition) {
mDecodedStream->Stop();
}
+
+template <MediaType Type>
+void TestDecodedStream<Type>::TestVideoTimestampsWithPlaybackRate(
+ double aPlaybackRate) {
+ static_assert(Type == MediaType::Video);
+
+ auto imageContainer = MakeRefPtr<ImageContainer>(ImageUsageType::Webrtc,
+ ImageContainer::SYNCHRONOUS);
+ // Capture the output into a dedicated segment, that the graph will not prune
+ // like it will for the output track's mSegment.
+ RefPtr capturingListener = new CapturingListener<VideoSegment>();
+ mOutputTracks[0]->AddListener(capturingListener);
+ VideoSegment* segment = &capturingListener->mSegment;
+
+ {
+ // Add 4 video frames a 100ms each. Later we'll check timestamps of 3. We
+ // add 4 here to make the 3rd frames duration deterministic.
+ BlankVideoDataCreator creator(640, 480, imageContainer);
+ TimeUnit t = TimeUnit::Zero();
+ for (size_t i = 0; i < 4; ++i) {
+ constexpr TimeUnit kDuration = TimeUnit(kRate / 10, kRate);
+ auto raw = MakeRefPtr<MediaRawData>();
+ raw->mTime = t;
+ raw->mDuration = kDuration;
+ t += kDuration;
+ mVideoQueue.Push(RefPtr(creator.Create(raw))->template As<VideoData>());
+ }
+ }
+
+ mDecodedStream->SetPlaybackRate(aPlaybackRate);
+ mDecodedStream->Start(TimeUnit::Zero(), CreateMediaInfo());
+ mDecodedStream->SetPlaying(true);
+ NS_ProcessPendingEvents(nullptr);
+ mMockCubebStream->ManualDataCallback(0);
+
+ // Advance time enough to extract all 3 video frames.
+ long duration = 0;
+ while (duration < static_cast<long>((static_cast<double>(kRate) / 10) * 3 /
+ aPlaybackRate)) {
+ constexpr long kChunk = 512;
+ mMockCubebStream->ManualDataCallback(kChunk);
+ NS_ProcessPendingEvents(nullptr);
+ duration += kChunk;
+ }
+ EXPECT_EQ(segment->GetDuration(), duration);
+
+ // Calculate the expected timestamp of the first frame. At this point all
+ // frames in the VideoQueue have been sent, so LastVideoTimeStamp() matches
+ // the timestamp of frame 4.
+ const auto frameGap =
+ TimeDuration::FromMilliseconds(100).MultDouble(1 / aPlaybackRate);
+ TimeStamp videoStartOffset =
+ mDecodedStream->LastVideoTimeStamp() - frameGap * 3;
+
+ // Check durations of the first 3 frames.
+ AutoTArray<TrackTime, 3> durations;
+ AutoTArray<TimeDuration, 3> timestamps;
+ for (VideoSegment::ConstChunkIterator i(*segment);
+ durations.Length() < 3 && !i.IsEnded(); i.Next()) {
+ durations.AppendElement(i->GetDuration());
+ timestamps.AppendElement(i->mTimeStamp - videoStartOffset);
+ }
+ const TrackTime d =
+ static_cast<TrackTime>(static_cast<double>(kRate) / 10 / aPlaybackRate);
+ EXPECT_THAT(durations, ElementsAre(d, d, d));
+ EXPECT_THAT(timestamps,
+ ElementsAre(frameGap * 0, frameGap * 1, frameGap * 2));
+
+ mOutputTracks[0]->RemoveListener(capturingListener);
+ mDecodedStream->Stop();
+}
+
+TEST_F(TestDecodedStreamV, VideoTimeStamps) {
+ TestVideoTimestampsWithPlaybackRate(1.0);
+}
+TEST_F(TestDecodedStreamV, VideoTimeStampsFaster) {
+ TestVideoTimestampsWithPlaybackRate(2.0);
+}
+TEST_F(TestDecodedStreamV, VideoTimeStampsSlower) {
+ TestVideoTimestampsWithPlaybackRate(0.5);
+}
} // namespace mozilla
diff --git a/dom/media/mediasink/DecodedStream.cpp b/dom/media/mediasink/DecodedStream.cpp
@@ -994,9 +994,10 @@ void DecodedStream::SendVideo(const PrincipalHandle& aPrincipalHandle) {
// video frame). E.g. if we have a video frame that is 30 sec long
// and capture happens at 15 sec, we'll have to append a black frame
// that is 15 sec long.
- TimeStamp t =
- std::max(mData->mLastVideoTimeStamp,
- currentTime + (lastEnd - currentPosition).ToTimeDuration());
+ TimeStamp t = std::max(mData->mLastVideoTimeStamp,
+ currentTime + (lastEnd - currentPosition)
+ .ToTimeDuration()
+ .MultDouble(1 / mPlaybackRate));
mData->WriteVideoToSegment(mData->mLastVideoImage, lastEnd, v->mTime,
mData->mLastVideoImageDisplaySize, t, &output,
aPrincipalHandle, mPlaybackRate);
@@ -1008,9 +1009,10 @@ void DecodedStream::SendVideo(const PrincipalHandle& aPrincipalHandle) {
// before the last frame's end time for some videos. This only matters for
// the track's lifetime in the MTG, as rendering is based on timestamps,
// aka frame start times.
- TimeStamp t =
- std::max(mData->mLastVideoTimeStamp,
- currentTime + (lastEnd - currentPosition).ToTimeDuration());
+ TimeStamp t = std::max(mData->mLastVideoTimeStamp,
+ currentTime + (lastEnd - currentPosition)
+ .ToTimeDuration()
+ .MultDouble(1 / mPlaybackRate));
TimeUnit end = std::max(
v->GetEndTime(),
lastEnd + TimeUnit::FromMicroseconds(
@@ -1145,6 +1147,11 @@ AwakeTimeStamp DecodedStream::LastOutputSystemTime() const {
return *mLastOutputSystemTime;
}
+TimeStamp DecodedStream::LastVideoTimeStamp() const {
+ AssertOwnerThread();
+ return mData->mLastVideoTimeStamp;
+}
+
void DecodedStream::NotifyOutput(int64_t aTime, TimeStamp aSystemTime,
AwakeTimeStamp aAwakeSystemTime) {
AssertOwnerThread();
diff --git a/dom/media/mediasink/DecodedStream.h b/dom/media/mediasink/DecodedStream.h
@@ -82,6 +82,7 @@ class DecodedStream : public MediaSink {
media::TimeUnit GetPositionImpl(TimeStamp aNow, AwakeTimeStamp aAwakeNow,
TimeStamp* aTimeStamp = nullptr);
AwakeTimeStamp LastOutputSystemTime() const;
+ TimeStamp LastVideoTimeStamp() const;
private:
void DestroyData(UniquePtr<DecodedStreamData>&& aData);