WebMDemuxer.cpp (51513B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "MediaResource.h" 8 #include "nsError.h" 9 #ifdef MOZ_AV1 10 # include "AOMDecoder.h" 11 #endif 12 #include <opus/opus.h> 13 #include <stdint.h> 14 15 #include <algorithm> 16 #include <numeric> 17 18 #include "MediaDataDemuxer.h" 19 #include "NesteggPacketHolder.h" 20 #include "VPXDecoder.h" 21 #include "VideoUtils.h" 22 #include "WebMBufferedParser.h" 23 #include "WebMDemuxer.h" 24 #include "XiphExtradata.h" 25 #include "gfx2DGlue.h" 26 #include "gfxUtils.h" 27 #include "mozilla/IntegerPrintfMacros.h" 28 #include "mozilla/Maybe.h" 29 #include "mozilla/SharedThreadPool.h" 30 #include "mozilla/Sprintf.h" 31 #include "nsAutoRef.h" 32 #include "prprf.h" // leaving it for PR_vsnprintf() 33 34 #define WEBM_DEBUG(arg, ...) \ 35 DDMOZ_LOG(gMediaDemuxerLog, mozilla::LogLevel::Debug, "::%s: " arg, \ 36 __func__, ##__VA_ARGS__) 37 extern mozilla::LazyLogModule gMediaDemuxerLog; 38 39 namespace mozilla { 40 41 using namespace gfx; 42 using media::TimeUnit; 43 44 LazyLogModule gNesteggLog("Nestegg"); 45 46 #define NSECS_PER_USEC 1000 47 48 // How far ahead will we look when searching future keyframe. In microseconds. 49 // This value is based on what appears to be a reasonable value as most webm 50 // files encountered appear to have keyframes located < 4s. 51 #define MAX_LOOK_AHEAD 10000000 52 53 // Functions for reading and seeking using WebMDemuxer required for 54 // nestegg_io. The 'user data' passed to these functions is the 55 // demuxer's context. 56 static int webmdemux_read(void* aBuffer, size_t aLength, void* aUserData) { 57 MOZ_ASSERT(aUserData); 58 MOZ_ASSERT(aLength < UINT32_MAX); 59 WebMDemuxer::NestEggContext* context = 60 reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); 61 uint32_t bytes = 0; 62 context->mLastIORV = context->GetResource()->Read(static_cast<char*>(aBuffer), 63 aLength, &bytes); 64 bool eof = bytes < aLength; 65 return NS_FAILED(context->mLastIORV) ? -1 : eof ? 0 : 1; 66 } 67 68 static int webmdemux_seek(int64_t aOffset, int aWhence, void* aUserData) { 69 MOZ_ASSERT(aUserData); 70 WebMDemuxer::NestEggContext* context = 71 reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); 72 context->mLastIORV = context->GetResource()->Seek(aWhence, aOffset); 73 return NS_SUCCEEDED(context->mLastIORV) ? 0 : -1; 74 } 75 76 static int64_t webmdemux_tell(void* aUserData) { 77 MOZ_ASSERT(aUserData); 78 WebMDemuxer::NestEggContext* context = 79 reinterpret_cast<WebMDemuxer::NestEggContext*>(aUserData); 80 return context->GetResource()->Tell(); 81 } 82 83 static void webmdemux_log(nestegg* aContext, unsigned int aSeverity, 84 char const* aFormat, ...) { 85 if (!MOZ_LOG_TEST(gNesteggLog, LogLevel::Debug)) { 86 return; 87 } 88 89 va_list args; 90 char msg[256]; 91 const char* sevStr; 92 93 switch (aSeverity) { 94 case NESTEGG_LOG_DEBUG: 95 sevStr = "DBG"; 96 break; 97 case NESTEGG_LOG_INFO: 98 sevStr = "INF"; 99 break; 100 case NESTEGG_LOG_WARNING: 101 sevStr = "WRN"; 102 break; 103 case NESTEGG_LOG_ERROR: 104 sevStr = "ERR"; 105 break; 106 case NESTEGG_LOG_CRITICAL: 107 sevStr = "CRT"; 108 break; 109 default: 110 sevStr = "UNK"; 111 break; 112 } 113 114 va_start(args, aFormat); 115 116 SprintfLiteral(msg, "%p [Nestegg-%s] ", aContext, sevStr); 117 PR_vsnprintf(msg + strlen(msg), sizeof(msg) - strlen(msg), aFormat, args); 118 MOZ_LOG(gNesteggLog, LogLevel::Debug, ("%s", msg)); 119 120 va_end(args); 121 } 122 123 WebMDemuxer::NestEggContext::~NestEggContext() { 124 if (mContext) { 125 nestegg_destroy(mContext); 126 } 127 } 128 129 int WebMDemuxer::NestEggContext::Init() { 130 nestegg_io io; 131 io.read = webmdemux_read; 132 io.seek = webmdemux_seek; 133 io.tell = webmdemux_tell; 134 io.userdata = this; 135 136 return nestegg_init( 137 &mContext, io, &webmdemux_log, 138 // nestegg_init() would return an error, from ne_parse(), if a resource 139 // read were to fail. 140 // For MediaSource, TrackBuffersManager::InitializationSegmentReceived() 141 // calls WebMDemuxer::Init() while the resource has cached only the 142 // bytes of the initialization segment. max_offset is passed so that no 143 // read will fail. 144 mParent->IsMediaSource() ? mResource.GetResource()->GetCachedDataEnd(0) 145 : -1); 146 } 147 148 WebMDemuxer::WebMDemuxer(MediaResource* aResource) 149 : WebMDemuxer(aResource, false) {} 150 151 WebMDemuxer::WebMDemuxer( 152 MediaResource* aResource, bool aIsMediaSource, 153 Maybe<media::TimeUnit> aFrameEndTimeBeforeRecreateDemuxer) 154 : mVideoContext(this, aResource), 155 mAudioContext(this, aResource), 156 mBufferedState(nullptr), 157 mInitData(nullptr), 158 mVideoTrack(0), 159 mAudioTrack(0), 160 mSeekPreroll(0), 161 mAudioCodec(-1), 162 mVideoCodec(-1), 163 mHasVideo(false), 164 mHasAudio(false), 165 mNeedReIndex(true), 166 mIsMediaSource(aIsMediaSource) { 167 DDLINKCHILD("resource", aResource); 168 // Audio/video contexts hold a MediaResourceIndex. 169 DDLINKCHILD("video context", mVideoContext.GetResource()); 170 DDLINKCHILD("audio context", mAudioContext.GetResource()); 171 172 MOZ_ASSERT_IF(!aIsMediaSource, 173 aFrameEndTimeBeforeRecreateDemuxer.isNothing()); 174 if (aIsMediaSource && aFrameEndTimeBeforeRecreateDemuxer) { 175 mVideoFrameEndTimeBeforeReset = aFrameEndTimeBeforeRecreateDemuxer; 176 WEBM_DEBUG("Set mVideoFrameEndTimeBeforeReset=%" PRId64, 177 mVideoFrameEndTimeBeforeReset->ToMicroseconds()); 178 } 179 } 180 181 WebMDemuxer::~WebMDemuxer() { 182 Reset(TrackInfo::kVideoTrack); 183 Reset(TrackInfo::kAudioTrack); 184 } 185 186 RefPtr<WebMDemuxer::InitPromise> WebMDemuxer::Init() { 187 InitBufferedState(); 188 189 if (NS_FAILED(ReadMetadata())) { 190 return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, 191 __func__); 192 } 193 194 if (!GetNumberTracks(TrackInfo::kAudioTrack) && 195 !GetNumberTracks(TrackInfo::kVideoTrack)) { 196 return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, 197 __func__); 198 } 199 200 return InitPromise::CreateAndResolve(NS_OK, __func__); 201 } 202 203 void WebMDemuxer::InitBufferedState() { 204 MOZ_ASSERT(!mBufferedState); 205 mBufferedState = new WebMBufferedState; 206 } 207 208 uint32_t WebMDemuxer::GetNumberTracks(TrackInfo::TrackType aType) const { 209 switch (aType) { 210 case TrackInfo::kAudioTrack: 211 return mHasAudio ? 1 : 0; 212 case TrackInfo::kVideoTrack: 213 return mHasVideo ? 1 : 0; 214 default: 215 return 0; 216 } 217 } 218 219 UniquePtr<TrackInfo> WebMDemuxer::GetTrackInfo(TrackInfo::TrackType aType, 220 size_t aTrackNumber) const { 221 switch (aType) { 222 case TrackInfo::kAudioTrack: 223 return mInfo.mAudio.Clone(); 224 case TrackInfo::kVideoTrack: 225 return mInfo.mVideo.Clone(); 226 default: 227 return nullptr; 228 } 229 } 230 231 already_AddRefed<MediaTrackDemuxer> WebMDemuxer::GetTrackDemuxer( 232 TrackInfo::TrackType aType, uint32_t aTrackNumber) { 233 if (GetNumberTracks(aType) <= aTrackNumber) { 234 return nullptr; 235 } 236 RefPtr<WebMTrackDemuxer> e = new WebMTrackDemuxer(this, aType, aTrackNumber); 237 DDLINKCHILD("track demuxer", e.get()); 238 mDemuxers.AppendElement(e); 239 240 return e.forget(); 241 } 242 243 void WebMDemuxer::Reset(TrackInfo::TrackType aType) { 244 mProcessedDiscardPadding = false; 245 if (aType == TrackInfo::kVideoTrack) { 246 mVideoPackets.Reset(); 247 } else { 248 mAudioPackets.Reset(); 249 } 250 } 251 252 int64_t WebMDemuxer::FloorDefaultDurationToTimecodeScale( 253 nestegg* aContext, unsigned aTrackNumber) { 254 uint64_t durationNanoSecs; 255 // https://www.webmproject.org/docs/container/#DefaultDuration 256 if (0 != nestegg_track_default_duration(aContext, aTrackNumber, 257 &durationNanoSecs)) { 258 return -1; 259 } 260 261 // https://www.webmproject.org/docs/container/#TimecodeScale 262 uint64_t timecodeScale = 0; 263 nestegg_tstamp_scale(aContext, &timecodeScale); 264 if (timecodeScale == 0) { 265 // Zero TimecodeScale would make timestamps all zero. 266 // The Segment should have triggered an error before now, but use the 267 // specified default if that has not happened. 268 // https://www.ietf.org/archive/id/draft-ietf-cellar-matroska-21.html#name-timestampscale-element 269 WEBM_DEBUG("Zero timecode scale"); 270 timecodeScale = PR_NSEC_PER_MSEC; 271 } 272 // Round down to nearest multiple of TimecodeScale. 273 // Round down again to microseconds. 274 // This avoids having block end times unintentionally overlap subsequent 275 // frame start times, which would cause subsequent frames to be removed from 276 // MediaSource buffers. 277 return AssertedCast<int64_t>(durationNanoSecs / timecodeScale * 278 timecodeScale / NSECS_PER_USEC); 279 } 280 281 nsresult WebMDemuxer::SetVideoCodecInfo(nestegg* aContext, int aTrackId) { 282 mVideoCodec = nestegg_track_codec_id(aContext, aTrackId); 283 switch (mVideoCodec) { 284 case NESTEGG_CODEC_VP8: 285 mInfo.mVideo.mMimeType = "video/vp8"; 286 break; 287 case NESTEGG_CODEC_VP9: 288 mInfo.mVideo.mMimeType = "video/vp9"; 289 break; 290 case NESTEGG_CODEC_AV1: 291 mInfo.mVideo.mMimeType = "video/av1"; 292 break; 293 default: 294 NS_WARNING("Unknown WebM video codec"); 295 return NS_ERROR_FAILURE; 296 } 297 return NS_OK; 298 } 299 300 nsresult WebMDemuxer::SetAudioCodecInfo(nestegg* aContext, int aTrackId, 301 const nestegg_audio_params& aParams) { 302 mAudioCodec = nestegg_track_codec_id(aContext, aTrackId); 303 switch (mAudioCodec) { 304 case NESTEGG_CODEC_VORBIS: { 305 mInfo.mAudio.mCodecSpecificConfig = 306 AudioCodecSpecificVariant{VorbisCodecSpecificData{}}; 307 mInfo.mAudio.mMimeType = "audio/vorbis"; 308 break; 309 } 310 case NESTEGG_CODEC_OPUS: { 311 uint64_t codecDelayUs = aParams.codec_delay / NSECS_PER_USEC; 312 mInfo.mAudio.mMimeType = "audio/opus"; 313 OpusCodecSpecificData opusCodecSpecificData; 314 opusCodecSpecificData.mContainerCodecDelayFrames = 315 AssertedCast<int64_t>(USECS_PER_S * codecDelayUs / 48000); 316 WEBM_DEBUG("Preroll for Opus: %" PRIu64 " frames", 317 opusCodecSpecificData.mContainerCodecDelayFrames); 318 mInfo.mAudio.mCodecSpecificConfig = 319 AudioCodecSpecificVariant{std::move(opusCodecSpecificData)}; 320 break; 321 } 322 default: 323 NS_WARNING("Unknown WebM audio codec"); 324 return NS_ERROR_DOM_MEDIA_METADATA_ERR; 325 } 326 327 AutoTArray<const unsigned char*, 4> headers; 328 AutoTArray<size_t, 4> headerLens; 329 nsresult rv = GetCodecPrivateData(aContext, aTrackId, &headers, &headerLens); 330 if (NS_FAILED(rv)) { 331 WEBM_DEBUG("GetCodecPrivateData error for WebM"); 332 return rv; 333 } 334 335 // Vorbis has 3 headers, convert to Xiph extradata format to send them to 336 // the demuxer. 337 // TODO: This is already the format WebM stores them in. Would be nice 338 // to avoid having libnestegg split them only for us to pack them again, 339 // but libnestegg does not give us an API to access this data directly. 340 RefPtr<MediaByteBuffer> audioCodecSpecificBlob = 341 GetAudioCodecSpecificBlob(mInfo.mAudio.mCodecSpecificConfig); 342 if (headers.Length() > 1) { 343 if (!XiphHeadersToExtradata(audioCodecSpecificBlob, headers, headerLens)) { 344 WEBM_DEBUG("Couldn't parse Xiph headers"); 345 return NS_ERROR_FAILURE; 346 } 347 } else { 348 audioCodecSpecificBlob->AppendElements(headers[0], headerLens[0]); 349 } 350 351 return NS_OK; 352 } 353 354 nsresult WebMDemuxer::GetCodecPrivateData( 355 nestegg* aContext, int aTrackId, nsTArray<const unsigned char*>* aHeaders, 356 nsTArray<size_t>* aHeaderLens) { 357 unsigned int nheaders = 0; 358 int r = nestegg_track_codec_data_count(aContext, aTrackId, &nheaders); 359 if (r == -1) { 360 WEBM_DEBUG("nestegg_track_codec_data_count error"); 361 return NS_ERROR_FAILURE; 362 } 363 364 for (uint32_t header = 0; header < nheaders; ++header) { 365 unsigned char* data = 0; 366 size_t length = 0; 367 r = nestegg_track_codec_data(aContext, aTrackId, header, &data, &length); 368 if (r == -1) { 369 WEBM_DEBUG("nestegg_track_codec_data error"); 370 return NS_ERROR_FAILURE; 371 } 372 aHeaders->AppendElement(data); 373 aHeaderLens->AppendElement(length); 374 } 375 return NS_OK; 376 } 377 378 nsresult WebMDemuxer::ReadMetadata() { 379 int r = mVideoContext.Init(); 380 if (r == -1) { 381 WEBM_DEBUG("mVideoContext::Init failure"); 382 return NS_ERROR_FAILURE; 383 } 384 if (mAudioContext.Init() == -1) { 385 WEBM_DEBUG("mAudioContext::Init failure"); 386 return NS_ERROR_FAILURE; 387 } 388 389 // Both contexts have the metadata; the video context is used here. 390 MediaResourceIndex& resource = Resource(TrackInfo::kVideoTrack); 391 nestegg* context = Context(TrackInfo::kVideoTrack); 392 393 { 394 // Check how much data nestegg read and force feed it to BufferedState. 395 RefPtr<MediaByteBuffer> buffer = resource.MediaReadAt(0, resource.Tell()); 396 if (!buffer) { 397 WEBM_DEBUG("resource.MediaReadAt error"); 398 return NS_ERROR_FAILURE; 399 } 400 mBufferedState->NotifyDataArrived(buffer->Elements(), buffer->Length(), 0); 401 if (mBufferedState->GetInitEndOffset() < 0) { 402 WEBM_DEBUG("Couldn't find init end"); 403 return NS_ERROR_FAILURE; 404 } 405 MOZ_ASSERT(mBufferedState->GetInitEndOffset() <= resource.Tell()); 406 } 407 mInitData = resource.MediaReadAt(0, mBufferedState->GetInitEndOffset()); 408 if (!mInitData || 409 mInitData->Length() != size_t(mBufferedState->GetInitEndOffset())) { 410 WEBM_DEBUG("Couldn't read init data"); 411 return NS_ERROR_FAILURE; 412 } 413 414 unsigned int ntracks = 0; 415 r = nestegg_track_count(context, &ntracks); 416 if (r == -1) { 417 WEBM_DEBUG("nestegg_track_count error"); 418 return NS_ERROR_FAILURE; 419 } 420 421 for (unsigned int track = 0; track < ntracks; ++track) { 422 int id = nestegg_track_codec_id(context, track); 423 if (id == -1) { 424 WEBM_DEBUG("nestegg_track_codec_id error"); 425 return NS_ERROR_FAILURE; 426 } 427 428 WEBM_DEBUG("Read metadata, track %u, codec id %d", track, id); 429 int type = nestegg_track_type(context, track); 430 if (type == NESTEGG_TRACK_VIDEO && !mHasVideo) { 431 nestegg_video_params params; 432 r = nestegg_track_video_params(context, track, ¶ms); 433 if (r == -1) { 434 WEBM_DEBUG("nestegg_track_video_params error"); 435 return NS_ERROR_FAILURE; 436 } 437 mVideoDefaultDuration = 438 FloorDefaultDurationToTimecodeScale(context, track); 439 nsresult rv = SetVideoCodecInfo(context, track); 440 if (NS_FAILED(rv)) { 441 WEBM_DEBUG("Set video codec info error, ignoring track"); 442 continue; 443 } 444 mInfo.mVideo.mColorPrimaries = gfxUtils::CicpToColorPrimaries( 445 static_cast<gfx::CICP::ColourPrimaries>(params.primaries), 446 gMediaDemuxerLog); 447 448 // For VPX, this is our only chance to capture the transfer 449 // characteristics, which we can't get from a VPX bitstream later. 450 // We only need this value if the video is using the BT2020 451 // colorspace, which will be determined on a per-frame basis later. 452 mInfo.mVideo.mTransferFunction = gfxUtils::CicpToTransferFunction( 453 static_cast<gfx::CICP::TransferCharacteristics>( 454 params.transfer_characteristics)); 455 456 // Picture region, taking into account cropping, before scaling 457 // to the display size. 458 unsigned int cropH = params.crop_right + params.crop_left; 459 unsigned int cropV = params.crop_bottom + params.crop_top; 460 gfx::IntRect pictureRect(params.crop_left, params.crop_top, 461 params.width - cropH, params.height - cropV); 462 463 // If the cropping data appears invalid then use the frame data 464 if (pictureRect.width <= 0 || pictureRect.height <= 0 || 465 pictureRect.x < 0 || pictureRect.y < 0) { 466 pictureRect.x = 0; 467 pictureRect.y = 0; 468 pictureRect.width = params.width; 469 pictureRect.height = params.height; 470 } 471 472 // Validate the container-reported frame and pictureRect sizes. This 473 // ensures that our video frame creation code doesn't overflow. 474 gfx::IntSize displaySize(params.display_width, params.display_height); 475 gfx::IntSize frameSize(params.width, params.height); 476 if (!IsValidVideoRegion(frameSize, pictureRect, displaySize)) { 477 // Video track's frame sizes will overflow. Ignore the video track. 478 continue; 479 } 480 481 mVideoTrack = track; 482 mHasVideo = true; 483 484 mInfo.mVideo.mDisplay = displaySize; 485 mInfo.mVideo.mImage = frameSize; 486 mInfo.mVideo.SetImageRect(pictureRect); 487 mInfo.mVideo.SetAlpha(params.alpha_mode); 488 489 switch (params.stereo_mode) { 490 case NESTEGG_VIDEO_MONO: 491 mInfo.mVideo.mStereoMode = StereoMode::MONO; 492 break; 493 case NESTEGG_VIDEO_STEREO_LEFT_RIGHT: 494 mInfo.mVideo.mStereoMode = StereoMode::LEFT_RIGHT; 495 break; 496 case NESTEGG_VIDEO_STEREO_BOTTOM_TOP: 497 mInfo.mVideo.mStereoMode = StereoMode::BOTTOM_TOP; 498 break; 499 case NESTEGG_VIDEO_STEREO_TOP_BOTTOM: 500 mInfo.mVideo.mStereoMode = StereoMode::TOP_BOTTOM; 501 break; 502 case NESTEGG_VIDEO_STEREO_RIGHT_LEFT: 503 mInfo.mVideo.mStereoMode = StereoMode::RIGHT_LEFT; 504 break; 505 } 506 uint64_t duration = 0; 507 r = nestegg_duration(context, &duration); 508 if (!r) { 509 mInfo.mVideo.mDuration = TimeUnit::FromNanoseconds(duration); 510 } 511 WEBM_DEBUG("stream duration: %lf\n", mInfo.mVideo.mDuration.ToSeconds()); 512 mInfo.mVideo.mCrypto = GetTrackCrypto(TrackInfo::kVideoTrack, track); 513 if (mInfo.mVideo.mCrypto.IsEncrypted()) { 514 MOZ_ASSERT(mInfo.mVideo.mCrypto.mCryptoScheme == CryptoScheme::Cenc, 515 "WebM should only use cenc scheme"); 516 mCrypto.AddInitData(u"webm"_ns, mInfo.mVideo.mCrypto.mKeyId); 517 } 518 } else if (type == NESTEGG_TRACK_AUDIO && !mHasAudio) { 519 nestegg_audio_params params; 520 r = nestegg_track_audio_params(context, track, ¶ms); 521 if (r == -1) { 522 WEBM_DEBUG("nestegg_track_audio_params error"); 523 return NS_ERROR_FAILURE; 524 } 525 526 const uint32_t rate = AssertedCast<uint32_t>(std::max(0., params.rate)); 527 if (rate > AudioInfo::MAX_RATE || rate == 0 || 528 params.channels > AudioConfig::ChannelLayout::MAX_CHANNELS) { 529 WEBM_DEBUG("Invalid audio param rate: %lf channel count: %d", 530 params.rate, params.channels); 531 return NS_ERROR_DOM_MEDIA_METADATA_ERR; 532 } 533 params.rate = rate; 534 535 nsresult rv = SetAudioCodecInfo(context, track, params); 536 if (NS_FAILED(rv)) { 537 WEBM_DEBUG("Set audio codec info error, ignoring track"); 538 continue; 539 } 540 541 mAudioTrack = track; 542 mHasAudio = true; 543 mAudioDefaultDuration = 544 FloorDefaultDurationToTimecodeScale(context, track); 545 mSeekPreroll = params.seek_preroll; 546 mInfo.mAudio.mRate = rate; 547 mInfo.mAudio.mChannels = params.channels; 548 549 uint64_t duration = 0; 550 r = nestegg_duration(context, &duration); 551 if (!r) { 552 mInfo.mAudio.mDuration = TimeUnit::FromNanoseconds(duration); 553 WEBM_DEBUG("audio track duration: %lf", 554 mInfo.mAudio.mDuration.ToSeconds()); 555 } 556 mInfo.mAudio.mCrypto = GetTrackCrypto(TrackInfo::kAudioTrack, track); 557 if (mInfo.mAudio.mCrypto.IsEncrypted()) { 558 MOZ_ASSERT(mInfo.mAudio.mCrypto.mCryptoScheme == CryptoScheme::Cenc, 559 "WebM should only use cenc scheme"); 560 mCrypto.AddInitData(u"webm"_ns, mInfo.mAudio.mCrypto.mKeyId); 561 } 562 } 563 } 564 565 if (!mHasVideo && !mHasAudio) { 566 WEBM_DEBUG("No supported track!"); 567 return NS_ERROR_DOM_MEDIA_METADATA_ERR; 568 } 569 WEBM_DEBUG("Read metadata OK"); 570 return NS_OK; 571 } 572 573 bool WebMDemuxer::IsSeekable() const { 574 return Context(TrackInfo::kVideoTrack) && 575 nestegg_has_cues(Context(TrackInfo::kVideoTrack)); 576 } 577 578 bool WebMDemuxer::IsSeekableOnlyInBufferedRanges() const { 579 return Context(TrackInfo::kVideoTrack) && 580 !nestegg_has_cues(Context(TrackInfo::kVideoTrack)); 581 } 582 583 void WebMDemuxer::EnsureUpToDateIndex() { 584 if (!mNeedReIndex || !mInitData) { 585 return; 586 } 587 AutoPinned<MediaResource> resource( 588 Resource(TrackInfo::kVideoTrack).GetResource()); 589 MediaByteRangeSet byteRanges; 590 nsresult rv = resource->GetCachedRanges(byteRanges); 591 if (NS_FAILED(rv) || byteRanges.IsEmpty()) { 592 return; 593 } 594 mBufferedState->UpdateIndex(byteRanges, resource); 595 596 mNeedReIndex = false; 597 } 598 599 void WebMDemuxer::NotifyDataArrived() { 600 WEBM_DEBUG(""); 601 mNeedReIndex = true; 602 } 603 604 void WebMDemuxer::NotifyDataRemoved() { 605 mBufferedState->Reset(); 606 if (mInitData) { 607 mBufferedState->NotifyDataArrived(mInitData->Elements(), 608 mInitData->Length(), 0); 609 } 610 mNeedReIndex = true; 611 } 612 613 UniquePtr<EncryptionInfo> WebMDemuxer::GetCrypto() { 614 return mCrypto.IsEncrypted() ? MakeUnique<EncryptionInfo>(mCrypto) : nullptr; 615 } 616 617 CryptoTrack WebMDemuxer::GetTrackCrypto(TrackInfo::TrackType aType, 618 size_t aTrackNumber) { 619 const int WEBM_IV_SIZE = 16; 620 const unsigned char* contentEncKeyId; 621 size_t contentEncKeyIdLength; 622 CryptoTrack crypto; 623 nestegg* context = Context(aType); 624 625 int r = nestegg_track_content_enc_key_id( 626 context, aTrackNumber, &contentEncKeyId, &contentEncKeyIdLength); 627 628 if (r == -1) { 629 WEBM_DEBUG("nestegg_track_content_enc_key_id failed r=%d", r); 630 return crypto; 631 } 632 633 uint32_t i; 634 nsTArray<uint8_t> initData; 635 for (i = 0; i < contentEncKeyIdLength; i++) { 636 initData.AppendElement(contentEncKeyId[i]); 637 } 638 639 if (!initData.IsEmpty()) { 640 // Webm only uses a cenc style scheme. 641 crypto.mCryptoScheme = CryptoScheme::Cenc; 642 crypto.mIVSize = WEBM_IV_SIZE; 643 crypto.mKeyId = std::move(initData); 644 } 645 646 return crypto; 647 } 648 649 bool WebMDemuxer::CheckKeyFrameByExamineByteStream( 650 const MediaRawData* aSample) { 651 switch (mVideoCodec) { 652 case NESTEGG_CODEC_VP8: 653 return VPXDecoder::IsKeyframe(*aSample, VPXDecoder::Codec::VP8); 654 case NESTEGG_CODEC_VP9: 655 return VPXDecoder::IsKeyframe(*aSample, VPXDecoder::Codec::VP9); 656 #ifdef MOZ_AV1 657 case NESTEGG_CODEC_AV1: 658 return AOMDecoder::IsKeyframe(*aSample); 659 #endif 660 default: 661 MOZ_ASSERT_UNREACHABLE( 662 "Cannot detect keyframes in unknown WebM video codec"); 663 return false; 664 } 665 } 666 667 nsresult WebMDemuxer::GetNextPacket(TrackInfo::TrackType aType, 668 MediaRawDataQueue* aSamples) { 669 auto result = NextPacket(aType); 670 if (result.isErr()) { 671 return result.unwrapErr(); 672 } 673 RefPtr<NesteggPacketHolder> holder = result.unwrap(); 674 675 int r = 0; 676 unsigned int count = 0; 677 r = nestegg_packet_count(holder->Packet(), &count); 678 if (r == -1) { 679 WEBM_DEBUG("nestegg_packet_count: error"); 680 return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; 681 } 682 int64_t tstamp = holder->Timestamp(); 683 int64_t duration = holder->Duration(); 684 if (aType == TrackInfo::TrackType::kVideoTrack) { 685 WEBM_DEBUG("video: tstamp=%" PRId64 ", duration=%" PRId64 686 ", mVideoDefaultDuration=%" PRId64, 687 tstamp, duration, mVideoDefaultDuration); 688 } 689 690 // The end time of this frame is the start time of the next frame. 691 // Attempt to fetch the timestamp of the next packet for this track. 692 result = NextPacket(aType); 693 if (result.isErr()) { 694 nsresult rv = result.inspectErr(); 695 if (rv != NS_ERROR_DOM_MEDIA_END_OF_STREAM && 696 // Gecko has historically estimated a duration for the last frame 697 // available in a SourceBuffer, if possible, even though this might 698 // result in a different frame duration from that which would be 699 // calculated if the frame were not parsed until the next frame 700 // becomes available. 701 rv != NS_ERROR_DOM_MEDIA_WAITING_FOR_DATA) { 702 WEBM_DEBUG("NextPacket: error"); 703 return rv; 704 } 705 } 706 RefPtr<NesteggPacketHolder> next_holder = result.unwrapOr(nullptr); 707 708 int64_t next_tstamp = INT64_MIN; 709 auto calculateNextTimestamp = [&](auto pushPacket, 710 Maybe<int64_t>* lastFrameTime, 711 int64_t defaultDuration, 712 int64_t trackEndTime) { 713 MOZ_ASSERT(lastFrameTime); 714 if (next_holder) { 715 next_tstamp = next_holder->Timestamp(); 716 (this->*pushPacket)(next_holder); 717 } else if (duration >= 0) { 718 next_tstamp = tstamp + duration; 719 } else if (defaultDuration >= 0) { 720 next_tstamp = tstamp + defaultDuration; 721 } else if (lastFrameTime->isSome()) { 722 // This is a poor estimate, and overestimation overlaps the subsequent 723 // block, which can cause cause removal of subsequent frames from 724 // MediaSource buffers. 725 next_tstamp = tstamp + (tstamp - lastFrameTime->value()); 726 } else if (mVideoFrameEndTimeBeforeReset) { 727 WEBM_DEBUG("Setting next timestamp to be %" PRId64 " us", 728 mVideoFrameEndTimeBeforeReset->ToMicroseconds()); 729 next_tstamp = mVideoFrameEndTimeBeforeReset->ToMicroseconds(); 730 } else if (mIsMediaSource) { 731 (this->*pushPacket)(holder); 732 } else { 733 // If we can't get frame's duration, it means either we need to wait for 734 // more data for MSE case or this is the last frame for file resource 735 // case. 736 if (tstamp > trackEndTime) { 737 // This shouldn't happen, but some muxers give incorrect durations to 738 // segments, then have samples appear beyond those durations. 739 WEBM_DEBUG("Found tstamp=%" PRIi64 " > trackEndTime=%" PRIi64 740 " while calculating next timestamp! Indicates a bad mux! " 741 "Will use tstamp value.", 742 tstamp, trackEndTime); 743 } 744 next_tstamp = std::max<int64_t>(tstamp, trackEndTime); 745 } 746 *lastFrameTime = Some(tstamp); 747 }; 748 749 if (aType == TrackInfo::kAudioTrack) { 750 calculateNextTimestamp(&WebMDemuxer::PushAudioPacket, &mLastAudioFrameTime, 751 mAudioDefaultDuration, 752 mInfo.mAudio.mDuration.ToMicroseconds()); 753 } else { 754 WEBM_DEBUG("next_holder %c mLastVideoFrameTime %c", next_holder ? 'Y' : 'N', 755 mLastVideoFrameTime ? 'Y' : 'N'); 756 calculateNextTimestamp(&WebMDemuxer::PushVideoPacket, &mLastVideoFrameTime, 757 mVideoDefaultDuration, 758 mInfo.mVideo.mDuration.ToMicroseconds()); 759 } 760 761 if (mIsMediaSource && next_tstamp == INT64_MIN) { 762 WEBM_DEBUG("WebM is a media source, and next timestamp computation filed."); 763 return result.unwrapErr(); 764 } 765 766 int64_t discardPadding = 0; 767 if (aType == TrackInfo::kAudioTrack) { 768 (void)nestegg_packet_discard_padding(holder->Packet(), &discardPadding); 769 } 770 771 int packetEncryption = nestegg_packet_encryption(holder->Packet()); 772 773 for (uint32_t i = 0; i < count; ++i) { 774 unsigned char* data = nullptr; 775 size_t length; 776 r = nestegg_packet_data(holder->Packet(), i, &data, &length); 777 if (r == -1) { 778 WEBM_DEBUG("nestegg_packet_data failed r=%d", r); 779 return NS_ERROR_DOM_MEDIA_DEMUXER_ERR; 780 } 781 unsigned char* alphaData = nullptr; 782 size_t alphaLength = 0; 783 // Check packets for alpha information if file has declared alpha frames 784 // may be present. 785 if (mInfo.mVideo.HasAlpha()) { 786 r = nestegg_packet_additional_data(holder->Packet(), 1, &alphaData, 787 &alphaLength); 788 if (r == -1) { 789 WEBM_DEBUG( 790 "nestegg_packet_additional_data failed to retrieve alpha data r=%d", 791 r); 792 } 793 } 794 795 RefPtr<MediaRawData> sample; 796 if (mInfo.mVideo.HasAlpha() && alphaLength != 0) { 797 sample = new MediaRawData(data, length, alphaData, alphaLength); 798 if ((length && !sample->Data()) || 799 (alphaLength && !sample->AlphaData())) { 800 WEBM_DEBUG("Couldn't allocate MediaRawData: OOM"); 801 return NS_ERROR_OUT_OF_MEMORY; 802 } 803 } else { 804 sample = new MediaRawData(data, length); 805 if (length && !sample->Data()) { 806 WEBM_DEBUG("Couldn't allocate MediaRawData: OOM"); 807 return NS_ERROR_OUT_OF_MEMORY; 808 } 809 } 810 sample->mTimecode = TimeUnit::FromMicroseconds(tstamp); 811 sample->mTime = TimeUnit::FromMicroseconds(tstamp); 812 if (next_tstamp > tstamp) { 813 sample->mDuration = TimeUnit::FromMicroseconds(next_tstamp - tstamp); 814 } else { 815 WEBM_DEBUG("tstamp >= next_tstamp"); 816 } 817 sample->mOffset = holder->Offset(); 818 // Determine if the sample should be a key frame 819 if (aType == TrackInfo::kAudioTrack) { 820 sample->mKeyframe = true; 821 } else { 822 sample->mExtraData = mInfo.mVideo.mExtraData; 823 if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED || 824 packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_PARTITIONED) { 825 // Packet is encrypted, can't peek, use packet info 826 sample->mKeyframe = nestegg_packet_has_keyframe(holder->Packet()) == 827 NESTEGG_PACKET_HAS_KEYFRAME_TRUE; 828 } else { 829 MOZ_ASSERT( 830 packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_UNENCRYPTED || 831 packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_FALSE, 832 "Unencrypted packet expected"); 833 sample->mKeyframe = CheckKeyFrameByExamineByteStream(sample); 834 } 835 } 836 WEBM_DEBUG("push sample tstamp: %" PRId64 " next_tstamp: %" PRId64 837 " length: %zu kf: %d", 838 tstamp, next_tstamp, length, sample->mKeyframe); 839 840 if (discardPadding && i == count - 1) { 841 sample->mOriginalPresentationWindow = 842 Some(media::TimeInterval{sample->mTime, sample->GetEndTime()}); 843 if (discardPadding < 0) { 844 // This will ensure decoding will error out, and the file is rejected. 845 sample->mDuration = TimeUnit::Invalid(); 846 } else { 847 TimeUnit padding = TimeUnit::FromNanoseconds(discardPadding); 848 const int samples = opus_packet_get_nb_samples( 849 sample->Data(), AssertedCast<int32_t>(sample->Size()), 850 AssertedCast<int32_t>(mInfo.mAudio.mRate)); 851 if (samples <= 0) { 852 WEBM_DEBUG( 853 "Invalid number of samples, flagging packet for error (padding: " 854 "%s, samples: %d, already processed: %s, error: %s)", 855 padding.ToString().get(), samples, 856 mProcessedDiscardPadding ? "true" : "false", 857 (samples == OPUS_BAD_ARG) ? "OPUS_BAD_ARG" 858 : (samples == OPUS_INVALID_PACKET) ? "OPUS_INVALID_PACKET" 859 : "Undefined Error"); 860 sample->mDuration = TimeUnit::Invalid(); 861 } else { 862 TimeUnit packetDuration = TimeUnit(samples, mInfo.mAudio.mRate); 863 if (padding > packetDuration || mProcessedDiscardPadding) { 864 WEBM_DEBUG( 865 "Padding frames larger than packet size, flagging packet for " 866 "error (padding: %s, duration: %s, already processed: %s)", 867 padding.ToString().get(), packetDuration.ToString().get(), 868 mProcessedDiscardPadding ? "true" : "false"); 869 sample->mDuration = TimeUnit::Invalid(); 870 } else { 871 sample->mDuration = packetDuration - padding; 872 } 873 } 874 } 875 mProcessedDiscardPadding = true; 876 } 877 878 if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED || 879 packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_PARTITIONED) { 880 UniquePtr<MediaRawDataWriter> writer(sample->CreateWriter()); 881 unsigned char const* iv; 882 size_t ivLength; 883 nestegg_packet_iv(holder->Packet(), &iv, &ivLength); 884 writer->mCrypto.mCryptoScheme = CryptoScheme::Cenc; 885 writer->mCrypto.mIVSize = ivLength; 886 if (ivLength == 0) { 887 // Frame is not encrypted. This shouldn't happen as it means the 888 // encryption bit is set on a frame with no IV, but we gracefully 889 // handle incase. 890 MOZ_ASSERT_UNREACHABLE( 891 "Unencrypted packets should not have the encryption bit set!"); 892 WEBM_DEBUG("Unencrypted packet with encryption bit set"); 893 writer->mCrypto.mPlainSizes.AppendElement(length); 894 writer->mCrypto.mEncryptedSizes.AppendElement(0); 895 } else { 896 // Frame is encrypted 897 writer->mCrypto.mIV.AppendElements(iv, 8); 898 // Iv from a sample is 64 bits, must be padded with 64 bits more 0s 899 // in compliance with spec 900 for (uint32_t i = 0; i < 8; i++) { 901 writer->mCrypto.mIV.AppendElement(0); 902 } 903 904 if (packetEncryption == NESTEGG_PACKET_HAS_SIGNAL_BYTE_ENCRYPTED) { 905 writer->mCrypto.mPlainSizes.AppendElement(0); 906 writer->mCrypto.mEncryptedSizes.AppendElement(length); 907 } else if (packetEncryption == 908 NESTEGG_PACKET_HAS_SIGNAL_BYTE_PARTITIONED) { 909 uint8_t numPartitions = 0; 910 const uint32_t* partitions = NULL; 911 nestegg_packet_offsets(holder->Packet(), &partitions, &numPartitions); 912 913 // WebM stores a list of 'partitions' in the data, which alternate 914 // clear, encrypted. The data in the first partition is always clear. 915 // So, and sample might look as follows: 916 // 00|XXXX|000|XX, where | represents a partition, 0 a clear byte and 917 // X an encrypted byte. If the first bytes in sample are unencrypted, 918 // the first partition will be at zero |XXXX|000|XX. 919 // 920 // As GMP expects the lengths of the clear and encrypted chunks of 921 // data, we calculate these from the difference between the last two 922 // partitions. 923 uint32_t lastOffset = 0; 924 bool encrypted = false; 925 926 for (uint8_t i = 0; i < numPartitions; i++) { 927 uint32_t partition = partitions[i]; 928 uint32_t currentLength = partition - lastOffset; 929 930 if (encrypted) { 931 writer->mCrypto.mEncryptedSizes.AppendElement(currentLength); 932 } else { 933 writer->mCrypto.mPlainSizes.AppendElement(currentLength); 934 } 935 936 encrypted = !encrypted; 937 lastOffset = partition; 938 939 MOZ_ASSERT(lastOffset <= length); 940 } 941 942 // Add the data between the last offset and the end of the data. 943 // 000|XXX|000 944 // ^---^ 945 if (encrypted) { 946 writer->mCrypto.mEncryptedSizes.AppendElement(length - lastOffset); 947 } else { 948 writer->mCrypto.mPlainSizes.AppendElement(length - lastOffset); 949 } 950 951 // Make sure we have an equal number of encrypted and plain sizes (GMP 952 // expects this). This simple check is sufficient as there are two 953 // possible cases at this point: 954 // 1. The number of samples are even (so we don't need to do anything) 955 // 2. There is one more clear sample than encrypted samples, so add a 956 // zero length encrypted chunk. 957 // There can never be more encrypted partitions than clear partitions 958 // due to the alternating structure of the WebM samples and the 959 // restriction that the first chunk is always clear. 960 if (numPartitions % 2 == 0) { 961 writer->mCrypto.mEncryptedSizes.AppendElement(0); 962 } 963 964 // Assert that the lengths of the encrypted and plain samples add to 965 // the length of the data. 966 MOZ_ASSERT( 967 ((size_t)(std::accumulate(writer->mCrypto.mPlainSizes.begin(), 968 writer->mCrypto.mPlainSizes.end(), 0) + 969 std::accumulate(writer->mCrypto.mEncryptedSizes.begin(), 970 writer->mCrypto.mEncryptedSizes.end(), 971 0)) == length)); 972 } 973 } 974 } 975 aSamples->Push(sample); 976 } 977 return NS_OK; 978 } 979 980 Result<RefPtr<NesteggPacketHolder>, nsresult> WebMDemuxer::NextPacket( 981 TrackInfo::TrackType aType) { 982 bool isVideo = aType == TrackInfo::kVideoTrack; 983 984 // Flag to indicate that we do need to playback these types of 985 // packets. 986 bool hasType = isVideo ? mHasVideo : mHasAudio; 987 988 if (!hasType) { 989 WEBM_DEBUG("No media type found"); 990 return Err(NS_ERROR_DOM_MEDIA_DEMUXER_ERR); 991 } 992 993 // The packet queue for the type that we are interested in. 994 WebMPacketQueue& packets = isVideo ? mVideoPackets : mAudioPackets; 995 996 if (packets.GetSize() > 0) { 997 return packets.PopFront(); 998 } 999 1000 // Track we are interested in 1001 uint32_t ourTrack = isVideo ? mVideoTrack : mAudioTrack; 1002 1003 do { 1004 auto result = DemuxPacket(aType); 1005 if (result.isErr()) { 1006 return result.propagateErr(); 1007 } 1008 1009 RefPtr<NesteggPacketHolder> holder = result.unwrap(); 1010 if (ourTrack == holder->Track()) { 1011 return holder; 1012 } 1013 } while (true); 1014 } 1015 1016 Result<RefPtr<NesteggPacketHolder>, nsresult> WebMDemuxer::DemuxPacket( 1017 TrackInfo::TrackType aType) { 1018 nestegg_packet* packet; 1019 const NestEggContext& context = CallbackContext(aType); 1020 int r = nestegg_read_packet(context.mContext, &packet); 1021 if (r <= 0) { 1022 nsresult rv = context.mLastIORV; 1023 nestegg_read_reset(context.mContext); 1024 if (r == 0) { 1025 WEBM_DEBUG("EOS"); 1026 return Err(NS_ERROR_DOM_MEDIA_END_OF_STREAM); 1027 } else if (r < 0) { 1028 WEBM_DEBUG("nestegg_read_packet: error"); 1029 return Err(NS_FAILED(rv) ? rv : NS_ERROR_DOM_MEDIA_DEMUXER_ERR); 1030 } 1031 } 1032 1033 unsigned int track = 0; 1034 r = nestegg_packet_track(packet, &track); 1035 if (r == -1) { 1036 WEBM_DEBUG("nestegg_packet_track: error"); 1037 return Err(NS_ERROR_DOM_MEDIA_DEMUXER_ERR); 1038 } 1039 1040 int64_t offset = Resource(aType).Tell(); 1041 RefPtr<NesteggPacketHolder> holder = new NesteggPacketHolder(); 1042 if (!holder->Init(packet, offset, track, false)) { 1043 WEBM_DEBUG("NesteggPacketHolder::Init: error"); 1044 return Err(NS_ERROR_DOM_MEDIA_DEMUXER_ERR); 1045 } 1046 1047 return holder; 1048 } 1049 1050 void WebMDemuxer::PushAudioPacket(NesteggPacketHolder* aItem) { 1051 mAudioPackets.PushFront(aItem); 1052 } 1053 1054 void WebMDemuxer::PushVideoPacket(NesteggPacketHolder* aItem) { 1055 mVideoPackets.PushFront(aItem); 1056 } 1057 1058 nsresult WebMDemuxer::SeekInternal(TrackInfo::TrackType aType, 1059 const TimeUnit& aTarget) { 1060 EnsureUpToDateIndex(); 1061 uint32_t trackToSeek = mHasVideo ? mVideoTrack : mAudioTrack; 1062 MOZ_ASSERT(aTarget.ToNanoseconds() >= 0, "Seek time can't be negative"); 1063 uint64_t target = static_cast<uint64_t>(aTarget.ToNanoseconds()); 1064 WEBM_DEBUG("Seeking to %lf", aTarget.ToSeconds()); 1065 1066 Reset(aType); 1067 1068 if (mSeekPreroll) { 1069 uint64_t startTime = 0; 1070 if (!mBufferedState->GetStartTime(&startTime)) { 1071 startTime = 0; 1072 } 1073 WEBM_DEBUG("Seek Target: %f", 1074 TimeUnit::FromNanoseconds(target).ToSeconds()); 1075 if (target < mSeekPreroll || target - mSeekPreroll < startTime) { 1076 target = startTime; 1077 } else { 1078 target -= mSeekPreroll; 1079 } 1080 WEBM_DEBUG("SeekPreroll: %f StartTime: %f Adjusted Target: %f", 1081 TimeUnit::FromNanoseconds(mSeekPreroll).ToSeconds(), 1082 TimeUnit::FromNanoseconds(startTime).ToSeconds(), 1083 TimeUnit::FromNanoseconds(target).ToSeconds()); 1084 } 1085 int r = nestegg_track_seek(Context(aType), trackToSeek, target); 1086 if (r == -1) { 1087 WEBM_DEBUG("track_seek for track %u to %f failed, r=%d", trackToSeek, 1088 TimeUnit::FromNanoseconds(target).ToSeconds(), r); 1089 // Try seeking directly based on cluster information in memory. 1090 int64_t offset = 0; 1091 bool rv = mBufferedState->GetOffsetForTime(target, &offset); 1092 if (!rv) { 1093 WEBM_DEBUG("mBufferedState->GetOffsetForTime failed too"); 1094 return NS_ERROR_FAILURE; 1095 } 1096 1097 if (offset < 0) { 1098 WEBM_DEBUG("Unknow byte offset time for seek target %" PRIu64 "ns", 1099 target); 1100 return NS_ERROR_FAILURE; 1101 } 1102 1103 r = nestegg_offset_seek(Context(aType), static_cast<uint64_t>(offset)); 1104 if (r == -1) { 1105 WEBM_DEBUG("and nestegg_offset_seek to %" PRIu64 " failed", offset); 1106 return NS_ERROR_FAILURE; 1107 } 1108 WEBM_DEBUG("got offset from buffered state: %" PRIu64 "", offset); 1109 } 1110 1111 if (aType == TrackInfo::kAudioTrack) { 1112 mLastAudioFrameTime.reset(); 1113 } else { 1114 mLastVideoFrameTime.reset(); 1115 } 1116 1117 return NS_OK; 1118 } 1119 1120 bool WebMDemuxer::IsBufferedIntervalValid(uint64_t start, uint64_t end) { 1121 if (start > end) { 1122 // Buffered ranges are clamped to the media's start time and duration. Any 1123 // frames with timestamps outside that range are ignored, see bug 1697641 1124 // for more info. 1125 WEBM_DEBUG("Ignoring range %" PRIu64 "-%" PRIu64 1126 ", due to invalid interval (start > end).", 1127 start, end); 1128 return false; 1129 } 1130 1131 auto startTime = TimeUnit::FromNanoseconds(start); 1132 auto endTime = TimeUnit::FromNanoseconds(end); 1133 1134 if (startTime.IsNegative() || endTime.IsNegative()) { 1135 // We can get timestamps that are conceptually valid, but become 1136 // negative due to uint64 -> int64 conversion from TimeUnit. We should 1137 // not get negative timestamps, so guard against them. 1138 WEBM_DEBUG( 1139 "Invalid range %f-%f, likely result of uint64 -> int64 conversion.", 1140 startTime.ToSeconds(), endTime.ToSeconds()); 1141 return false; 1142 } 1143 1144 return true; 1145 } 1146 1147 media::TimeIntervals WebMDemuxer::GetBuffered() { 1148 EnsureUpToDateIndex(); 1149 AutoPinned<MediaResource> resource( 1150 Resource(TrackInfo::kVideoTrack).GetResource()); 1151 1152 media::TimeIntervals buffered; 1153 1154 MediaByteRangeSet ranges; 1155 nsresult rv = resource->GetCachedRanges(ranges); 1156 if (NS_FAILED(rv)) { 1157 return media::TimeIntervals(); 1158 } 1159 uint64_t duration = 0; 1160 uint64_t startOffset = 0; 1161 if (!nestegg_duration(Context(TrackInfo::kVideoTrack), &duration)) { 1162 if (mBufferedState->GetStartTime(&startOffset)) { 1163 duration += startOffset; 1164 } 1165 WEBM_DEBUG("Duration: %f StartTime: %f", 1166 TimeUnit::FromNanoseconds(duration).ToSeconds(), 1167 TimeUnit::FromNanoseconds(startOffset).ToSeconds()); 1168 } 1169 for (uint32_t index = 0; index < ranges.Length(); index++) { 1170 uint64_t start, end; 1171 bool rv = mBufferedState->CalculateBufferedForRange( 1172 ranges[index].mStart, ranges[index].mEnd, &start, &end); 1173 if (rv) { 1174 NS_ASSERTION(startOffset <= start, 1175 "startOffset negative or larger than start time"); 1176 1177 if (duration && end > duration) { 1178 WEBM_DEBUG("limit range to duration, end: %f duration: %f", 1179 TimeUnit::FromNanoseconds(end).ToSeconds(), 1180 TimeUnit::FromNanoseconds(duration).ToSeconds()); 1181 end = duration; 1182 } 1183 1184 if (!IsBufferedIntervalValid(start, end)) { 1185 WEBM_DEBUG("Invalid interval, bailing"); 1186 break; 1187 } 1188 1189 auto startTime = TimeUnit::FromNanoseconds(start); 1190 auto endTime = TimeUnit::FromNanoseconds(end); 1191 1192 WEBM_DEBUG("add range %f-%f", startTime.ToSeconds(), endTime.ToSeconds()); 1193 buffered += media::TimeInterval(startTime, endTime); 1194 } 1195 } 1196 return buffered; 1197 } 1198 1199 bool WebMDemuxer::GetOffsetForTime(uint64_t aTime, int64_t* aOffset) { 1200 EnsureUpToDateIndex(); 1201 return mBufferedState && mBufferedState->GetOffsetForTime(aTime, aOffset); 1202 } 1203 1204 // WebMTrackDemuxer 1205 WebMTrackDemuxer::WebMTrackDemuxer(WebMDemuxer* aParent, 1206 TrackInfo::TrackType aType, 1207 uint32_t aTrackNumber) 1208 : mParent(aParent), mType(aType), mNeedKeyframe(true) { 1209 mInfo = mParent->GetTrackInfo(aType, aTrackNumber); 1210 MOZ_ASSERT(mInfo); 1211 } 1212 1213 WebMTrackDemuxer::~WebMTrackDemuxer() { mSamples.Reset(); } 1214 1215 UniquePtr<TrackInfo> WebMTrackDemuxer::GetInfo() const { 1216 return mInfo->Clone(); 1217 } 1218 1219 RefPtr<WebMTrackDemuxer::SeekPromise> WebMTrackDemuxer::Seek( 1220 const TimeUnit& aTime) { 1221 // Seeks to aTime. Upon success, SeekPromise will be resolved with the 1222 // actual time seeked to. Typically the random access point time 1223 1224 auto seekTime = aTime; 1225 bool keyframe = false; 1226 1227 mNeedKeyframe = true; 1228 1229 do { 1230 mSamples.Reset(); 1231 mParent->SeekInternal(mType, seekTime); 1232 nsresult rv = mParent->GetNextPacket(mType, &mSamples); 1233 if (NS_FAILED(rv)) { 1234 if (rv == NS_ERROR_DOM_MEDIA_END_OF_STREAM) { 1235 // Ignore the error for now, the next GetSample will be rejected with 1236 // EOS. 1237 return SeekPromise::CreateAndResolve(TimeUnit::Zero(), __func__); 1238 } 1239 return SeekPromise::CreateAndReject(rv, __func__); 1240 } 1241 1242 // Check what time we actually seeked to. 1243 if (mSamples.GetSize() == 0) { 1244 // We can't determine if the seek succeeded at this stage, so break the 1245 // loop. 1246 break; 1247 } 1248 1249 for (const auto& sample : mSamples) { 1250 seekTime = sample->mTime; 1251 keyframe = sample->mKeyframe; 1252 if (keyframe) { 1253 break; 1254 } 1255 } 1256 if (mType == TrackInfo::kVideoTrack && 1257 !mInfo->GetAsVideoInfo()->HasAlpha()) { 1258 // We only perform a search for a keyframe on videos with alpha layer to 1259 // prevent potential regression for normal video (even though invalid) 1260 break; 1261 } 1262 if (!keyframe) { 1263 // We didn't find any keyframe, attempt to seek to the previous cluster. 1264 seekTime = mSamples.First()->mTime - TimeUnit::FromMicroseconds(1); 1265 } 1266 } while (!keyframe && seekTime >= TimeUnit::Zero()); 1267 1268 SetNextKeyFrameTime(); 1269 1270 return SeekPromise::CreateAndResolve(seekTime, __func__); 1271 } 1272 1273 nsresult WebMTrackDemuxer::NextSample(RefPtr<MediaRawData>& aData) { 1274 nsresult rv = NS_ERROR_DOM_MEDIA_END_OF_STREAM; 1275 while (mSamples.GetSize() < 1 && 1276 NS_SUCCEEDED((rv = mParent->GetNextPacket(mType, &mSamples)))) { 1277 } 1278 if (mSamples.GetSize()) { 1279 aData = mSamples.PopFront(); 1280 return NS_OK; 1281 } 1282 WEBM_DEBUG("WebMTrackDemuxer::NextSample: error"); 1283 return rv; 1284 } 1285 1286 RefPtr<WebMTrackDemuxer::SamplesPromise> WebMTrackDemuxer::GetSamples( 1287 int32_t aNumSamples) { 1288 RefPtr<SamplesHolder> samples = new SamplesHolder; 1289 MOZ_ASSERT(aNumSamples); 1290 1291 while (aNumSamples) { 1292 RefPtr<MediaRawData> sample; 1293 nsresult rv = NextSample(sample); 1294 if (NS_FAILED(rv)) { 1295 if ((rv != NS_ERROR_DOM_MEDIA_END_OF_STREAM && 1296 rv != NS_ERROR_DOM_MEDIA_WAITING_FOR_DATA) || 1297 samples->GetSamples().IsEmpty()) { 1298 return SamplesPromise::CreateAndReject(rv, __func__); 1299 } 1300 break; 1301 } 1302 // Ignore empty samples. 1303 if (sample->Size() == 0) { 1304 WEBM_DEBUG( 1305 "0 sized sample encountered while getting samples, skipping it"); 1306 continue; 1307 } 1308 if (mNeedKeyframe && !sample->mKeyframe) { 1309 continue; 1310 } 1311 if (!sample->HasValidTime()) { 1312 return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, 1313 __func__); 1314 } 1315 mNeedKeyframe = false; 1316 samples->AppendSample(std::move(sample)); 1317 aNumSamples--; 1318 } 1319 1320 UpdateSamples(samples->GetSamples()); 1321 return SamplesPromise::CreateAndResolve(samples, __func__); 1322 } 1323 1324 void WebMTrackDemuxer::SetNextKeyFrameTime() { 1325 if (mType != TrackInfo::kVideoTrack || mParent->IsMediaSource()) { 1326 return; 1327 } 1328 1329 auto frameTime = TimeUnit::Invalid(); 1330 1331 mNextKeyframeTime.reset(); 1332 1333 MediaRawDataQueue skipSamplesQueue; 1334 bool foundKeyframe = false; 1335 while (!foundKeyframe && mSamples.GetSize()) { 1336 RefPtr<MediaRawData> sample = mSamples.PopFront(); 1337 if (sample->mKeyframe) { 1338 frameTime = sample->mTime; 1339 foundKeyframe = true; 1340 } 1341 skipSamplesQueue.Push(sample.forget()); 1342 } 1343 Maybe<int64_t> startTime; 1344 if (skipSamplesQueue.GetSize()) { 1345 const RefPtr<MediaRawData>& sample = skipSamplesQueue.First(); 1346 startTime.emplace(sample->mTimecode.ToMicroseconds()); 1347 } 1348 // Demux and buffer frames until we find a keyframe. 1349 RefPtr<MediaRawData> sample; 1350 nsresult rv = NS_OK; 1351 while (!foundKeyframe && NS_SUCCEEDED((rv = NextSample(sample)))) { 1352 if (sample->mKeyframe) { 1353 frameTime = sample->mTime; 1354 foundKeyframe = true; 1355 } 1356 int64_t sampleTimecode = sample->mTimecode.ToMicroseconds(); 1357 skipSamplesQueue.Push(sample.forget()); 1358 if (!startTime) { 1359 startTime.emplace(sampleTimecode); 1360 } else if (!foundKeyframe && 1361 sampleTimecode > startTime.ref() + MAX_LOOK_AHEAD) { 1362 WEBM_DEBUG("Couldn't find keyframe in a reasonable time, aborting"); 1363 break; 1364 } 1365 } 1366 // We may have demuxed more than intended, so ensure that all frames are kept 1367 // in the right order. 1368 mSamples.PushFront(std::move(skipSamplesQueue)); 1369 1370 if (frameTime.IsValid()) { 1371 mNextKeyframeTime.emplace(frameTime); 1372 WEBM_DEBUG( 1373 "Next Keyframe %f (%u queued %.02fs)", 1374 mNextKeyframeTime.value().ToSeconds(), uint32_t(mSamples.GetSize()), 1375 (mSamples.Last()->mTimecode - mSamples.First()->mTimecode).ToSeconds()); 1376 } else { 1377 WEBM_DEBUG("Couldn't determine next keyframe time (%u queued)", 1378 uint32_t(mSamples.GetSize())); 1379 } 1380 } 1381 1382 void WebMTrackDemuxer::Reset() { 1383 mSamples.Reset(); 1384 media::TimeIntervals buffered = GetBuffered(); 1385 mNeedKeyframe = true; 1386 if (!buffered.IsEmpty()) { 1387 WEBM_DEBUG("Seek to start point: %f", buffered.Start(0).ToSeconds()); 1388 mParent->SeekInternal(mType, buffered.Start(0)); 1389 SetNextKeyFrameTime(); 1390 } else { 1391 mNextKeyframeTime.reset(); 1392 } 1393 } 1394 1395 void WebMTrackDemuxer::UpdateSamples( 1396 const nsTArray<RefPtr<MediaRawData>>& aSamples) { 1397 for (const auto& sample : aSamples) { 1398 if (sample->mCrypto.IsEncrypted()) { 1399 UniquePtr<MediaRawDataWriter> writer(sample->CreateWriter()); 1400 writer->mCrypto.mIVSize = mInfo->mCrypto.mIVSize; 1401 writer->mCrypto.mKeyId.AppendElements(mInfo->mCrypto.mKeyId); 1402 } 1403 } 1404 if (mNextKeyframeTime.isNothing() || 1405 aSamples.LastElement()->mTime >= mNextKeyframeTime.value()) { 1406 SetNextKeyFrameTime(); 1407 } 1408 } 1409 1410 nsresult WebMTrackDemuxer::GetNextRandomAccessPoint(TimeUnit* aTime) { 1411 if (mNextKeyframeTime.isNothing()) { 1412 // There's no next key frame. 1413 *aTime = TimeUnit::FromInfinity(); 1414 } else { 1415 *aTime = mNextKeyframeTime.ref(); 1416 } 1417 return NS_OK; 1418 } 1419 1420 RefPtr<WebMTrackDemuxer::SkipAccessPointPromise> 1421 WebMTrackDemuxer::SkipToNextRandomAccessPoint(const TimeUnit& aTimeThreshold) { 1422 uint32_t parsed = 0; 1423 bool found = false; 1424 RefPtr<MediaRawData> sample; 1425 nsresult rv = NS_OK; 1426 1427 WEBM_DEBUG("TimeThreshold: %f", aTimeThreshold.ToSeconds()); 1428 while (!found && NS_SUCCEEDED((rv = NextSample(sample)))) { 1429 parsed++; 1430 if (sample->mKeyframe && sample->mTime >= aTimeThreshold) { 1431 WEBM_DEBUG("next sample: %f (parsed: %d)", sample->mTime.ToSeconds(), 1432 parsed); 1433 found = true; 1434 mSamples.Reset(); 1435 mSamples.PushFront(sample.forget()); 1436 } 1437 } 1438 if (NS_SUCCEEDED(rv)) { 1439 SetNextKeyFrameTime(); 1440 } 1441 if (found) { 1442 return SkipAccessPointPromise::CreateAndResolve(parsed, __func__); 1443 } else { 1444 SkipFailureHolder failure(NS_ERROR_DOM_MEDIA_END_OF_STREAM, parsed); 1445 return SkipAccessPointPromise::CreateAndReject(std::move(failure), 1446 __func__); 1447 } 1448 } 1449 1450 media::TimeIntervals WebMTrackDemuxer::GetBuffered() { 1451 return mParent->GetBuffered(); 1452 } 1453 1454 void WebMTrackDemuxer::BreakCycles() { mParent = nullptr; } 1455 1456 int64_t WebMTrackDemuxer::GetEvictionOffset(const TimeUnit& aTime) { 1457 int64_t offset; 1458 int64_t nanos = aTime.ToNanoseconds(); 1459 if (nanos < 0 || 1460 !mParent->GetOffsetForTime(static_cast<uint64_t>(nanos), &offset)) { 1461 return 0; 1462 } 1463 1464 return offset; 1465 } 1466 } // namespace mozilla 1467 1468 #undef WEBM_DEBUG