MP3Demuxer.cpp (31932B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "MP3Demuxer.h" 8 9 #include <inttypes.h> 10 11 #include <algorithm> 12 #include <limits> 13 14 #include "ByteWriter.h" 15 #include "TimeUnits.h" 16 #include "VideoUtils.h" 17 #include "mozilla/Assertions.h" 18 19 #define MP3LOG(msg, ...) \ 20 DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Debug, msg, ##__VA_ARGS__) 21 #define MP3LOGV(msg, ...) \ 22 DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Verbose, msg, ##__VA_ARGS__) 23 24 using mozilla::media::TimeInterval; 25 using mozilla::media::TimeIntervals; 26 using mozilla::media::TimeUnit; 27 28 namespace mozilla { 29 30 // MP3Demuxer 31 32 MP3Demuxer::MP3Demuxer(MediaResource* aSource) : mSource(aSource) { 33 DDLINKCHILD("source", aSource); 34 } 35 36 bool MP3Demuxer::InitInternal() { 37 if (!mTrackDemuxer) { 38 mTrackDemuxer = new MP3TrackDemuxer(mSource); 39 DDLINKCHILD("track demuxer", mTrackDemuxer.get()); 40 } 41 return mTrackDemuxer->Init(); 42 } 43 44 RefPtr<MP3Demuxer::InitPromise> MP3Demuxer::Init() { 45 if (!InitInternal()) { 46 MP3LOG("MP3Demuxer::Init() failure: waiting for data"); 47 48 return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR, 49 __func__); 50 } 51 52 MP3LOG("MP3Demuxer::Init() successful"); 53 return InitPromise::CreateAndResolve(NS_OK, __func__); 54 } 55 56 uint32_t MP3Demuxer::GetNumberTracks(TrackInfo::TrackType aType) const { 57 return aType == TrackInfo::kAudioTrack ? 1u : 0u; 58 } 59 60 already_AddRefed<MediaTrackDemuxer> MP3Demuxer::GetTrackDemuxer( 61 TrackInfo::TrackType aType, uint32_t aTrackNumber) { 62 if (!mTrackDemuxer) { 63 return nullptr; 64 } 65 return RefPtr<MP3TrackDemuxer>(mTrackDemuxer).forget(); 66 } 67 68 bool MP3Demuxer::IsSeekable() const { return true; } 69 70 void MP3Demuxer::NotifyDataArrived() { 71 // TODO: bug 1169485. 72 NS_WARNING("Unimplemented function NotifyDataArrived"); 73 MP3LOGV("NotifyDataArrived()"); 74 } 75 76 void MP3Demuxer::NotifyDataRemoved() { 77 // TODO: bug 1169485. 78 NS_WARNING("Unimplemented function NotifyDataRemoved"); 79 MP3LOGV("NotifyDataRemoved()"); 80 } 81 82 // MP3TrackDemuxer 83 84 MP3TrackDemuxer::MP3TrackDemuxer(MediaResource* aSource) 85 : mSource(aSource), 86 mFrameLock(false), 87 mOffset(0), 88 mFirstFrameOffset(0), 89 mNumParsedFrames(0), 90 mFrameIndex(0), 91 mTotalFrameLen(0), 92 mSamplesPerFrame(0), 93 mSamplesPerSecond(0), 94 mChannels(0) { 95 DDLINKCHILD("source", aSource); 96 Reset(); 97 } 98 99 bool MP3TrackDemuxer::Init() { 100 Reset(); 101 FastSeek(TimeUnit()); 102 // Read the first frame to fetch sample rate and other meta data. 103 RefPtr<MediaRawData> frame(GetNextFrame(FindFirstFrame())); 104 105 MP3LOG("Init StreamLength()=%" PRId64 " first-frame-found=%d", StreamLength(), 106 !!frame); 107 108 if (!frame) { 109 return false; 110 } 111 112 // Rewind back to the stream begin to avoid dropping the first frame. 113 FastSeek(TimeUnit()); 114 115 if (!mInfo) { 116 mInfo = MakeUnique<AudioInfo>(); 117 } 118 119 mInfo->mRate = mSamplesPerSecond; 120 mInfo->mChannels = mChannels; 121 mInfo->mBitDepth = 16; 122 mInfo->mMimeType = "audio/mpeg"; 123 mInfo->mDuration = Duration().valueOr(TimeUnit::FromInfinity()); 124 125 MP3LOG("Init mInfo={mRate=%d mChannels=%d mBitDepth=%d mDuration=%s (%lfs)}", 126 mInfo->mRate, mInfo->mChannels, mInfo->mBitDepth, 127 mInfo->mDuration.ToString().get(), mInfo->mDuration.ToSeconds()); 128 129 return mSamplesPerSecond && mChannels; 130 } 131 132 media::TimeUnit MP3TrackDemuxer::SeekPosition() const { 133 TimeUnit pos = Duration(mFrameIndex); 134 auto duration = Duration(); 135 if (duration) { 136 pos = std::min(*duration, pos); 137 } 138 return pos; 139 } 140 141 const FrameParser::Frame& MP3TrackDemuxer::LastFrame() const { 142 return mParser.PrevFrame(); 143 } 144 145 RefPtr<MediaRawData> MP3TrackDemuxer::DemuxSample() { 146 return GetNextFrame(FindNextFrame()); 147 } 148 149 const ID3Parser::ID3Header& MP3TrackDemuxer::ID3Header() const { 150 return mParser.ID3Header(); 151 } 152 153 const FrameParser::VBRHeader& MP3TrackDemuxer::VBRInfo() const { 154 return mParser.VBRInfo(); 155 } 156 157 UniquePtr<TrackInfo> MP3TrackDemuxer::GetInfo() const { return mInfo->Clone(); } 158 159 RefPtr<MP3TrackDemuxer::SeekPromise> MP3TrackDemuxer::Seek( 160 const TimeUnit& aTime) { 161 mRemainingEncoderPadding = AssertedCast<int32_t>(mEncoderPadding); 162 // Efficiently seek to the position. 163 FastSeek(aTime); 164 // Correct seek position by scanning the next frames. 165 const TimeUnit seekTime = ScanUntil(aTime); 166 167 return SeekPromise::CreateAndResolve(seekTime, __func__); 168 } 169 170 TimeUnit MP3TrackDemuxer::FastSeek(const TimeUnit& aTime) { 171 MP3LOG("FastSeek(%" PRId64 ") avgFrameLen=%f mNumParsedFrames=%" PRIu64 172 " mFrameIndex=%" PRId64 " mOffset=%" PRIu64, 173 aTime.ToMicroseconds(), AverageFrameLength(), mNumParsedFrames, 174 mFrameIndex, mOffset); 175 176 const auto& vbr = mParser.VBRInfo(); 177 if (aTime.IsZero()) { 178 // Quick seek to the beginning of the stream. 179 mFrameIndex = 0; 180 } else if (vbr.IsTOCPresent() && Duration() && 181 *Duration() != TimeUnit::Zero()) { 182 // Use TOC for more precise seeking. 183 mFrameIndex = FrameIndexFromOffset(vbr.Offset(aTime, Duration().value())); 184 } else if (AverageFrameLength() > 0) { 185 mFrameIndex = FrameIndexFromTime(aTime); 186 } 187 188 mOffset = OffsetFromFrameIndex(mFrameIndex); 189 190 if (mOffset > mFirstFrameOffset && StreamLength() > 0) { 191 mOffset = std::min(StreamLength() - 1, mOffset); 192 } 193 194 mParser.EndFrameSession(); 195 196 MP3LOG("FastSeek End TOC=%d avgFrameLen=%f mNumParsedFrames=%" PRIu64 197 " mFrameIndex=%" PRId64 " mFirstFrameOffset=%" PRId64 198 " mOffset=%" PRIu64 " SL=%" PRId64 " NumBytes=%u", 199 vbr.IsTOCPresent(), AverageFrameLength(), mNumParsedFrames, 200 mFrameIndex, mFirstFrameOffset, mOffset, StreamLength(), 201 vbr.NumBytes().valueOr(0)); 202 203 return Duration(mFrameIndex); 204 } 205 206 TimeUnit MP3TrackDemuxer::ScanUntil(const TimeUnit& aTime) { 207 MP3LOG("ScanUntil(%" PRId64 ") avgFrameLen=%f mNumParsedFrames=%" PRIu64 208 " mFrameIndex=%" PRId64 " mOffset=%" PRIu64, 209 aTime.ToMicroseconds(), AverageFrameLength(), mNumParsedFrames, 210 mFrameIndex, mOffset); 211 212 if (aTime.IsZero()) { 213 return FastSeek(aTime); 214 } 215 216 if (Duration(mFrameIndex) > aTime) { 217 // We've seeked past the target time, rewind back a little to correct it. 218 const int64_t rewind = aTime.ToMicroseconds() / 100; 219 FastSeek(aTime - TimeUnit::FromMicroseconds(rewind)); 220 } 221 222 if (Duration(mFrameIndex + 1) > aTime) { 223 return SeekPosition(); 224 } 225 226 MediaByteRange nextRange = FindNextFrame(); 227 while (SkipNextFrame(nextRange) && Duration(mFrameIndex + 1) < aTime) { 228 nextRange = FindNextFrame(); 229 MP3LOGV("ScanUntil* avgFrameLen=%f mNumParsedFrames=%" PRIu64 230 " mFrameIndex=%" PRId64 " mOffset=%" PRIu64 " Duration=%" PRId64, 231 AverageFrameLength(), mNumParsedFrames, mFrameIndex, mOffset, 232 Duration(mFrameIndex + 1).ToMicroseconds()); 233 } 234 235 MP3LOG("ScanUntil End avgFrameLen=%f mNumParsedFrames=%" PRIu64 236 " mFrameIndex=%" PRId64 " mOffset=%" PRIu64, 237 AverageFrameLength(), mNumParsedFrames, mFrameIndex, mOffset); 238 239 return SeekPosition(); 240 } 241 242 RefPtr<MP3TrackDemuxer::SamplesPromise> MP3TrackDemuxer::GetSamples( 243 int32_t aNumSamples) { 244 MP3LOGV("GetSamples(%d) Begin mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 245 " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 246 " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d", 247 aNumSamples, mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, 248 mSamplesPerFrame, mSamplesPerSecond, mChannels); 249 250 if (!aNumSamples) { 251 return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, 252 __func__); 253 } 254 255 RefPtr<SamplesHolder> frames = new SamplesHolder(); 256 257 while (aNumSamples--) { 258 RefPtr<MediaRawData> frame(GetNextFrame(FindNextFrame())); 259 if (!frame) { 260 break; 261 } 262 if (!frame->HasValidTime()) { 263 return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, 264 __func__); 265 } 266 frames->AppendSample(std::move(frame)); 267 } 268 269 MP3LOGV("GetSamples() End mSamples.Size()=%zu aNumSamples=%d mOffset=%" PRIu64 270 " mNumParsedFrames=%" PRIu64 " mFrameIndex=%" PRId64 271 " mTotalFrameLen=%" PRIu64 272 " mSamplesPerFrame=%d mSamplesPerSecond=%d " 273 "mChannels=%d", 274 frames->GetSamples().Length(), aNumSamples, mOffset, mNumParsedFrames, 275 mFrameIndex, mTotalFrameLen, mSamplesPerFrame, mSamplesPerSecond, 276 mChannels); 277 278 if (frames->GetSamples().IsEmpty()) { 279 return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM, 280 __func__); 281 } 282 return SamplesPromise::CreateAndResolve(frames, __func__); 283 } 284 285 void MP3TrackDemuxer::Reset() { 286 MP3LOG("Reset()"); 287 288 FastSeek(TimeUnit()); 289 mParser.Reset(); 290 } 291 292 RefPtr<MP3TrackDemuxer::SkipAccessPointPromise> 293 MP3TrackDemuxer::SkipToNextRandomAccessPoint(const TimeUnit& aTimeThreshold) { 294 // Will not be called for audio-only resources. 295 return SkipAccessPointPromise::CreateAndReject( 296 SkipFailureHolder(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, 0), __func__); 297 } 298 299 int64_t MP3TrackDemuxer::GetResourceOffset() const { return mOffset; } 300 301 TimeIntervals MP3TrackDemuxer::GetBuffered() { 302 AutoPinned<MediaResource> stream(mSource.GetResource()); 303 TimeIntervals buffered; 304 305 if (Duration() && stream->IsDataCachedToEndOfResource(0)) { 306 // Special case completely cached files. This also handles local files. 307 buffered += TimeInterval(TimeUnit(), *Duration()); 308 MP3LOGV("buffered = [[%" PRId64 ", %" PRId64 "]]", 309 TimeUnit().ToMicroseconds(), Duration()->ToMicroseconds()); 310 return buffered; 311 } 312 313 MediaByteRangeSet ranges; 314 nsresult rv = stream->GetCachedRanges(ranges); 315 NS_ENSURE_SUCCESS(rv, buffered); 316 317 for (const auto& range : ranges) { 318 if (range.IsEmpty()) { 319 continue; 320 } 321 TimeUnit start = Duration(FrameIndexFromOffset(range.mStart)); 322 TimeUnit end = Duration(FrameIndexFromOffset(range.mEnd)); 323 MP3LOGV("buffered += [%" PRId64 ", %" PRId64 "]", start.ToMicroseconds(), 324 end.ToMicroseconds()); 325 buffered += TimeInterval(start, end); 326 } 327 328 // If the number of frames reported by the header is valid, 329 // the duration calculated from it is the maximal duration. 330 if (ValidNumAudioFrames() && Duration()) { 331 TimeInterval duration = TimeInterval(TimeUnit(), *Duration()); 332 return buffered.Intersection(duration); 333 } 334 335 return buffered; 336 } 337 338 int64_t MP3TrackDemuxer::StreamLength() const { return mSource.GetLength(); } 339 340 media::NullableTimeUnit NothingIfNegative(TimeUnit aDuration) { 341 if (aDuration.IsNegative()) { 342 return Nothing(); 343 } 344 return Some(aDuration); 345 } 346 347 media::NullableTimeUnit MP3TrackDemuxer::Duration() const { 348 if (!mNumParsedFrames) { 349 return Nothing(); 350 } 351 352 int64_t numFrames = 0; 353 const auto numAudioFrames = ValidNumAudioFrames(); 354 if (numAudioFrames) { 355 // VBR headers don't include the VBR header frame. 356 numFrames = numAudioFrames.value() + 1; 357 return NothingIfNegative(Duration(numFrames) - 358 (EncoderDelay() + Padding())); 359 } 360 361 const int64_t streamLen = StreamLength(); 362 if (streamLen < 0) { // Live streams. 363 // Unknown length, we can't estimate duration. 364 return Nothing(); 365 } 366 // We can't early return when streamLen < 0 before checking numAudioFrames 367 // since some live radio will give an opening remark before playing music 368 // and the duration of the opening talk can be calculated by numAudioFrames. 369 370 int64_t size = streamLen - mFirstFrameOffset; 371 MOZ_ASSERT(size); 372 373 if (mParser.ID3v1MetadataFound() && size > 128) { 374 size -= 128; 375 } 376 377 // If it's CBR, calculate the duration by bitrate. 378 if (!mParser.VBRInfo().IsValid()) { 379 const uint32_t bitrate = mParser.CurrentFrame().Header().Bitrate(); 380 return NothingIfNegative( 381 media::TimeUnit::FromSeconds(static_cast<double>(size) * 8 / bitrate)); 382 } 383 384 if (AverageFrameLength() > 0) { 385 numFrames = std::lround(AssertedCast<double>(size) / AverageFrameLength()); 386 } 387 388 return NothingIfNegative(Duration(numFrames) - (EncoderDelay() + Padding())); 389 } 390 391 TimeUnit MP3TrackDemuxer::Duration(int64_t aNumFrames) const { 392 if (!mSamplesPerSecond) { 393 return TimeUnit::Invalid(); 394 } 395 396 const int64_t frameCount = aNumFrames * mSamplesPerFrame; 397 return TimeUnit(frameCount, mSamplesPerSecond); 398 } 399 400 MediaByteRange MP3TrackDemuxer::FindFirstFrame() { 401 // We attempt to find multiple successive frames to avoid locking onto a false 402 // positive if we're fed a stream that has been cut mid-frame. 403 // For compatibility reasons we have to use the same frame count as Chrome, 404 // since some web sites actually use a file that short to test our playback 405 // capabilities. 406 static const int MIN_SUCCESSIVE_FRAMES = 3; 407 mFrameLock = false; 408 409 MediaByteRange candidateFrame = FindNextFrame(); 410 int numSuccFrames = candidateFrame.Length() > 0; 411 MediaByteRange currentFrame = candidateFrame; 412 MP3LOGV("FindFirst() first candidate frame: mOffset=%" PRIu64 413 " Length()=%" PRIu64, 414 candidateFrame.mStart, candidateFrame.Length()); 415 416 while (candidateFrame.Length()) { 417 mParser.EndFrameSession(); 418 mOffset = currentFrame.mEnd; 419 const MediaByteRange prevFrame = currentFrame; 420 421 // FindNextFrame() here will only return frames consistent with our 422 // candidate frame. 423 currentFrame = FindNextFrame(); 424 numSuccFrames += currentFrame.Length() > 0; 425 // Multiple successive false positives, which wouldn't be caught by the 426 // consistency checks alone, can be detected by wrong alignment (non-zero 427 // gap between frames). 428 const int64_t frameSeparation = currentFrame.mStart - prevFrame.mEnd; 429 430 if (!currentFrame.Length() || frameSeparation != 0) { 431 MP3LOGV( 432 "FindFirst() not enough successive frames detected, " 433 "rejecting candidate frame: successiveFrames=%d, last " 434 "Length()=%" PRIu64 ", last frameSeparation=%" PRId64, 435 numSuccFrames, currentFrame.Length(), frameSeparation); 436 437 mParser.ResetFrameData(); 438 mOffset = candidateFrame.mStart + 1; 439 candidateFrame = FindNextFrame(); 440 numSuccFrames = candidateFrame.Length() > 0; 441 currentFrame = candidateFrame; 442 MP3LOGV("FindFirst() new candidate frame: mOffset=%" PRIu64 443 " Length()=%" PRIu64, 444 candidateFrame.mStart, candidateFrame.Length()); 445 } else if (numSuccFrames >= MIN_SUCCESSIVE_FRAMES) { 446 MP3LOG( 447 "FindFirst() accepting candidate frame: " 448 "successiveFrames=%d", 449 numSuccFrames); 450 mFrameLock = true; 451 return candidateFrame; 452 } else if (prevFrame.mStart == mParser.TotalID3HeaderSize() && 453 currentFrame.mEnd == StreamLength()) { 454 // We accept streams with only two frames if both frames are valid. This 455 // is to handle very short files and provide parity with Chrome. See 456 // bug 1432195 for more information. This will not handle short files 457 // with a trailing tag, but as of writing we lack infrastructure to 458 // handle such tags. 459 MP3LOG( 460 "FindFirst() accepting candidate frame for short stream: " 461 "successiveFrames=%d", 462 numSuccFrames); 463 mFrameLock = true; 464 return candidateFrame; 465 } 466 } 467 468 MP3LOG("FindFirst() no suitable first frame found"); 469 return candidateFrame; 470 } 471 472 static bool VerifyFrameConsistency(const FrameParser::Frame& aFrame1, 473 const FrameParser::Frame& aFrame2) { 474 const auto& h1 = aFrame1.Header(); 475 const auto& h2 = aFrame2.Header(); 476 477 return h1.IsValid() && h2.IsValid() && h1.Layer() == h2.Layer() && 478 h1.SlotSize() == h2.SlotSize() && 479 h1.SamplesPerFrame() == h2.SamplesPerFrame() && 480 h1.Channels() == h2.Channels() && h1.SampleRate() == h2.SampleRate() && 481 h1.RawVersion() == h2.RawVersion() && 482 h1.RawProtection() == h2.RawProtection(); 483 } 484 485 MediaByteRange MP3TrackDemuxer::FindNextFrame() { 486 static const int BUFFER_SIZE = 64; 487 static const uint32_t MAX_SKIPPABLE_BYTES = 1024 * BUFFER_SIZE; 488 489 MP3LOGV("FindNext() Begin mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 490 " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 491 " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d", 492 mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, 493 mSamplesPerFrame, mSamplesPerSecond, mChannels); 494 495 uint8_t buffer[BUFFER_SIZE]; 496 uint32_t read = 0; 497 498 bool foundFrame = false; 499 int64_t frameHeaderOffset = 0; 500 int64_t startOffset = mOffset; 501 const bool searchingForID3 = !mParser.ID3Header().HasSizeBeenSet(); 502 503 // Check whether we've found a valid MPEG frame. 504 while (!foundFrame) { 505 // How many bytes we can go without finding a valid MPEG frame 506 // (effectively rounded up to the next full buffer size multiple, as we 507 // only check this before reading the next set of data into the buffer). 508 509 // This default value of 0 will be used during testing whether we're being 510 // fed a valid stream, which shouldn't have any gaps between frames. 511 uint32_t maxSkippableBytes = 0; 512 513 if (!mParser.FirstFrame().Length()) { 514 // We're looking for the first valid frame. A well-formed file should 515 // have its first frame header right at the start (skipping an ID3 tag 516 // if necessary), but in order to support files that might have been 517 // improperly cut, we search the first few kB for a frame header. 518 maxSkippableBytes = MAX_SKIPPABLE_BYTES; 519 // Since we're counting the skipped bytes from the offset we started 520 // this parsing session with, we need to discount the ID3 tag size only 521 // if we were looking for one during the current frame parsing session. 522 if (searchingForID3) { 523 maxSkippableBytes += mParser.TotalID3HeaderSize(); 524 } 525 } else if (mFrameLock) { 526 // We've found a valid MPEG stream, so don't impose any limits 527 // to allow skipping corrupted data until we hit EOS. 528 maxSkippableBytes = std::numeric_limits<uint32_t>::max(); 529 } 530 531 if ((mOffset - startOffset > maxSkippableBytes) || 532 (read = Read(buffer, mOffset, BUFFER_SIZE)) == 0) { 533 MP3LOG( 534 "FindNext() EOS or exceeded maxSkippeableBytes without a frame " 535 "(read: %d)", 536 read); 537 // This is not a valid MPEG audio stream or we've reached EOS, give up. 538 break; 539 } 540 541 BufferReader reader(buffer, read); 542 uint32_t bytesToSkip = 0; 543 auto res = mParser.Parse(&reader, &bytesToSkip); 544 foundFrame = res.unwrapOr(false); 545 int64_t readerOffset = static_cast<int64_t>(reader.Offset()); 546 frameHeaderOffset = mOffset + readerOffset - FrameParser::FrameHeader::SIZE; 547 548 // If we've found neither an MPEG frame header nor an ID3v2 tag, 549 // the reader shouldn't have any bytes remaining. 550 MOZ_ASSERT(foundFrame || bytesToSkip || !reader.Remaining()); 551 552 if (foundFrame && mParser.FirstFrame().Length() && 553 !VerifyFrameConsistency(mParser.FirstFrame(), mParser.CurrentFrame())) { 554 MP3LOG("Skipping frame"); 555 // We've likely hit a false-positive, ignore it and proceed with the 556 // search for the next valid frame. 557 foundFrame = false; 558 mOffset = frameHeaderOffset + 1; 559 mParser.EndFrameSession(); 560 } else { 561 // Advance mOffset by the amount of bytes read and if necessary, 562 // skip an ID3v2 tag which stretches beyond the current buffer. 563 NS_ENSURE_TRUE(mOffset + read + bytesToSkip > mOffset, 564 MediaByteRange(0, 0)); 565 mOffset += static_cast<int64_t>(read + bytesToSkip); 566 } 567 } 568 569 if (StreamLength() != -1) { 570 mEOS = frameHeaderOffset + mParser.CurrentFrame().Length() + BUFFER_SIZE > 571 StreamLength(); 572 } 573 574 if (!foundFrame || !mParser.CurrentFrame().Length()) { 575 MP3LOG("FindNext() Exit foundFrame=%d mParser.CurrentFrame().Length()=%d ", 576 foundFrame, mParser.CurrentFrame().Length()); 577 return {0, 0}; 578 } 579 580 MP3LOGV("FindNext() End mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 581 " mFrameIndex=%" PRId64 " frameHeaderOffset=%" PRId64 582 " mTotalFrameLen=%" PRIu64 583 " mSamplesPerFrame=%d mSamplesPerSecond=%d" 584 " mChannels=%d, mEOS=%s", 585 mOffset, mNumParsedFrames, mFrameIndex, frameHeaderOffset, 586 mTotalFrameLen, mSamplesPerFrame, mSamplesPerSecond, mChannels, 587 mEOS ? "true" : "false"); 588 589 return {frameHeaderOffset, 590 frameHeaderOffset + mParser.CurrentFrame().Length()}; 591 } 592 593 bool MP3TrackDemuxer::SkipNextFrame(const MediaByteRange& aRange) { 594 if (!mNumParsedFrames || !aRange.Length()) { 595 // We can't skip the first frame, since it could contain VBR headers. 596 RefPtr<MediaRawData> frame(GetNextFrame(aRange)); 597 return frame; 598 } 599 600 UpdateState(aRange); 601 602 MP3LOGV("SkipNext() End mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 603 " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 604 " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d", 605 mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, 606 mSamplesPerFrame, mSamplesPerSecond, mChannels); 607 608 return true; 609 } 610 611 media::TimeUnit MP3TrackDemuxer::EncoderDelay() const { 612 return media::TimeUnit(mEncoderDelay, mSamplesPerSecond); 613 } 614 615 uint32_t MP3TrackDemuxer::EncoderDelayFrames() const { return mEncoderDelay; } 616 617 media::TimeUnit MP3TrackDemuxer::Padding() const { 618 return media::TimeUnit(mEncoderPadding, mSamplesPerSecond); 619 } 620 621 uint32_t MP3TrackDemuxer::PaddingFrames() const { return mEncoderPadding; } 622 623 already_AddRefed<MediaRawData> MP3TrackDemuxer::GetNextFrame( 624 const MediaByteRange& aRange) { 625 MP3LOG("GetNext() Begin({mStart=%" PRId64 " Length()=%" PRId64 "})", 626 aRange.mStart, aRange.Length()); 627 if (!aRange.Length()) { 628 return nullptr; 629 } 630 631 RefPtr<MediaRawData> frame = new MediaRawData(); 632 frame->mOffset = aRange.mStart; 633 634 UniquePtr<MediaRawDataWriter> frameWriter(frame->CreateWriter()); 635 if (!frameWriter->SetSize(static_cast<size_t>(aRange.Length()))) { 636 MP3LOG("GetNext() Exit failed to allocated media buffer"); 637 return nullptr; 638 } 639 640 const uint32_t read = 641 Read(frameWriter->Data(), frame->mOffset, frame->Size()); 642 643 if (read != aRange.Length()) { 644 MP3LOG("GetNext() Exit read=%u frame->Size()=%zu", read, frame->Size()); 645 return nullptr; 646 } 647 648 UpdateState(aRange); 649 650 if (mNumParsedFrames == 1) { 651 // First frame parsed, let's read VBR info if available. 652 BufferReader reader(frame->Data(), frame->Size()); 653 mFirstFrameOffset = frame->mOffset; 654 655 if (mParser.ParseVBRHeader(&reader)) { 656 // Parsing was successful 657 if (mParser.VBRInfo().Type() == FrameParser::VBRHeader::XING) { 658 MP3LOG("XING header present, skipping encoder delay (%u frames)", 659 mParser.VBRInfo().EncoderDelay()); 660 mEncoderDelay = mParser.VBRInfo().EncoderDelay(); 661 mEncoderPadding = mParser.VBRInfo().EncoderPadding(); 662 // Padding is encoded as a 12-bit unsigned number so this is fine. 663 mRemainingEncoderPadding = AssertedCast<int32_t>(mEncoderPadding); 664 if (mEncoderDelay == 0) { 665 // Skip the VBR frame + the decoder delay, that is always 529 frames 666 // in practice for the decoder we're using. 667 mEncoderDelay = mSamplesPerFrame + 529; 668 MP3LOG( 669 "No explicit delay present in vbr header, delay is assumed to be " 670 "%u frames\n", 671 mEncoderDelay); 672 } 673 } else if (mParser.VBRInfo().Type() == FrameParser::VBRHeader::VBRI) { 674 MP3LOG("VBRI header present, skipping encoder delay (%u frames)", 675 mParser.VBRInfo().EncoderDelay()); 676 mEncoderDelay = mParser.VBRInfo().EncoderDelay(); 677 } 678 } 679 } 680 681 TimeUnit rawPts = Duration(mFrameIndex - 1) - EncoderDelay(); 682 TimeUnit rawDuration = Duration(1); 683 TimeUnit rawEnd = rawPts + rawDuration; 684 685 frame->mTime = std::max(TimeUnit::Zero(mSamplesPerSecond), rawPts); 686 687 frame->mDuration = Duration(1); 688 frame->mTimecode = frame->mTime; 689 frame->mKeyframe = true; 690 frame->mEOS = mEOS; 691 692 // Handle decoder delay. A packet must be trimmed if its pts, adjusted for 693 // decoder delay, is negative. A packet can be trimmed entirely. 694 if (rawPts.IsNegative()) { 695 frame->mDuration = 696 std::max(TimeUnit::Zero(mSamplesPerSecond), rawEnd - frame->mTime); 697 } 698 699 // It's possible to create an mp3 file that has a padding value that somehow 700 // spans multiple packets. In that case the duration is probably known, 701 // because it's probably a VBR file with a XING header (that has a duration 702 // field). Use the duration to be able to set the correct duration on 703 // packets that aren't the last one. 704 // For most files, the padding is less than a packet, it's simply substracted. 705 if (mParser.VBRInfo().Type() == FrameParser::VBRHeader::XING && 706 mRemainingEncoderPadding > 0 && 707 frame->GetEndTime() > Duration().valueOr(TimeUnit::FromInfinity())) { 708 TimeUnit duration = Duration().value(); 709 TimeUnit inPaddingZone = frame->GetEndTime() - duration; 710 TimeUnit originalEnd = frame->GetEndTime(); 711 TimeUnit originalPts = frame->mTime; 712 frame->mDuration -= inPaddingZone; 713 // Packet is entirely padding and will be completely discarded by the 714 // decoder. 715 if (frame->mDuration.IsNegative()) { 716 frame->mDuration = TimeUnit::Zero(mSamplesPerSecond); 717 } 718 int32_t paddingFrames = 719 AssertedCast<int32_t>(inPaddingZone.ToTicksAtRate(mSamplesPerSecond)); 720 if (mRemainingEncoderPadding >= paddingFrames) { 721 mRemainingEncoderPadding -= paddingFrames; 722 } else { 723 mRemainingEncoderPadding = 0; 724 } 725 MP3LOG("Trimming [%s, %s] to [%s,%s] (padding) (stream duration: %s)", 726 originalPts.ToString().get(), originalEnd.ToString().get(), 727 frame->mTime.ToString().get(), frame->GetEndTime().ToString().get(), 728 duration.ToString().get()); 729 } else if (frame->mEOS && 730 mRemainingEncoderPadding <= 731 frame->mDuration.ToTicksAtRate(mSamplesPerSecond)) { 732 frame->mDuration -= TimeUnit(mRemainingEncoderPadding, mSamplesPerSecond); 733 MOZ_ASSERT(frame->mDuration.IsPositiveOrZero()); 734 MP3LOG("Trimming last packet %s to [%s,%s]", Padding().ToString().get(), 735 frame->mTime.ToString().get(), frame->GetEndTime().ToString().get()); 736 } 737 738 MP3LOGV("GetNext() End mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64 739 " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64 740 " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d, mEOS=%s", 741 mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen, 742 mSamplesPerFrame, mSamplesPerSecond, mChannels, 743 mEOS ? "true" : "false"); 744 745 // It's possible for the duration of a frame to be zero if the frame is to be 746 // trimmed entirely because it's fully comprised of decoder delay samples. 747 // This is common at the beginning of an stream. 748 MOZ_ASSERT(frame->mDuration.IsPositiveOrZero()); 749 750 MP3LOG("Packet demuxed: pts [%s, %s] (duration: %s)", 751 frame->mTime.ToString().get(), frame->GetEndTime().ToString().get(), 752 frame->mDuration.ToString().get()); 753 754 // Indicate original packet information to trim after decoding. 755 if (frame->mDuration != rawDuration) { 756 frame->mOriginalPresentationWindow = Some(TimeInterval{rawPts, rawEnd}); 757 MP3LOG("Total packet time excluding trimming: [%s, %s]", 758 rawPts.ToString().get(), rawEnd.ToString().get()); 759 } 760 761 return frame.forget(); 762 } 763 764 int64_t MP3TrackDemuxer::OffsetFromFrameIndex(int64_t aFrameIndex) const { 765 int64_t offset = 0; 766 const auto& vbr = mParser.VBRInfo(); 767 768 if (vbr.IsComplete()) { 769 offset = mFirstFrameOffset + aFrameIndex * vbr.NumBytes().value() / 770 vbr.NumAudioFrames().value(); 771 } else if (AverageFrameLength() > 0) { 772 offset = mFirstFrameOffset + 773 AssertedCast<int64_t>(static_cast<float>(aFrameIndex) * 774 AverageFrameLength()); 775 } 776 777 MP3LOGV("OffsetFromFrameIndex(%" PRId64 ") -> %" PRId64, aFrameIndex, offset); 778 return std::max<int64_t>(mFirstFrameOffset, offset); 779 } 780 781 int64_t MP3TrackDemuxer::FrameIndexFromOffset(int64_t aOffset) const { 782 int64_t frameIndex = 0; 783 const auto& vbr = mParser.VBRInfo(); 784 785 if (vbr.IsComplete()) { 786 frameIndex = 787 AssertedCast<int64_t>(static_cast<float>(aOffset - mFirstFrameOffset) / 788 static_cast<float>(vbr.NumBytes().value()) * 789 static_cast<float>(vbr.NumAudioFrames().value())); 790 frameIndex = std::min<int64_t>(vbr.NumAudioFrames().value(), frameIndex); 791 } else if (AverageFrameLength() > 0) { 792 frameIndex = AssertedCast<int64_t>( 793 static_cast<float>(aOffset - mFirstFrameOffset) / AverageFrameLength()); 794 } 795 796 MP3LOGV("FrameIndexFromOffset(%" PRId64 ") -> %" PRId64, aOffset, frameIndex); 797 return std::max<int64_t>(0, frameIndex); 798 } 799 800 int64_t MP3TrackDemuxer::FrameIndexFromTime( 801 const media::TimeUnit& aTime) const { 802 int64_t frameIndex = 0; 803 if (mSamplesPerSecond > 0 && mSamplesPerFrame > 0) { 804 frameIndex = AssertedCast<int64_t>( 805 aTime.ToSeconds() * mSamplesPerSecond / mSamplesPerFrame - 1); 806 } 807 808 MP3LOGV("FrameIndexFromOffset(%fs) -> %" PRId64, aTime.ToSeconds(), 809 frameIndex); 810 return std::max<int64_t>(0, frameIndex); 811 } 812 813 void MP3TrackDemuxer::UpdateState(const MediaByteRange& aRange) { 814 // Prevent overflow. 815 if (mTotalFrameLen + aRange.Length() < mTotalFrameLen) { 816 // These variables have a linear dependency and are only used to derive the 817 // average frame length. 818 mTotalFrameLen /= 2; 819 mNumParsedFrames /= 2; 820 } 821 822 // Full frame parsed, move offset to its end. 823 mOffset = aRange.mEnd; 824 825 mTotalFrameLen += aRange.Length(); 826 827 if (!mSamplesPerFrame) { 828 mSamplesPerFrame = mParser.CurrentFrame().Header().SamplesPerFrame(); 829 mSamplesPerSecond = mParser.CurrentFrame().Header().SampleRate(); 830 mChannels = mParser.CurrentFrame().Header().Channels(); 831 } 832 833 ++mNumParsedFrames; 834 ++mFrameIndex; 835 MOZ_ASSERT(mFrameIndex > 0); 836 837 // Prepare the parser for the next frame parsing session. 838 mParser.EndFrameSession(); 839 } 840 841 uint32_t MP3TrackDemuxer::Read(uint8_t* aBuffer, int64_t aOffset, 842 uint32_t aSize) { 843 MP3LOGV("MP3TrackDemuxer::Read(%p %" PRId64 " %d)", aBuffer, aOffset, aSize); 844 845 const int64_t streamLen = StreamLength(); 846 if (mInfo && streamLen > 0) { 847 // Prevent blocking reads after successful initialization. 848 int64_t max = streamLen > aOffset ? streamLen - aOffset : 0; 849 aSize = std::min<int64_t>(aSize, max); 850 } 851 852 uint32_t read = 0; 853 MP3LOGV("MP3TrackDemuxer::Read -> ReadAt(%u)", aSize); 854 const nsresult rv = mSource.ReadAt(aOffset, reinterpret_cast<char*>(aBuffer), 855 static_cast<uint32_t>(aSize), &read); 856 NS_ENSURE_SUCCESS(rv, 0); 857 return read; 858 } 859 860 double MP3TrackDemuxer::AverageFrameLength() const { 861 if (mNumParsedFrames) { 862 return static_cast<double>(mTotalFrameLen) / 863 static_cast<double>(mNumParsedFrames); 864 } 865 const auto& vbr = mParser.VBRInfo(); 866 if (vbr.IsComplete() && vbr.NumAudioFrames().value() + 1) { 867 return static_cast<double>(vbr.NumBytes().value()) / 868 (vbr.NumAudioFrames().value() + 1); 869 } 870 return 0.0; 871 } 872 873 Maybe<uint32_t> MP3TrackDemuxer::ValidNumAudioFrames() const { 874 return mParser.VBRInfo().IsValid() && 875 mParser.VBRInfo().NumAudioFrames().valueOr(0) + 1 > 1 876 ? mParser.VBRInfo().NumAudioFrames() 877 : Nothing(); 878 } 879 880 } // namespace mozilla 881 882 #undef MP3LOG 883 #undef MP3LOGV