MoofParser.h (12717B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #ifndef MOOF_PARSER_H_ 6 #define MOOF_PARSER_H_ 7 8 #include "Atom.h" 9 #include "AtomType.h" 10 #include "ByteStream.h" 11 #include "MP4Interval.h" 12 #include "MediaResource.h" 13 #include "SinfParser.h" 14 #include "TimeUnits.h" 15 #include "mozilla/Variant.h" 16 17 namespace mozilla { 18 19 class Box; 20 class BoxContext; 21 class BoxReader; 22 class Moof; 23 24 // Used to track the CTS end time of the last sample of a track 25 // in the preceeding Moof, so that we can smooth tracks' timestamps 26 // across Moofs. 27 struct TrackEndCts { 28 TrackEndCts(uint32_t aTrackId, const media::TimeUnit& aCtsEndTime) 29 : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {} 30 uint32_t mTrackId; 31 media::TimeUnit mCtsEndTime; 32 }; 33 34 class Mvhd : public Atom { 35 public: 36 Mvhd() 37 : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {} 38 explicit Mvhd(const Box& aBox); 39 40 Result<media::TimeUnit, nsresult> ToTimeUnit(int64_t aTimescaleUnits) const { 41 if (!mTimescale) { 42 NS_WARNING("invalid mTimescale"); 43 return Err(NS_ERROR_FAILURE); 44 } 45 return media::TimeUnit(aTimescaleUnits, mTimescale); 46 } 47 48 uint64_t mCreationTime; 49 uint64_t mModificationTime; 50 uint32_t mTimescale; 51 uint64_t mDuration; 52 53 protected: 54 Result<Ok, nsresult> Parse(const Box& aBox); 55 }; 56 57 class Tkhd : public Mvhd { 58 public: 59 Tkhd() : mTrackId(0) {} 60 explicit Tkhd(const Box& aBox); 61 62 uint32_t mTrackId; 63 64 protected: 65 Result<Ok, nsresult> Parse(const Box& aBox); 66 }; 67 68 class Mdhd : public Mvhd { 69 public: 70 Mdhd() = default; 71 explicit Mdhd(const Box& aBox); 72 }; 73 74 class Trex : public Atom { 75 public: 76 explicit Trex(uint32_t aTrackId) 77 : mFlags(0), 78 mTrackId(aTrackId), 79 mDefaultSampleDescriptionIndex(0), 80 mDefaultSampleDuration(0), 81 mDefaultSampleSize(0), 82 mDefaultSampleFlags(0) {} 83 84 explicit Trex(const Box& aBox); 85 86 uint32_t mFlags; 87 uint32_t mTrackId; 88 uint32_t mDefaultSampleDescriptionIndex; 89 uint32_t mDefaultSampleDuration; 90 uint32_t mDefaultSampleSize; 91 uint32_t mDefaultSampleFlags; 92 93 protected: 94 Result<Ok, nsresult> Parse(const Box& aBox); 95 }; 96 97 class Tfhd : public Trex { 98 public: 99 explicit Tfhd(const Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) { 100 mValid = aTrex.IsValid(); 101 } 102 Tfhd(const Box& aBox, const Trex& aTrex); 103 104 uint64_t mBaseDataOffset; 105 106 protected: 107 Result<Ok, nsresult> Parse(const Box& aBox); 108 }; 109 110 class Tfdt : public Atom { 111 public: 112 Tfdt() : mBaseMediaDecodeTime(0) {} 113 explicit Tfdt(const Box& aBox); 114 115 uint64_t mBaseMediaDecodeTime; 116 117 protected: 118 Result<Ok, nsresult> Parse(const Box& aBox); 119 }; 120 121 class Edts : public Atom { 122 public: 123 Edts() : mMediaStart(0), mEmptyOffset(0) {} 124 explicit Edts(const Box& aBox); 125 virtual bool IsValid() const override { 126 // edts is optional 127 return true; 128 } 129 130 int64_t mMediaStart; 131 int64_t mEmptyOffset; 132 133 protected: 134 Result<Ok, nsresult> Parse(const Box& aBox); 135 }; 136 137 struct Sample { 138 mozilla::MediaByteRange mByteRange; 139 // Crypto information coming from senc box: shall be used first 140 CopyableTArray<uint8_t> mIV; 141 CopyableTArray<uint32_t> mPlainSizes; 142 // The number of encrypted bytes in each subsample. The nth element in the 143 // array is the number of encrypted bytes at the start of the nth subsample. 144 CopyableTArray<uint32_t> mEncryptedSizes; 145 // Crypto information coming from saio box: shall be used if no senc 146 // information is present 147 mozilla::MediaByteRange mCencRange; 148 media::TimeUnit mDecodeTime; 149 MP4Interval<media::TimeUnit> mCompositionRange; 150 bool mSync; 151 }; 152 153 class Saiz final : public Atom { 154 public: 155 Saiz(const Box& aBox, AtomType aDefaultType); 156 157 AtomType mAuxInfoType; 158 uint32_t mAuxInfoTypeParameter; 159 FallibleTArray<uint8_t> mSampleInfoSize; 160 161 protected: 162 Result<Ok, nsresult> Parse(const Box& aBox); 163 }; 164 165 class Saio final : public Atom { 166 public: 167 Saio(const Box& aBox, AtomType aDefaultType); 168 169 AtomType mAuxInfoType; 170 uint32_t mAuxInfoTypeParameter; 171 FallibleTArray<uint64_t> mOffsets; 172 173 protected: 174 Result<Ok, nsresult> Parse(const Box& aBox); 175 }; 176 177 struct SampleToGroupEntry { 178 public: 179 static const uint32_t kTrackGroupDescriptionIndexBase = 0; 180 static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000; 181 182 SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex) 183 : mSampleCount(aSampleCount), 184 mGroupDescriptionIndex(aGroupDescriptionIndex) {} 185 186 uint32_t mSampleCount; 187 uint32_t mGroupDescriptionIndex; 188 }; 189 190 class Sbgp final : public Atom // SampleToGroup box. 191 { 192 public: 193 explicit Sbgp(const Box& aBox); 194 195 AtomType mGroupingType; 196 uint32_t mGroupingTypeParam; 197 FallibleTArray<SampleToGroupEntry> mEntries; 198 199 protected: 200 Result<Ok, nsresult> Parse(const Box& aBox); 201 }; 202 203 // Stores information form CencSampleEncryptionInformationGroupEntry (seig). 204 // Cenc here refers to the common encryption standard, rather than the specific 205 // cenc scheme from that standard. This structure is used for all encryption 206 // schemes. I.e. it is used for both cenc and cbcs, not just cenc. 207 struct CencSampleEncryptionInfoEntry final { 208 public: 209 CencSampleEncryptionInfoEntry() = default; 210 211 Result<Ok, nsresult> Init(BoxReader& aReader); 212 213 bool mIsEncrypted = false; 214 uint8_t mIVSize = 0; 215 CopyableTArray<uint8_t> mKeyId; 216 uint8_t mCryptByteBlock = 0; 217 uint8_t mSkipByteBlock = 0; 218 CopyableTArray<uint8_t> mConsantIV; 219 }; 220 221 class Sgpd final : public Atom // SampleGroupDescription box. 222 { 223 public: 224 explicit Sgpd(const Box& aBox); 225 226 AtomType mGroupingType; 227 FallibleTArray<CencSampleEncryptionInfoEntry> mEntries; 228 229 protected: 230 Result<Ok, nsresult> Parse(const Box& aBox); 231 }; 232 233 // Audio/video entries from the sample description box (stsd). We only need to 234 // store if these are encrypted, so do not need a specialized class for 235 // different audio and video data. Currently most of the parsing of these 236 // entries is by the mp4parse-rust, but moof pasrser needs to know which of 237 // these are encrypted when parsing the track fragment header (tfhd). 238 struct SampleDescriptionEntry { 239 bool mIsEncryptedEntry = false; 240 }; 241 242 // Used to indicate in variants if all tracks should be parsed. 243 struct ParseAllTracks {}; 244 245 using TrackParseMode = Variant<ParseAllTracks, uint32_t>; 246 247 class Moof final : public Atom { 248 public: 249 Moof(const Box& aBox, const TrackParseMode& aTrackParseMode, 250 const Trex& aTrex, const Mvhd& aMvhd, const Mdhd& aMdhd, 251 const Edts& aEdts, const Sinf& aSinf, const bool aIsAudio, 252 uint64_t* aDecodeTime, nsTArray<TrackEndCts>& aTracksEndCts); 253 void FixRounding(const Moof& aMoof); 254 255 // Retrieve CencSampleEncryptionInfoEntry for a given sample number. 256 // Optionally, you can provide track's group boxes (sbgp): they will be used 257 // if the moof fragment does not contain a sbgp box. 258 const CencSampleEncryptionInfoEntry* GetSampleEncryptionEntry( 259 size_t aSample, 260 const FallibleTArray<SampleToGroupEntry>* aTrackSampleToGroupEntries = 261 nullptr, 262 const FallibleTArray<CencSampleEncryptionInfoEntry>* 263 aTrackSampleEncryptionInfoEntries = nullptr) const; 264 265 // Returns true if a senc box has been found, successfully parsed, and 266 // contains crypto info 267 bool SencIsValid() const { return mSencValid; } 268 269 mozilla::MediaByteRange mRange; 270 mozilla::MediaByteRange mMdatRange; 271 MP4Interval<media::TimeUnit> mTimeRange; 272 FallibleTArray<Sample> mIndex; 273 274 FallibleTArray<CencSampleEncryptionInfoEntry> 275 mFragmentSampleEncryptionInfoEntries; 276 FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries; 277 278 Tfhd mTfhd; 279 FallibleTArray<Saiz> mSaizs; 280 FallibleTArray<Saio> mSaios; 281 nsTArray<nsTArray<uint8_t>> mPsshes; 282 283 private: 284 // aDecodeTime is updated to the end of the parsed TRAF on return. 285 void ParseTraf(const Box& aBox, const TrackParseMode& aTrackParseMode, 286 const Trex& aTrex, const Mvhd& aMvhd, const Mdhd& aMdhd, 287 const Edts& aEdts, const Sinf& aSinf, const bool aIsAudio, 288 uint64_t* aDecodeTime); 289 // aDecodeTime is updated to the end of the parsed TRUN on return. 290 Result<Ok, nsresult> ParseTrun(const Box& aBox, const Mvhd& aMvhd, 291 const Mdhd& aMdhd, const Edts& aEdts, 292 const bool aIsAudio, uint64_t* aDecodeTime); 293 Result<Ok, nsresult> ParseSenc(const Box& aBox, const Sinf& aSinf); 294 // Process the sample auxiliary information used by common encryption. 295 // aScheme is used to select the appropriate auxiliary information and should 296 // be set based on the encryption scheme used by the track being processed. 297 // Note, the term cenc here refers to the standard, not the specific scheme 298 // from that standard. I.e. this function is used to handle up auxiliary 299 // information from the cenc and cbcs schemes. 300 bool ProcessCencAuxInfo(AtomType aScheme); 301 bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges); 302 303 media::TimeUnit mMaxRoundingError; 304 bool mSencValid = false; 305 }; 306 307 DDLoggedTypeDeclName(MoofParser); 308 309 class MoofParser : public DecoderDoctorLifeLogger<MoofParser> { 310 public: 311 MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode, 312 const bool aIsAudio) 313 : mSource(aSource), 314 mOffset(0), 315 mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>() 316 : 0), 317 mIsAudio(aIsAudio), 318 mLastDecodeTime(0), 319 mTrackParseMode(aTrackParseMode) { 320 // Setting mIsMultitrackParser is a nasty work around for calculating 321 // the composition range for MSE that causes the parser to parse multiple 322 // tracks. Ideally we'd store an array of tracks with different metadata 323 // for each. 324 DDLINKCHILD("source", aSource); 325 } 326 // Advance, looking for additional moov, moof, or mdat boxes in aByteRanges. 327 // Return true or false to indicate whether a new valid moof is seen. 328 bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges); 329 // If *aCanEvict is set to true. then will remove all moofs already parsed 330 // from index then rebuild the index. *aCanEvict is set to true upon return if 331 // some moofs were removed. 332 bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges, 333 bool* aCanEvict); 334 bool RebuildFragmentedIndex(BoxContext& aContext); 335 MP4Interval<media::TimeUnit> GetCompositionRange( 336 const mozilla::MediaByteRangeSet& aByteRanges); 337 bool ReachedEnd(); 338 void ParseMoov(const Box& aBox); 339 void ParseTrak(const Box& aBox); 340 void ParseMdia(const Box& aBox); 341 void ParseMvex(const Box& aBox); 342 343 void ParseMinf(const Box& aBox); 344 void ParseStbl(const Box& aBox); 345 void ParseStsd(const Box& aBox); 346 void ParseEncrypted(const Box& aBox); 347 348 // Similar to RebuildFragmentedIndex(), but advance only as far as the next 349 // moof, only if there is a next moof, and block, waiting for the read, if 350 // the ByteStream supports blocking reads. 351 // Return NS_OK if a new valid moof is seen or 352 // NS_ERROR_DOM_MEDIA_END_OF_STREAM if no fatal error occurs before reaching 353 // end of stream. 354 nsresult BlockingReadNextMoof(); 355 356 already_AddRefed<mozilla::MediaByteBuffer> Metadata(); 357 MediaByteRange FirstCompleteMediaSegment(); 358 MediaByteRange FirstCompleteMediaHeader(); 359 360 const CencSampleEncryptionInfoEntry* GetSampleEncryptionEntry( 361 size_t moofNumber, size_t aMoof) const; 362 363 mozilla::MediaByteRange mInitRange; 364 RefPtr<ByteStream> mSource; 365 uint64_t mOffset; 366 Mvhd mMvhd; 367 Mdhd mMdhd; 368 Trex mTrex; 369 Tfdt mTfdt; 370 Edts mEdts; 371 Sinf mSinf; 372 373 FallibleTArray<CencSampleEncryptionInfoEntry> 374 mTrackSampleEncryptionInfoEntries; 375 FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries; 376 FallibleTArray<SampleDescriptionEntry> mSampleDescriptions; 377 378 nsTArray<Moof>& Moofs() { return mMoofs; } 379 380 private: 381 void ScanForMetadata(mozilla::MediaByteRange& aMoov); 382 nsTArray<Moof> mMoofs; 383 nsTArray<MediaByteRange> mMediaRanges; 384 nsTArray<TrackEndCts> mTracksEndCts; 385 const bool mIsAudio; 386 uint64_t mLastDecodeTime; 387 // Either a ParseAllTracks if in multitrack mode, or an integer representing 388 // the track_id for the track being parsed. If parsing a specific track, mTrex 389 // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0 390 // is a valid track id -- this is not allowed in the spec, but such mp4s 391 // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary 392 // id based on the tracks being parsed. 393 const TrackParseMode mTrackParseMode; 394 }; 395 } // namespace mozilla 396 397 #endif