tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MoofParser.h (12717B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #ifndef MOOF_PARSER_H_
      6 #define MOOF_PARSER_H_
      7 
      8 #include "Atom.h"
      9 #include "AtomType.h"
     10 #include "ByteStream.h"
     11 #include "MP4Interval.h"
     12 #include "MediaResource.h"
     13 #include "SinfParser.h"
     14 #include "TimeUnits.h"
     15 #include "mozilla/Variant.h"
     16 
     17 namespace mozilla {
     18 
     19 class Box;
     20 class BoxContext;
     21 class BoxReader;
     22 class Moof;
     23 
     24 // Used to track the CTS end time of the last sample of a track
     25 // in the preceeding Moof, so that we can smooth tracks' timestamps
     26 // across Moofs.
     27 struct TrackEndCts {
     28  TrackEndCts(uint32_t aTrackId, const media::TimeUnit& aCtsEndTime)
     29      : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {}
     30  uint32_t mTrackId;
     31  media::TimeUnit mCtsEndTime;
     32 };
     33 
     34 class Mvhd : public Atom {
     35 public:
     36  Mvhd()
     37      : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {}
     38  explicit Mvhd(const Box& aBox);
     39 
     40  Result<media::TimeUnit, nsresult> ToTimeUnit(int64_t aTimescaleUnits) const {
     41    if (!mTimescale) {
     42      NS_WARNING("invalid mTimescale");
     43      return Err(NS_ERROR_FAILURE);
     44    }
     45    return media::TimeUnit(aTimescaleUnits, mTimescale);
     46  }
     47 
     48  uint64_t mCreationTime;
     49  uint64_t mModificationTime;
     50  uint32_t mTimescale;
     51  uint64_t mDuration;
     52 
     53 protected:
     54  Result<Ok, nsresult> Parse(const Box& aBox);
     55 };
     56 
     57 class Tkhd : public Mvhd {
     58 public:
     59  Tkhd() : mTrackId(0) {}
     60  explicit Tkhd(const Box& aBox);
     61 
     62  uint32_t mTrackId;
     63 
     64 protected:
     65  Result<Ok, nsresult> Parse(const Box& aBox);
     66 };
     67 
     68 class Mdhd : public Mvhd {
     69 public:
     70  Mdhd() = default;
     71  explicit Mdhd(const Box& aBox);
     72 };
     73 
     74 class Trex : public Atom {
     75 public:
     76  explicit Trex(uint32_t aTrackId)
     77      : mFlags(0),
     78        mTrackId(aTrackId),
     79        mDefaultSampleDescriptionIndex(0),
     80        mDefaultSampleDuration(0),
     81        mDefaultSampleSize(0),
     82        mDefaultSampleFlags(0) {}
     83 
     84  explicit Trex(const Box& aBox);
     85 
     86  uint32_t mFlags;
     87  uint32_t mTrackId;
     88  uint32_t mDefaultSampleDescriptionIndex;
     89  uint32_t mDefaultSampleDuration;
     90  uint32_t mDefaultSampleSize;
     91  uint32_t mDefaultSampleFlags;
     92 
     93 protected:
     94  Result<Ok, nsresult> Parse(const Box& aBox);
     95 };
     96 
     97 class Tfhd : public Trex {
     98 public:
     99  explicit Tfhd(const Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
    100    mValid = aTrex.IsValid();
    101  }
    102  Tfhd(const Box& aBox, const Trex& aTrex);
    103 
    104  uint64_t mBaseDataOffset;
    105 
    106 protected:
    107  Result<Ok, nsresult> Parse(const Box& aBox);
    108 };
    109 
    110 class Tfdt : public Atom {
    111 public:
    112  Tfdt() : mBaseMediaDecodeTime(0) {}
    113  explicit Tfdt(const Box& aBox);
    114 
    115  uint64_t mBaseMediaDecodeTime;
    116 
    117 protected:
    118  Result<Ok, nsresult> Parse(const Box& aBox);
    119 };
    120 
    121 class Edts : public Atom {
    122 public:
    123  Edts() : mMediaStart(0), mEmptyOffset(0) {}
    124  explicit Edts(const Box& aBox);
    125  virtual bool IsValid() const override {
    126    // edts is optional
    127    return true;
    128  }
    129 
    130  int64_t mMediaStart;
    131  int64_t mEmptyOffset;
    132 
    133 protected:
    134  Result<Ok, nsresult> Parse(const Box& aBox);
    135 };
    136 
    137 struct Sample {
    138  mozilla::MediaByteRange mByteRange;
    139  // Crypto information coming from senc box: shall be used first
    140  CopyableTArray<uint8_t> mIV;
    141  CopyableTArray<uint32_t> mPlainSizes;
    142  // The number of encrypted bytes in each subsample. The nth element in the
    143  // array is the number of encrypted bytes at the start of the nth subsample.
    144  CopyableTArray<uint32_t> mEncryptedSizes;
    145  // Crypto information coming from saio box: shall be used if no senc
    146  // information is present
    147  mozilla::MediaByteRange mCencRange;
    148  media::TimeUnit mDecodeTime;
    149  MP4Interval<media::TimeUnit> mCompositionRange;
    150  bool mSync;
    151 };
    152 
    153 class Saiz final : public Atom {
    154 public:
    155  Saiz(const Box& aBox, AtomType aDefaultType);
    156 
    157  AtomType mAuxInfoType;
    158  uint32_t mAuxInfoTypeParameter;
    159  FallibleTArray<uint8_t> mSampleInfoSize;
    160 
    161 protected:
    162  Result<Ok, nsresult> Parse(const Box& aBox);
    163 };
    164 
    165 class Saio final : public Atom {
    166 public:
    167  Saio(const Box& aBox, AtomType aDefaultType);
    168 
    169  AtomType mAuxInfoType;
    170  uint32_t mAuxInfoTypeParameter;
    171  FallibleTArray<uint64_t> mOffsets;
    172 
    173 protected:
    174  Result<Ok, nsresult> Parse(const Box& aBox);
    175 };
    176 
    177 struct SampleToGroupEntry {
    178 public:
    179  static const uint32_t kTrackGroupDescriptionIndexBase = 0;
    180  static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000;
    181 
    182  SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex)
    183      : mSampleCount(aSampleCount),
    184        mGroupDescriptionIndex(aGroupDescriptionIndex) {}
    185 
    186  uint32_t mSampleCount;
    187  uint32_t mGroupDescriptionIndex;
    188 };
    189 
    190 class Sbgp final : public Atom  // SampleToGroup box.
    191 {
    192 public:
    193  explicit Sbgp(const Box& aBox);
    194 
    195  AtomType mGroupingType;
    196  uint32_t mGroupingTypeParam;
    197  FallibleTArray<SampleToGroupEntry> mEntries;
    198 
    199 protected:
    200  Result<Ok, nsresult> Parse(const Box& aBox);
    201 };
    202 
    203 // Stores information form CencSampleEncryptionInformationGroupEntry (seig).
    204 // Cenc here refers to the common encryption standard, rather than the specific
    205 // cenc scheme from that standard. This structure is used for all encryption
    206 // schemes. I.e. it is used for both cenc and cbcs, not just cenc.
    207 struct CencSampleEncryptionInfoEntry final {
    208 public:
    209  CencSampleEncryptionInfoEntry() = default;
    210 
    211  Result<Ok, nsresult> Init(BoxReader& aReader);
    212 
    213  bool mIsEncrypted = false;
    214  uint8_t mIVSize = 0;
    215  CopyableTArray<uint8_t> mKeyId;
    216  uint8_t mCryptByteBlock = 0;
    217  uint8_t mSkipByteBlock = 0;
    218  CopyableTArray<uint8_t> mConsantIV;
    219 };
    220 
    221 class Sgpd final : public Atom  // SampleGroupDescription box.
    222 {
    223 public:
    224  explicit Sgpd(const Box& aBox);
    225 
    226  AtomType mGroupingType;
    227  FallibleTArray<CencSampleEncryptionInfoEntry> mEntries;
    228 
    229 protected:
    230  Result<Ok, nsresult> Parse(const Box& aBox);
    231 };
    232 
    233 // Audio/video entries from the sample description box (stsd). We only need to
    234 // store if these are encrypted, so do not need a specialized class for
    235 // different audio and video data. Currently most of the parsing of these
    236 // entries is by the mp4parse-rust, but moof pasrser needs to know which of
    237 // these are encrypted when parsing the track fragment header (tfhd).
    238 struct SampleDescriptionEntry {
    239  bool mIsEncryptedEntry = false;
    240 };
    241 
    242 // Used to indicate in variants if all tracks should be parsed.
    243 struct ParseAllTracks {};
    244 
    245 using TrackParseMode = Variant<ParseAllTracks, uint32_t>;
    246 
    247 class Moof final : public Atom {
    248 public:
    249  Moof(const Box& aBox, const TrackParseMode& aTrackParseMode,
    250       const Trex& aTrex, const Mvhd& aMvhd, const Mdhd& aMdhd,
    251       const Edts& aEdts, const Sinf& aSinf, const bool aIsAudio,
    252       uint64_t* aDecodeTime, nsTArray<TrackEndCts>& aTracksEndCts);
    253  void FixRounding(const Moof& aMoof);
    254 
    255  // Retrieve CencSampleEncryptionInfoEntry for a given sample number.
    256  // Optionally, you can provide track's group boxes (sbgp): they will be used
    257  // if the moof fragment does not contain a sbgp box.
    258  const CencSampleEncryptionInfoEntry* GetSampleEncryptionEntry(
    259      size_t aSample,
    260      const FallibleTArray<SampleToGroupEntry>* aTrackSampleToGroupEntries =
    261          nullptr,
    262      const FallibleTArray<CencSampleEncryptionInfoEntry>*
    263          aTrackSampleEncryptionInfoEntries = nullptr) const;
    264 
    265  // Returns true if a senc box has been found, successfully parsed, and
    266  // contains crypto info
    267  bool SencIsValid() const { return mSencValid; }
    268 
    269  mozilla::MediaByteRange mRange;
    270  mozilla::MediaByteRange mMdatRange;
    271  MP4Interval<media::TimeUnit> mTimeRange;
    272  FallibleTArray<Sample> mIndex;
    273 
    274  FallibleTArray<CencSampleEncryptionInfoEntry>
    275      mFragmentSampleEncryptionInfoEntries;
    276  FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
    277 
    278  Tfhd mTfhd;
    279  FallibleTArray<Saiz> mSaizs;
    280  FallibleTArray<Saio> mSaios;
    281  nsTArray<nsTArray<uint8_t>> mPsshes;
    282 
    283 private:
    284  // aDecodeTime is updated to the end of the parsed TRAF on return.
    285  void ParseTraf(const Box& aBox, const TrackParseMode& aTrackParseMode,
    286                 const Trex& aTrex, const Mvhd& aMvhd, const Mdhd& aMdhd,
    287                 const Edts& aEdts, const Sinf& aSinf, const bool aIsAudio,
    288                 uint64_t* aDecodeTime);
    289  // aDecodeTime is updated to the end of the parsed TRUN on return.
    290  Result<Ok, nsresult> ParseTrun(const Box& aBox, const Mvhd& aMvhd,
    291                                 const Mdhd& aMdhd, const Edts& aEdts,
    292                                 const bool aIsAudio, uint64_t* aDecodeTime);
    293  Result<Ok, nsresult> ParseSenc(const Box& aBox, const Sinf& aSinf);
    294  // Process the sample auxiliary information used by common encryption.
    295  // aScheme is used to select the appropriate auxiliary information and should
    296  // be set based on the encryption scheme used by the track being processed.
    297  // Note, the term cenc here refers to the standard, not the specific scheme
    298  // from that standard. I.e. this function is used to handle up auxiliary
    299  // information from the cenc and cbcs schemes.
    300  bool ProcessCencAuxInfo(AtomType aScheme);
    301  bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
    302 
    303  media::TimeUnit mMaxRoundingError;
    304  bool mSencValid = false;
    305 };
    306 
    307 DDLoggedTypeDeclName(MoofParser);
    308 
    309 class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
    310 public:
    311  MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode,
    312             const bool aIsAudio)
    313      : mSource(aSource),
    314        mOffset(0),
    315        mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>()
    316                                             : 0),
    317        mIsAudio(aIsAudio),
    318        mLastDecodeTime(0),
    319        mTrackParseMode(aTrackParseMode) {
    320    // Setting mIsMultitrackParser is a nasty work around for calculating
    321    // the composition range for MSE that causes the parser to parse multiple
    322    // tracks. Ideally we'd store an array of tracks with different metadata
    323    // for each.
    324    DDLINKCHILD("source", aSource);
    325  }
    326  // Advance, looking for additional moov, moof, or mdat boxes in aByteRanges.
    327  // Return true or false to indicate whether a new valid moof is seen.
    328  bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
    329  // If *aCanEvict is set to true. then will remove all moofs already parsed
    330  // from index then rebuild the index. *aCanEvict is set to true upon return if
    331  // some moofs were removed.
    332  bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
    333                              bool* aCanEvict);
    334  bool RebuildFragmentedIndex(BoxContext& aContext);
    335  MP4Interval<media::TimeUnit> GetCompositionRange(
    336      const mozilla::MediaByteRangeSet& aByteRanges);
    337  bool ReachedEnd();
    338  void ParseMoov(const Box& aBox);
    339  void ParseTrak(const Box& aBox);
    340  void ParseMdia(const Box& aBox);
    341  void ParseMvex(const Box& aBox);
    342 
    343  void ParseMinf(const Box& aBox);
    344  void ParseStbl(const Box& aBox);
    345  void ParseStsd(const Box& aBox);
    346  void ParseEncrypted(const Box& aBox);
    347 
    348  // Similar to RebuildFragmentedIndex(), but advance only as far as the next
    349  // moof, only if there is a next moof, and block, waiting for the read, if
    350  // the ByteStream supports blocking reads.
    351  // Return NS_OK if a new valid moof is seen or
    352  // NS_ERROR_DOM_MEDIA_END_OF_STREAM if no fatal error occurs before reaching
    353  // end of stream.
    354  nsresult BlockingReadNextMoof();
    355 
    356  already_AddRefed<mozilla::MediaByteBuffer> Metadata();
    357  MediaByteRange FirstCompleteMediaSegment();
    358  MediaByteRange FirstCompleteMediaHeader();
    359 
    360  const CencSampleEncryptionInfoEntry* GetSampleEncryptionEntry(
    361      size_t moofNumber, size_t aMoof) const;
    362 
    363  mozilla::MediaByteRange mInitRange;
    364  RefPtr<ByteStream> mSource;
    365  uint64_t mOffset;
    366  Mvhd mMvhd;
    367  Mdhd mMdhd;
    368  Trex mTrex;
    369  Tfdt mTfdt;
    370  Edts mEdts;
    371  Sinf mSinf;
    372 
    373  FallibleTArray<CencSampleEncryptionInfoEntry>
    374      mTrackSampleEncryptionInfoEntries;
    375  FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries;
    376  FallibleTArray<SampleDescriptionEntry> mSampleDescriptions;
    377 
    378  nsTArray<Moof>& Moofs() { return mMoofs; }
    379 
    380 private:
    381  void ScanForMetadata(mozilla::MediaByteRange& aMoov);
    382  nsTArray<Moof> mMoofs;
    383  nsTArray<MediaByteRange> mMediaRanges;
    384  nsTArray<TrackEndCts> mTracksEndCts;
    385  const bool mIsAudio;
    386  uint64_t mLastDecodeTime;
    387  // Either a ParseAllTracks if in multitrack mode, or an integer representing
    388  // the track_id for the track being parsed. If parsing a specific track, mTrex
    389  // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0
    390  // is a valid track id -- this is not allowed in the spec, but such mp4s
    391  // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary
    392  // id based on the tracks being parsed.
    393  const TrackParseMode mTrackParseMode;
    394 };
    395 }  // namespace mozilla
    396 
    397 #endif