OggCodecState.h (21671B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 #include "Intervals.h" 7 #if !defined(OggCodecState_h_) 8 # define OggCodecState_h_ 9 10 # include <ogg/ogg.h> 11 // For MOZ_SAMPLE_TYPE_* 12 # include <nsClassHashtable.h> 13 # include <nsDeque.h> 14 # include <nsTArray.h> 15 # include <vorbis/codec.h> 16 17 # include "FlacFrameParser.h" 18 # include "OggRLBoxTypes.h" 19 # include "VideoUtils.h" 20 21 // Uncomment the following to validate that we're predicting the number 22 // of Vorbis samples in each packet correctly. 23 # define VALIDATE_VORBIS_SAMPLE_CALCULATION 24 # ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 25 # include <map> 26 # endif 27 28 struct OpusMSDecoder; 29 30 namespace mozilla { 31 32 inline constexpr char RLBOX_SAFE_DEBUG_ASSERTION[] = 33 "Tainted data is being inspected only for debugging purposes. This is not " 34 "a condition that is critical for safety of the renderer."; 35 36 inline constexpr char RLBOX_OGG_STATE_ASSERT_REASON[] = 37 "Tainted data is being inspected only to check the internal state of " 38 "libogg structures. This is not a condition that is critical for safety of " 39 "the renderer."; 40 41 inline constexpr char RLBOX_OGG_PAGE_SERIAL_REASON[] = 42 "We are checking the serial of the page. If libogg is operating correctly, " 43 "we check serial numbers to make sure the Firefox renderer is correctly " 44 "passing streams to the correct source. If libogg has been corrupted, it " 45 "could return an incorrect serial, however this would mean that an OGG " 46 "file has intentionally corrupted data across multiple logical streams. " 47 "This however cannot compromise memory safety of the renderer."; 48 49 class OpusParser; 50 51 struct OggPacketDeletePolicy { 52 void operator()(ogg_packet* aPacket) const { 53 delete[] aPacket->packet; 54 delete aPacket; 55 } 56 }; 57 58 using OggPacketPtr = UniquePtr<ogg_packet, OggPacketDeletePolicy>; 59 60 // Deallocates a packet, used in OggPacketQueue below. 61 class OggPacketDeallocator : public nsDequeFunctor<ogg_packet> { 62 virtual void operator()(ogg_packet* aPacket) override { 63 OggPacketDeletePolicy()(aPacket); 64 } 65 }; 66 67 // A queue of ogg_packets. When we read a page, we extract the page's packets 68 // and buffer them in the owning stream's OggCodecState. This is because 69 // if we're skipping up to the next keyframe in very large frame sized videos, 70 // there may be several megabytes of data between keyframes, and the 71 // ogg_stream_state would end up resizing its buffer every time we added a 72 // new 4KB page to the bitstream, which kills performance on Windows. This 73 // also gives us the option to timestamp packets rather than decoded 74 // frames/samples, reducing the amount of frames/samples we must decode to 75 // determine start-time at a particular offset, and gives us finer control 76 // over memory usage. 77 class OggPacketQueue : private nsDeque<ogg_packet> { 78 public: 79 OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {} 80 ~OggPacketQueue() { Erase(); } 81 bool IsEmpty() { return nsDeque<ogg_packet>::GetSize() == 0; } 82 void Append(OggPacketPtr aPacket); 83 OggPacketPtr PopFront() { 84 return OggPacketPtr(nsDeque<ogg_packet>::PopFront()); 85 } 86 ogg_packet* PeekFront() { return nsDeque<ogg_packet>::PeekFront(); } 87 OggPacketPtr Pop() { return OggPacketPtr(nsDeque<ogg_packet>::Pop()); } 88 ogg_packet* operator[](size_t aIndex) const { 89 return nsDeque<ogg_packet>::ObjectAt(aIndex); 90 } 91 size_t Length() const { return nsDeque<ogg_packet>::GetSize(); } 92 void PushFront(OggPacketPtr aPacket) { 93 nsDeque<ogg_packet>::PushFront(aPacket.release()); 94 } 95 void Erase() { nsDeque<ogg_packet>::Erase(); } 96 }; 97 98 // Encapsulates the data required for decoding an ogg bitstream and for 99 // converting granulepos to timestamps. 100 class OggCodecState { 101 public: 102 using MetadataTags = mozilla::MetadataTags; 103 // Ogg types we know about 104 enum CodecType { 105 TYPE_VORBIS = 0, 106 TYPE_OPUS, 107 TYPE_SKELETON, 108 TYPE_FLAC, 109 TYPE_UNKNOWN 110 }; 111 112 virtual ~OggCodecState(); 113 114 // Factory for creating nsCodecStates. Use instead of constructor. 115 // aPage should be a beginning-of-stream page. 116 static UniquePtr<OggCodecState> Create(rlbox_sandbox_ogg* aSandbox, 117 tainted_opaque_ogg<ogg_page*> aPage, 118 uint32_t aSerial); 119 120 virtual CodecType GetType() { return TYPE_UNKNOWN; } 121 122 // Reads a header packet. Returns false if an error was encountered 123 // while reading header packets. Callers should check DoneReadingHeaders() 124 // to determine if the last header has been read. 125 // This function takes ownership of the packet and is responsible for 126 // releasing it or queuing it for later processing. 127 virtual bool DecodeHeader(OggPacketPtr aPacket) { 128 return (mDoneReadingHeaders = true); 129 } 130 131 // Build a hash table with tag metadata parsed from the stream. 132 virtual UniquePtr<MetadataTags> GetTags() { return nullptr; } 133 134 using TimeUnit = media::TimeUnit; 135 136 // Returns the end time that a granulepos represents. 137 virtual TimeUnit Time(int64_t aGranulepos) { return TimeUnit::Invalid(); } 138 139 // Returns the start time that a granulepos represents. 140 virtual TimeUnit StartTime(int64_t aGranulepos) { 141 return TimeUnit::Invalid(); 142 } 143 144 // Returns the duration of the given packet, if it can be determined. 145 virtual TimeUnit PacketDuration(ogg_packet* aPacket) { 146 return TimeUnit::Invalid(); 147 } 148 149 // Returns the start time of the given packet, if it can be determined. 150 virtual TimeUnit PacketStartTime(ogg_packet* aPacket) { 151 if (aPacket->granulepos < 0) { 152 return TimeUnit::Invalid(); 153 } 154 TimeUnit endTime = Time(aPacket->granulepos); 155 TimeUnit duration = PacketDuration(aPacket); 156 // When looping, it's possible to find header packets there because the 157 // demuxing restarts from the beginning of the stream. Just skip and retry 158 // with the next packet. 159 if (!duration.IsValid()) { 160 return TimeUnit::Invalid(); 161 } 162 if (duration > endTime) { 163 // Audio preskip may eat a whole packet or more. 164 return TimeUnit::Zero(); 165 } 166 return endTime - duration; 167 } 168 169 // Initializes the codec state. 170 virtual bool Init() { return true; } 171 172 // Returns true when this bitstream has finished reading all its 173 // header packets. 174 bool DoneReadingHeaders() { return mDoneReadingHeaders; } 175 176 // Deactivates the bitstream. Only the primary video and audio bitstreams 177 // should be active. 178 void Deactivate() { 179 mActive = false; 180 mDoneReadingHeaders = true; 181 Reset(); 182 } 183 184 // Resets decoding state. 185 virtual nsresult Reset(); 186 187 // Returns true if the OggCodecState thinks this packet is a header 188 // packet. Note this does not verify the validity of the header packet, 189 // it just guarantees that the packet is marked as a header packet (i.e. 190 // it is definintely not a data packet). Do not use this to identify 191 // streams, use it to filter header packets from data packets while 192 // decoding. 193 virtual bool IsHeader(ogg_packet* aPacket) { return false; } 194 195 // Returns true if the OggCodecState thinks this packet represents a 196 // keyframe, from which decoding can restart safely. 197 virtual bool IsKeyframe(ogg_packet* aPacket) { return true; } 198 199 // Returns true if there is a packet available for dequeueing in the stream. 200 bool IsPacketReady(); 201 202 // Returns the next raw packet in the stream, or nullptr if there are no more 203 // packets buffered in the packet queue. More packets can be buffered by 204 // inserting one or more pages into the stream by calling PageIn(). 205 // The packet will have a valid granulepos. 206 OggPacketPtr PacketOut(); 207 208 // Returns the next raw packet in the stream, or nullptr if there are no more 209 // packets buffered in the packet queue, without consuming it. 210 // The packet will have a valid granulepos. 211 ogg_packet* PacketPeek(); 212 213 // Moves all raw packets from aOther to the front of the current packet queue. 214 void PushFront(OggPacketQueue&& aOther); 215 216 // Returns the next packet in the stream as a MediaRawData, or nullptr 217 // if there are no more packets buffered in the packet queue. More packets 218 // can be buffered by inserting one or more pages into the stream by calling 219 // PageIn(). The packet will have a valid granulepos. 220 virtual already_AddRefed<MediaRawData> PacketOutAsMediaRawData(); 221 222 // Extracts all packets from the page, and inserts them into the packet 223 // queue. They can be extracted by calling PacketOut(). Packets from an 224 // inactive stream are not buffered, i.e. this call has no effect for 225 // inactive streams. Multiple pages may need to be inserted before 226 // PacketOut() starts to return packets, as granulepos may need to be 227 // captured. 228 virtual nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage); 229 230 // Returns the maximum number of microseconds which a keyframe can be offset 231 // from any given interframe.b 232 virtual TimeUnit MaxKeyframeOffset() { return TimeUnit::Zero(); } 233 234 // Number of packets read. 235 uint64_t mPacketCount; 236 237 // Serial number of the bitstream. 238 uint32_t mSerial; 239 240 // Ogg specific state. 241 tainted_opaque_ogg<ogg_stream_state*> mState; 242 243 // Queue of as yet undecoded packets. Packets are guaranteed to have 244 // a valid granulepos. 245 OggPacketQueue mPackets; 246 247 // Is the bitstream active; whether we're decoding and playing this bitstream. 248 bool mActive; 249 250 // True when all headers packets have been read. 251 bool mDoneReadingHeaders; 252 253 // All invocations of libogg functionality from the demuxer is sandboxed using 254 // wasm library sandboxes on supported platforms. This is the sandbox 255 // instance. 256 rlbox_sandbox_ogg* mSandbox; 257 258 virtual const TrackInfo* GetInfo() const { 259 MOZ_RELEASE_ASSERT(false, "Can't be called directly"); 260 return nullptr; 261 } 262 263 // Validation utility for vorbis-style tag names. 264 static bool IsValidVorbisTagName(nsCString& aName); 265 266 // Utility method to parse and add a vorbis-style comment 267 // to a metadata hash table. Most Ogg-encapsulated codecs 268 // use the vorbis comment format for metadata. 269 static bool AddVorbisComment(UniquePtr<MetadataTags>& aTags, 270 const char* aComment, uint32_t aLength); 271 272 protected: 273 // Constructs a new OggCodecState. aActive denotes whether the stream is 274 // active. For streams of unsupported or unknown types, aActive should be 275 // false. 276 OggCodecState(rlbox_sandbox_ogg* aSandbox, 277 tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial, 278 bool aActive); 279 280 // Deallocates all packets stored in mUnstamped, and clears the array. 281 void ClearUnstamped(); 282 283 // Extracts packets out of mState until a data packet with a non -1 284 // granulepos is encountered, or no more packets are readable. Header 285 // packets are pushed into the packet queue immediately, and data packets 286 // are buffered in mUnstamped. Once a non -1 granulepos packet is read 287 // the granulepos of the packets in mUnstamped can be inferred, and they 288 // can be pushed over to mPackets. Used by PageIn() implementations in 289 // subclasses. 290 nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos); 291 292 // Temporary buffer in which to store packets while we're reading packets 293 // in order to capture granulepos. 294 nsTArray<OggPacketPtr> mUnstamped; 295 296 bool SetCodecSpecificConfig(MediaByteBuffer* aBuffer, 297 OggPacketQueue& aHeaders); 298 299 private: 300 bool InternalInit(); 301 }; 302 303 class VorbisState : public OggCodecState { 304 public: 305 explicit VorbisState(rlbox_sandbox_ogg* aSandbox, 306 tainted_opaque_ogg<ogg_page*> aBosPage, 307 uint32_t aSerial); 308 virtual ~VorbisState(); 309 310 CodecType GetType() override { return TYPE_VORBIS; } 311 bool DecodeHeader(OggPacketPtr aPacket) override; 312 TimeUnit Time(int64_t aGranulepos) override; 313 TimeUnit PacketDuration(ogg_packet* aPacket) override; 314 bool Init() override; 315 nsresult Reset() override; 316 bool IsHeader(ogg_packet* aPacket) override; 317 nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage) override; 318 const TrackInfo* GetInfo() const override { return &mInfo; } 319 320 // Return a hash table with tag metadata. 321 UniquePtr<MetadataTags> GetTags() override; 322 323 private: 324 AudioInfo mInfo; 325 vorbis_info mVorbisInfo = {}; 326 vorbis_comment mComment = {}; 327 vorbis_dsp_state mDsp = {}; 328 vorbis_block mBlock = {}; 329 OggPacketQueue mHeaders; 330 331 // Returns the end time that a granulepos represents. 332 static TimeUnit Time(vorbis_info* aInfo, int64_t aGranulePos); 333 334 // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped 335 // array. 336 void ReconstructVorbisGranulepos(); 337 338 // The "block size" of the previously decoded Vorbis packet, or 0 if we've 339 // not yet decoded anything. This is used to calculate the number of samples 340 // in a Vorbis packet, since each Vorbis packet depends on the previous 341 // packet while being decoded. 342 long mPrevVorbisBlockSize; 343 344 // Granulepos (end sample) of the last decoded Vorbis packet. This is used 345 // to calculate the Vorbis granulepos when we don't find a granulepos to 346 // back-propagate from. 347 int64_t mGranulepos; 348 349 # ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 350 // When validating that we've correctly predicted Vorbis packets' number 351 // of samples, we store each packet's predicted number of samples in this 352 // map, and verify we decode the predicted number of samples. 353 std::map<ogg_packet*, long> mVorbisPacketSamples; 354 # endif 355 356 // Records that aPacket is predicted to have aSamples samples. 357 // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION 358 // is not defined. 359 void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples); 360 361 // Verifies that aPacket has had its number of samples predicted. 362 // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION 363 // is not defined. 364 void AssertHasRecordedPacketSamples(ogg_packet* aPacket); 365 366 public: 367 // Asserts that the number of samples predicted for aPacket is aSamples. 368 // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION 369 // is not defined. 370 void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples); 371 }; 372 373 class OpusState : public OggCodecState { 374 public: 375 explicit OpusState(rlbox_sandbox_ogg* aSandbox, 376 tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial); 377 virtual ~OpusState(); 378 379 CodecType GetType() override { return TYPE_OPUS; } 380 bool DecodeHeader(OggPacketPtr aPacket) override; 381 TimeUnit Time(int64_t aGranulepos) override; 382 TimeUnit PacketDuration(ogg_packet* aPacket) override; 383 bool Init() override; 384 nsresult Reset() override; 385 nsresult Reset(bool aStart); 386 bool IsHeader(ogg_packet* aPacket) override; 387 nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage) override; 388 already_AddRefed<MediaRawData> PacketOutAsMediaRawData() override; 389 const TrackInfo* GetInfo() const override { return &mInfo; } 390 391 // Returns the end time that a granulepos represents. 392 static TimeUnit Time(int aPreSkip, int64_t aGranulepos); 393 394 // Construct and return a table of tags from the metadata header. 395 UniquePtr<MetadataTags> GetTags() override; 396 397 private: 398 UniquePtr<OpusParser> mParser; 399 OpusMSDecoder* mDecoder; 400 401 // Granule position (end sample) of the last decoded Opus packet. This is 402 // used to calculate the amount we should trim from the last packet. 403 int64_t mPrevPacketGranulepos; 404 405 // Reconstructs the granulepos of Opus packets stored in the 406 // mUnstamped array. mUnstamped must be filled with consecutive packets from 407 // the stream, with the last packet having a known granulepos. Using this 408 // known granulepos, and the known frame numbers, we recover the granulepos 409 // of all frames in the array. This enables us to determine their timestamps. 410 bool ReconstructOpusGranulepos(); 411 412 // Granule position (end sample) of the last decoded Opus page. This is 413 // used to calculate the Opus per-packet granule positions on the last page, 414 // where we may need to trim some samples from the end. 415 int64_t mPrevPageGranulepos; 416 AudioInfo mInfo; 417 OggPacketQueue mHeaders; 418 }; 419 420 // Constructs a 32bit version number out of two 16 bit major,minor 421 // version numbers. 422 # define SKELETON_VERSION(major, minor) (((major) << 16) | (minor)) 423 424 enum EMsgHeaderType { 425 eContentType, 426 eRole, 427 eName, 428 eLanguage, 429 eTitle, 430 eDisplayHint, 431 eAltitude, 432 eTrackOrder, 433 eTrackDependencies 434 }; 435 436 struct FieldPatternType { 437 const char* mPatternToRecognize; 438 EMsgHeaderType mMsgHeaderType; 439 }; 440 441 // Stores the message information for different logical bitstream. 442 struct MessageField { 443 nsClassHashtable<nsUint32HashKey, nsCString> mValuesStore; 444 }; 445 446 class SkeletonState : public OggCodecState { 447 public: 448 explicit SkeletonState(rlbox_sandbox_ogg* aSandbox, 449 tainted_opaque_ogg<ogg_page*> aBosPage, 450 uint32_t aSerial); 451 ~SkeletonState(); 452 453 nsClassHashtable<nsUint32HashKey, MessageField> mMsgFieldStore; 454 455 CodecType GetType() override { return TYPE_SKELETON; } 456 bool DecodeHeader(OggPacketPtr aPacket) override; 457 TimeUnit Time(int64_t aGranulepos) override { return TimeUnit::Invalid(); } 458 bool IsHeader(ogg_packet* aPacket) override { return true; } 459 460 // Return true if the given time (in milliseconds) is within 461 // the presentation time defined in the skeleton track. 462 bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; } 463 464 // Stores the offset of the page on which a keyframe starts, 465 // and its presentation time. 466 class nsKeyPoint { 467 public: 468 nsKeyPoint() : mOffset(INT64_MAX), mTime(TimeUnit::Invalid()) {} 469 470 nsKeyPoint(int64_t aOffset, TimeUnit aTime) 471 : mOffset(aOffset), mTime(aTime) {} 472 473 // Offset from start of segment/link-in-the-chain in bytes. 474 int64_t mOffset; 475 476 // Presentation time 477 TimeUnit mTime; 478 479 bool IsNull() { return mOffset == INT64_MAX && !mTime.IsValid(); } 480 }; 481 482 // Stores a keyframe's byte-offset, presentation time and the serialno 483 // of the stream it belongs to. 484 class nsSeekTarget { 485 public: 486 nsSeekTarget() : mSerial(0) {} 487 nsKeyPoint mKeyPoint; 488 uint32_t mSerial; 489 bool IsNull() { return mKeyPoint.IsNull() && mSerial == 0; } 490 }; 491 492 // Determines from the seek index the keyframe which you must seek back to 493 // in order to get all keyframes required to render all streams with 494 // serialnos in aTracks, at time aTarget. 495 nsresult IndexedSeekTarget(const TimeUnit& aTarget, 496 nsTArray<uint32_t>& aTracks, 497 nsSeekTarget& aResult); 498 499 bool HasIndex() const { return mIndex.Count() > 0; } 500 501 // Returns the duration of the active tracks in the media, if we have 502 // an index. aTracks must be filled with the serialnos of the active tracks. 503 // The duration is calculated as the greatest end time of all active tracks, 504 // minus the smalled start time of all the active tracks. 505 nsresult GetDuration(const nsTArray<uint32_t>& aTracks, TimeUnit& aDuration); 506 507 private: 508 // Decodes an index packet. Returns false on failure. 509 bool DecodeIndex(ogg_packet* aPacket); 510 // Decodes an fisbone packet. Returns false on failure. 511 bool DecodeFisbone(ogg_packet* aPacket); 512 513 // Gets the keypoint you must seek to in order to get the keyframe required 514 // to render the stream at time aTarget on stream with serial aSerialno. 515 nsresult IndexedSeekTargetForTrack(uint32_t aSerialno, 516 const TimeUnit& aTarget, 517 nsKeyPoint& aResult); 518 519 // Version of the decoded skeleton track, as per the SKELETON_VERSION macro. 520 uint32_t mVersion; 521 522 // Presentation time of the resource in milliseconds 523 int64_t mPresentationTime; 524 525 // Length of the resource in bytes. 526 int64_t mLength; 527 528 // Stores the keyframe index and duration information for a particular 529 // stream. 530 class nsKeyFrameIndex { 531 public: 532 nsKeyFrameIndex(const TimeUnit& aStartTime, const TimeUnit& aEndTime) 533 : mStartTime(aStartTime), mEndTime(aEndTime) { 534 MOZ_COUNT_CTOR(nsKeyFrameIndex); 535 } 536 537 MOZ_COUNTED_DTOR(nsKeyFrameIndex) 538 539 void Add(int64_t aOffset, const TimeUnit& aTime) { 540 mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTime)); 541 } 542 543 const nsKeyPoint& Get(uint32_t aIndex) const { return mKeyPoints[aIndex]; } 544 545 uint32_t Length() const { return mKeyPoints.Length(); } 546 547 // Presentation time of the first sample in this stream in usecs. 548 const TimeUnit mStartTime; 549 550 // End time of the last sample in this stream in usecs. 551 const TimeUnit mEndTime; 552 553 private: 554 nsTArray<nsKeyPoint> mKeyPoints; 555 }; 556 557 // Maps Ogg serialnos to the index-keypoint list. 558 nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex; 559 }; 560 561 class FlacState : public OggCodecState { 562 public: 563 explicit FlacState(rlbox_sandbox_ogg* aSandbox, 564 tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial); 565 566 CodecType GetType() override { return TYPE_FLAC; } 567 bool DecodeHeader(OggPacketPtr aPacket) override; 568 TimeUnit Time(int64_t aGranulepos) override; 569 TimeUnit PacketDuration(ogg_packet* aPacket) override; 570 bool IsHeader(ogg_packet* aPacket) override; 571 nsresult PageIn(tainted_opaque_ogg<ogg_page*> aPage) override; 572 573 // Return a hash table with tag metadata. 574 UniquePtr<MetadataTags> GetTags() override; 575 576 const TrackInfo* GetInfo() const override; 577 578 private: 579 bool ReconstructFlacGranulepos(void); 580 581 FlacFrameParser mParser; 582 }; 583 584 } // namespace mozilla 585 586 #endif