OggCodecState.cpp (53327B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "OggCodecState.h" 8 9 #include <opus/opus.h> 10 #include <opus/opus_multistream.h> 11 #include <stdint.h> 12 #include <string.h> 13 14 #include <algorithm> 15 16 #include "OggRLBox.h" 17 #include "OpusParser.h" 18 #include "VideoUtils.h" 19 #include "XiphExtradata.h" 20 #include "mozilla/EndianUtils.h" 21 #include "mozilla/ScopeExit.h" 22 #include "mozilla/TextUtils.h" 23 #include "mozilla/Utf8.h" 24 #include "nsDebug.h" 25 26 namespace mozilla { 27 28 extern LazyLogModule gMediaDecoderLog; 29 #define LOG(type, msg) MOZ_LOG(gMediaDecoderLog, type, msg) 30 31 using media::TimeUnit; 32 33 /** Decoder base class for Ogg-encapsulated streams. */ 34 UniquePtr<OggCodecState> OggCodecState::Create( 35 rlbox_sandbox_ogg* aSandbox, tainted_opaque_ogg<ogg_page*> aPage, 36 uint32_t aSerial) { 37 NS_ASSERTION(sandbox_invoke(*aSandbox, ogg_page_bos, aPage) 38 .unverified_safe_because(RLBOX_SAFE_DEBUG_ASSERTION), 39 "Only call on BOS page!"); 40 UniquePtr<OggCodecState> codecState; 41 tainted_ogg<ogg_page*> aPage_t = rlbox::from_opaque(aPage); 42 const char codec_reason[] = 43 "These conditions set the type of codec. Since we are relying on " 44 "ogg_page to determine the codec type, the library could lie about " 45 "this. We allow this as it does not directly allow renderer " 46 "vulnerabilities if this is incorrect."; 47 long body_len = aPage_t->body_len.unverified_safe_because(codec_reason); 48 49 if (body_len > 6 && rlbox::memcmp(*aSandbox, aPage_t->body + 1, "vorbis", 6u) 50 .unverified_safe_because(codec_reason) == 0) { 51 codecState = MakeUnique<VorbisState>(aSandbox, aPage, aSerial); 52 } else if (body_len > 8 && 53 rlbox::memcmp(*aSandbox, aPage_t->body, "OpusHead", 8u) 54 .unverified_safe_because(codec_reason) == 0) { 55 codecState = MakeUnique<OpusState>(aSandbox, aPage, aSerial); 56 } else if (body_len > 8 && 57 rlbox::memcmp(*aSandbox, aPage_t->body, "fishead\0", 8u) 58 .unverified_safe_because(codec_reason) == 0) { 59 codecState = MakeUnique<SkeletonState>(aSandbox, aPage, aSerial); 60 } else if (body_len > 5 && 61 rlbox::memcmp(*aSandbox, aPage_t->body, "\177FLAC", 5u) 62 .unverified_safe_because(codec_reason) == 0) { 63 codecState = MakeUnique<FlacState>(aSandbox, aPage, aSerial); 64 } else { 65 // Can't use MakeUnique here, OggCodecState is protected. 66 codecState.reset(new OggCodecState(aSandbox, aPage, aSerial, false)); 67 } 68 69 if (!codecState->OggCodecState::InternalInit()) { 70 codecState.reset(); 71 } 72 73 return codecState; 74 } 75 76 OggCodecState::OggCodecState(rlbox_sandbox_ogg* aSandbox, 77 tainted_opaque_ogg<ogg_page*> aBosPage, 78 uint32_t aSerial, bool aActive) 79 : mPacketCount(0), 80 mSerial(aSerial), 81 mActive(aActive), 82 mDoneReadingHeaders(!aActive), 83 mSandbox(aSandbox) { 84 MOZ_COUNT_CTOR(OggCodecState); 85 tainted_ogg<ogg_stream_state*> state = 86 mSandbox->malloc_in_sandbox<ogg_stream_state>(); 87 MOZ_RELEASE_ASSERT(state != nullptr); 88 rlbox::memset(*mSandbox, state, 0, sizeof(ogg_stream_state)); 89 mState = state.to_opaque(); 90 } 91 92 OggCodecState::~OggCodecState() { 93 MOZ_COUNT_DTOR(OggCodecState); 94 Reset(); 95 #ifdef DEBUG 96 int ret = 97 #endif 98 sandbox_invoke(*mSandbox, ogg_stream_clear, mState) 99 .unverified_safe_because(RLBOX_SAFE_DEBUG_ASSERTION); 100 NS_ASSERTION(ret == 0, "ogg_stream_clear failed"); 101 mSandbox->free_in_sandbox(rlbox::from_opaque(mState)); 102 tainted_ogg<ogg_stream_state*> nullval = nullptr; 103 mState = nullval.to_opaque(); 104 } 105 106 nsresult OggCodecState::Reset() { 107 if (sandbox_invoke(*mSandbox, ogg_stream_reset, mState) 108 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) != 0) { 109 return NS_ERROR_FAILURE; 110 } 111 mPackets.Erase(); 112 ClearUnstamped(); 113 return NS_OK; 114 } 115 116 void OggCodecState::ClearUnstamped() { mUnstamped.Clear(); } 117 118 bool OggCodecState::InternalInit() { 119 int ret = sandbox_invoke(*mSandbox, ogg_stream_init, mState, mSerial) 120 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON); 121 return ret == 0; 122 } 123 124 bool OggCodecState::IsValidVorbisTagName(nsCString& aName) { 125 // Tag names must consist of ASCII 0x20 through 0x7D, 126 // excluding 0x3D '=' which is the separator. 127 uint32_t length = aName.Length(); 128 const char* data = aName.Data(); 129 for (uint32_t i = 0; i < length; i++) { 130 if (data[i] < 0x20 || data[i] > 0x7D || data[i] == '=') { 131 return false; 132 } 133 } 134 return true; 135 } 136 137 bool OggCodecState::AddVorbisComment(UniquePtr<MetadataTags>& aTags, 138 const char* aComment, uint32_t aLength) { 139 const char* div = (const char*)memchr(aComment, '=', aLength); 140 if (!div) { 141 LOG(LogLevel::Debug, ("Skipping comment: no separator")); 142 return false; 143 } 144 nsCString key = nsCString(aComment, div - aComment); 145 if (!IsValidVorbisTagName(key)) { 146 LOG(LogLevel::Debug, ("Skipping comment: invalid tag name")); 147 return false; 148 } 149 uint32_t valueLength = aLength - (div - aComment); 150 nsCString value = nsCString(div + 1, valueLength); 151 if (!IsUtf8(value)) { 152 LOG(LogLevel::Debug, ("Skipping comment: invalid UTF-8 in value")); 153 return false; 154 } 155 aTags->InsertOrUpdate(key, value); 156 return true; 157 } 158 159 bool OggCodecState::SetCodecSpecificConfig(MediaByteBuffer* aBuffer, 160 OggPacketQueue& aHeaders) { 161 nsTArray<const unsigned char*> headers; 162 nsTArray<size_t> headerLens; 163 for (size_t i = 0; i < aHeaders.Length(); i++) { 164 headers.AppendElement(aHeaders[i]->packet); 165 headerLens.AppendElement(aHeaders[i]->bytes); 166 } 167 // Save header packets for the decoder 168 if (!XiphHeadersToExtradata(aBuffer, headers, headerLens)) { 169 return false; 170 } 171 aHeaders.Erase(); 172 return true; 173 } 174 175 void VorbisState::RecordVorbisPacketSamples(ogg_packet* aPacket, 176 long aSamples) { 177 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 178 mVorbisPacketSamples[aPacket] = aSamples; 179 #endif 180 } 181 182 void VorbisState::ValidateVorbisPacketSamples(ogg_packet* aPacket, 183 long aSamples) { 184 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 185 NS_ASSERTION(mVorbisPacketSamples[aPacket] == aSamples, 186 "Decoded samples for Vorbis packet don't match expected!"); 187 mVorbisPacketSamples.erase(aPacket); 188 #endif 189 } 190 191 void VorbisState::AssertHasRecordedPacketSamples(ogg_packet* aPacket) { 192 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 193 NS_ASSERTION(mVorbisPacketSamples.count(aPacket) == 1, 194 "Must have recorded packet samples"); 195 #endif 196 } 197 198 // Clone the given packet from memory accessible to the sandboxed libOgg to 199 // memory accessible only to the Firefox renderer 200 static OggPacketPtr CloneOutOfSandbox(tainted_ogg<ogg_packet*> aPacket) { 201 ogg_packet* clone = 202 aPacket.copy_and_verify([](std::unique_ptr<tainted_ogg<ogg_packet>> val) { 203 const char packet_reason[] = 204 "Packets have no guarantees on what data they hold. The renderer's " 205 "safety is not compromised even if packets return garbage data."; 206 207 ogg_packet* p = new ogg_packet(); 208 p->bytes = val->bytes.unverified_safe_because(packet_reason); 209 p->b_o_s = val->b_o_s.unverified_safe_because(packet_reason); 210 p->e_o_s = val->e_o_s.unverified_safe_because(packet_reason); 211 p->granulepos = val->granulepos.unverified_safe_because(packet_reason); 212 p->packetno = val->packetno.unverified_safe_because(packet_reason); 213 if (p->bytes == 0) { 214 p->packet = nullptr; 215 } else { 216 p->packet = val->packet.copy_and_verify_range( 217 [](std::unique_ptr<unsigned char[]> packet) { 218 return packet.release(); 219 }, 220 p->bytes); 221 } 222 return p; 223 }); 224 return OggPacketPtr(clone); 225 } 226 227 void OggPacketQueue::Append(OggPacketPtr aPacket) { 228 nsDeque::Push(aPacket.release()); 229 } 230 231 bool OggCodecState::IsPacketReady() { return !mPackets.IsEmpty(); } 232 233 OggPacketPtr OggCodecState::PacketOut() { 234 if (mPackets.IsEmpty()) { 235 return nullptr; 236 } 237 return mPackets.PopFront(); 238 } 239 240 ogg_packet* OggCodecState::PacketPeek() { 241 if (mPackets.IsEmpty()) { 242 return nullptr; 243 } 244 return mPackets.PeekFront(); 245 } 246 247 void OggCodecState::PushFront(OggPacketQueue&& aOther) { 248 while (!aOther.IsEmpty()) { 249 mPackets.PushFront(aOther.Pop()); 250 } 251 } 252 253 already_AddRefed<MediaRawData> OggCodecState::PacketOutAsMediaRawData() { 254 OggPacketPtr packet = PacketOut(); 255 if (!packet) { 256 return nullptr; 257 } 258 259 NS_ASSERTION( 260 !IsHeader(packet.get()), 261 "PacketOutAsMediaRawData can only be called on non-header packets"); 262 RefPtr<MediaRawData> sample = new MediaRawData(packet->packet, packet->bytes); 263 if (packet->bytes && !sample->Data()) { 264 // OOM. 265 return nullptr; 266 } 267 268 TimeUnit endTimestamp = Time(packet->granulepos); 269 NS_ASSERTION(endTimestamp.IsPositiveOrZero(), "timestamp invalid"); 270 271 TimeUnit duration = PacketDuration(packet.get()); 272 if (!duration.IsValid() || !duration.IsPositiveOrZero()) { 273 NS_WARNING( 274 nsPrintfCString("duration invalid! (%s)", duration.ToString().get()) 275 .get()); 276 duration = TimeUnit::Zero(endTimestamp); 277 } 278 279 sample->mTimecode = Time(packet->granulepos); 280 sample->mTime = endTimestamp - duration; 281 sample->mDuration = duration; 282 sample->mKeyframe = IsKeyframe(packet.get()); 283 sample->mEOS = packet->e_o_s; 284 285 return sample.forget(); 286 } 287 288 nsresult OggCodecState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) { 289 if (!mActive) { 290 return NS_OK; 291 } 292 NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke( 293 *mSandbox, ogg_page_serialno, aPage)) == mSerial) 294 .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), 295 "Page must be for this stream!"); 296 if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) 297 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { 298 return NS_ERROR_FAILURE; 299 } 300 int r; 301 tainted_ogg<ogg_packet*> packet = mSandbox->malloc_in_sandbox<ogg_packet>(); 302 if (!packet) { 303 return NS_ERROR_OUT_OF_MEMORY; 304 } 305 auto clean_packet = MakeScopeExit([&] { mSandbox->free_in_sandbox(packet); }); 306 307 do { 308 r = sandbox_invoke(*mSandbox, ogg_stream_packetout, mState, packet) 309 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON); 310 if (r == 1) { 311 mPackets.Append(CloneOutOfSandbox(packet)); 312 } 313 } while (r != 0); 314 if (sandbox_invoke(*mSandbox, ogg_stream_check, mState) 315 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) { 316 NS_WARNING("Unrecoverable error in ogg_stream_packetout"); 317 return NS_ERROR_FAILURE; 318 } 319 return NS_OK; 320 } 321 322 nsresult OggCodecState::PacketOutUntilGranulepos(bool& aFoundGranulepos) { 323 tainted_ogg<int> r; 324 aFoundGranulepos = false; 325 // Extract packets from the sync state until either no more packets 326 // come out, or we get a data packet with non -1 granulepos. 327 tainted_ogg<ogg_packet*> packet = mSandbox->malloc_in_sandbox<ogg_packet>(); 328 if (!packet) { 329 return NS_ERROR_OUT_OF_MEMORY; 330 } 331 auto clean_packet = MakeScopeExit([&] { mSandbox->free_in_sandbox(packet); }); 332 333 do { 334 r = sandbox_invoke(*mSandbox, ogg_stream_packetout, mState, packet); 335 if (r.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == 1) { 336 OggPacketPtr clone = CloneOutOfSandbox(packet); 337 if (IsHeader(clone.get())) { 338 // Header packets go straight into the packet queue. 339 mPackets.Append(std::move(clone)); 340 } else { 341 // We buffer data packets until we encounter a granulepos. We'll 342 // then use the granulepos to figure out the granulepos of the 343 // preceeding packets. 344 aFoundGranulepos = clone.get()->granulepos > 0; 345 mUnstamped.AppendElement(std::move(clone)); 346 } 347 } 348 } while (r.unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) != 0 && 349 !aFoundGranulepos); 350 if (sandbox_invoke(*mSandbox, ogg_stream_check, mState) 351 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON)) { 352 NS_WARNING("Unrecoverable error in ogg_stream_packetout"); 353 return NS_ERROR_FAILURE; 354 } 355 return NS_OK; 356 } 357 358 nsresult VorbisState::Reset() { 359 nsresult res = NS_OK; 360 if (mActive && vorbis_synthesis_restart(&mDsp) != 0) { 361 res = NS_ERROR_FAILURE; 362 } 363 mHeaders.Erase(); 364 if (NS_FAILED(OggCodecState::Reset())) { 365 return NS_ERROR_FAILURE; 366 } 367 368 mGranulepos = 0; 369 mPrevVorbisBlockSize = 0; 370 371 return res; 372 } 373 374 VorbisState::VorbisState(rlbox_sandbox_ogg* aSandbox, 375 tainted_opaque_ogg<ogg_page*> aBosPage, 376 uint32_t aSerial) 377 : OggCodecState(aSandbox, aBosPage, aSerial, true), 378 mPrevVorbisBlockSize(0), 379 mGranulepos(0) { 380 MOZ_COUNT_CTOR(VorbisState); 381 vorbis_info_init(&mVorbisInfo); 382 vorbis_comment_init(&mComment); 383 memset(&mDsp, 0, sizeof(vorbis_dsp_state)); 384 memset(&mBlock, 0, sizeof(vorbis_block)); 385 } 386 387 VorbisState::~VorbisState() { 388 MOZ_COUNT_DTOR(VorbisState); 389 Reset(); 390 vorbis_block_clear(&mBlock); 391 vorbis_dsp_clear(&mDsp); 392 vorbis_info_clear(&mVorbisInfo); 393 vorbis_comment_clear(&mComment); 394 } 395 396 bool VorbisState::DecodeHeader(OggPacketPtr aPacket) { 397 ogg_packet* packet = aPacket.get(); // Will be owned by mHeaders. 398 mHeaders.Append(std::move(aPacket)); 399 mPacketCount++; 400 int ret = vorbis_synthesis_headerin(&mVorbisInfo, &mComment, packet); 401 // We must determine when we've read the last header packet. 402 // vorbis_synthesis_headerin() does not tell us when it's read the last 403 // header, so we must keep track of the headers externally. 404 // 405 // There are 3 header packets, the Identification, Comment, and Setup 406 // headers, which must be in that order. If they're out of order, the file 407 // is invalid. If we've successfully read a header, and it's the setup 408 // header, then we're done reading headers. The first byte of each packet 409 // determines it's type as follows: 410 // 0x1 -> Identification header 411 // 0x3 -> Comment header 412 // 0x5 -> Setup header 413 // For more details of the Vorbis/Ogg containment scheme, see the Vorbis I 414 // Specification, Chapter 4, Codec Setup and Packet Decode: 415 // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-580004 416 417 bool isSetupHeader = packet->bytes > 0 && packet->packet[0] == 0x5; 418 419 if (ret < 0 || mPacketCount > 3) { 420 // We've received an error, or the first three packets weren't valid 421 // header packets. Assume bad input. Our caller will deactivate the 422 // bitstream. 423 return false; 424 } 425 if (!ret && isSetupHeader && mPacketCount == 3) { 426 // Successfully read the three header packets. 427 // The bitstream remains active. 428 mDoneReadingHeaders = true; 429 } 430 431 return true; 432 } 433 434 bool VorbisState::Init() { 435 if (!mActive) { 436 return false; 437 } 438 439 int ret = vorbis_synthesis_init(&mDsp, &mVorbisInfo); 440 if (ret != 0) { 441 NS_WARNING("vorbis_synthesis_init() failed initializing vorbis bitstream"); 442 return mActive = false; 443 } 444 ret = vorbis_block_init(&mDsp, &mBlock); 445 if (ret != 0) { 446 NS_WARNING("vorbis_block_init() failed initializing vorbis bitstream"); 447 if (mActive) { 448 vorbis_dsp_clear(&mDsp); 449 } 450 return mActive = false; 451 } 452 453 nsTArray<const unsigned char*> headers; 454 nsTArray<size_t> headerLens; 455 for (size_t i = 0; i < mHeaders.Length(); i++) { 456 headers.AppendElement(mHeaders[i]->packet); 457 headerLens.AppendElement(mHeaders[i]->bytes); 458 } 459 // Save header packets for the decoder 460 VorbisCodecSpecificData vorbisCodecSpecificData{}; 461 if (!XiphHeadersToExtradata(vorbisCodecSpecificData.mHeadersBinaryBlob, 462 headers, headerLens)) { 463 return mActive = false; 464 } 465 mHeaders.Erase(); 466 mInfo.mMimeType = "audio/vorbis"_ns; 467 mInfo.mRate = mVorbisInfo.rate; 468 mInfo.mChannels = mVorbisInfo.channels; 469 mInfo.mBitDepth = 16; 470 mInfo.mCodecSpecificConfig = 471 AudioCodecSpecificVariant{std::move(vorbisCodecSpecificData)}; 472 473 return true; 474 } 475 476 TimeUnit VorbisState::Time(int64_t aGranulepos) { 477 if (!mActive) { 478 return TimeUnit::Invalid(); 479 } 480 481 return VorbisState::Time(&mVorbisInfo, aGranulepos); 482 } 483 484 TimeUnit VorbisState::Time(vorbis_info* aInfo, int64_t aGranulepos) { 485 if (aGranulepos == -1 || aInfo->rate == 0) { 486 return TimeUnit::Invalid(); 487 } 488 return TimeUnit(aGranulepos, aInfo->rate); 489 } 490 491 TimeUnit VorbisState::PacketDuration(ogg_packet* aPacket) { 492 if (!mActive) { 493 return TimeUnit::Invalid(); 494 } 495 if (aPacket->granulepos == -1) { 496 return TimeUnit::Invalid(); 497 } 498 // @FIXME store these in a more stable place 499 if (mVorbisPacketSamples.count(aPacket) == 0) { 500 // We haven't seen this packet, don't know its size? 501 return TimeUnit::Invalid(); 502 } 503 504 long samples = mVorbisPacketSamples[aPacket]; 505 return Time(samples); 506 } 507 508 bool VorbisState::IsHeader(ogg_packet* aPacket) { 509 // The first byte in each Vorbis header packet is either 0x01, 0x03, or 0x05, 510 // i.e. the first bit is odd. Audio data packets have their first bit as 0x0. 511 // Any packet with its first bit set cannot be a data packet, it's a 512 // (possibly invalid) header packet. 513 // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-610004.2.1 514 return aPacket->bytes > 0 ? (aPacket->packet[0] & 0x1) : false; 515 } 516 517 UniquePtr<MetadataTags> VorbisState::GetTags() { 518 NS_ASSERTION(mComment.user_comments, "no vorbis comment strings!"); 519 NS_ASSERTION(mComment.comment_lengths, "no vorbis comment lengths!"); 520 auto tags = MakeUnique<MetadataTags>(); 521 for (int i = 0; i < mComment.comments; i++) { 522 AddVorbisComment(tags, mComment.user_comments[i], 523 mComment.comment_lengths[i]); 524 } 525 return tags; 526 } 527 528 nsresult VorbisState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) { 529 if (!mActive) { 530 return NS_OK; 531 } 532 NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke( 533 *mSandbox, ogg_page_serialno, aPage)) == mSerial) 534 .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), 535 "Page must be for this stream!"); 536 if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) 537 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { 538 return NS_ERROR_FAILURE; 539 } 540 bool foundGp; 541 nsresult res = PacketOutUntilGranulepos(foundGp); 542 if (NS_FAILED(res)) { 543 return res; 544 } 545 if (foundGp && mDoneReadingHeaders) { 546 // We've found a packet with a granulepos, and we've loaded our metadata 547 // and initialized our decoder. Determine granulepos of buffered packets. 548 ReconstructVorbisGranulepos(); 549 for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { 550 OggPacketPtr packet = std::move(mUnstamped[i]); 551 AssertHasRecordedPacketSamples(packet.get()); 552 NS_ASSERTION(!IsHeader(packet.get()), 553 "Don't try to recover header packet gp"); 554 NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); 555 mPackets.Append(std::move(packet)); 556 } 557 mUnstamped.Clear(); 558 } 559 return NS_OK; 560 } 561 562 void VorbisState::ReconstructVorbisGranulepos() { 563 // The number of samples in a Vorbis packet is: 564 // window_blocksize(previous_packet)/4+window_blocksize(current_packet)/4 565 // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-230001.3.2 566 // So we maintain mPrevVorbisBlockSize, the block size of the last packet 567 // encountered. We also maintain mGranulepos, which is the granulepos of 568 // the last encountered packet. This enables us to give granulepos to 569 // packets when the last packet in mUnstamped doesn't have a granulepos 570 // (for example if the stream was truncated). 571 // 572 // We validate our prediction of the number of samples decoded when 573 // VALIDATE_VORBIS_SAMPLE_CALCULATION is defined by recording the predicted 574 // number of samples, and verifing we extract that many when decoding 575 // each packet. 576 577 NS_ASSERTION(mUnstamped.Length() > 0, "Length must be > 0"); 578 auto& last = mUnstamped.LastElement(); 579 NS_ASSERTION(last->e_o_s || last->granulepos >= 0, 580 "Must know last granulepos!"); 581 if (mUnstamped.Length() == 1) { 582 auto& packet = mUnstamped[0]; 583 long blockSize = vorbis_packet_blocksize(&mVorbisInfo, packet.get()); 584 if (blockSize < 0) { 585 // On failure vorbis_packet_blocksize returns < 0. If we've got 586 // a bad packet, we just assume that decode will have to skip this 587 // packet, i.e. assume 0 samples are decodable from this packet. 588 blockSize = 0; 589 mPrevVorbisBlockSize = 0; 590 } 591 long samples = mPrevVorbisBlockSize / 4 + blockSize / 4; 592 mPrevVorbisBlockSize = blockSize; 593 if (packet->granulepos == -1) { 594 packet->granulepos = mGranulepos + samples; 595 } 596 597 // Account for a partial last frame 598 if (packet->e_o_s && packet->granulepos >= mGranulepos) { 599 samples = packet->granulepos - mGranulepos; 600 } 601 602 mGranulepos = packet->granulepos; 603 RecordVorbisPacketSamples(packet.get(), samples); 604 return; 605 } 606 607 bool unknownGranulepos = last->granulepos == -1; 608 int64_t totalSamples = 0; 609 for (int32_t i = AssertedCast<int32_t>(mUnstamped.Length() - 1); i > 0; i--) { 610 auto& packet = mUnstamped[i]; 611 auto& prev = mUnstamped[i - 1]; 612 ogg_int64_t granulepos = packet->granulepos; 613 NS_ASSERTION(granulepos != -1, "Must know granulepos!"); 614 long prevBlockSize = vorbis_packet_blocksize(&mVorbisInfo, prev.get()); 615 long blockSize = vorbis_packet_blocksize(&mVorbisInfo, packet.get()); 616 617 if (blockSize < 0 || prevBlockSize < 0) { 618 // On failure vorbis_packet_blocksize returns < 0. If we've got 619 // a bad packet, we just assume that decode will have to skip this 620 // packet, i.e. assume 0 samples are decodable from this packet. 621 blockSize = 0; 622 prevBlockSize = 0; 623 } 624 625 long samples = prevBlockSize / 4 + blockSize / 4; 626 totalSamples += samples; 627 prev->granulepos = granulepos - samples; 628 RecordVorbisPacketSamples(packet.get(), samples); 629 } 630 631 if (unknownGranulepos) { 632 for (uint32_t i = 0; i < mUnstamped.Length(); i++) { 633 mUnstamped[i]->granulepos += mGranulepos + totalSamples + 1; 634 } 635 } 636 637 auto& first = mUnstamped[0]; 638 long blockSize = vorbis_packet_blocksize(&mVorbisInfo, first.get()); 639 if (blockSize < 0) { 640 mPrevVorbisBlockSize = 0; 641 blockSize = 0; 642 } 643 644 long samples = (mPrevVorbisBlockSize == 0) 645 ? 0 646 : mPrevVorbisBlockSize / 4 + blockSize / 4; 647 int64_t start = first->granulepos - samples; 648 RecordVorbisPacketSamples(first.get(), samples); 649 650 if (last->e_o_s && start < mGranulepos) { 651 // We've calculated that there are more samples in this page than its 652 // granulepos claims, and it's the last page in the stream. This is legal, 653 // and we will need to prune the trailing samples when we come to decode it. 654 // We must correct the timestamps so that they follow the last Vorbis page's 655 // samples. 656 int64_t pruned = mGranulepos - start; 657 for (uint32_t i = 0; i < mUnstamped.Length() - 1; i++) { 658 mUnstamped[i]->granulepos += pruned; 659 } 660 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 661 mVorbisPacketSamples[last.get()] -= pruned; 662 #endif 663 } 664 665 mPrevVorbisBlockSize = vorbis_packet_blocksize(&mVorbisInfo, last.get()); 666 mPrevVorbisBlockSize = std::max(static_cast<long>(0), mPrevVorbisBlockSize); 667 mGranulepos = last->granulepos; 668 } 669 670 OpusState::OpusState(rlbox_sandbox_ogg* aSandbox, 671 tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial) 672 : OggCodecState(aSandbox, aBosPage, aSerial, true), 673 mParser(nullptr), 674 mDecoder(nullptr), 675 mPrevPacketGranulepos(0), 676 mPrevPageGranulepos(0) { 677 MOZ_COUNT_CTOR(OpusState); 678 } 679 680 OpusState::~OpusState() { 681 MOZ_COUNT_DTOR(OpusState); 682 Reset(); 683 684 if (mDecoder) { 685 opus_multistream_decoder_destroy(mDecoder); 686 mDecoder = nullptr; 687 } 688 } 689 690 nsresult OpusState::Reset() { return Reset(false); } 691 692 nsresult OpusState::Reset(bool aStart) { 693 nsresult res = NS_OK; 694 695 if (mActive && mDecoder) { 696 // Reset the decoder. 697 opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE); 698 // This lets us distinguish the first page being the last page vs. just 699 // not having processed the previous page when we encounter the last page. 700 mPrevPageGranulepos = aStart ? 0 : -1; 701 mPrevPacketGranulepos = aStart ? 0 : -1; 702 } 703 704 // Clear queued data. 705 if (NS_FAILED(OggCodecState::Reset())) { 706 return NS_ERROR_FAILURE; 707 } 708 709 LOG(LogLevel::Debug, ("Opus decoder reset")); 710 711 return res; 712 } 713 714 bool OpusState::Init(void) { 715 if (!mActive) { 716 return false; 717 } 718 719 int error; 720 721 NS_ASSERTION(mDecoder == nullptr, "leaking OpusDecoder"); 722 723 mDecoder = opus_multistream_decoder_create( 724 mParser->mRate, mParser->mChannels, mParser->mStreams, 725 mParser->mCoupledStreams, mParser->mMappingTable, &error); 726 727 mInfo.mMimeType = "audio/opus"_ns; 728 mInfo.mRate = mParser->mRate; 729 mInfo.mChannels = mParser->mChannels; 730 mInfo.mBitDepth = 16; 731 // Save preskip & the first header packet for the Opus decoder 732 OpusCodecSpecificData opusData; 733 opusData.mContainerCodecDelayFrames = mParser->mPreSkip; 734 735 if (!mHeaders.PeekFront()) { 736 return false; 737 } 738 opusData.mHeadersBinaryBlob->AppendElements(mHeaders.PeekFront()->packet, 739 mHeaders.PeekFront()->bytes); 740 mInfo.mCodecSpecificConfig = AudioCodecSpecificVariant{std::move(opusData)}; 741 742 mHeaders.Erase(); 743 LOG(LogLevel::Debug, ("Opus decoder init")); 744 745 return error == OPUS_OK; 746 } 747 748 bool OpusState::DecodeHeader(OggPacketPtr aPacket) { 749 switch (mPacketCount++) { 750 // Parse the id header. 751 case 0: 752 mParser = MakeUnique<OpusParser>(); 753 if (!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) { 754 return false; 755 } 756 mHeaders.Append(std::move(aPacket)); 757 break; 758 759 // Parse the metadata header. 760 case 1: 761 if (!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) { 762 return false; 763 } 764 break; 765 766 // We made it to the first data packet (which includes reconstructing 767 // timestamps for it in PageIn). Success! 768 default: 769 mDoneReadingHeaders = true; 770 // Put it back on the queue so we can decode it. 771 mPackets.PushFront(std::move(aPacket)); 772 break; 773 } 774 return true; 775 } 776 777 /* Construct and return a tags hashmap from our internal array */ 778 UniquePtr<MetadataTags> OpusState::GetTags() { 779 auto tags = MakeUnique<MetadataTags>(); 780 for (uint32_t i = 0; i < mParser->mTags.Length(); i++) { 781 AddVorbisComment(tags, mParser->mTags[i].Data(), 782 mParser->mTags[i].Length()); 783 } 784 785 return tags; 786 } 787 788 /* Return the timestamp (in microseconds) equivalent to a granulepos. */ 789 TimeUnit OpusState::Time(int64_t aGranulepos) { 790 if (!mActive) { 791 return TimeUnit::Invalid(); 792 } 793 794 return Time(mParser->mPreSkip, aGranulepos); 795 } 796 797 TimeUnit OpusState::Time(int aPreSkip, int64_t aGranulepos) { 798 if (aGranulepos < 0) { 799 return TimeUnit::Invalid(); 800 } 801 802 int64_t offsetGranulePos = aGranulepos - aPreSkip; 803 // Ogg Opus always runs at a granule rate of 48 kHz. 804 return TimeUnit(offsetGranulePos, 48000); 805 } 806 807 bool OpusState::IsHeader(ogg_packet* aPacket) { 808 return aPacket->bytes >= 16 && (!memcmp(aPacket->packet, "OpusHead", 8) || 809 !memcmp(aPacket->packet, "OpusTags", 8)); 810 } 811 812 nsresult OpusState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) { 813 if (!mActive) { 814 return NS_OK; 815 } 816 NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke( 817 *mSandbox, ogg_page_serialno, aPage)) == mSerial) 818 .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), 819 "Page must be for this stream!"); 820 if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) 821 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { 822 return NS_ERROR_FAILURE; 823 } 824 825 bool haveGranulepos; 826 nsresult rv = PacketOutUntilGranulepos(haveGranulepos); 827 if (NS_FAILED(rv) || !haveGranulepos || mPacketCount < 2) { 828 return rv; 829 } 830 if (!ReconstructOpusGranulepos()) { 831 return NS_ERROR_FAILURE; 832 } 833 for (uint32_t i = 0; i < mUnstamped.Length(); i++) { 834 OggPacketPtr packet = std::move(mUnstamped[i]); 835 NS_ASSERTION(!IsHeader(packet.get()), "Don't try to play a header packet"); 836 NS_ASSERTION(packet->granulepos != -1, "Packet should have a granulepos"); 837 mPackets.Append(std::move(packet)); 838 } 839 mUnstamped.Clear(); 840 return NS_OK; 841 } 842 843 // Helper method to return the change in granule position due to an Opus packet 844 // (as distinct from the number of samples in the packet, which depends on the 845 // decoder rate). It should work with a multistream Opus file, and continue to 846 // work should we ever allow the decoder to decode at a rate other than 48 kHz. 847 // It even works before we've created the actual Opus decoder. 848 static int GetOpusDeltaGP(ogg_packet* packet) { 849 int nframes; 850 nframes = opus_packet_get_nb_frames(packet->packet, 851 AssertedCast<int32_t>(packet->bytes)); 852 if (nframes > 0) { 853 return nframes * opus_packet_get_samples_per_frame(packet->packet, 48000); 854 } 855 NS_WARNING("Invalid Opus packet."); 856 return 0; 857 } 858 859 TimeUnit OpusState::PacketDuration(ogg_packet* aPacket) { 860 return TimeUnit(GetOpusDeltaGP(aPacket), 48000); 861 } 862 863 bool OpusState::ReconstructOpusGranulepos(void) { 864 NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); 865 NS_ASSERTION(mUnstamped.LastElement()->e_o_s || 866 mUnstamped.LastElement()->granulepos > 0, 867 "Must know last granulepos!"); 868 int64_t gp; 869 // If this is the last page, and we've seen at least one previous page (or 870 // this is the first page)... 871 if (mUnstamped.LastElement()->e_o_s) { 872 auto& last = mUnstamped.LastElement(); 873 if (mPrevPageGranulepos != -1) { 874 // If this file only has one page and the final granule position is 875 // smaller than the pre-skip amount, we MUST reject the stream. 876 if (!mDoneReadingHeaders && last->granulepos < mParser->mPreSkip) { 877 return false; 878 } 879 int64_t last_gp = last->granulepos; 880 gp = mPrevPageGranulepos; 881 // Loop through the packets forwards, adding the current packet's 882 // duration to the previous granulepos to get the value for the 883 // current packet. 884 for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { 885 auto& packet = mUnstamped[i]; 886 int offset = GetOpusDeltaGP(packet.get()); 887 // Check for error (negative offset) and overflow. 888 if (offset >= 0 && gp <= INT64_MAX - offset) { 889 gp += offset; 890 if (gp >= last_gp) { 891 NS_WARNING("Opus end trimming removed more than a full packet."); 892 // We were asked to remove a full packet's worth of data or more. 893 // Encoders SHOULD NOT produce streams like this, but we'll handle 894 // it for them anyway. 895 gp = last_gp; 896 mUnstamped.RemoveLastElements(mUnstamped.Length() - (i + 1)); 897 packet->e_o_s = 1; 898 } 899 } 900 packet->granulepos = gp; 901 } 902 mPrevPageGranulepos = last_gp; 903 return true; 904 } 905 NS_WARNING("No previous granule position to use for Opus end trimming."); 906 // If we don't have a previous granule position, fall through. 907 // We simply won't trim any samples from the end. 908 // TODO: Are we guaranteed to have seen a previous page if there is one? 909 } 910 911 auto& last = mUnstamped.LastElement(); 912 gp = last->granulepos; 913 // Loop through the packets backwards, subtracting the next 914 // packet's duration from its granulepos to get the value 915 // for the current packet. 916 for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { 917 int offset = GetOpusDeltaGP(mUnstamped[i].get()); 918 // Check for error (negative offset) and overflow. 919 if (offset >= 0) { 920 if (offset <= gp) { 921 gp -= offset; 922 } else { 923 // If the granule position of the first data page is smaller than the 924 // number of decodable audio samples on that page, then we MUST reject 925 // the stream. 926 if (!mDoneReadingHeaders) return false; 927 // It's too late to reject the stream. 928 // If we get here, this almost certainly means the file has screwed-up 929 // timestamps somewhere after the first page. 930 NS_WARNING("Clamping negative Opus granulepos to zero."); 931 gp = 0; 932 } 933 } 934 mUnstamped[i - 1]->granulepos = gp; 935 } 936 937 // Check to make sure the first granule position is at least as large as the 938 // total number of samples decodable from the first page with completed 939 // packets. This requires looking at the duration of the first packet, too. 940 // We MUST reject such streams. 941 if (!mDoneReadingHeaders && GetOpusDeltaGP(mUnstamped[0].get()) > gp) { 942 return false; 943 } 944 mPrevPageGranulepos = last->granulepos; 945 return true; 946 } 947 948 already_AddRefed<MediaRawData> OpusState::PacketOutAsMediaRawData() { 949 ogg_packet* packet = PacketPeek(); 950 if (!packet) { 951 return nullptr; 952 } 953 954 uint32_t frames = 0; 955 const int64_t endFrame = packet->granulepos; 956 957 if (packet->e_o_s) { 958 frames = GetOpusDeltaGP(packet); 959 } 960 961 RefPtr<MediaRawData> data = OggCodecState::PacketOutAsMediaRawData(); 962 if (!data) { 963 return nullptr; 964 } 965 966 if (data->mEOS && mPrevPacketGranulepos != -1) { 967 // If this is the last packet, perform end trimming. 968 int64_t startFrame = mPrevPacketGranulepos; 969 frames -= std::max<int64_t>( 970 0, std::min(endFrame - startFrame, static_cast<int64_t>(frames))); 971 TimeUnit toTrim = TimeUnit(frames, 48000); 972 LOG(LogLevel::Debug, 973 ("Trimming last opus packet: [%s, %s] to [%s, %s]", 974 data->mTime.ToString().get(), data->GetEndTime().ToString().get(), 975 data->mTime.ToString().get(), 976 (data->mTime + data->mDuration - toTrim).ToString().get())); 977 978 data->mOriginalPresentationWindow = 979 Some(media::TimeInterval{data->mTime, data->mTime + data->mDuration}); 980 data->mDuration -= toTrim; 981 if (data->mDuration.IsNegative()) { 982 data->mDuration = TimeUnit::Zero(data->mTime); 983 } 984 } 985 986 // Save this packet's granule position in case we need to perform end 987 // trimming on the next packet. 988 mPrevPacketGranulepos = endFrame; 989 990 return data.forget(); 991 } 992 993 FlacState::FlacState(rlbox_sandbox_ogg* aSandbox, 994 tainted_opaque_ogg<ogg_page*> aBosPage, uint32_t aSerial) 995 : OggCodecState(aSandbox, aBosPage, aSerial, true) {} 996 997 bool FlacState::DecodeHeader(OggPacketPtr aPacket) { 998 if (mParser.DecodeHeaderBlock(aPacket->packet, aPacket->bytes).isErr()) { 999 return false; 1000 } 1001 if (mParser.HasFullMetadata()) { 1002 mDoneReadingHeaders = true; 1003 } 1004 return true; 1005 } 1006 1007 TimeUnit FlacState::Time(int64_t aGranulepos) { 1008 if (!mParser.mInfo.IsValid()) { 1009 return TimeUnit::Invalid(); 1010 } 1011 return TimeUnit(aGranulepos, mParser.mInfo.mRate); 1012 } 1013 1014 TimeUnit FlacState::PacketDuration(ogg_packet* aPacket) { 1015 return TimeUnit(mParser.BlockDuration(aPacket->packet, aPacket->bytes), 1016 mParser.mInfo.mRate); 1017 } 1018 1019 bool FlacState::IsHeader(ogg_packet* aPacket) { 1020 auto res = mParser.IsHeaderBlock(aPacket->packet, aPacket->bytes); 1021 return res.isOk() ? res.unwrap() : false; 1022 } 1023 1024 nsresult FlacState::PageIn(tainted_opaque_ogg<ogg_page*> aPage) { 1025 if (!mActive) { 1026 return NS_OK; 1027 } 1028 NS_ASSERTION((rlbox::sandbox_static_cast<uint32_t>(sandbox_invoke( 1029 *mSandbox, ogg_page_serialno, aPage)) == mSerial) 1030 .unverified_safe_because(RLBOX_OGG_PAGE_SERIAL_REASON), 1031 "Page must be for this stream!"); 1032 if (sandbox_invoke(*mSandbox, ogg_stream_pagein, mState, aPage) 1033 .unverified_safe_because(RLBOX_OGG_STATE_ASSERT_REASON) == -1) { 1034 return NS_ERROR_FAILURE; 1035 } 1036 bool foundGp; 1037 nsresult res = PacketOutUntilGranulepos(foundGp); 1038 if (NS_FAILED(res)) { 1039 return res; 1040 } 1041 if (foundGp && mDoneReadingHeaders) { 1042 // We've found a packet with a granulepos, and we've loaded our metadata 1043 // and initialized our decoder. Determine granulepos of buffered packets. 1044 ReconstructFlacGranulepos(); 1045 for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { 1046 OggPacketPtr packet = std::move(mUnstamped[i]); 1047 NS_ASSERTION(!IsHeader(packet.get()), 1048 "Don't try to recover header packet gp"); 1049 NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); 1050 mPackets.Append(std::move(packet)); 1051 } 1052 mUnstamped.Clear(); 1053 } 1054 return NS_OK; 1055 } 1056 1057 // Return a hash table with tag metadata. 1058 UniquePtr<MetadataTags> FlacState::GetTags() { return mParser.GetTags(); } 1059 1060 const TrackInfo* FlacState::GetInfo() const { return &mParser.mInfo; } 1061 1062 bool FlacState::ReconstructFlacGranulepos(void) { 1063 NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); 1064 auto& last = mUnstamped.LastElement(); 1065 NS_ASSERTION(last->e_o_s || last->granulepos > 0, 1066 "Must know last granulepos!"); 1067 int64_t gp; 1068 1069 gp = last->granulepos; 1070 // Loop through the packets backwards, subtracting the next 1071 // packet's duration from its granulepos to get the value 1072 // for the current packet. 1073 for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { 1074 int64_t offset = 1075 mParser.BlockDuration(mUnstamped[i]->packet, mUnstamped[i]->bytes); 1076 // Check for error (negative offset) and overflow. 1077 if (offset >= 0) { 1078 if (offset <= gp) { 1079 gp -= offset; 1080 } else { 1081 // If the granule position of the first data page is smaller than the 1082 // number of decodable audio samples on that page, then we MUST reject 1083 // the stream. 1084 if (!mDoneReadingHeaders) { 1085 return false; 1086 } 1087 // It's too late to reject the stream. 1088 // If we get here, this almost certainly means the file has screwed-up 1089 // timestamps somewhere after the first page. 1090 NS_WARNING("Clamping negative granulepos to zero."); 1091 gp = 0; 1092 } 1093 } 1094 mUnstamped[i - 1]->granulepos = gp; 1095 } 1096 1097 return true; 1098 } 1099 1100 SkeletonState::SkeletonState(rlbox_sandbox_ogg* aSandbox, 1101 tainted_opaque_ogg<ogg_page*> aBosPage, 1102 uint32_t aSerial) 1103 : OggCodecState(aSandbox, aBosPage, aSerial, true), 1104 mVersion(0), 1105 mPresentationTime(0), 1106 mLength(0) { 1107 MOZ_COUNT_CTOR(SkeletonState); 1108 } 1109 1110 SkeletonState::~SkeletonState() { MOZ_COUNT_DTOR(SkeletonState); } 1111 1112 // Support for Ogg Skeleton 4.0, as per specification at: 1113 // http://wiki.xiph.org/Ogg_Skeleton_4 1114 1115 // Minimum length in bytes of a Skeleton header packet. 1116 static const long SKELETON_MIN_HEADER_LEN = 28; 1117 static const long SKELETON_4_0_MIN_HEADER_LEN = 80; 1118 1119 // Minimum length in bytes of a Skeleton 4.0 index packet. 1120 static const long SKELETON_4_0_MIN_INDEX_LEN = 42; 1121 1122 // Minimum length in bytes of a Skeleton 3.0/4.0 Fisbone packet. 1123 static const long SKELETON_MIN_FISBONE_LEN = 52; 1124 1125 // Minimum possible size of a compressed index keypoint. 1126 static const size_t MIN_KEY_POINT_SIZE = 2; 1127 1128 // Byte offset of the major and minor version numbers in the 1129 // Ogg Skeleton 4.0 header packet. 1130 static const size_t SKELETON_VERSION_MAJOR_OFFSET = 8; 1131 static const size_t SKELETON_VERSION_MINOR_OFFSET = 10; 1132 1133 // Byte-offsets of the presentation time numerator and denominator 1134 static const size_t SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET = 12; 1135 static const size_t SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET = 20; 1136 1137 // Byte-offsets of the length of file field in the Skeleton 4.0 header packet. 1138 static const size_t SKELETON_FILE_LENGTH_OFFSET = 64; 1139 1140 // Byte-offsets of the fields in the Skeleton index packet. 1141 static const size_t INDEX_SERIALNO_OFFSET = 6; 1142 static const size_t INDEX_NUM_KEYPOINTS_OFFSET = 10; 1143 static const size_t INDEX_TIME_DENOM_OFFSET = 18; 1144 static const size_t INDEX_FIRST_NUMER_OFFSET = 26; 1145 static const size_t INDEX_LAST_NUMER_OFFSET = 34; 1146 static const size_t INDEX_KEYPOINT_OFFSET = 42; 1147 1148 // Byte-offsets of the fields in the Skeleton Fisbone packet. 1149 static const size_t FISBONE_MSG_FIELDS_OFFSET = 8; 1150 static const size_t FISBONE_SERIALNO_OFFSET = 12; 1151 1152 static bool IsSkeletonBOS(ogg_packet* aPacket) { 1153 static_assert(SKELETON_MIN_HEADER_LEN >= 8, 1154 "Minimum length of skeleton BOS header incorrect"); 1155 return aPacket->bytes >= SKELETON_MIN_HEADER_LEN && 1156 memcmp(reinterpret_cast<char*>(aPacket->packet), "fishead", 8) == 0; 1157 } 1158 1159 static bool IsSkeletonIndex(ogg_packet* aPacket) { 1160 static_assert(SKELETON_4_0_MIN_INDEX_LEN >= 5, 1161 "Minimum length of skeleton index header incorrect"); 1162 return aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN && 1163 memcmp(reinterpret_cast<char*>(aPacket->packet), "index", 5) == 0; 1164 } 1165 1166 static bool IsSkeletonFisbone(ogg_packet* aPacket) { 1167 static_assert(SKELETON_MIN_FISBONE_LEN >= 8, 1168 "Minimum length of skeleton fisbone header incorrect"); 1169 return aPacket->bytes >= SKELETON_MIN_FISBONE_LEN && 1170 memcmp(reinterpret_cast<char*>(aPacket->packet), "fisbone", 8) == 0; 1171 } 1172 1173 // Reads a variable length encoded integer at p. Will not read 1174 // past aLimit. Returns pointer to character after end of integer. 1175 static const unsigned char* ReadVariableLengthInt(const unsigned char* p, 1176 const unsigned char* aLimit, 1177 int64_t& n) { 1178 int shift = 0; 1179 int64_t byte = 0; 1180 n = 0; 1181 while (p < aLimit && (byte & 0x80) != 0x80 && shift < 57) { 1182 byte = static_cast<int64_t>(*p); 1183 n |= ((byte & 0x7f) << shift); 1184 shift += 7; 1185 p++; 1186 } 1187 return p; 1188 } 1189 1190 bool SkeletonState::DecodeIndex(ogg_packet* aPacket) { 1191 NS_ASSERTION(aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN, 1192 "Index must be at least minimum size"); 1193 if (!mActive) { 1194 return false; 1195 } 1196 1197 uint32_t serialno = 1198 LittleEndian::readUint32(aPacket->packet + INDEX_SERIALNO_OFFSET); 1199 int64_t numKeyPoints = 1200 LittleEndian::readInt64(aPacket->packet + INDEX_NUM_KEYPOINTS_OFFSET); 1201 1202 TimeUnit endTime = TimeUnit::Zero(); 1203 TimeUnit startTime = TimeUnit::Zero(); 1204 const unsigned char* p = aPacket->packet; 1205 1206 int64_t timeDenom = 1207 LittleEndian::readInt64(aPacket->packet + INDEX_TIME_DENOM_OFFSET); 1208 if (timeDenom == 0) { 1209 LOG(LogLevel::Debug, ("Ogg Skeleton Index packet for stream %u has 0 " 1210 "timestamp denominator.", 1211 serialno)); 1212 return (mActive = false); 1213 } 1214 1215 // Extract the start time. 1216 int64_t timeRawInt = LittleEndian::readInt64(p + INDEX_FIRST_NUMER_OFFSET); 1217 startTime = TimeUnit(timeRawInt, timeDenom); 1218 // Extract the end time. 1219 timeRawInt = LittleEndian::readInt64(p + INDEX_LAST_NUMER_OFFSET); 1220 endTime = TimeUnit(timeRawInt, timeDenom); 1221 1222 // Check the numKeyPoints value read, ensure we're not going to run out of 1223 // memory while trying to decode the index packet. 1224 CheckedInt64 minPacketSize = 1225 (CheckedInt64(numKeyPoints) * MIN_KEY_POINT_SIZE) + INDEX_KEYPOINT_OFFSET; 1226 if (!minPacketSize.isValid()) { 1227 return (mActive = false); 1228 } 1229 1230 int64_t sizeofIndex = 1231 AssertedCast<int64_t>(aPacket->bytes - INDEX_KEYPOINT_OFFSET); 1232 int64_t maxNumKeyPoints = 1233 AssertedCast<int64_t>(sizeofIndex / MIN_KEY_POINT_SIZE); 1234 if (aPacket->bytes < minPacketSize.value() || 1235 numKeyPoints > maxNumKeyPoints || numKeyPoints < 0) { 1236 // Packet size is less than the theoretical minimum size, or the packet is 1237 // claiming to store more keypoints than it's capable of storing. This means 1238 // that the numKeyPoints field is too large or small for the packet to 1239 // possibly contain as many packets as it claims to, so the numKeyPoints 1240 // field is possibly malicious. Don't try decoding this index, we may run 1241 // out of memory. 1242 LOG(LogLevel::Debug, ("Possibly malicious number of key points reported " 1243 "(%" PRId64 ") in index packet for stream %u.", 1244 numKeyPoints, serialno)); 1245 return (mActive = false); 1246 } 1247 1248 UniquePtr<nsKeyFrameIndex> keyPoints(new nsKeyFrameIndex(startTime, endTime)); 1249 1250 p = aPacket->packet + INDEX_KEYPOINT_OFFSET; 1251 const unsigned char* limit = aPacket->packet + aPacket->bytes; 1252 int64_t numKeyPointsRead = 0; 1253 CheckedInt64 offset = 0; 1254 TimeUnit time = TimeUnit::Zero(); 1255 while (p < limit && numKeyPointsRead < numKeyPoints) { 1256 int64_t delta = 0; 1257 p = ReadVariableLengthInt(p, limit, delta); 1258 offset += delta; 1259 if (p == limit || !offset.isValid() || offset.value() > mLength || 1260 offset.value() < 0) { 1261 return (mActive = false); 1262 } 1263 p = ReadVariableLengthInt(p, limit, delta); 1264 time += TimeUnit(delta, timeDenom); 1265 if (!time.IsValid() || time > endTime || time < startTime) { 1266 return (mActive = false); 1267 } 1268 keyPoints->Add(offset.value(), time); 1269 numKeyPointsRead++; 1270 } 1271 1272 uint32_t keyPointsRead = keyPoints->Length(); 1273 if (keyPointsRead > 0) { 1274 mIndex.InsertOrUpdate(serialno, std::move(keyPoints)); 1275 } 1276 1277 LOG(LogLevel::Debug, ("Loaded %d keypoints for Skeleton on stream %u", 1278 keyPointsRead, serialno)); 1279 return true; 1280 } 1281 1282 nsresult SkeletonState::IndexedSeekTargetForTrack(uint32_t aSerialno, 1283 const TimeUnit& aTarget, 1284 nsKeyPoint& aResult) { 1285 nsKeyFrameIndex* index = nullptr; 1286 mIndex.Get(aSerialno, &index); 1287 1288 if (!index || index->Length() == 0 || aTarget < index->mStartTime || 1289 aTarget > index->mEndTime) { 1290 return NS_ERROR_FAILURE; 1291 } 1292 1293 // Binary search to find the last key point with time less than target. 1294 uint32_t start = 0; 1295 uint32_t end = index->Length() - 1; 1296 while (end > start) { 1297 uint32_t mid = start + ((end - start + 1) >> 1); 1298 if (index->Get(mid).mTime == aTarget) { 1299 start = mid; 1300 break; 1301 } 1302 if (index->Get(mid).mTime < aTarget) { 1303 start = mid; 1304 } else { 1305 end = mid - 1; 1306 } 1307 } 1308 1309 aResult = index->Get(start); 1310 NS_ASSERTION(aResult.mTime <= aTarget, "Result should have time <= target"); 1311 return NS_OK; 1312 } 1313 1314 nsresult SkeletonState::IndexedSeekTarget(const TimeUnit& aTarget, 1315 nsTArray<uint32_t>& aTracks, 1316 nsSeekTarget& aResult) { 1317 if (!mActive || mVersion < SKELETON_VERSION(4, 0)) { 1318 return NS_ERROR_FAILURE; 1319 } 1320 // Loop over all requested tracks' indexes, and get the keypoint for that 1321 // seek target. Record the keypoint with the lowest offset, this will be 1322 // our seek result. User must seek to the one with lowest offset to ensure we 1323 // pass "keyframes" on all tracks when we decode forwards to the seek target. 1324 nsSeekTarget r; 1325 for (uint32_t i = 0; i < aTracks.Length(); i++) { 1326 nsKeyPoint k; 1327 if (NS_SUCCEEDED(IndexedSeekTargetForTrack(aTracks[i], aTarget, k)) && 1328 k.mOffset < r.mKeyPoint.mOffset) { 1329 r.mKeyPoint = k; 1330 r.mSerial = aTracks[i]; 1331 } 1332 } 1333 if (r.IsNull()) { 1334 return NS_ERROR_FAILURE; 1335 } 1336 LOG(LogLevel::Debug, ("Indexed seek target for time %s is offset %" PRId64, 1337 aTarget.ToString().get(), r.mKeyPoint.mOffset)); 1338 aResult = r; 1339 return NS_OK; 1340 } 1341 1342 nsresult SkeletonState::GetDuration(const nsTArray<uint32_t>& aTracks, 1343 TimeUnit& aDuration) { 1344 if (!mActive || mVersion < SKELETON_VERSION(4, 0) || !HasIndex() || 1345 aTracks.Length() == 0) { 1346 return NS_ERROR_FAILURE; 1347 } 1348 TimeUnit endTime = TimeUnit::FromNegativeInfinity(); 1349 TimeUnit startTime = TimeUnit::FromInfinity(); 1350 for (uint32_t i = 0; i < aTracks.Length(); i++) { 1351 nsKeyFrameIndex* index = nullptr; 1352 mIndex.Get(aTracks[i], &index); 1353 if (!index) { 1354 // Can't get the timestamps for one of the required tracks, fail. 1355 return NS_ERROR_FAILURE; 1356 } 1357 if (index->mEndTime > endTime) { 1358 endTime = index->mEndTime; 1359 } 1360 if (index->mStartTime < startTime) { 1361 startTime = index->mStartTime; 1362 } 1363 } 1364 NS_ASSERTION(endTime > startTime, "Duration must be positive"); 1365 aDuration = endTime - startTime; 1366 return aDuration.IsValid() ? NS_OK : NS_ERROR_FAILURE; 1367 } 1368 1369 bool SkeletonState::DecodeFisbone(ogg_packet* aPacket) { 1370 if (aPacket->bytes < static_cast<long>(FISBONE_MSG_FIELDS_OFFSET + 4)) { 1371 return false; 1372 } 1373 uint32_t offsetMsgField = 1374 LittleEndian::readUint32(aPacket->packet + FISBONE_MSG_FIELDS_OFFSET); 1375 1376 if (aPacket->bytes < static_cast<long>(FISBONE_SERIALNO_OFFSET + 4)) { 1377 return false; 1378 } 1379 uint32_t serialno = 1380 LittleEndian::readUint32(aPacket->packet + FISBONE_SERIALNO_OFFSET); 1381 1382 CheckedUint32 checked_fields_pos = 1383 CheckedUint32(FISBONE_MSG_FIELDS_OFFSET) + offsetMsgField; 1384 if (!checked_fields_pos.isValid() || 1385 aPacket->bytes < static_cast<int64_t>(checked_fields_pos.value())) { 1386 return false; 1387 } 1388 int64_t msgLength = aPacket->bytes - checked_fields_pos.value(); 1389 char* msgProbe = (char*)aPacket->packet + checked_fields_pos.value(); 1390 char* msgHead = msgProbe; 1391 UniquePtr<MessageField> field(new MessageField()); 1392 1393 const static FieldPatternType kFieldTypeMaps[] = { 1394 {"Content-Type:", eContentType}, 1395 {"Role:", eRole}, 1396 {"Name:", eName}, 1397 {"Language:", eLanguage}, 1398 {"Title:", eTitle}, 1399 {"Display-hint:", eDisplayHint}, 1400 {"Altitude:", eAltitude}, 1401 {"TrackOrder:", eTrackOrder}, 1402 {"Track dependencies:", eTrackDependencies}}; 1403 1404 bool isContentTypeParsed = false; 1405 while (msgLength > 1) { 1406 if (*msgProbe == '\r' && *(msgProbe + 1) == '\n') { 1407 nsAutoCString strMsg(msgHead, msgProbe - msgHead); 1408 for (size_t i = 0; i < std::size(kFieldTypeMaps); i++) { 1409 if (strMsg.Find(kFieldTypeMaps[i].mPatternToRecognize) != -1) { 1410 // The content of message header fields follows [RFC2822], and the 1411 // mandatory message field must be encoded in US-ASCII, others 1412 // must be be encoded in UTF-8. "Content-Type" must come first 1413 // for all of message header fields. 1414 // See 1415 // http://svn.annodex.net/standards/draft-pfeiffer-oggskeleton-current.txt. 1416 if (i != 0 && !isContentTypeParsed) { 1417 return false; 1418 } 1419 1420 if ((i == 0 && IsAscii(strMsg)) || (i != 0 && IsUtf8(strMsg))) { 1421 EMsgHeaderType eHeaderType = kFieldTypeMaps[i].mMsgHeaderType; 1422 (void)field->mValuesStore.LookupOrInsertWith( 1423 eHeaderType, [i, msgHead, msgProbe]() { 1424 uint32_t nameLen = 1425 strlen(kFieldTypeMaps[i].mPatternToRecognize); 1426 return MakeUnique<nsCString>(msgHead + nameLen, 1427 msgProbe - msgHead - nameLen); 1428 }); 1429 isContentTypeParsed = i == 0 ? true : isContentTypeParsed; 1430 } 1431 break; 1432 } 1433 } 1434 msgProbe += 2; 1435 msgLength -= 2; 1436 msgHead = msgProbe; 1437 continue; 1438 } 1439 msgLength--; 1440 msgProbe++; 1441 } 1442 1443 return mMsgFieldStore.WithEntryHandle(serialno, [&](auto&& entry) { 1444 if (entry) { 1445 // mMsgFieldStore has an entry for serialno already. 1446 return false; 1447 } 1448 entry.Insert(std::move(field)); 1449 return true; 1450 }); 1451 } 1452 1453 bool SkeletonState::DecodeHeader(OggPacketPtr aPacket) { 1454 if (IsSkeletonBOS(aPacket.get())) { 1455 uint16_t verMajor = LittleEndian::readUint16(aPacket->packet + 1456 SKELETON_VERSION_MAJOR_OFFSET); 1457 uint16_t verMinor = LittleEndian::readUint16(aPacket->packet + 1458 SKELETON_VERSION_MINOR_OFFSET); 1459 1460 // Read the presentation time. We read this before the version check as the 1461 // presentation time exists in all versions. 1462 int64_t n = LittleEndian::readInt64( 1463 aPacket->packet + SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET); 1464 int64_t d = LittleEndian::readInt64( 1465 aPacket->packet + SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET); 1466 mPresentationTime = d == 0 ? 0 1467 : AssertedCast<int64_t>(static_cast<float>(n) / 1468 static_cast<float>(d)) * 1469 USECS_PER_S; 1470 1471 mVersion = SKELETON_VERSION(verMajor, verMinor); 1472 // We can only care to parse Skeleton version 4.0+. 1473 if (mVersion < SKELETON_VERSION(4, 0) || 1474 mVersion >= SKELETON_VERSION(5, 0) || 1475 aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN) { 1476 return false; 1477 } 1478 1479 // Extract the segment length. 1480 mLength = 1481 LittleEndian::readInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET); 1482 1483 LOG(LogLevel::Debug, ("Skeleton segment length: %" PRId64, mLength)); 1484 1485 // Initialize the serialno-to-index map. 1486 return true; 1487 } 1488 if (IsSkeletonIndex(aPacket.get()) && mVersion >= SKELETON_VERSION(4, 0)) { 1489 return DecodeIndex(aPacket.get()); 1490 } 1491 if (IsSkeletonFisbone(aPacket.get())) { 1492 return DecodeFisbone(aPacket.get()); 1493 } 1494 if (aPacket->e_o_s) { 1495 mDoneReadingHeaders = true; 1496 } 1497 return true; 1498 } 1499 1500 #undef LOG 1501 1502 } // namespace mozilla