nsParser.cpp (36209B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set sw=2 ts=2 et tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "nsAtom.h" 8 #include "nsParser.h" 9 #include "nsString.h" 10 #include "nsCRT.h" 11 #include "nsScanner.h" 12 #include "plstr.h" 13 #include "nsIChannel.h" 14 #include "nsIInputStream.h" 15 #include "prenv.h" 16 #include "prlock.h" 17 #include "prcvar.h" 18 #include "nsReadableUtils.h" 19 #include "nsCOMPtr.h" 20 #include "nsExpatDriver.h" 21 #include "nsIFragmentContentSink.h" 22 #include "nsStreamUtils.h" 23 #include "nsXPCOMCIDInternal.h" 24 #include "nsMimeTypes.h" 25 #include "nsCharsetSource.h" 26 #include "nsThreadUtils.h" 27 28 #include "mozilla/CondVar.h" 29 #include "mozilla/dom/ScriptLoader.h" 30 #include "mozilla/Encoding.h" 31 #include "mozilla/Mutex.h" 32 33 using namespace mozilla; 34 35 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000001 36 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000002 37 38 //-------------- Begin ParseContinue Event Definition ------------------------ 39 /* 40 The parser can be explicitly interrupted by passing a return value of 41 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause 42 the parser to stop processing and allow the application to return to the event 43 loop. The data which was left at the time of interruption will be processed 44 the next time OnDataAvailable is called. If the parser has received its final 45 chunk of data then OnDataAvailable will no longer be called by the networking 46 module, so the parser will schedule a nsParserContinueEvent which will call 47 the parser to process the remaining data after returning to the event loop. 48 If the parser is interrupted while processing the remaining data it will 49 schedule another ParseContinueEvent. The processing of data followed by 50 scheduling of the continue events will proceed until either: 51 52 1) All of the remaining data can be processed without interrupting 53 2) The parser has been cancelled. 54 55 The nsContentSink records the time when the chunk has started processing and 56 will return NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has 57 exceeded a threshold called max tokenizing processing time. This allows the 58 content sink to limit how much data is processed in a single chunk which in 59 turn gates how much time is spent away from the event loop. Processing smaller 60 chunks of data also reduces the time spent in subsequent reflows. 61 62 This capability is most apparent when loading large documents. If the maximum 63 token processing time is set small enough the application will remain 64 responsive during document load. 65 66 A side-effect of this capability is that document load is not complete when 67 the last chunk of data is passed to OnDataAvailable since the parser may have 68 been interrupted when the last chunk of data arrived. The document is complete 69 when all of the document has been tokenized and there aren't any pending 70 nsParserContinueEvents. This can cause problems if the application assumes 71 that it can monitor the load requests to determine when the document load has 72 been completed. This is what happens in Mozilla. The document is considered 73 completely loaded when all of the load requests have been satisfied. 74 75 Currently the parser is ignores requests to be interrupted during the 76 processing of script. This is because a document.write followed by JavaScript 77 calls to manipulate the DOM may fail if the parser was interrupted during the 78 document.write. 79 80 For more details @see bugzilla bug 76722 81 */ 82 83 class nsParserContinueEvent : public Runnable { 84 public: 85 RefPtr<nsParser> mParser; 86 87 explicit nsParserContinueEvent(nsParser* aParser) 88 : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {} 89 90 NS_IMETHOD Run() override { 91 mParser->HandleParserContinueEvent(this); 92 return NS_OK; 93 } 94 }; 95 96 //-------------- End ParseContinue Event Definition ------------------------ 97 98 /** 99 * default constructor 100 */ 101 nsParser::nsParser() : mCharset(WINDOWS_1252_ENCODING) { Initialize(); } 102 103 nsParser::~nsParser() { Cleanup(); } 104 105 void nsParser::Initialize() { 106 mContinueEvent = nullptr; 107 mCharsetSource = kCharsetUninitialized; 108 mCharset = WINDOWS_1252_ENCODING; 109 mInternalState = NS_OK; 110 mStreamStatus = NS_OK; 111 mCommand = eViewNormal; 112 mBlocked = 0; 113 mFlags = NS_PARSER_FLAG_CAN_TOKENIZE; 114 115 mProcessingNetworkData = false; 116 mOnStopPending = false; 117 } 118 119 void nsParser::Cleanup() { 120 // It should not be possible for this flag to be set when we are getting 121 // destroyed since this flag implies a pending nsParserContinueEvent, which 122 // has an owning reference to |this|. 123 NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad"); 124 } 125 126 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser) 127 128 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser) 129 NS_IMPL_CYCLE_COLLECTION_UNLINK(mExpatDriver) 130 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink) 131 NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE 132 NS_IMPL_CYCLE_COLLECTION_UNLINK_END 133 134 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser) 135 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mExpatDriver) 136 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink) 137 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END 138 139 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser) 140 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser) 141 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser) 142 NS_INTERFACE_MAP_ENTRY(nsIStreamListener) 143 NS_INTERFACE_MAP_ENTRY(nsIParser) 144 NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) 145 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) 146 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser) 147 NS_INTERFACE_MAP_END 148 149 // The parser continue event is posted only if 150 // all of the data to parse has been passed to ::OnDataAvailable 151 // and the parser has been interrupted by the content sink 152 // because the processing of tokens took too long. 153 154 nsresult nsParser::PostContinueEvent() { 155 if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) { 156 // If this flag isn't set, then there shouldn't be a live continue event! 157 NS_ASSERTION(!mContinueEvent, "bad"); 158 159 // This creates a reference cycle between this and the event that is 160 // broken when the event fires. 161 nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this); 162 if (NS_FAILED(NS_DispatchToCurrentThread(event))) { 163 NS_WARNING("failed to dispatch parser continuation event"); 164 } else { 165 mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; 166 mContinueEvent = event; 167 } 168 } 169 return NS_OK; 170 } 171 172 NS_IMETHODIMP_(void) 173 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; } 174 175 /** 176 * Call this method once you've created a parser, and want to instruct it 177 * about the command which caused the parser to be constructed. For example, 178 * this allows us to select a DTD which can do, say, view-source. 179 * 180 * @param aCommand the command string to set 181 */ 182 NS_IMETHODIMP_(void) 183 nsParser::SetCommand(const char* aCommand) { 184 mCommandStr.Assign(aCommand); 185 if (mCommandStr.EqualsLiteral("view-source")) { 186 mCommand = eViewSource; 187 } else if (mCommandStr.EqualsLiteral("view-fragment")) { 188 mCommand = eViewFragment; 189 } else { 190 mCommand = eViewNormal; 191 } 192 } 193 194 /** 195 * Call this method once you've created a parser, and want to instruct it 196 * about the command which caused the parser to be constructed. For example, 197 * this allows us to select a DTD which can do, say, view-source. 198 * 199 * @param aParserCommand the command to set 200 */ 201 NS_IMETHODIMP_(void) 202 nsParser::SetCommand(eParserCommands aParserCommand) { 203 mCommand = aParserCommand; 204 } 205 206 /** 207 * Call this method once you've created a parser, and want to instruct it 208 * about what charset to load 209 * 210 * @param aCharset- the charset of a document 211 * @param aCharsetSource- the source of the charset 212 */ 213 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset, 214 int32_t aCharsetSource, 215 bool aForceAutoDetection) { 216 mCharset = aCharset; 217 mCharsetSource = aCharsetSource; 218 if (mParserContext) { 219 mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource); 220 } 221 } 222 223 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) { 224 if (mSink) { 225 mSink->SetDocumentCharset(aCharset); 226 } 227 } 228 229 /** 230 * This method gets called in order to set the content 231 * sink for this parser to dump nodes to. 232 * 233 * @param nsIContentSink interface for node receiver 234 */ 235 NS_IMETHODIMP_(void) 236 nsParser::SetContentSink(nsIContentSink* aSink) { 237 MOZ_ASSERT(aSink, "sink cannot be null!"); 238 mSink = aSink; 239 240 if (mSink) { 241 mSink->SetParser(this); 242 } 243 } 244 245 /** 246 * retrieve the sink set into the parser 247 * @return current sink 248 */ 249 NS_IMETHODIMP_(nsIContentSink*) 250 nsParser::GetContentSink() { return mSink; } 251 252 //////////////////////////////////////////////////////////////////////// 253 254 /** 255 * This gets called just prior to the model actually 256 * being constructed. It's important to make this the 257 * last thing that happens right before parsing, so we 258 * can delay until the last moment the resolution of 259 * which DTD to use (unless of course we're assigned one). 260 */ 261 nsresult nsParser::WillBuildModel() { 262 if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT; 263 264 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 265 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 266 // to avoid introducing unintentional changes to behavior. 267 return mInternalState; 268 } 269 270 if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK; 271 272 if (eDTDMode_autodetect == mParserContext->mDTDMode) { 273 mParserContext->mDTDMode = eDTDMode_full_standards; 274 mParserContext->mDocType = eXML; 275 } // else XML fragment with nested parser context 276 277 // We always find a DTD. 278 mParserContext->mAutoDetectStatus = ePrimaryDetect; 279 280 // Quick check for view source. 281 MOZ_ASSERT(mParserContext->mParserCommand != eViewSource, 282 "The old parser is not supposed to be used for View Source " 283 "anymore."); 284 285 RefPtr<nsExpatDriver> expat = new nsExpatDriver(); 286 nsresult rv = expat->Initialize(mParserContext->mScanner.GetURI(), mSink); 287 NS_ENSURE_SUCCESS(rv, rv); 288 289 mExpatDriver = expat.forget(); 290 291 return mSink->WillBuildModel(); 292 } 293 294 /** 295 * This gets called when the parser is done with its input. 296 */ 297 void nsParser::DidBuildModel() { 298 if (IsComplete() && mParserContext) { 299 // Let sink know if we're about to end load because we've been terminated. 300 // In that case we don't want it to run deferred scripts. 301 bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING; 302 if (mExpatDriver && mSink) { 303 mExpatDriver->DidBuildModel(); 304 mSink->DidBuildModel(terminated); 305 } 306 307 // Ref. to bug 61462. 308 mParserContext->mRequest = nullptr; 309 } 310 } 311 312 /** 313 * Call this when you want to *force* the parser to terminate the 314 * parsing process altogether. This is binary -- so once you terminate 315 * you can't resume without restarting altogether. 316 */ 317 NS_IMETHODIMP 318 nsParser::Terminate(void) { 319 // We should only call DidBuildModel once, so don't do anything if this is 320 // the second time that Terminate has been called. 321 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { 322 return NS_OK; 323 } 324 325 nsresult result = NS_OK; 326 // XXX - [ until we figure out a way to break parser-sink circularity ] 327 // Hack - Hold a reference until we are completely done... 328 nsCOMPtr<nsIParser> kungFuDeathGrip(this); 329 mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING; 330 331 // @see bug 108049 332 // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then reset it so 333 // DidBuildModel will call DidBuildModel on the DTD. Note: The IsComplete() 334 // call inside of DidBuildModel looks at the pendingContinueEvents flag. 335 if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { 336 NS_ASSERTION(mContinueEvent, "mContinueEvent is null"); 337 // Revoke the pending continue parsing event 338 mContinueEvent = nullptr; 339 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; 340 } 341 342 if (mExpatDriver) { 343 mExpatDriver->Terminate(); 344 DidBuildModel(); 345 } else if (mSink) { 346 // We have no parser context or no DTD yet (so we got terminated before we 347 // got any data). Manually break the reference cycle with the sink. 348 result = mSink->DidBuildModel(true); 349 NS_ENSURE_SUCCESS(result, result); 350 } 351 352 return NS_OK; 353 } 354 355 NS_IMETHODIMP 356 nsParser::ContinueInterruptedParsing() { 357 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 358 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 359 // to avoid introducing unintentional changes to behavior. 360 return mInternalState; 361 } 362 363 if (mBlocked) { 364 // Whatever blocked the parser is responsible for ensuring 365 // that we don't stall. 366 return NS_OK; 367 } 368 369 // If there are scripts executing, this is probably due to a synchronous 370 // XMLHttpRequest, see bug 460706 and 1938290. 371 if (IsScriptExecuting()) { 372 ContinueParsingDocumentAfterCurrentScript(); 373 return NS_OK; 374 } 375 376 if (mProcessingNetworkData) { 377 // The call already on stack is responsible for ensuring that we 378 // don't stall. 379 return NS_OK; 380 } 381 382 // If the stream has already finished, there's a good chance 383 // that we might start closing things down when the parser 384 // is reenabled. To make sure that we're not deleted across 385 // the reenabling process, hold a reference to ourselves. 386 nsresult result = NS_OK; 387 nsCOMPtr<nsIParser> kungFuDeathGrip(this); 388 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); 389 390 bool isFinalChunk = 391 mParserContext && mParserContext->mStreamListenerState == eOnStop; 392 393 mProcessingNetworkData = true; 394 if (sinkDeathGrip) { 395 sinkDeathGrip->WillParse(); 396 } 397 result = ResumeParse(true, isFinalChunk); // Ref. bug 57999 398 399 // Bug 1899786 added a flag for deferring `eOnStop`, so `isFinalChunk` 400 // above may be false. Let's run the logic from bug 1899786: 401 // Check if someone spun the event loop while we were parsing (XML 402 // script...) If so, and OnStop was called during the spin, process it 403 // now. 404 if ((result == NS_OK) && mOnStopPending) { 405 mOnStopPending = false; 406 mParserContext->mStreamListenerState = eOnStop; 407 mParserContext->mScanner.SetIncremental(false); 408 409 if (sinkDeathGrip) { 410 sinkDeathGrip->WillParse(); 411 } 412 result = ResumeParse(true, true); 413 } 414 mProcessingNetworkData = false; 415 416 if (result != NS_OK) { 417 result = mInternalState; 418 } 419 420 return result; 421 } 422 423 /** 424 * Stops parsing temporarily. That is, it will prevent the 425 * parser from building up content model while scripts 426 * are being loaded (either an external script from a web 427 * page, or any number of extension content scripts). 428 */ 429 NS_IMETHODIMP_(void) 430 nsParser::BlockParser() { mBlocked++; } 431 432 /** 433 * Open up the parser for tokenization, building up content 434 * model..etc. However, this method does not resume parsing 435 * automatically. It's the callers' responsibility to restart 436 * the parsing engine. 437 */ 438 NS_IMETHODIMP_(void) 439 nsParser::UnblockParser() { 440 MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0); 441 if (MOZ_LIKELY(mBlocked > 0)) { 442 mBlocked--; 443 } 444 } 445 446 NS_IMETHODIMP_(void) 447 nsParser::ContinueInterruptedParsingAsync() { 448 MOZ_ASSERT(mSink); 449 if (MOZ_LIKELY(mSink)) { 450 mSink->ContinueInterruptedParsingAsync(); 451 } 452 } 453 454 /** 455 * Call this to query whether the parser is enabled or not. 456 */ 457 NS_IMETHODIMP_(bool) 458 nsParser::IsParserEnabled() { return !mBlocked; } 459 460 /** 461 * Call this to query whether the parser thinks it's done with parsing. 462 */ 463 NS_IMETHODIMP_(bool) 464 nsParser::IsComplete() { 465 return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT); 466 } 467 468 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) { 469 // Ignore any revoked continue events... 470 if (mContinueEvent != ev) return; 471 472 mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; 473 mContinueEvent = nullptr; 474 475 ContinueInterruptedParsing(); 476 } 477 478 bool nsParser::IsInsertionPointDefined() { return false; } 479 480 void nsParser::IncrementScriptNestingLevel() {} 481 482 void nsParser::DecrementScriptNestingLevel() {} 483 484 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; } 485 486 bool nsParser::IsScriptCreated() { return false; } 487 488 bool nsParser::IsAboutBlankMode() { return false; } 489 490 /** 491 * This is the main controlling routine in the parsing process. 492 * Note that it may get called multiple times for the same scanner, 493 * since this is a pushed based system, and all the tokens may 494 * not have been consumed by the scanner during a given invocation 495 * of this method. 496 */ 497 NS_IMETHODIMP 498 nsParser::Parse(nsIURI* aURL) { 499 MOZ_ASSERT(aURL, "Error: Null URL given"); 500 501 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 502 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 503 // to avoid introducing unintentional changes to behavior. 504 return mInternalState; 505 } 506 507 if (!aURL) { 508 return NS_ERROR_HTMLPARSER_BADURL; 509 } 510 511 MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null."); 512 513 mParserContext = MakeUnique<CParserContext>(aURL, mCommand); 514 515 return NS_OK; 516 } 517 518 /** 519 * Used by XML fragment parsing below. 520 * 521 * @param aSourceBuffer contains a string-full of real content 522 */ 523 nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) { 524 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 525 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 526 // to avoid introducing unintentional changes to behavior. 527 return mInternalState; 528 } 529 530 // Don't bother if we're never going to parse this. 531 if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { 532 return NS_OK; 533 } 534 535 if (!aLastCall && aSourceBuffer.IsEmpty()) { 536 // Nothing is being passed to the parser so return 537 // immediately. mUnusedInput will get processed when 538 // some data is actually passed in. 539 // But if this is the last call, make sure to finish up 540 // stuff correctly. 541 return NS_OK; 542 } 543 544 // Maintain a reference to ourselves so we don't go away 545 // till we're completely done. 546 nsCOMPtr<nsIParser> kungFuDeathGrip(this); 547 548 if (!mParserContext) { 549 // Only make a new context if we don't have one. 550 mParserContext = 551 MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall); 552 553 mUnusedInput.Truncate(); 554 } else if (aLastCall) { 555 // Set stream listener state to eOnStop, on the final context - Fix 556 // 68160, to guarantee DidBuildModel() call - Fix 36148 557 mParserContext->mStreamListenerState = eOnStop; 558 mParserContext->mScanner.SetIncremental(false); 559 } 560 561 mParserContext->mScanner.Append(aSourceBuffer); 562 return ResumeParse(false, false, false); 563 } 564 565 nsresult nsParser::ParseFragment(const nsAString& aSourceBuffer, 566 nsTArray<nsString>& aTagStack) { 567 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 568 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 569 // to avoid introducing unintentional changes to behavior. 570 return mInternalState; 571 } 572 573 nsresult result = NS_OK; 574 nsAutoString theContext; 575 uint32_t theCount = aTagStack.Length(); 576 uint32_t theIndex = 0; 577 578 for (theIndex = 0; theIndex < theCount; theIndex++) { 579 theContext.Append('<'); 580 theContext.Append(aTagStack[theCount - theIndex - 1]); 581 theContext.Append('>'); 582 } 583 584 if (theCount == 0) { 585 // Ensure that the buffer is not empty. Because none of the DTDs care 586 // about leading whitespace, this doesn't change the result. 587 theContext.Assign(' '); 588 } 589 590 // First, parse the context to build up the DTD's tag stack. Note that we 591 // pass false for the aLastCall parameter. 592 result = Parse(theContext, false); 593 if (NS_FAILED(result)) { 594 return result; 595 } 596 597 if (!mSink) { 598 // Parse must have failed in the XML case and so the sink was killed. 599 return NS_ERROR_HTMLPARSER_STOPPARSING; 600 } 601 602 nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink); 603 NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink"); 604 605 fragSink->WillBuildContent(); 606 // Now, parse the actual content. Note that this is the last call 607 // for HTML content, but for XML, we will want to build and parse 608 // the end tags. However, if tagStack is empty, it's the last call 609 // for XML as well. 610 if (theCount == 0) { 611 result = Parse(aSourceBuffer, true); 612 fragSink->DidBuildContent(); 613 } else { 614 // Add an end tag chunk, so expat will read the whole source buffer, 615 // and not worry about ']]' etc. 616 result = Parse(aSourceBuffer + u"</"_ns, false); 617 fragSink->DidBuildContent(); 618 619 if (NS_SUCCEEDED(result)) { 620 nsAutoString endContext; 621 for (theIndex = 0; theIndex < theCount; theIndex++) { 622 // we already added an end tag chunk above 623 if (theIndex > 0) { 624 endContext.AppendLiteral("</"); 625 } 626 627 nsString& thisTag = aTagStack[theIndex]; 628 // was there an xmlns=? 629 int32_t endOfTag = thisTag.FindChar(char16_t(' ')); 630 if (endOfTag == -1) { 631 endContext.Append(thisTag); 632 } else { 633 endContext.Append(Substring(thisTag, 0, endOfTag)); 634 } 635 636 endContext.Append('>'); 637 } 638 639 result = Parse(endContext, true); 640 } 641 } 642 643 mParserContext.reset(); 644 645 return result; 646 } 647 648 /** 649 * This routine is called to cause the parser to continue parsing its 650 * underlying stream. This call allows the parse process to happen in 651 * chunks, such as when the content is push based, and we need to parse in 652 * pieces. 653 * 654 * An interesting change in how the parser gets used has led us to add extra 655 * processing to this method. The case occurs when the parser is blocked in 656 * one context, and gets a parse(string) call in another context. In this 657 * case, the parserContexts are linked. No problem. 658 * 659 * The problem is that Parse(string) assumes that it can proceed unabated, 660 * but if the parser is already blocked that assumption is false. So we 661 * needed to add a mechanism here to allow the parser to continue to process 662 * (the pop and free) contexts until 1) it get's blocked again; 2) it runs 663 * out of contexts. 664 * 665 * 666 * @param allowItertion : set to true if non-script resumption is requested 667 * @param aIsFinalChunk : tells us when the last chunk of data is provided. 668 * @return error code -- 0 if ok, non-zero if error. 669 */ 670 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk, 671 bool aCanInterrupt) { 672 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 673 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 674 // to avoid introducing unintentional changes to behavior. 675 return mInternalState; 676 } 677 678 nsresult result = NS_OK; 679 680 if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { 681 result = WillBuildModel(); 682 if (NS_FAILED(result)) { 683 mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE; 684 return result; 685 } 686 687 if (mExpatDriver) { 688 mSink->WillResume(); 689 bool theIterationIsOk = true; 690 691 while (result == NS_OK && theIterationIsOk) { 692 if (!mUnusedInput.IsEmpty()) { 693 // -- Ref: Bug# 22485 -- 694 // Insert the unused input into the source buffer 695 // as if it was read from the input stream. 696 // Adding UngetReadable() per vidur!! 697 mParserContext->mScanner.UngetReadable(mUnusedInput); 698 mUnusedInput.Truncate(0); 699 } 700 701 // Only allow parsing to be interrupted in the subsequent call to 702 // build model. 703 nsresult theTokenizerResult; 704 if (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) { 705 mParserContext->mScanner.Mark(); 706 if (mParserContext->mDocType == eXML && 707 mParserContext->mParserCommand != eViewSource) { 708 theTokenizerResult = mExpatDriver->ResumeParse( 709 mParserContext->mScanner, aIsFinalChunk); 710 if (NS_FAILED(theTokenizerResult)) { 711 mParserContext->mScanner.RewindToMark(); 712 if (NS_ERROR_HTMLPARSER_STOPPARSING == theTokenizerResult) { 713 theTokenizerResult = Terminate(); 714 mSink = nullptr; 715 } 716 } 717 } else { 718 // Nothing to do for non-XML. Note that this should only be 719 // about:blank at this point, we're also checking for view-source 720 // above, but that shouldn't end up here anymore. 721 theTokenizerResult = NS_ERROR_HTMLPARSER_EOF; 722 } 723 } else { 724 theTokenizerResult = NS_OK; 725 } 726 727 result = mExpatDriver->BuildModel(); 728 if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) { 729 PostContinueEvent(); 730 } 731 732 theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF && 733 result != NS_ERROR_HTMLPARSER_INTERRUPTED; 734 735 // Make sure not to stop parsing too early. Therefore, before shutting 736 // down the parser, it's important to check whether the input buffer 737 // has been scanned to completion (theTokenizerResult should be kEOF). 738 // kEOF -> End of buffer. 739 740 // If we're told the parser has been blocked, we disable all further 741 // parsing (and cache any data coming in) until the parser is 742 // re-enabled. 743 if (NS_ERROR_HTMLPARSER_BLOCK == result) { 744 mSink->WillInterrupt(); 745 return NS_OK; 746 } 747 if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { 748 // Note: Parser Terminate() calls DidBuildModel. 749 if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { 750 DidBuildModel(); 751 mInternalState = result; 752 } 753 754 return NS_OK; 755 } 756 if (((NS_OK == result && 757 theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) || 758 result == NS_ERROR_HTMLPARSER_INTERRUPTED) && 759 mParserContext->mStreamListenerState == eOnStop) { 760 DidBuildModel(); 761 return NS_OK; 762 } 763 764 if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF || 765 result == NS_ERROR_HTMLPARSER_INTERRUPTED) { 766 result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; 767 mSink->WillInterrupt(); 768 } 769 } 770 } else { 771 mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD; 772 } 773 } 774 775 return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; 776 } 777 778 /******************************************************************* 779 These methods are used to talk to the netlib system... 780 *******************************************************************/ 781 782 nsresult nsParser::OnStartRequest(nsIRequest* request) { 783 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 784 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 785 // to avoid introducing unintentional changes to behavior. 786 return mInternalState; 787 } 788 789 MOZ_ASSERT(eNone == mParserContext->mStreamListenerState, 790 "Parser's nsIStreamListener API was not setup " 791 "correctly in constructor."); 792 793 mParserContext->mStreamListenerState = eOnStart; 794 mParserContext->mAutoDetectStatus = eUnknownDetect; 795 mParserContext->mRequest = request; 796 797 mExpatDriver = nullptr; 798 799 nsresult rv; 800 nsAutoCString contentType; 801 nsCOMPtr<nsIChannel> channel = do_QueryInterface(request); 802 if (channel) { 803 rv = channel->GetContentType(contentType); 804 if (NS_SUCCEEDED(rv)) { 805 mParserContext->SetMimeType(contentType); 806 } 807 } 808 809 rv = NS_OK; 810 811 return rv; 812 } 813 814 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, 815 int32_t aLen, 816 nsCString& oCharset) { 817 // This code is rather pointless to have. Might as well reuse expat as 818 // seen in nsHtml5StreamParser. -- hsivonen 819 oCharset.Truncate(); 820 if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) && 821 ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) { 822 int32_t i; 823 bool versionFound = false, encodingFound = false; 824 for (i = 6; i < aLen && !encodingFound; ++i) { 825 // end of XML declaration? 826 if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) && 827 (((char*)aBytes)[i + 1] == '>')) { 828 break; 829 } 830 // Version is required. 831 if (!versionFound) { 832 // Want to avoid string comparisons, hence looking for 'n' 833 // and only if found check the string leading to it. Not 834 // foolproof, but fast. 835 // The shortest string allowed before this is (strlen==13): 836 // <?xml version 837 if ((((char*)aBytes)[i] == 'n') && (i >= 12) && 838 (0 == strncmp("versio", (char*)(aBytes + i - 6), 6))) { 839 // Fast forward through version 840 char q = 0; 841 for (++i; i < aLen; ++i) { 842 char qi = ((char*)aBytes)[i]; 843 if (qi == '\'' || qi == '"') { 844 if (q && q == qi) { 845 // ending quote 846 versionFound = true; 847 break; 848 } else { 849 // Starting quote 850 q = qi; 851 } 852 } 853 } 854 } 855 } else { 856 // encoding must follow version 857 // Want to avoid string comparisons, hence looking for 'g' 858 // and only if found check the string leading to it. Not 859 // foolproof, but fast. 860 // The shortest allowed string before this (strlen==26): 861 // <?xml version="1" encoding 862 if ((((char*)aBytes)[i] == 'g') && (i >= 25) && 863 (0 == strncmp("encodin", (char*)(aBytes + i - 7), 7))) { 864 int32_t encStart = 0; 865 char q = 0; 866 for (++i; i < aLen; ++i) { 867 char qi = ((char*)aBytes)[i]; 868 if (qi == '\'' || qi == '"') { 869 if (q && q == qi) { 870 int32_t count = i - encStart; 871 // encoding value is invalid if it is UTF-16 872 if (count > 0 && 873 PL_strncasecmp("UTF-16", (char*)(aBytes + encStart), 874 count)) { 875 oCharset.Assign((char*)(aBytes + encStart), count); 876 } 877 encodingFound = true; 878 break; 879 } else { 880 encStart = i + 1; 881 q = qi; 882 } 883 } 884 } 885 } 886 } // if (!versionFound) 887 } // for 888 } 889 return !oCharset.IsEmpty(); 890 } 891 892 inline char GetNextChar(nsACString::const_iterator& aStart, 893 nsACString::const_iterator& aEnd) { 894 NS_ASSERTION(aStart != aEnd, "end of buffer"); 895 return (++aStart != aEnd) ? *aStart : '\0'; 896 } 897 898 typedef struct { 899 bool mNeedCharsetCheck; 900 nsParser* mParser; 901 nsScanner* mScanner; 902 nsIRequest* mRequest; 903 } ParserWriteStruct; 904 905 /* 906 * This function is invoked as a result of a call to a stream's 907 * ReadSegments() method. It is called for each contiguous buffer 908 * of data in the underlying stream or pipe. Using ReadSegments 909 * allows us to avoid copying data to read out of the stream. 910 */ 911 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure, 912 const char* fromRawSegment, uint32_t toOffset, 913 uint32_t count, uint32_t* writeCount) { 914 nsresult result; 915 ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure); 916 const unsigned char* buf = 917 reinterpret_cast<const unsigned char*>(fromRawSegment); 918 uint32_t theNumRead = count; 919 920 if (!pws) { 921 return NS_ERROR_FAILURE; 922 } 923 924 if (pws->mNeedCharsetCheck) { 925 pws->mNeedCharsetCheck = false; 926 int32_t source; 927 auto preferred = pws->mParser->GetDocumentCharset(source); 928 929 // This code was bogus when I found it. It expects the BOM or the XML 930 // declaration to be entirely in the first network buffer. -- hsivonen 931 const Encoding* encoding; 932 std::tie(encoding, std::ignore) = Encoding::ForBOM(Span(buf, count)); 933 if (encoding) { 934 // The decoder will swallow the BOM. The UTF-16 will re-sniff for 935 // endianness. The value of preferred is now "UTF-8", "UTF-16LE" 936 // or "UTF-16BE". 937 preferred = WrapNotNull(encoding); 938 source = kCharsetFromByteOrderMark; 939 } else if (source < kCharsetFromChannel) { 940 nsAutoCString declCharset; 941 942 if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { 943 encoding = Encoding::ForLabel(declCharset); 944 if (encoding) { 945 preferred = WrapNotNull(encoding); 946 source = kCharsetFromMetaTag; 947 } 948 } 949 } 950 951 pws->mParser->SetDocumentCharset(preferred, source, false); 952 pws->mParser->SetSinkCharset(preferred); 953 } 954 955 result = pws->mScanner->Append(fromRawSegment, theNumRead); 956 if (NS_SUCCEEDED(result)) { 957 *writeCount = count; 958 } 959 960 return result; 961 } 962 963 nsresult nsParser::OnDataAvailable(nsIRequest* request, 964 nsIInputStream* pIStream, 965 uint64_t sourceOffset, uint32_t aLength) { 966 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 967 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 968 // to avoid introducing unintentional changes to behavior. 969 return mInternalState; 970 } 971 972 MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState || 973 eOnDataAvail == mParserContext->mStreamListenerState), 974 "Error: OnStartRequest() must be called before OnDataAvailable()"); 975 MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream), 976 "Must have a buffered input stream"); 977 978 nsresult rv = NS_OK; 979 980 if (mParserContext->mRequest == request) { 981 mParserContext->mStreamListenerState = eOnDataAvail; 982 983 uint32_t totalRead; 984 ParserWriteStruct pws; 985 pws.mNeedCharsetCheck = true; 986 pws.mParser = this; 987 pws.mScanner = &mParserContext->mScanner; 988 pws.mRequest = request; 989 990 rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead); 991 if (NS_FAILED(rv)) { 992 return rv; 993 } 994 995 // If there are scripts executing, this is probably due to a synchronous 996 // XMLHttpRequest, see bug 460706 and 1938290. 997 if (IsScriptExecuting()) { 998 ContinueParsingDocumentAfterCurrentScript(); 999 return rv; 1000 } 1001 1002 if (!mProcessingNetworkData) { 1003 nsCOMPtr<nsIParser> kungFuDeathGrip(this); 1004 nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); 1005 mProcessingNetworkData = true; 1006 if (sinkDeathGrip) { 1007 sinkDeathGrip->WillParse(); 1008 } 1009 rv = ResumeParse(); 1010 // Check if someone spun the event loop while we were parsing (XML 1011 // script...) If so, and OnStop was called during the spin, process it 1012 // now. 1013 if ((mParserContext->mRequest == request) && mOnStopPending) { 1014 mOnStopPending = false; 1015 mParserContext->mStreamListenerState = eOnStop; 1016 mParserContext->mScanner.SetIncremental(false); 1017 1018 if (sinkDeathGrip) { 1019 sinkDeathGrip->WillParse(); 1020 } 1021 rv = ResumeParse(true, true); 1022 } 1023 mProcessingNetworkData = false; 1024 } 1025 } else { 1026 rv = NS_ERROR_UNEXPECTED; 1027 } 1028 1029 return rv; 1030 } 1031 1032 /** 1033 * This is called by the networking library once the last block of data 1034 * has been collected from the net. 1035 */ 1036 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) { 1037 if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { 1038 // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED 1039 // to avoid introducing unintentional changes to behavior. 1040 return mInternalState; 1041 } 1042 1043 nsresult rv = NS_OK; 1044 1045 mStreamStatus = status; 1046 1047 // If there are scripts executing, this is probably due to a synchronous 1048 // XMLHttpRequest, see bug 460706 and 1938290. 1049 if (IsScriptExecuting()) { 1050 // We'll have to handle this later 1051 mOnStopPending = true; 1052 ContinueParsingDocumentAfterCurrentScript(); 1053 return rv; 1054 } 1055 1056 if (!mProcessingNetworkData && NS_SUCCEEDED(rv)) { 1057 if (mParserContext->mRequest == request) { 1058 mParserContext->mStreamListenerState = eOnStop; 1059 mParserContext->mScanner.SetIncremental(false); 1060 } 1061 mProcessingNetworkData = true; 1062 if (mSink) { 1063 mSink->WillParse(); 1064 } 1065 rv = ResumeParse(true, true); 1066 mProcessingNetworkData = false; 1067 } else { 1068 // We'll have to handle this later 1069 mOnStopPending = true; 1070 } 1071 1072 // If the parser isn't enabled, we don't finish parsing till 1073 // it is reenabled. 1074 1075 return rv; 1076 } 1077 1078 /** 1079 * Get this as nsIStreamListener 1080 */ 1081 nsIStreamListener* nsParser::GetStreamListener() { return this; }