tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsParser.cpp (36209B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set sw=2 ts=2 et tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "nsAtom.h"
      8 #include "nsParser.h"
      9 #include "nsString.h"
     10 #include "nsCRT.h"
     11 #include "nsScanner.h"
     12 #include "plstr.h"
     13 #include "nsIChannel.h"
     14 #include "nsIInputStream.h"
     15 #include "prenv.h"
     16 #include "prlock.h"
     17 #include "prcvar.h"
     18 #include "nsReadableUtils.h"
     19 #include "nsCOMPtr.h"
     20 #include "nsExpatDriver.h"
     21 #include "nsIFragmentContentSink.h"
     22 #include "nsStreamUtils.h"
     23 #include "nsXPCOMCIDInternal.h"
     24 #include "nsMimeTypes.h"
     25 #include "nsCharsetSource.h"
     26 #include "nsThreadUtils.h"
     27 
     28 #include "mozilla/CondVar.h"
     29 #include "mozilla/dom/ScriptLoader.h"
     30 #include "mozilla/Encoding.h"
     31 #include "mozilla/Mutex.h"
     32 
     33 using namespace mozilla;
     34 
     35 #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000001
     36 #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000002
     37 
     38 //-------------- Begin ParseContinue Event Definition ------------------------
     39 /*
     40 The parser can be explicitly interrupted by passing a return value of
     41 NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
     42 the parser to stop processing and allow the application to return to the event
     43 loop. The data which was left at the time of interruption will be processed
     44 the next time OnDataAvailable is called. If the parser has received its final
     45 chunk of data then OnDataAvailable will no longer be called by the networking
     46 module, so the parser will schedule a nsParserContinueEvent which will call
     47 the parser to process the remaining data after returning to the event loop.
     48 If the parser is interrupted while processing the remaining data it will
     49 schedule another ParseContinueEvent. The processing of data followed by
     50 scheduling of the continue events will proceed until either:
     51 
     52  1) All of the remaining data can be processed without interrupting
     53  2) The parser has been cancelled.
     54 
     55 The nsContentSink records the time when the chunk has started processing and
     56 will return NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has
     57 exceeded a threshold called max tokenizing processing time. This allows the
     58 content sink to limit how much data is processed in a single chunk which in
     59 turn gates how much time is spent away from the event loop. Processing smaller
     60 chunks of data also reduces the time spent in subsequent reflows.
     61 
     62 This capability is most apparent when loading large documents. If the maximum
     63 token processing time is set small enough the application will remain
     64 responsive during document load.
     65 
     66 A side-effect of this capability is that document load is not complete when
     67 the last chunk of data is passed to OnDataAvailable since  the parser may have
     68 been interrupted when the last chunk of data arrived. The document is complete
     69 when all of the document has been tokenized and there aren't any pending
     70 nsParserContinueEvents. This can cause problems if the application assumes
     71 that it can monitor the load requests to determine when the document load has
     72 been completed. This is what happens in Mozilla. The document is considered
     73 completely loaded when all of the load requests have been satisfied.
     74 
     75 Currently the parser is ignores requests to be interrupted during the
     76 processing of script.  This is because a document.write followed by JavaScript
     77 calls to manipulate the DOM may fail if the parser was interrupted during the
     78 document.write.
     79 
     80 For more details @see bugzilla bug 76722
     81 */
     82 
     83 class nsParserContinueEvent : public Runnable {
     84 public:
     85  RefPtr<nsParser> mParser;
     86 
     87  explicit nsParserContinueEvent(nsParser* aParser)
     88      : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {}
     89 
     90  NS_IMETHOD Run() override {
     91    mParser->HandleParserContinueEvent(this);
     92    return NS_OK;
     93  }
     94 };
     95 
     96 //-------------- End ParseContinue Event Definition ------------------------
     97 
     98 /**
     99 *  default constructor
    100 */
    101 nsParser::nsParser() : mCharset(WINDOWS_1252_ENCODING) { Initialize(); }
    102 
    103 nsParser::~nsParser() { Cleanup(); }
    104 
    105 void nsParser::Initialize() {
    106  mContinueEvent = nullptr;
    107  mCharsetSource = kCharsetUninitialized;
    108  mCharset = WINDOWS_1252_ENCODING;
    109  mInternalState = NS_OK;
    110  mStreamStatus = NS_OK;
    111  mCommand = eViewNormal;
    112  mBlocked = 0;
    113  mFlags = NS_PARSER_FLAG_CAN_TOKENIZE;
    114 
    115  mProcessingNetworkData = false;
    116  mOnStopPending = false;
    117 }
    118 
    119 void nsParser::Cleanup() {
    120  // It should not be possible for this flag to be set when we are getting
    121  // destroyed since this flag implies a pending nsParserContinueEvent, which
    122  // has an owning reference to |this|.
    123  NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
    124 }
    125 
    126 NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
    127 
    128 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
    129  NS_IMPL_CYCLE_COLLECTION_UNLINK(mExpatDriver)
    130  NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
    131  NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
    132 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
    133 
    134 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
    135  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mExpatDriver)
    136  NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
    137 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
    138 
    139 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
    140 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
    141 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
    142  NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
    143  NS_INTERFACE_MAP_ENTRY(nsIParser)
    144  NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
    145  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
    146  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
    147 NS_INTERFACE_MAP_END
    148 
    149 // The parser continue event is posted only if
    150 // all of the data to parse has been passed to ::OnDataAvailable
    151 // and the parser has been interrupted by the content sink
    152 // because the processing of tokens took too long.
    153 
    154 nsresult nsParser::PostContinueEvent() {
    155  if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
    156    // If this flag isn't set, then there shouldn't be a live continue event!
    157    NS_ASSERTION(!mContinueEvent, "bad");
    158 
    159    // This creates a reference cycle between this and the event that is
    160    // broken when the event fires.
    161    nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
    162    if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
    163      NS_WARNING("failed to dispatch parser continuation event");
    164    } else {
    165      mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
    166      mContinueEvent = event;
    167    }
    168  }
    169  return NS_OK;
    170 }
    171 
    172 NS_IMETHODIMP_(void)
    173 nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; }
    174 
    175 /**
    176 *  Call this method once you've created a parser, and want to instruct it
    177 *  about the command which caused the parser to be constructed. For example,
    178 *  this allows us to select a DTD which can do, say, view-source.
    179 *
    180 *  @param   aCommand the command string to set
    181 */
    182 NS_IMETHODIMP_(void)
    183 nsParser::SetCommand(const char* aCommand) {
    184  mCommandStr.Assign(aCommand);
    185  if (mCommandStr.EqualsLiteral("view-source")) {
    186    mCommand = eViewSource;
    187  } else if (mCommandStr.EqualsLiteral("view-fragment")) {
    188    mCommand = eViewFragment;
    189  } else {
    190    mCommand = eViewNormal;
    191  }
    192 }
    193 
    194 /**
    195 *  Call this method once you've created a parser, and want to instruct it
    196 *  about the command which caused the parser to be constructed. For example,
    197 *  this allows us to select a DTD which can do, say, view-source.
    198 *
    199 *  @param   aParserCommand the command to set
    200 */
    201 NS_IMETHODIMP_(void)
    202 nsParser::SetCommand(eParserCommands aParserCommand) {
    203  mCommand = aParserCommand;
    204 }
    205 
    206 /**
    207 *  Call this method once you've created a parser, and want to instruct it
    208 *  about what charset to load
    209 *
    210 *  @param   aCharset- the charset of a document
    211 *  @param   aCharsetSource- the source of the charset
    212 */
    213 void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
    214                                  int32_t aCharsetSource,
    215                                  bool aForceAutoDetection) {
    216  mCharset = aCharset;
    217  mCharsetSource = aCharsetSource;
    218  if (mParserContext) {
    219    mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource);
    220  }
    221 }
    222 
    223 void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) {
    224  if (mSink) {
    225    mSink->SetDocumentCharset(aCharset);
    226  }
    227 }
    228 
    229 /**
    230 *  This method gets called in order to set the content
    231 *  sink for this parser to dump nodes to.
    232 *
    233 *  @param   nsIContentSink interface for node receiver
    234 */
    235 NS_IMETHODIMP_(void)
    236 nsParser::SetContentSink(nsIContentSink* aSink) {
    237  MOZ_ASSERT(aSink, "sink cannot be null!");
    238  mSink = aSink;
    239 
    240  if (mSink) {
    241    mSink->SetParser(this);
    242  }
    243 }
    244 
    245 /**
    246 * retrieve the sink set into the parser
    247 * @return  current sink
    248 */
    249 NS_IMETHODIMP_(nsIContentSink*)
    250 nsParser::GetContentSink() { return mSink; }
    251 
    252 ////////////////////////////////////////////////////////////////////////
    253 
    254 /**
    255 * This gets called just prior to the model actually
    256 * being constructed. It's important to make this the
    257 * last thing that happens right before parsing, so we
    258 * can delay until the last moment the resolution of
    259 * which DTD to use (unless of course we're assigned one).
    260 */
    261 nsresult nsParser::WillBuildModel() {
    262  if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
    263 
    264  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    265    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    266    // to avoid introducing unintentional changes to behavior.
    267    return mInternalState;
    268  }
    269 
    270  if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK;
    271 
    272  if (eDTDMode_autodetect == mParserContext->mDTDMode) {
    273    mParserContext->mDTDMode = eDTDMode_full_standards;
    274    mParserContext->mDocType = eXML;
    275  }  // else XML fragment with nested parser context
    276 
    277  // We always find a DTD.
    278  mParserContext->mAutoDetectStatus = ePrimaryDetect;
    279 
    280  // Quick check for view source.
    281  MOZ_ASSERT(mParserContext->mParserCommand != eViewSource,
    282             "The old parser is not supposed to be used for View Source "
    283             "anymore.");
    284 
    285  RefPtr<nsExpatDriver> expat = new nsExpatDriver();
    286  nsresult rv = expat->Initialize(mParserContext->mScanner.GetURI(), mSink);
    287  NS_ENSURE_SUCCESS(rv, rv);
    288 
    289  mExpatDriver = expat.forget();
    290 
    291  return mSink->WillBuildModel();
    292 }
    293 
    294 /**
    295 * This gets called when the parser is done with its input.
    296 */
    297 void nsParser::DidBuildModel() {
    298  if (IsComplete() && mParserContext) {
    299    // Let sink know if we're about to end load because we've been terminated.
    300    // In that case we don't want it to run deferred scripts.
    301    bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
    302    if (mExpatDriver && mSink) {
    303      mExpatDriver->DidBuildModel();
    304      mSink->DidBuildModel(terminated);
    305    }
    306 
    307    // Ref. to bug 61462.
    308    mParserContext->mRequest = nullptr;
    309  }
    310 }
    311 
    312 /**
    313 *  Call this when you want to *force* the parser to terminate the
    314 *  parsing process altogether. This is binary -- so once you terminate
    315 *  you can't resume without restarting altogether.
    316 */
    317 NS_IMETHODIMP
    318 nsParser::Terminate(void) {
    319  // We should only call DidBuildModel once, so don't do anything if this is
    320  // the second time that Terminate has been called.
    321  if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
    322    return NS_OK;
    323  }
    324 
    325  nsresult result = NS_OK;
    326  // XXX - [ until we figure out a way to break parser-sink circularity ]
    327  // Hack - Hold a reference until we are completely done...
    328  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
    329  mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
    330 
    331  // @see bug 108049
    332  // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then reset it so
    333  // DidBuildModel will call DidBuildModel on the DTD. Note: The IsComplete()
    334  // call inside of DidBuildModel looks at the pendingContinueEvents flag.
    335  if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
    336    NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
    337    // Revoke the pending continue parsing event
    338    mContinueEvent = nullptr;
    339    mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
    340  }
    341 
    342  if (mExpatDriver) {
    343    mExpatDriver->Terminate();
    344    DidBuildModel();
    345  } else if (mSink) {
    346    // We have no parser context or no DTD yet (so we got terminated before we
    347    // got any data).  Manually break the reference cycle with the sink.
    348    result = mSink->DidBuildModel(true);
    349    NS_ENSURE_SUCCESS(result, result);
    350  }
    351 
    352  return NS_OK;
    353 }
    354 
    355 NS_IMETHODIMP
    356 nsParser::ContinueInterruptedParsing() {
    357  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    358    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    359    // to avoid introducing unintentional changes to behavior.
    360    return mInternalState;
    361  }
    362 
    363  if (mBlocked) {
    364    // Whatever blocked the parser is responsible for ensuring
    365    // that we don't stall.
    366    return NS_OK;
    367  }
    368 
    369  // If there are scripts executing, this is probably due to a synchronous
    370  // XMLHttpRequest, see bug 460706 and 1938290.
    371  if (IsScriptExecuting()) {
    372    ContinueParsingDocumentAfterCurrentScript();
    373    return NS_OK;
    374  }
    375 
    376  if (mProcessingNetworkData) {
    377    // The call already on stack is responsible for ensuring that we
    378    // don't stall.
    379    return NS_OK;
    380  }
    381 
    382  // If the stream has already finished, there's a good chance
    383  // that we might start closing things down when the parser
    384  // is reenabled. To make sure that we're not deleted across
    385  // the reenabling process, hold a reference to ourselves.
    386  nsresult result = NS_OK;
    387  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
    388  nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
    389 
    390  bool isFinalChunk =
    391      mParserContext && mParserContext->mStreamListenerState == eOnStop;
    392 
    393  mProcessingNetworkData = true;
    394  if (sinkDeathGrip) {
    395    sinkDeathGrip->WillParse();
    396  }
    397  result = ResumeParse(true, isFinalChunk);  // Ref. bug 57999
    398 
    399  // Bug 1899786 added a flag for deferring `eOnStop`, so `isFinalChunk`
    400  // above may be false. Let's run the logic from bug 1899786:
    401  // Check if someone spun the event loop while we were parsing (XML
    402  // script...) If so, and OnStop was called during the spin, process it
    403  // now.
    404  if ((result == NS_OK) && mOnStopPending) {
    405    mOnStopPending = false;
    406    mParserContext->mStreamListenerState = eOnStop;
    407    mParserContext->mScanner.SetIncremental(false);
    408 
    409    if (sinkDeathGrip) {
    410      sinkDeathGrip->WillParse();
    411    }
    412    result = ResumeParse(true, true);
    413  }
    414  mProcessingNetworkData = false;
    415 
    416  if (result != NS_OK) {
    417    result = mInternalState;
    418  }
    419 
    420  return result;
    421 }
    422 
    423 /**
    424 *  Stops parsing temporarily. That is, it will prevent the
    425 *  parser from building up content model while scripts
    426 *  are being loaded (either an external script from a web
    427 *  page, or any number of extension content scripts).
    428 */
    429 NS_IMETHODIMP_(void)
    430 nsParser::BlockParser() { mBlocked++; }
    431 
    432 /**
    433 *  Open up the parser for tokenization, building up content
    434 *  model..etc. However, this method does not resume parsing
    435 *  automatically. It's the callers' responsibility to restart
    436 *  the parsing engine.
    437 */
    438 NS_IMETHODIMP_(void)
    439 nsParser::UnblockParser() {
    440  MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
    441  if (MOZ_LIKELY(mBlocked > 0)) {
    442    mBlocked--;
    443  }
    444 }
    445 
    446 NS_IMETHODIMP_(void)
    447 nsParser::ContinueInterruptedParsingAsync() {
    448  MOZ_ASSERT(mSink);
    449  if (MOZ_LIKELY(mSink)) {
    450    mSink->ContinueInterruptedParsingAsync();
    451  }
    452 }
    453 
    454 /**
    455 * Call this to query whether the parser is enabled or not.
    456 */
    457 NS_IMETHODIMP_(bool)
    458 nsParser::IsParserEnabled() { return !mBlocked; }
    459 
    460 /**
    461 * Call this to query whether the parser thinks it's done with parsing.
    462 */
    463 NS_IMETHODIMP_(bool)
    464 nsParser::IsComplete() {
    465  return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
    466 }
    467 
    468 void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) {
    469  // Ignore any revoked continue events...
    470  if (mContinueEvent != ev) return;
    471 
    472  mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
    473  mContinueEvent = nullptr;
    474 
    475  ContinueInterruptedParsing();
    476 }
    477 
    478 bool nsParser::IsInsertionPointDefined() { return false; }
    479 
    480 void nsParser::IncrementScriptNestingLevel() {}
    481 
    482 void nsParser::DecrementScriptNestingLevel() {}
    483 
    484 bool nsParser::HasNonzeroScriptNestingLevel() const { return false; }
    485 
    486 bool nsParser::IsScriptCreated() { return false; }
    487 
    488 bool nsParser::IsAboutBlankMode() { return false; }
    489 
    490 /**
    491 *  This is the main controlling routine in the parsing process.
    492 *  Note that it may get called multiple times for the same scanner,
    493 *  since this is a pushed based system, and all the tokens may
    494 *  not have been consumed by the scanner during a given invocation
    495 *  of this method.
    496 */
    497 NS_IMETHODIMP
    498 nsParser::Parse(nsIURI* aURL) {
    499  MOZ_ASSERT(aURL, "Error: Null URL given");
    500 
    501  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    502    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    503    // to avoid introducing unintentional changes to behavior.
    504    return mInternalState;
    505  }
    506 
    507  if (!aURL) {
    508    return NS_ERROR_HTMLPARSER_BADURL;
    509  }
    510 
    511  MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null.");
    512 
    513  mParserContext = MakeUnique<CParserContext>(aURL, mCommand);
    514 
    515  return NS_OK;
    516 }
    517 
    518 /**
    519 * Used by XML fragment parsing below.
    520 *
    521 * @param   aSourceBuffer contains a string-full of real content
    522 */
    523 nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {
    524  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    525    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    526    // to avoid introducing unintentional changes to behavior.
    527    return mInternalState;
    528  }
    529 
    530  // Don't bother if we're never going to parse this.
    531  if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
    532    return NS_OK;
    533  }
    534 
    535  if (!aLastCall && aSourceBuffer.IsEmpty()) {
    536    // Nothing is being passed to the parser so return
    537    // immediately. mUnusedInput will get processed when
    538    // some data is actually passed in.
    539    // But if this is the last call, make sure to finish up
    540    // stuff correctly.
    541    return NS_OK;
    542  }
    543 
    544  // Maintain a reference to ourselves so we don't go away
    545  // till we're completely done.
    546  nsCOMPtr<nsIParser> kungFuDeathGrip(this);
    547 
    548  if (!mParserContext) {
    549    // Only make a new context if we don't have one.
    550    mParserContext =
    551        MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall);
    552 
    553    mUnusedInput.Truncate();
    554  } else if (aLastCall) {
    555    // Set stream listener state to eOnStop, on the final context - Fix
    556    // 68160, to guarantee DidBuildModel() call - Fix 36148
    557    mParserContext->mStreamListenerState = eOnStop;
    558    mParserContext->mScanner.SetIncremental(false);
    559  }
    560 
    561  mParserContext->mScanner.Append(aSourceBuffer);
    562  return ResumeParse(false, false, false);
    563 }
    564 
    565 nsresult nsParser::ParseFragment(const nsAString& aSourceBuffer,
    566                                 nsTArray<nsString>& aTagStack) {
    567  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    568    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    569    // to avoid introducing unintentional changes to behavior.
    570    return mInternalState;
    571  }
    572 
    573  nsresult result = NS_OK;
    574  nsAutoString theContext;
    575  uint32_t theCount = aTagStack.Length();
    576  uint32_t theIndex = 0;
    577 
    578  for (theIndex = 0; theIndex < theCount; theIndex++) {
    579    theContext.Append('<');
    580    theContext.Append(aTagStack[theCount - theIndex - 1]);
    581    theContext.Append('>');
    582  }
    583 
    584  if (theCount == 0) {
    585    // Ensure that the buffer is not empty. Because none of the DTDs care
    586    // about leading whitespace, this doesn't change the result.
    587    theContext.Assign(' ');
    588  }
    589 
    590  // First, parse the context to build up the DTD's tag stack. Note that we
    591  // pass false for the aLastCall parameter.
    592  result = Parse(theContext, false);
    593  if (NS_FAILED(result)) {
    594    return result;
    595  }
    596 
    597  if (!mSink) {
    598    // Parse must have failed in the XML case and so the sink was killed.
    599    return NS_ERROR_HTMLPARSER_STOPPARSING;
    600  }
    601 
    602  nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
    603  NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
    604 
    605  fragSink->WillBuildContent();
    606  // Now, parse the actual content. Note that this is the last call
    607  // for HTML content, but for XML, we will want to build and parse
    608  // the end tags.  However, if tagStack is empty, it's the last call
    609  // for XML as well.
    610  if (theCount == 0) {
    611    result = Parse(aSourceBuffer, true);
    612    fragSink->DidBuildContent();
    613  } else {
    614    // Add an end tag chunk, so expat will read the whole source buffer,
    615    // and not worry about ']]' etc.
    616    result = Parse(aSourceBuffer + u"</"_ns, false);
    617    fragSink->DidBuildContent();
    618 
    619    if (NS_SUCCEEDED(result)) {
    620      nsAutoString endContext;
    621      for (theIndex = 0; theIndex < theCount; theIndex++) {
    622        // we already added an end tag chunk above
    623        if (theIndex > 0) {
    624          endContext.AppendLiteral("</");
    625        }
    626 
    627        nsString& thisTag = aTagStack[theIndex];
    628        // was there an xmlns=?
    629        int32_t endOfTag = thisTag.FindChar(char16_t(' '));
    630        if (endOfTag == -1) {
    631          endContext.Append(thisTag);
    632        } else {
    633          endContext.Append(Substring(thisTag, 0, endOfTag));
    634        }
    635 
    636        endContext.Append('>');
    637      }
    638 
    639      result = Parse(endContext, true);
    640    }
    641  }
    642 
    643  mParserContext.reset();
    644 
    645  return result;
    646 }
    647 
    648 /**
    649 *  This routine is called to cause the parser to continue parsing its
    650 *  underlying stream.  This call allows the parse process to happen in
    651 *  chunks, such as when the content is push based, and we need to parse in
    652 *  pieces.
    653 *
    654 *  An interesting change in how the parser gets used has led us to add extra
    655 *  processing to this method.  The case occurs when the parser is blocked in
    656 *  one context, and gets a parse(string) call in another context.  In this
    657 *  case, the parserContexts are linked. No problem.
    658 *
    659 *  The problem is that Parse(string) assumes that it can proceed unabated,
    660 *  but if the parser is already blocked that assumption is false. So we
    661 *  needed to add a mechanism here to allow the parser to continue to process
    662 *  (the pop and free) contexts until 1) it get's blocked again; 2) it runs
    663 *  out of contexts.
    664 *
    665 *
    666 *  @param   allowItertion : set to true if non-script resumption is requested
    667 *  @param   aIsFinalChunk : tells us when the last chunk of data is provided.
    668 *  @return  error code -- 0 if ok, non-zero if error.
    669 */
    670 nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
    671                               bool aCanInterrupt) {
    672  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    673    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    674    // to avoid introducing unintentional changes to behavior.
    675    return mInternalState;
    676  }
    677 
    678  nsresult result = NS_OK;
    679 
    680  if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
    681    result = WillBuildModel();
    682    if (NS_FAILED(result)) {
    683      mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
    684      return result;
    685    }
    686 
    687    if (mExpatDriver) {
    688      mSink->WillResume();
    689      bool theIterationIsOk = true;
    690 
    691      while (result == NS_OK && theIterationIsOk) {
    692        if (!mUnusedInput.IsEmpty()) {
    693          // -- Ref: Bug# 22485 --
    694          // Insert the unused input into the source buffer
    695          // as if it was read from the input stream.
    696          // Adding UngetReadable() per vidur!!
    697          mParserContext->mScanner.UngetReadable(mUnusedInput);
    698          mUnusedInput.Truncate(0);
    699        }
    700 
    701        // Only allow parsing to be interrupted in the subsequent call to
    702        // build model.
    703        nsresult theTokenizerResult;
    704        if (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) {
    705          mParserContext->mScanner.Mark();
    706          if (mParserContext->mDocType == eXML &&
    707              mParserContext->mParserCommand != eViewSource) {
    708            theTokenizerResult = mExpatDriver->ResumeParse(
    709                mParserContext->mScanner, aIsFinalChunk);
    710            if (NS_FAILED(theTokenizerResult)) {
    711              mParserContext->mScanner.RewindToMark();
    712              if (NS_ERROR_HTMLPARSER_STOPPARSING == theTokenizerResult) {
    713                theTokenizerResult = Terminate();
    714                mSink = nullptr;
    715              }
    716            }
    717          } else {
    718            // Nothing to do for non-XML. Note that this should only be
    719            // about:blank at this point, we're also checking for view-source
    720            // above, but that shouldn't end up here anymore.
    721            theTokenizerResult = NS_ERROR_HTMLPARSER_EOF;
    722          }
    723        } else {
    724          theTokenizerResult = NS_OK;
    725        }
    726 
    727        result = mExpatDriver->BuildModel();
    728        if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
    729          PostContinueEvent();
    730        }
    731 
    732        theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
    733                           result != NS_ERROR_HTMLPARSER_INTERRUPTED;
    734 
    735        // Make sure not to stop parsing too early. Therefore, before shutting
    736        // down the parser, it's important to check whether the input buffer
    737        // has been scanned to completion (theTokenizerResult should be kEOF).
    738        // kEOF -> End of buffer.
    739 
    740        // If we're told the parser has been blocked, we disable all further
    741        // parsing (and cache any data coming in) until the parser is
    742        // re-enabled.
    743        if (NS_ERROR_HTMLPARSER_BLOCK == result) {
    744          mSink->WillInterrupt();
    745          return NS_OK;
    746        }
    747        if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
    748          // Note: Parser Terminate() calls DidBuildModel.
    749          if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
    750            DidBuildModel();
    751            mInternalState = result;
    752          }
    753 
    754          return NS_OK;
    755        }
    756        if (((NS_OK == result &&
    757              theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
    758             result == NS_ERROR_HTMLPARSER_INTERRUPTED) &&
    759            mParserContext->mStreamListenerState == eOnStop) {
    760          DidBuildModel();
    761          return NS_OK;
    762        }
    763 
    764        if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
    765            result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
    766          result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
    767          mSink->WillInterrupt();
    768        }
    769      }
    770    } else {
    771      mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
    772    }
    773  }
    774 
    775  return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
    776 }
    777 
    778 /*******************************************************************
    779  These methods are used to talk to the netlib system...
    780 *******************************************************************/
    781 
    782 nsresult nsParser::OnStartRequest(nsIRequest* request) {
    783  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    784    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    785    // to avoid introducing unintentional changes to behavior.
    786    return mInternalState;
    787  }
    788 
    789  MOZ_ASSERT(eNone == mParserContext->mStreamListenerState,
    790             "Parser's nsIStreamListener API was not setup "
    791             "correctly in constructor.");
    792 
    793  mParserContext->mStreamListenerState = eOnStart;
    794  mParserContext->mAutoDetectStatus = eUnknownDetect;
    795  mParserContext->mRequest = request;
    796 
    797  mExpatDriver = nullptr;
    798 
    799  nsresult rv;
    800  nsAutoCString contentType;
    801  nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
    802  if (channel) {
    803    rv = channel->GetContentType(contentType);
    804    if (NS_SUCCEEDED(rv)) {
    805      mParserContext->SetMimeType(contentType);
    806    }
    807  }
    808 
    809  rv = NS_OK;
    810 
    811  return rv;
    812 }
    813 
    814 static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes,
    815                                             int32_t aLen,
    816                                             nsCString& oCharset) {
    817  // This code is rather pointless to have. Might as well reuse expat as
    818  // seen in nsHtml5StreamParser. -- hsivonen
    819  oCharset.Truncate();
    820  if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) &&
    821      ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) {
    822    int32_t i;
    823    bool versionFound = false, encodingFound = false;
    824    for (i = 6; i < aLen && !encodingFound; ++i) {
    825      // end of XML declaration?
    826      if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) &&
    827          (((char*)aBytes)[i + 1] == '>')) {
    828        break;
    829      }
    830      // Version is required.
    831      if (!versionFound) {
    832        // Want to avoid string comparisons, hence looking for 'n'
    833        // and only if found check the string leading to it. Not
    834        // foolproof, but fast.
    835        // The shortest string allowed before this is  (strlen==13):
    836        // <?xml version
    837        if ((((char*)aBytes)[i] == 'n') && (i >= 12) &&
    838            (0 == strncmp("versio", (char*)(aBytes + i - 6), 6))) {
    839          // Fast forward through version
    840          char q = 0;
    841          for (++i; i < aLen; ++i) {
    842            char qi = ((char*)aBytes)[i];
    843            if (qi == '\'' || qi == '"') {
    844              if (q && q == qi) {
    845                //  ending quote
    846                versionFound = true;
    847                break;
    848              } else {
    849                // Starting quote
    850                q = qi;
    851              }
    852            }
    853          }
    854        }
    855      } else {
    856        // encoding must follow version
    857        // Want to avoid string comparisons, hence looking for 'g'
    858        // and only if found check the string leading to it. Not
    859        // foolproof, but fast.
    860        // The shortest allowed string before this (strlen==26):
    861        // <?xml version="1" encoding
    862        if ((((char*)aBytes)[i] == 'g') && (i >= 25) &&
    863            (0 == strncmp("encodin", (char*)(aBytes + i - 7), 7))) {
    864          int32_t encStart = 0;
    865          char q = 0;
    866          for (++i; i < aLen; ++i) {
    867            char qi = ((char*)aBytes)[i];
    868            if (qi == '\'' || qi == '"') {
    869              if (q && q == qi) {
    870                int32_t count = i - encStart;
    871                // encoding value is invalid if it is UTF-16
    872                if (count > 0 &&
    873                    PL_strncasecmp("UTF-16", (char*)(aBytes + encStart),
    874                                   count)) {
    875                  oCharset.Assign((char*)(aBytes + encStart), count);
    876                }
    877                encodingFound = true;
    878                break;
    879              } else {
    880                encStart = i + 1;
    881                q = qi;
    882              }
    883            }
    884          }
    885        }
    886      }  // if (!versionFound)
    887    }  // for
    888  }
    889  return !oCharset.IsEmpty();
    890 }
    891 
    892 inline char GetNextChar(nsACString::const_iterator& aStart,
    893                        nsACString::const_iterator& aEnd) {
    894  NS_ASSERTION(aStart != aEnd, "end of buffer");
    895  return (++aStart != aEnd) ? *aStart : '\0';
    896 }
    897 
    898 typedef struct {
    899  bool mNeedCharsetCheck;
    900  nsParser* mParser;
    901  nsScanner* mScanner;
    902  nsIRequest* mRequest;
    903 } ParserWriteStruct;
    904 
    905 /*
    906 * This function is invoked as a result of a call to a stream's
    907 * ReadSegments() method. It is called for each contiguous buffer
    908 * of data in the underlying stream or pipe. Using ReadSegments
    909 * allows us to avoid copying data to read out of the stream.
    910 */
    911 static nsresult ParserWriteFunc(nsIInputStream* in, void* closure,
    912                                const char* fromRawSegment, uint32_t toOffset,
    913                                uint32_t count, uint32_t* writeCount) {
    914  nsresult result;
    915  ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
    916  const unsigned char* buf =
    917      reinterpret_cast<const unsigned char*>(fromRawSegment);
    918  uint32_t theNumRead = count;
    919 
    920  if (!pws) {
    921    return NS_ERROR_FAILURE;
    922  }
    923 
    924  if (pws->mNeedCharsetCheck) {
    925    pws->mNeedCharsetCheck = false;
    926    int32_t source;
    927    auto preferred = pws->mParser->GetDocumentCharset(source);
    928 
    929    // This code was bogus when I found it. It expects the BOM or the XML
    930    // declaration to be entirely in the first network buffer. -- hsivonen
    931    const Encoding* encoding;
    932    std::tie(encoding, std::ignore) = Encoding::ForBOM(Span(buf, count));
    933    if (encoding) {
    934      // The decoder will swallow the BOM. The UTF-16 will re-sniff for
    935      // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
    936      // or "UTF-16BE".
    937      preferred = WrapNotNull(encoding);
    938      source = kCharsetFromByteOrderMark;
    939    } else if (source < kCharsetFromChannel) {
    940      nsAutoCString declCharset;
    941 
    942      if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
    943        encoding = Encoding::ForLabel(declCharset);
    944        if (encoding) {
    945          preferred = WrapNotNull(encoding);
    946          source = kCharsetFromMetaTag;
    947        }
    948      }
    949    }
    950 
    951    pws->mParser->SetDocumentCharset(preferred, source, false);
    952    pws->mParser->SetSinkCharset(preferred);
    953  }
    954 
    955  result = pws->mScanner->Append(fromRawSegment, theNumRead);
    956  if (NS_SUCCEEDED(result)) {
    957    *writeCount = count;
    958  }
    959 
    960  return result;
    961 }
    962 
    963 nsresult nsParser::OnDataAvailable(nsIRequest* request,
    964                                   nsIInputStream* pIStream,
    965                                   uint64_t sourceOffset, uint32_t aLength) {
    966  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
    967    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
    968    // to avoid introducing unintentional changes to behavior.
    969    return mInternalState;
    970  }
    971 
    972  MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState ||
    973              eOnDataAvail == mParserContext->mStreamListenerState),
    974             "Error: OnStartRequest() must be called before OnDataAvailable()");
    975  MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream),
    976             "Must have a buffered input stream");
    977 
    978  nsresult rv = NS_OK;
    979 
    980  if (mParserContext->mRequest == request) {
    981    mParserContext->mStreamListenerState = eOnDataAvail;
    982 
    983    uint32_t totalRead;
    984    ParserWriteStruct pws;
    985    pws.mNeedCharsetCheck = true;
    986    pws.mParser = this;
    987    pws.mScanner = &mParserContext->mScanner;
    988    pws.mRequest = request;
    989 
    990    rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
    991    if (NS_FAILED(rv)) {
    992      return rv;
    993    }
    994 
    995    // If there are scripts executing, this is probably due to a synchronous
    996    // XMLHttpRequest, see bug 460706 and 1938290.
    997    if (IsScriptExecuting()) {
    998      ContinueParsingDocumentAfterCurrentScript();
    999      return rv;
   1000    }
   1001 
   1002    if (!mProcessingNetworkData) {
   1003      nsCOMPtr<nsIParser> kungFuDeathGrip(this);
   1004      nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
   1005      mProcessingNetworkData = true;
   1006      if (sinkDeathGrip) {
   1007        sinkDeathGrip->WillParse();
   1008      }
   1009      rv = ResumeParse();
   1010      // Check if someone spun the event loop while we were parsing (XML
   1011      // script...) If so, and OnStop was called during the spin, process it
   1012      // now.
   1013      if ((mParserContext->mRequest == request) && mOnStopPending) {
   1014        mOnStopPending = false;
   1015        mParserContext->mStreamListenerState = eOnStop;
   1016        mParserContext->mScanner.SetIncremental(false);
   1017 
   1018        if (sinkDeathGrip) {
   1019          sinkDeathGrip->WillParse();
   1020        }
   1021        rv = ResumeParse(true, true);
   1022      }
   1023      mProcessingNetworkData = false;
   1024    }
   1025  } else {
   1026    rv = NS_ERROR_UNEXPECTED;
   1027  }
   1028 
   1029  return rv;
   1030 }
   1031 
   1032 /**
   1033 *  This is called by the networking library once the last block of data
   1034 *  has been collected from the net.
   1035 */
   1036 nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {
   1037  if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
   1038    // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
   1039    // to avoid introducing unintentional changes to behavior.
   1040    return mInternalState;
   1041  }
   1042 
   1043  nsresult rv = NS_OK;
   1044 
   1045  mStreamStatus = status;
   1046 
   1047  // If there are scripts executing, this is probably due to a synchronous
   1048  // XMLHttpRequest, see bug 460706 and 1938290.
   1049  if (IsScriptExecuting()) {
   1050    // We'll have to handle this later
   1051    mOnStopPending = true;
   1052    ContinueParsingDocumentAfterCurrentScript();
   1053    return rv;
   1054  }
   1055 
   1056  if (!mProcessingNetworkData && NS_SUCCEEDED(rv)) {
   1057    if (mParserContext->mRequest == request) {
   1058      mParserContext->mStreamListenerState = eOnStop;
   1059      mParserContext->mScanner.SetIncremental(false);
   1060    }
   1061    mProcessingNetworkData = true;
   1062    if (mSink) {
   1063      mSink->WillParse();
   1064    }
   1065    rv = ResumeParse(true, true);
   1066    mProcessingNetworkData = false;
   1067  } else {
   1068    // We'll have to handle this later
   1069    mOnStopPending = true;
   1070  }
   1071 
   1072  // If the parser isn't enabled, we don't finish parsing till
   1073  // it is reenabled.
   1074 
   1075  return rv;
   1076 }
   1077 
   1078 /**
   1079 * Get this as nsIStreamListener
   1080 */
   1081 nsIStreamListener* nsParser::GetStreamListener() { return this; }