nsHtml5Parser.h (10121B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef NS_HTML5_PARSER 7 #define NS_HTML5_PARSER 8 9 #include "mozilla/UniquePtr.h" 10 #include "nsIParser.h" 11 #include "nsDeque.h" 12 #include "nsIContentSink.h" 13 #include "nsIRequest.h" 14 #include "nsIChannel.h" 15 #include "nsCOMArray.h" 16 #include "nsContentSink.h" 17 #include "nsCycleCollectionParticipant.h" 18 #include "nsHtml5OwningUTF16Buffer.h" 19 #include "nsHtml5TreeOpExecutor.h" 20 #include "nsHtml5StreamParser.h" 21 #include "nsHtml5AtomTable.h" 22 #include "nsWeakReference.h" 23 #include "nsHtml5StreamListener.h" 24 #include "nsCharsetSource.h" 25 26 class nsHtml5Parser final : public nsIParser, 27 public nsSupportsWeakReference, 28 public nsIStreamListener { 29 public: 30 NS_DECL_CYCLE_COLLECTING_ISUPPORTS 31 32 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser) 33 34 nsHtml5Parser(); 35 36 // about:blank-only 37 NS_IMETHOD OnStartRequest(nsIRequest* aRequest) override; 38 39 // about:blank-only and exists only for interface compat. 40 NS_IMETHOD OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInStream, 41 uint64_t aSourceOffset, uint32_t aLength) override; 42 43 // about:blank-only and exists only for interface compat. 44 NS_IMETHOD OnStopRequest(nsIRequest* aRequest, nsresult aStatus) override; 45 46 /* Start nsIParser */ 47 /** 48 * No-op for backwards compat. 49 */ 50 NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override; 51 52 /** 53 * Returns the tree op executor for backwards compat. 54 */ 55 NS_IMETHOD_(nsIContentSink*) GetContentSink() override; 56 57 /** 58 * Always returns "view" for backwards compat. 59 */ 60 NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override; 61 62 /** 63 * No-op for backwards compat. 64 */ 65 NS_IMETHOD_(void) SetCommand(const char* aCommand) override; 66 67 /** 68 * No-op for backwards compat. 69 */ 70 NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override; 71 72 /** 73 * Call this method once you've created a parser, and want to instruct it 74 * about what charset to load 75 * 76 * @param aEncoding the charset of a document 77 * @param aCharsetSource the source of the charset 78 */ 79 virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding, 80 int32_t aSource, 81 bool aForceAutoDetection) override; 82 83 /** 84 * Get the channel associated with this parser 85 * @param aChannel out param that will contain the result 86 * @return NS_OK if successful or NS_NOT_AVAILABLE if not 87 */ 88 nsresult GetChannel(nsIChannel** aChannel); 89 90 /** 91 * Get the stream parser for this parser 92 */ 93 virtual nsIStreamListener* GetStreamListener() override; 94 95 /** 96 * Don't call. For interface compat only. 97 */ 98 NS_IMETHOD ContinueInterruptedParsing() override; 99 100 /** 101 * Blocks the parser. 102 */ 103 NS_IMETHOD_(void) BlockParser() override; 104 105 /** 106 * Unblocks the parser. 107 */ 108 NS_IMETHOD_(void) UnblockParser() override; 109 110 /** 111 * Asynchronously continues parsing. 112 */ 113 NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override; 114 115 /** 116 * Query whether the parser is enabled (i.e. not blocked) or not. 117 */ 118 NS_IMETHOD_(bool) IsParserEnabled() override; 119 120 /** 121 * Query whether the parser is closed (i.e. document.closed() is called) or 122 * not. 123 */ 124 NS_IMETHOD_(bool) IsParserClosed() override; 125 126 /** 127 * Query whether the parser thinks it's done with parsing. 128 */ 129 NS_IMETHOD_(bool) IsComplete() override; 130 131 /** 132 * Set up request observer. 133 * 134 * @param aURL used for View Source title 135 */ 136 NS_IMETHOD Parse(nsIURI* aURL) override; 137 138 /** 139 * document.write and document.close 140 * 141 * @param aSourceBuffer the argument of document.write (empty for .close()) 142 * @param aKey a key unique to the script element that caused this call 143 * @param aLastCall true if .close() false if .write() 144 */ 145 nsresult Parse(const nsAString& aSourceBuffer, void* aKey, bool aLastCall); 146 147 /** 148 * Stops the parser prematurely 149 */ 150 NS_IMETHOD Terminate() override; 151 152 /** 153 * True if the insertion point (per HTML5) is defined. 154 */ 155 virtual bool IsInsertionPointDefined() override; 156 157 /** 158 * Call immediately before starting to evaluate a parser-inserted script or 159 * in general when the spec says to increment the script nesting level. 160 */ 161 void IncrementScriptNestingLevel() final; 162 163 /** 164 * Call immediately after having evaluated a parser-inserted script or 165 * generally want to restore to the state before the last 166 * IncrementScriptNestingLevel call. 167 */ 168 void DecrementScriptNestingLevel() final; 169 170 /** 171 * True if this is an HTML5 parser whose script nesting level (in 172 * the sense of 173 * <https://html.spec.whatwg.org/multipage/parsing.html#script-nesting-level>) 174 * is nonzero. 175 */ 176 bool HasNonzeroScriptNestingLevel() const final; 177 178 /** 179 * Marks the HTML5 parser as not a script-created parser: Prepares the 180 * parser to be able to read a stream. 181 * 182 * @param aCommand the parser command (Yeah, this is bad API design. Let's 183 * make this better when retiring nsIParser) 184 */ 185 void MarkAsNotScriptCreated(const char* aCommand); 186 187 /** 188 * True if this is a script-created HTML5 parser. 189 */ 190 virtual bool IsScriptCreated() override; 191 192 /** 193 * True iff this is an about:blank-mode HTML5 parser 194 * (i.e. a parser for non-initial about:blank). 195 */ 196 virtual bool IsAboutBlankMode() override; 197 198 /* End nsIParser */ 199 200 // Not from an external interface 201 // Non-inherited methods 202 203 public: 204 /** 205 * Initializes the parser to load from a channel. 206 */ 207 virtual nsresult Initialize(mozilla::dom::Document* aDoc, nsIURI* aURI, 208 nsISupports* aContainer, nsIChannel* aChannel); 209 210 inline nsHtml5Tokenizer* GetTokenizer() { return mTokenizer.get(); } 211 212 void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState, 213 int32_t aLine); 214 215 void DropStreamParser() { 216 if (GetStreamParser()) { 217 GetStreamParser()->DropTimer(); 218 mStreamListener->DropDelegate(); 219 mStreamListener = nullptr; 220 } 221 } 222 223 void StartTokenizer(bool aScriptingEnabled); 224 225 void ContinueAfterFailedCharsetSwitch(); 226 227 nsHtml5StreamParser* GetStreamParser() { 228 if (!mStreamListener) { 229 return nullptr; 230 } 231 return mStreamListener->GetDelegate(); 232 } 233 234 void PermanentlyUndefineInsertionPoint() { 235 mInsertionPointPermanentlyUndefined = true; 236 } 237 238 /** 239 * Parse until pending data is exhausted or a script blocks the parser 240 */ 241 nsresult ParseUntilBlocked(); 242 243 /** 244 * Start our executor. This is meant to be used from document.open() _only_ 245 * and does some work similar to what nsHtml5StreamParser::OnStartRequest does 246 * for normal parses. 247 */ 248 nsresult StartExecutor(); 249 250 private: 251 virtual ~nsHtml5Parser(); 252 253 // State variables 254 255 /** 256 * This parser is parsing (non-initial) about:blank for viewing (not View 257 * Source or data) 258 */ 259 bool mAboutBlankMode; 260 261 /** 262 * Whether the last character tokenized was a carriage return (for CRLF) 263 */ 264 bool mLastWasCR; 265 266 /** 267 * Whether the last character tokenized was a carriage return (for CRLF) 268 * when preparsing document.write. 269 */ 270 bool mDocWriteSpeculativeLastWasCR; 271 272 /** 273 * The parser is blocking on the load of an external script from a web 274 * page, or any number of extension content scripts. 275 */ 276 uint32_t mBlocked; 277 278 /** 279 * Whether the document.write() speculator is already active. 280 */ 281 bool mDocWriteSpeculatorActive; 282 283 /** 284 * The number of IncrementScriptNestingLevel calls we've seen without a 285 * matching DecrementScriptNestingLevel. 286 */ 287 int32_t mScriptNestingLevel; 288 289 /** 290 * True if Terminate() has been called. 291 */ 292 bool mTerminationStarted; 293 294 /** 295 * True if document.close() has been called. 296 */ 297 bool mDocumentClosed; 298 299 bool mInDocumentWrite; 300 301 /** 302 * This is set when the tokenizer has seen EOF. The purpose is to 303 * keep the insertion point undefined between the time the 304 * parser has reached the point where it can't accept more input 305 * and the time the document's mParser is set to nullptr. 306 * Scripts can run during this time period due to an update 307 * batch ending and due to various end-of-parse events firing. 308 * (Setting mParser on the document to nullptr at the point 309 * where this flag gets set to true would break things that for 310 * legacy reasons assume that mParser on the document stays 311 * non-null though the end-of-parse events.) 312 */ 313 bool mInsertionPointPermanentlyUndefined; 314 315 // Portable parser objects 316 /** 317 * The first buffer in the pending UTF-16 buffer queue 318 */ 319 RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer; 320 321 /** 322 * The last buffer in the pending UTF-16 buffer queue. Always points 323 * to a sentinel object with nullptr as its parser key. 324 */ 325 nsHtml5OwningUTF16Buffer* mLastBuffer; // weak ref; 326 327 /** 328 * The tree operation executor 329 */ 330 RefPtr<nsHtml5TreeOpExecutor> mExecutor; 331 332 /** 333 * The HTML5 tree builder 334 */ 335 const mozilla::UniquePtr<nsHtml5TreeBuilder> mTreeBuilder; 336 337 /** 338 * The HTML5 tokenizer 339 */ 340 const mozilla::UniquePtr<nsHtml5Tokenizer> mTokenizer; 341 342 /** 343 * Another HTML5 tree builder for preloading document.written content. 344 */ 345 mozilla::UniquePtr<nsHtml5TreeBuilder> mDocWriteSpeculativeTreeBuilder; 346 347 /** 348 * Another HTML5 tokenizer for preloading document.written content. 349 */ 350 mozilla::UniquePtr<nsHtml5Tokenizer> mDocWriteSpeculativeTokenizer; 351 352 /** 353 * The stream listener holding the stream parser. 354 */ 355 RefPtr<nsHtml5StreamListener> mStreamListener; 356 357 /** 358 * 359 */ 360 int32_t mRootContextLineNumber; 361 362 /** 363 * Whether it's OK to transfer parsing back to the stream parser 364 */ 365 bool mReturnToStreamParserPermitted; 366 367 /** 368 * The scoped atom table 369 */ 370 nsHtml5AtomTable mAtomTable; 371 }; 372 #endif