nsParser.h (9096B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /** 7 * MODULE NOTES: 8 * 9 * This class does two primary jobs: 10 * 1) It iterates the tokens provided during the 11 * tokenization process, identifing where elements 12 * begin and end (doing validation and normalization). 13 * 2) It controls and coordinates with an instance of 14 * the IContentSink interface, to coordinate the 15 * the production of the content model. 16 * 17 * The basic operation of this class assumes that an HTML 18 * document is non-normalized. Therefore, we don't process 19 * the document in a normalized way. Don't bother to look 20 * for methods like: doHead() or doBody(). 21 * 22 * Instead, in order to be backward compatible, we must 23 * scan the set of tokens and perform this basic set of 24 * operations: 25 * 1) Determine the token type (easy, since the tokens know) 26 * 2) Determine the appropriate section of the HTML document 27 * each token belongs in (HTML,HEAD,BODY,FRAMESET). 28 * 3) Insert content into our document (via the sink) into 29 * the correct section. 30 * 4) In the case of tags that belong in the BODY, we must 31 * ensure that our underlying document state reflects 32 * the appropriate context for our tag. 33 * 34 * For example,if we see a <TR>, we must ensure our 35 * document contains a table into which the row can 36 * be placed. This may result in "implicit containers" 37 * created to ensure a well-formed document. 38 * 39 */ 40 41 #ifndef NS_PARSER__ 42 #define NS_PARSER__ 43 44 #include "nsIParser.h" 45 #include "nsDeque.h" 46 #include "CParserContext.h" 47 #include "nsHTMLTags.h" 48 #include "nsIContentSink.h" 49 #include "nsCOMArray.h" 50 #include "nsCycleCollectionParticipant.h" 51 #include "nsWeakReference.h" 52 #include "mozilla/Maybe.h" 53 #include "mozilla/UniquePtr.h" 54 55 class nsExpatDriver; 56 class nsIRunnable; 57 58 #ifdef _MSC_VER 59 # pragma warning(disable : 4275) 60 #endif 61 62 class nsParser final : public nsIParser, 63 public nsIStreamListener, 64 public nsSupportsWeakReference { 65 /** 66 * Destructor 67 * @update gess5/11/98 68 */ 69 virtual ~nsParser(); 70 71 public: 72 /** 73 * Called on module init 74 */ 75 static nsresult Init(); 76 77 /** 78 * Called on module shutdown 79 */ 80 static void Shutdown(); 81 82 NS_DECL_CYCLE_COLLECTING_ISUPPORTS 83 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser) 84 85 /** 86 * default constructor 87 * @update gess5/11/98 88 */ 89 nsParser(); 90 91 /** 92 * Select given content sink into parser for parser output 93 * @update gess5/11/98 94 * @param aSink is the new sink to be used by parser 95 * @return old sink, or nullptr 96 */ 97 NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override; 98 99 /** 100 * retrive the sink set into the parser 101 * @update gess5/11/98 102 * @param aSink is the new sink to be used by parser 103 * @return old sink, or nullptr 104 */ 105 NS_IMETHOD_(nsIContentSink*) GetContentSink(void) override; 106 107 /** 108 * Call this method once you've created a parser, and want to instruct it 109 * about the command which caused the parser to be constructed. For example, 110 * this allows us to select a DTD which can do, say, view-source. 111 * 112 * @update gess 3/25/98 113 * @param aCommand -- ptrs to string that contains command 114 * @return nada 115 */ 116 NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override; 117 NS_IMETHOD_(void) SetCommand(const char* aCommand) override; 118 NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override; 119 120 /** 121 * Call this method once you've created a parser, and want to instruct it 122 * about what charset to load 123 * 124 * @update ftang 4/23/99 125 * @param aCharset- the charset of a document 126 * @param aCharsetSource- the source of the charset 127 * @param aChannelHadCharset- ignored 128 * @return nada 129 */ 130 virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset, 131 int32_t aSource, 132 bool aForceAutoDetection) override; 133 134 NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource) { 135 aSource = mCharsetSource; 136 return mCharset; 137 } 138 139 /** 140 * Cause parser to parse input from given URL 141 */ 142 NS_IMETHOD Parse(nsIURI* aURL) override; 143 144 /** 145 * This method gets called when you want to parse a fragment of XML surrounded 146 * by the context |aTagStack|. It requires that the parser have been given a 147 * fragment content sink. 148 * 149 * @param aSourceBuffer The XML that hasn't been parsed yet. 150 * @param aTagStack The context of the source buffer. 151 */ 152 nsresult ParseFragment(const nsAString& aSourceBuffer, 153 nsTArray<nsString>& aTagStack); 154 155 NS_IMETHOD ContinueInterruptedParsing() override; 156 NS_IMETHOD_(void) BlockParser() override; 157 NS_IMETHOD_(void) UnblockParser() override; 158 NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override; 159 NS_IMETHOD Terminate(void) override; 160 161 /** 162 * Call this to query whether the parser is enabled or not. 163 * 164 * @update vidur 4/12/99 165 * @return current state 166 */ 167 NS_IMETHOD_(bool) IsParserEnabled() override; 168 169 /** 170 * Call this to query whether the parser thinks it's done with parsing. 171 * 172 * @update rickg 5/12/01 173 * @return complete state 174 */ 175 NS_IMETHOD_(bool) IsComplete() override; 176 177 /** 178 * This method gets called (automatically) during incremental parsing 179 * @update gess5/11/98 180 * @return TRUE if all went well, otherwise FALSE 181 */ 182 virtual nsresult ResumeParse(bool allowIteration = true, 183 bool aIsFinalChunk = false, 184 bool aCanInterrupt = true); 185 186 //********************************************* 187 // These methods are callback methods used by 188 // net lib to let us know about our inputstream. 189 //********************************************* 190 // nsIRequestObserver methods: 191 NS_DECL_NSIREQUESTOBSERVER 192 193 // nsIStreamListener methods: 194 NS_DECL_NSISTREAMLISTENER 195 196 /** 197 * Get the nsIStreamListener for this parser 198 */ 199 virtual nsIStreamListener* GetStreamListener() override; 200 201 void SetSinkCharset(NotNull<const Encoding*> aCharset); 202 203 /** 204 * Return true. 205 */ 206 virtual bool IsInsertionPointDefined() override; 207 208 /** 209 * No-op. 210 */ 211 void IncrementScriptNestingLevel() final; 212 213 /** 214 * No-op. 215 */ 216 void DecrementScriptNestingLevel() final; 217 218 bool HasNonzeroScriptNestingLevel() const final; 219 220 /** 221 * Always false. 222 */ 223 virtual bool IsScriptCreated() override; 224 225 /** 226 * Always false. 227 */ 228 virtual bool IsAboutBlankMode() override; 229 230 /** 231 * This is called when the final chunk has been 232 * passed to the parser and the content sink has 233 * interrupted token processing. It schedules 234 * a ParserContinue PL_Event which will ask the parser 235 * to HandleParserContinueEvent when it is handled. 236 * @update kmcclusk6/1/2001 237 */ 238 nsresult PostContinueEvent(); 239 240 /** 241 * Fired when the continue parse event is triggered. 242 * @update kmcclusk 5/18/98 243 */ 244 void HandleParserContinueEvent(class nsParserContinueEvent*); 245 246 void Reset() { 247 Cleanup(); 248 mUnusedInput.Truncate(); 249 Initialize(); 250 } 251 252 bool IsScriptExecuting() { return mSink && mSink->IsScriptExecuting(); } 253 254 void ContinueParsingDocumentAfterCurrentScript() { 255 if (mSink) { 256 mSink->ContinueParsingDocumentAfterCurrentScript(); 257 } 258 } 259 260 // Returns Nothing() if we haven't determined yet what the parser is being 261 // used for. Else returns whether this parser is used for parsing XML. 262 mozilla::Maybe<bool> IsForParsingXML() { 263 if (!mParserContext || mParserContext->mDTDMode == eDTDMode_autodetect) { 264 return mozilla::Nothing(); 265 } 266 267 return mozilla::Some(mParserContext->mDocType == eXML); 268 } 269 270 protected: 271 void Initialize(); 272 void Cleanup(); 273 274 /** 275 * 276 * @update gess5/18/98 277 * @param 278 * @return 279 */ 280 nsresult WillBuildModel(); 281 282 /** 283 * Called when parsing is done. 284 */ 285 void DidBuildModel(); 286 287 private: 288 /** 289 * Pushes XML fragment parsing data to expat without an input stream. 290 */ 291 nsresult Parse(const nsAString& aSourceBuffer, bool aLastCall); 292 293 protected: 294 //********************************************* 295 // And now, some data members... 296 //********************************************* 297 298 mozilla::UniquePtr<CParserContext> mParserContext; 299 // mExpatDriver probably should be UniquePtr, but not changing 300 // for now due to cycle collection. 301 RefPtr<nsExpatDriver> mExpatDriver; 302 nsCOMPtr<nsIContentSink> mSink; 303 nsIRunnable* mContinueEvent; // weak ref 304 305 eParserCommands mCommand; 306 nsresult mInternalState; 307 nsresult mStreamStatus; 308 int32_t mCharsetSource; 309 310 uint16_t mFlags; 311 uint32_t mBlocked; 312 313 nsString mUnusedInput; 314 NotNull<const Encoding*> mCharset; 315 nsCString mCommandStr; 316 317 bool mProcessingNetworkData; 318 bool mOnStopPending; 319 }; 320 321 #endif