tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsHtml5Parser.h (10121B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef NS_HTML5_PARSER
      7 #define NS_HTML5_PARSER
      8 
      9 #include "mozilla/UniquePtr.h"
     10 #include "nsIParser.h"
     11 #include "nsDeque.h"
     12 #include "nsIContentSink.h"
     13 #include "nsIRequest.h"
     14 #include "nsIChannel.h"
     15 #include "nsCOMArray.h"
     16 #include "nsContentSink.h"
     17 #include "nsCycleCollectionParticipant.h"
     18 #include "nsHtml5OwningUTF16Buffer.h"
     19 #include "nsHtml5TreeOpExecutor.h"
     20 #include "nsHtml5StreamParser.h"
     21 #include "nsHtml5AtomTable.h"
     22 #include "nsWeakReference.h"
     23 #include "nsHtml5StreamListener.h"
     24 #include "nsCharsetSource.h"
     25 
     26 class nsHtml5Parser final : public nsIParser,
     27                            public nsSupportsWeakReference,
     28                            public nsIStreamListener {
     29 public:
     30  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
     31 
     32  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser)
     33 
     34  nsHtml5Parser();
     35 
     36  // about:blank-only
     37  NS_IMETHOD OnStartRequest(nsIRequest* aRequest) override;
     38 
     39  // about:blank-only and exists only for interface compat.
     40  NS_IMETHOD OnDataAvailable(nsIRequest* aRequest, nsIInputStream* aInStream,
     41                             uint64_t aSourceOffset, uint32_t aLength) override;
     42 
     43  // about:blank-only and exists only for interface compat.
     44  NS_IMETHOD OnStopRequest(nsIRequest* aRequest, nsresult aStatus) override;
     45 
     46  /* Start nsIParser */
     47  /**
     48   * No-op for backwards compat.
     49   */
     50  NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;
     51 
     52  /**
     53   * Returns the tree op executor for backwards compat.
     54   */
     55  NS_IMETHOD_(nsIContentSink*) GetContentSink() override;
     56 
     57  /**
     58   * Always returns "view" for backwards compat.
     59   */
     60  NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;
     61 
     62  /**
     63   * No-op for backwards compat.
     64   */
     65  NS_IMETHOD_(void) SetCommand(const char* aCommand) override;
     66 
     67  /**
     68   * No-op for backwards compat.
     69   */
     70  NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;
     71 
     72  /**
     73   *  Call this method once you've created a parser, and want to instruct it
     74   *  about what charset to load
     75   *
     76   *  @param   aEncoding the charset of a document
     77   *  @param   aCharsetSource the source of the charset
     78   */
     79  virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
     80                                  int32_t aSource,
     81                                  bool aForceAutoDetection) override;
     82 
     83  /**
     84   * Get the channel associated with this parser
     85   * @param aChannel out param that will contain the result
     86   * @return NS_OK if successful or NS_NOT_AVAILABLE if not
     87   */
     88  nsresult GetChannel(nsIChannel** aChannel);
     89 
     90  /**
     91   * Get the stream parser for this parser
     92   */
     93  virtual nsIStreamListener* GetStreamListener() override;
     94 
     95  /**
     96   * Don't call. For interface compat only.
     97   */
     98  NS_IMETHOD ContinueInterruptedParsing() override;
     99 
    100  /**
    101   * Blocks the parser.
    102   */
    103  NS_IMETHOD_(void) BlockParser() override;
    104 
    105  /**
    106   * Unblocks the parser.
    107   */
    108  NS_IMETHOD_(void) UnblockParser() override;
    109 
    110  /**
    111   * Asynchronously continues parsing.
    112   */
    113  NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;
    114 
    115  /**
    116   * Query whether the parser is enabled (i.e. not blocked) or not.
    117   */
    118  NS_IMETHOD_(bool) IsParserEnabled() override;
    119 
    120  /**
    121   * Query whether the parser is closed (i.e. document.closed() is called) or
    122   * not.
    123   */
    124  NS_IMETHOD_(bool) IsParserClosed() override;
    125 
    126  /**
    127   * Query whether the parser thinks it's done with parsing.
    128   */
    129  NS_IMETHOD_(bool) IsComplete() override;
    130 
    131  /**
    132   * Set up request observer.
    133   *
    134   * @param   aURL used for View Source title
    135   */
    136  NS_IMETHOD Parse(nsIURI* aURL) override;
    137 
    138  /**
    139   * document.write and document.close
    140   *
    141   * @param   aSourceBuffer the argument of document.write (empty for .close())
    142   * @param   aKey a key unique to the script element that caused this call
    143   * @param   aLastCall true if .close() false if .write()
    144   */
    145  nsresult Parse(const nsAString& aSourceBuffer, void* aKey, bool aLastCall);
    146 
    147  /**
    148   * Stops the parser prematurely
    149   */
    150  NS_IMETHOD Terminate() override;
    151 
    152  /**
    153   * True if the insertion point (per HTML5) is defined.
    154   */
    155  virtual bool IsInsertionPointDefined() override;
    156 
    157  /**
    158   * Call immediately before starting to evaluate a parser-inserted script or
    159   * in general when the spec says to increment the script nesting level.
    160   */
    161  void IncrementScriptNestingLevel() final;
    162 
    163  /**
    164   * Call immediately after having evaluated a parser-inserted script or
    165   * generally want to restore to the state before the last
    166   * IncrementScriptNestingLevel call.
    167   */
    168  void DecrementScriptNestingLevel() final;
    169 
    170  /**
    171   * True if this is an HTML5 parser whose script nesting level (in
    172   * the sense of
    173   * <https://html.spec.whatwg.org/multipage/parsing.html#script-nesting-level>)
    174   * is nonzero.
    175   */
    176  bool HasNonzeroScriptNestingLevel() const final;
    177 
    178  /**
    179   * Marks the HTML5 parser as not a script-created parser: Prepares the
    180   * parser to be able to read a stream.
    181   *
    182   * @param aCommand the parser command (Yeah, this is bad API design. Let's
    183   * make this better when retiring nsIParser)
    184   */
    185  void MarkAsNotScriptCreated(const char* aCommand);
    186 
    187  /**
    188   * True if this is a script-created HTML5 parser.
    189   */
    190  virtual bool IsScriptCreated() override;
    191 
    192  /**
    193   * True iff this is an about:blank-mode HTML5 parser
    194   * (i.e. a parser for non-initial about:blank).
    195   */
    196  virtual bool IsAboutBlankMode() override;
    197 
    198  /* End nsIParser  */
    199 
    200  // Not from an external interface
    201  // Non-inherited methods
    202 
    203 public:
    204  /**
    205   * Initializes the parser to load from a channel.
    206   */
    207  virtual nsresult Initialize(mozilla::dom::Document* aDoc, nsIURI* aURI,
    208                              nsISupports* aContainer, nsIChannel* aChannel);
    209 
    210  inline nsHtml5Tokenizer* GetTokenizer() { return mTokenizer.get(); }
    211 
    212  void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState,
    213                                     int32_t aLine);
    214 
    215  void DropStreamParser() {
    216    if (GetStreamParser()) {
    217      GetStreamParser()->DropTimer();
    218      mStreamListener->DropDelegate();
    219      mStreamListener = nullptr;
    220    }
    221  }
    222 
    223  void StartTokenizer(bool aScriptingEnabled);
    224 
    225  void ContinueAfterFailedCharsetSwitch();
    226 
    227  nsHtml5StreamParser* GetStreamParser() {
    228    if (!mStreamListener) {
    229      return nullptr;
    230    }
    231    return mStreamListener->GetDelegate();
    232  }
    233 
    234  void PermanentlyUndefineInsertionPoint() {
    235    mInsertionPointPermanentlyUndefined = true;
    236  }
    237 
    238  /**
    239   * Parse until pending data is exhausted or a script blocks the parser
    240   */
    241  nsresult ParseUntilBlocked();
    242 
    243  /**
    244   * Start our executor.  This is meant to be used from document.open() _only_
    245   * and does some work similar to what nsHtml5StreamParser::OnStartRequest does
    246   * for normal parses.
    247   */
    248  nsresult StartExecutor();
    249 
    250 private:
    251  virtual ~nsHtml5Parser();
    252 
    253  // State variables
    254 
    255  /**
    256   * This parser is parsing (non-initial) about:blank for viewing (not View
    257   * Source or data)
    258   */
    259  bool mAboutBlankMode;
    260 
    261  /**
    262   * Whether the last character tokenized was a carriage return (for CRLF)
    263   */
    264  bool mLastWasCR;
    265 
    266  /**
    267   * Whether the last character tokenized was a carriage return (for CRLF)
    268   * when preparsing document.write.
    269   */
    270  bool mDocWriteSpeculativeLastWasCR;
    271 
    272  /**
    273   * The parser is blocking on the load of an external script from a web
    274   * page, or any number of extension content scripts.
    275   */
    276  uint32_t mBlocked;
    277 
    278  /**
    279   * Whether the document.write() speculator is already active.
    280   */
    281  bool mDocWriteSpeculatorActive;
    282 
    283  /**
    284   * The number of IncrementScriptNestingLevel calls we've seen without a
    285   * matching DecrementScriptNestingLevel.
    286   */
    287  int32_t mScriptNestingLevel;
    288 
    289  /**
    290   * True if Terminate() has been called.
    291   */
    292  bool mTerminationStarted;
    293 
    294  /**
    295   * True if document.close() has been called.
    296   */
    297  bool mDocumentClosed;
    298 
    299  bool mInDocumentWrite;
    300 
    301  /**
    302   * This is set when the tokenizer has seen EOF. The purpose is to
    303   * keep the insertion point undefined between the time the
    304   * parser has reached the point where it can't accept more input
    305   * and the time the document's mParser is set to nullptr.
    306   * Scripts can run during this time period due to an update
    307   * batch ending and due to various end-of-parse events firing.
    308   * (Setting mParser on the document to nullptr at the point
    309   * where this flag gets set to true would break things that for
    310   * legacy reasons assume that mParser on the document stays
    311   * non-null though the end-of-parse events.)
    312   */
    313  bool mInsertionPointPermanentlyUndefined;
    314 
    315  // Portable parser objects
    316  /**
    317   * The first buffer in the pending UTF-16 buffer queue
    318   */
    319  RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer;
    320 
    321  /**
    322   * The last buffer in the pending UTF-16 buffer queue. Always points
    323   * to a sentinel object with nullptr as its parser key.
    324   */
    325  nsHtml5OwningUTF16Buffer* mLastBuffer;  // weak ref;
    326 
    327  /**
    328   * The tree operation executor
    329   */
    330  RefPtr<nsHtml5TreeOpExecutor> mExecutor;
    331 
    332  /**
    333   * The HTML5 tree builder
    334   */
    335  const mozilla::UniquePtr<nsHtml5TreeBuilder> mTreeBuilder;
    336 
    337  /**
    338   * The HTML5 tokenizer
    339   */
    340  const mozilla::UniquePtr<nsHtml5Tokenizer> mTokenizer;
    341 
    342  /**
    343   * Another HTML5 tree builder for preloading document.written content.
    344   */
    345  mozilla::UniquePtr<nsHtml5TreeBuilder> mDocWriteSpeculativeTreeBuilder;
    346 
    347  /**
    348   * Another HTML5 tokenizer for preloading document.written content.
    349   */
    350  mozilla::UniquePtr<nsHtml5Tokenizer> mDocWriteSpeculativeTokenizer;
    351 
    352  /**
    353   * The stream listener holding the stream parser.
    354   */
    355  RefPtr<nsHtml5StreamListener> mStreamListener;
    356 
    357  /**
    358   *
    359   */
    360  int32_t mRootContextLineNumber;
    361 
    362  /**
    363   * Whether it's OK to transfer parsing back to the stream parser
    364   */
    365  bool mReturnToStreamParserPermitted;
    366 
    367  /**
    368   * The scoped atom table
    369   */
    370  nsHtml5AtomTable mAtomTable;
    371 };
    372 #endif