tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsExpatDriver.h (11031B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef NS_EXPAT_DRIVER__
      7 #define NS_EXPAT_DRIVER__
      8 
      9 #include "expat_config.h"
     10 #include "moz_expat.h"
     11 #include "nsCOMPtr.h"
     12 #include "nsString.h"
     13 #include "nsIInputStream.h"
     14 #include "nsIParser.h"
     15 #include "nsCycleCollectionParticipant.h"
     16 #include "nsScanner.h"
     17 
     18 #include "rlbox_expat.h"
     19 #include "nsRLBoxExpatDriver.h"
     20 #include "mozilla/UniquePtr.h"
     21 
     22 class nsIExpatSink;
     23 struct nsCatalogData;
     24 class RLBoxExpatSandboxData;
     25 namespace mozilla {
     26 template <typename, size_t>
     27 class Array;
     28 }
     29 
     30 class nsExpatDriver : public nsISupports {
     31  virtual ~nsExpatDriver();
     32 
     33 public:
     34  NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
     35  NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver)
     36 
     37  nsExpatDriver();
     38 
     39  nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink);
     40 
     41  void DidBuildModel();
     42  nsresult BuildModel();
     43  void Terminate();
     44 
     45  nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk);
     46 
     47  int HandleExternalEntityRef(const char16_t* aOpenEntityNames,
     48                              const char16_t* aBase, const char16_t* aSystemId,
     49                              const char16_t* aPublicId);
     50  static void HandleStartElement(rlbox_sandbox_expat& aSandbox,
     51                                 tainted_expat<void*> aUserData,
     52                                 tainted_expat<const char16_t*> aName,
     53                                 tainted_expat<const char16_t**> aAtts);
     54  static void HandleStartElementForSystemPrincipal(
     55      rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
     56      tainted_expat<const char16_t*> aName,
     57      tainted_expat<const char16_t**> aAtts);
     58  static void HandleEndElement(rlbox_sandbox_expat& aSandbox,
     59                               tainted_expat<void*> aUserData,
     60                               tainted_expat<const char16_t*> aName);
     61  static void HandleEndElementForSystemPrincipal(
     62      rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
     63      tainted_expat<const char16_t*> aName);
     64  nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength);
     65  nsresult HandleComment(const char16_t* aName);
     66  nsresult HandleProcessingInstruction(const char16_t* aTarget,
     67                                       const char16_t* aData);
     68  nsresult HandleXMLDeclaration(const char16_t* aVersion,
     69                                const char16_t* aEncoding, int32_t aStandalone);
     70  nsresult HandleDefault(const char16_t* aData, const uint32_t aLength);
     71  nsresult HandleStartCdataSection();
     72  nsresult HandleEndCdataSection();
     73  nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
     74                                  const char16_t* aSysid,
     75                                  const char16_t* aPubid,
     76                                  bool aHasInternalSubset);
     77  nsresult HandleEndDoctypeDecl();
     78 
     79 private:
     80  // Load up an external stream to get external entity information
     81  nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
     82                                          const char16_t* aURLStr,
     83                                          nsIURI* aBaseURI,
     84                                          nsIInputStream** aStream,
     85                                          nsIURI** aAbsURI);
     86 
     87  enum class ChunkOrBufferIsFinal {
     88    None,
     89    FinalChunk,
     90    FinalChunkAndBuffer,
     91  };
     92 
     93  /**
     94   * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
     95   * aLength should be 0. The result of the call will be stored in
     96   * mInternalState. Expat will parse as much of the buffer as it can and store
     97   * the rest in its internal buffer.
     98   *
     99   * @param aBuffer the buffer to pass to Expat. May be null.
    100   * @param aLength the length of the buffer to pass to Expat (in number of
    101   *                char16_t's). Must be 0 if aBuffer is null and > 0 if
    102   *                aBuffer is not null.
    103   * @param aIsFinal whether this is the last chunk in a row passed to
    104   *                 ParseChunk, and if so whether it's the last chunk and
    105   *                 buffer passed to ParseChunk (meaning there will be no more
    106   *                 calls to ParseChunk for the document being parsed).
    107   * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
    108   *                        doesn't include the PRUnichars that Expat stored in
    109   *                        its buffer but didn't parse yet.
    110   * @param aLastLineLength [out] the length of the last line that Expat has
    111   *                              consumed. This will only be computed if
    112   *                              aIsFinal is not None or mInternalState is set
    113   *                              to a failure.
    114   */
    115  void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
    116                  ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
    117                  XML_Size* aLastLineLength);
    118  /**
    119   * Wrapper for ParseBuffer. If the buffer is too large to be copied into the
    120   * sandbox all at once, splits it into chunks and invokes ParseBuffer in a
    121   * loop.
    122   *
    123   * @param aBuffer the buffer to pass to Expat. May be null.
    124   * @param aLength the length of the buffer to pass to Expat (in number of
    125   *                char16_t's). Must be 0 if aBuffer is null and > 0 if
    126   *                aBuffer is not null.
    127   * @param aIsFinal whether there will definitely not be any more new buffers
    128   *                 passed in to ParseBuffer
    129   * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
    130   *                        doesn't include the PRUnichars that Expat stored in
    131   *                        its buffer but didn't parse yet.
    132   * @param aLastLineLength [out] the length of the last line that Expat has
    133   *                              consumed.
    134   */
    135  void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
    136                           bool aIsFinal, uint32_t* aPassedToExpat,
    137                           uint32_t* aConsumed, XML_Size* aLastLineLength);
    138 
    139  nsresult HandleError();
    140 
    141  void MaybeStopParser(nsresult aState);
    142 
    143  bool BlockedOrInterrupted() {
    144    return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
    145           mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
    146  }
    147 
    148  // Expat allows us to set the base URI for entities. It doesn't use the base
    149  // URI itself, but just passes it along to all the entity handlers (just the
    150  // external entity reference handler for us). It does expect the base URI as a
    151  // null-terminated string, with the same character type as the parsed buffers
    152  // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
    153  // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
    154  // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
    155  // Most of the time this base URI is unused (the external entity handler is
    156  // rarely called), but when it is we also convert it back to a nsIURI, so we
    157  // convert the string back to UTF-8.
    158  //
    159  // We'd rather not do any of these conversions and copies, so we use a (hacky)
    160  // workaround. We store all base URIs in an array of nsIURIs. Instead of
    161  // passing the real URI to Expat as a string, we pass it a null-terminated
    162  // 2-character buffer. The first character of that buffer stores the index of
    163  // the corresponding nsIURI in the array (incremented with 1 because 0 is used
    164  // to terminate a string). The entity handler can then use the index from the
    165  // base URI that Expat passes it to look up the right nsIURI from the array.
    166  //
    167  // GetExpatBaseURI pushes the nsIURI to the array, and creates the
    168  // two-character buffer for it.
    169  //
    170  // GetBaseURI looks up the right nsIURI in the array, based on the index from
    171  // the two-character buffer.
    172  using ExpatBaseURI = mozilla::Array<XML_Char, 2>;
    173  ExpatBaseURI GetExpatBaseURI(nsIURI* aURI);
    174  nsIURI* GetBaseURI(const XML_Char* aBase) const;
    175 
    176  RLBoxExpatSandboxData* SandboxData() const;
    177  rlbox_sandbox_expat* Sandbox() const;
    178 
    179  // Destroy expat parser and return sandbox to pool
    180  void Destroy();
    181 
    182  mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData;
    183  tainted_expat<XML_Parser> mExpatParser;
    184 
    185  nsString mLastLine;
    186  nsString mCDataText;
    187  // Various parts of a doctype
    188  nsString mDoctypeName;
    189  nsString mSystemID;
    190  nsString mPublicID;
    191  nsString mInternalSubset;
    192  bool mInCData;
    193  bool mInInternalSubset;
    194  bool mInExternalDTD;
    195  bool mMadeFinalCallToExpat;
    196 
    197  // Used to track if we're in the parser.
    198  bool mInParser;
    199 
    200  nsresult mInternalState;
    201 
    202  // The length of the data in Expat's buffer (in number of PRUnichars).
    203  uint32_t mExpatBuffered;
    204 
    205  uint16_t mTagDepth;
    206 
    207  // These sinks all refer the same conceptual object. mOriginalSink is
    208  // identical with the nsIContentSink* passed to WillBuildModel, and exists
    209  // only to avoid QI-ing back to nsIContentSink*.
    210  nsCOMPtr<nsIContentSink> mOriginalSink;
    211  nsCOMPtr<nsIExpatSink> mSink;
    212 
    213  const nsCatalogData* mCatalogData;  // weak
    214  nsTArray<nsCOMPtr<nsIURI>> mURIs;
    215 
    216  // Used for error reporting.
    217  uint64_t mInnerWindowID;
    218 };
    219 
    220 class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase {
    221  friend class RLBoxExpatSandboxPool;
    222  friend class nsExpatDriver;
    223 
    224 public:
    225  explicit RLBoxExpatSandboxData(uint64_t aSize)
    226      : mozilla::RLBoxSandboxDataBase(aSize) {
    227    MOZ_COUNT_CTOR(RLBoxExpatSandboxData);
    228  }
    229  ~RLBoxExpatSandboxData();
    230  rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); }
    231  // After getting a sandbox from the pool we need to register the
    232  // Handle{Start,End}Element callbacks and associate the driver with the
    233  // sandbox.
    234  void AttachDriver(bool IsSystemPrincipal, void* aDriver);
    235  void DetachDriver();
    236 
    237 private:
    238  mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox;
    239  // Common expat callbacks that persist across calls to {Attach,Detach}Driver,
    240  // and consequently across sandbox reuses.
    241  sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration;
    242  sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData;
    243  sandbox_callback_expat<XML_ProcessingInstructionHandler>
    244      mHandleProcessingInstruction;
    245  sandbox_callback_expat<XML_DefaultHandler> mHandleDefault;
    246  sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef;
    247  sandbox_callback_expat<XML_CommentHandler> mHandleComment;
    248  sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection;
    249  sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection;
    250  sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl;
    251  sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl;
    252  // Expat callbacks specific to each driver, and thus (re)set across sandbox
    253  // reuses.
    254  sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement;
    255  sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement;
    256 };
    257 
    258 #endif