nsExpatDriver.h (11031B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef NS_EXPAT_DRIVER__ 7 #define NS_EXPAT_DRIVER__ 8 9 #include "expat_config.h" 10 #include "moz_expat.h" 11 #include "nsCOMPtr.h" 12 #include "nsString.h" 13 #include "nsIInputStream.h" 14 #include "nsIParser.h" 15 #include "nsCycleCollectionParticipant.h" 16 #include "nsScanner.h" 17 18 #include "rlbox_expat.h" 19 #include "nsRLBoxExpatDriver.h" 20 #include "mozilla/UniquePtr.h" 21 22 class nsIExpatSink; 23 struct nsCatalogData; 24 class RLBoxExpatSandboxData; 25 namespace mozilla { 26 template <typename, size_t> 27 class Array; 28 } 29 30 class nsExpatDriver : public nsISupports { 31 virtual ~nsExpatDriver(); 32 33 public: 34 NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL 35 NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver) 36 37 nsExpatDriver(); 38 39 nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink); 40 41 void DidBuildModel(); 42 nsresult BuildModel(); 43 void Terminate(); 44 45 nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk); 46 47 int HandleExternalEntityRef(const char16_t* aOpenEntityNames, 48 const char16_t* aBase, const char16_t* aSystemId, 49 const char16_t* aPublicId); 50 static void HandleStartElement(rlbox_sandbox_expat& aSandbox, 51 tainted_expat<void*> aUserData, 52 tainted_expat<const char16_t*> aName, 53 tainted_expat<const char16_t**> aAtts); 54 static void HandleStartElementForSystemPrincipal( 55 rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData, 56 tainted_expat<const char16_t*> aName, 57 tainted_expat<const char16_t**> aAtts); 58 static void HandleEndElement(rlbox_sandbox_expat& aSandbox, 59 tainted_expat<void*> aUserData, 60 tainted_expat<const char16_t*> aName); 61 static void HandleEndElementForSystemPrincipal( 62 rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData, 63 tainted_expat<const char16_t*> aName); 64 nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength); 65 nsresult HandleComment(const char16_t* aName); 66 nsresult HandleProcessingInstruction(const char16_t* aTarget, 67 const char16_t* aData); 68 nsresult HandleXMLDeclaration(const char16_t* aVersion, 69 const char16_t* aEncoding, int32_t aStandalone); 70 nsresult HandleDefault(const char16_t* aData, const uint32_t aLength); 71 nsresult HandleStartCdataSection(); 72 nsresult HandleEndCdataSection(); 73 nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName, 74 const char16_t* aSysid, 75 const char16_t* aPubid, 76 bool aHasInternalSubset); 77 nsresult HandleEndDoctypeDecl(); 78 79 private: 80 // Load up an external stream to get external entity information 81 nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr, 82 const char16_t* aURLStr, 83 nsIURI* aBaseURI, 84 nsIInputStream** aStream, 85 nsIURI** aAbsURI); 86 87 enum class ChunkOrBufferIsFinal { 88 None, 89 FinalChunk, 90 FinalChunkAndBuffer, 91 }; 92 93 /** 94 * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and 95 * aLength should be 0. The result of the call will be stored in 96 * mInternalState. Expat will parse as much of the buffer as it can and store 97 * the rest in its internal buffer. 98 * 99 * @param aBuffer the buffer to pass to Expat. May be null. 100 * @param aLength the length of the buffer to pass to Expat (in number of 101 * char16_t's). Must be 0 if aBuffer is null and > 0 if 102 * aBuffer is not null. 103 * @param aIsFinal whether this is the last chunk in a row passed to 104 * ParseChunk, and if so whether it's the last chunk and 105 * buffer passed to ParseChunk (meaning there will be no more 106 * calls to ParseChunk for the document being parsed). 107 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This 108 * doesn't include the PRUnichars that Expat stored in 109 * its buffer but didn't parse yet. 110 * @param aLastLineLength [out] the length of the last line that Expat has 111 * consumed. This will only be computed if 112 * aIsFinal is not None or mInternalState is set 113 * to a failure. 114 */ 115 void ParseChunk(const char16_t* aBuffer, uint32_t aLength, 116 ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed, 117 XML_Size* aLastLineLength); 118 /** 119 * Wrapper for ParseBuffer. If the buffer is too large to be copied into the 120 * sandbox all at once, splits it into chunks and invokes ParseBuffer in a 121 * loop. 122 * 123 * @param aBuffer the buffer to pass to Expat. May be null. 124 * @param aLength the length of the buffer to pass to Expat (in number of 125 * char16_t's). Must be 0 if aBuffer is null and > 0 if 126 * aBuffer is not null. 127 * @param aIsFinal whether there will definitely not be any more new buffers 128 * passed in to ParseBuffer 129 * @param aConsumed [out] the number of PRUnichars that Expat consumed. This 130 * doesn't include the PRUnichars that Expat stored in 131 * its buffer but didn't parse yet. 132 * @param aLastLineLength [out] the length of the last line that Expat has 133 * consumed. 134 */ 135 void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength, 136 bool aIsFinal, uint32_t* aPassedToExpat, 137 uint32_t* aConsumed, XML_Size* aLastLineLength); 138 139 nsresult HandleError(); 140 141 void MaybeStopParser(nsresult aState); 142 143 bool BlockedOrInterrupted() { 144 return mInternalState == NS_ERROR_HTMLPARSER_BLOCK || 145 mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED; 146 } 147 148 // Expat allows us to set the base URI for entities. It doesn't use the base 149 // URI itself, but just passes it along to all the entity handlers (just the 150 // external entity reference handler for us). It does expect the base URI as a 151 // null-terminated string, with the same character type as the parsed buffers 152 // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do 153 // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we 154 // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool). 155 // Most of the time this base URI is unused (the external entity handler is 156 // rarely called), but when it is we also convert it back to a nsIURI, so we 157 // convert the string back to UTF-8. 158 // 159 // We'd rather not do any of these conversions and copies, so we use a (hacky) 160 // workaround. We store all base URIs in an array of nsIURIs. Instead of 161 // passing the real URI to Expat as a string, we pass it a null-terminated 162 // 2-character buffer. The first character of that buffer stores the index of 163 // the corresponding nsIURI in the array (incremented with 1 because 0 is used 164 // to terminate a string). The entity handler can then use the index from the 165 // base URI that Expat passes it to look up the right nsIURI from the array. 166 // 167 // GetExpatBaseURI pushes the nsIURI to the array, and creates the 168 // two-character buffer for it. 169 // 170 // GetBaseURI looks up the right nsIURI in the array, based on the index from 171 // the two-character buffer. 172 using ExpatBaseURI = mozilla::Array<XML_Char, 2>; 173 ExpatBaseURI GetExpatBaseURI(nsIURI* aURI); 174 nsIURI* GetBaseURI(const XML_Char* aBase) const; 175 176 RLBoxExpatSandboxData* SandboxData() const; 177 rlbox_sandbox_expat* Sandbox() const; 178 179 // Destroy expat parser and return sandbox to pool 180 void Destroy(); 181 182 mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData; 183 tainted_expat<XML_Parser> mExpatParser; 184 185 nsString mLastLine; 186 nsString mCDataText; 187 // Various parts of a doctype 188 nsString mDoctypeName; 189 nsString mSystemID; 190 nsString mPublicID; 191 nsString mInternalSubset; 192 bool mInCData; 193 bool mInInternalSubset; 194 bool mInExternalDTD; 195 bool mMadeFinalCallToExpat; 196 197 // Used to track if we're in the parser. 198 bool mInParser; 199 200 nsresult mInternalState; 201 202 // The length of the data in Expat's buffer (in number of PRUnichars). 203 uint32_t mExpatBuffered; 204 205 uint16_t mTagDepth; 206 207 // These sinks all refer the same conceptual object. mOriginalSink is 208 // identical with the nsIContentSink* passed to WillBuildModel, and exists 209 // only to avoid QI-ing back to nsIContentSink*. 210 nsCOMPtr<nsIContentSink> mOriginalSink; 211 nsCOMPtr<nsIExpatSink> mSink; 212 213 const nsCatalogData* mCatalogData; // weak 214 nsTArray<nsCOMPtr<nsIURI>> mURIs; 215 216 // Used for error reporting. 217 uint64_t mInnerWindowID; 218 }; 219 220 class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase { 221 friend class RLBoxExpatSandboxPool; 222 friend class nsExpatDriver; 223 224 public: 225 explicit RLBoxExpatSandboxData(uint64_t aSize) 226 : mozilla::RLBoxSandboxDataBase(aSize) { 227 MOZ_COUNT_CTOR(RLBoxExpatSandboxData); 228 } 229 ~RLBoxExpatSandboxData(); 230 rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); } 231 // After getting a sandbox from the pool we need to register the 232 // Handle{Start,End}Element callbacks and associate the driver with the 233 // sandbox. 234 void AttachDriver(bool IsSystemPrincipal, void* aDriver); 235 void DetachDriver(); 236 237 private: 238 mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox; 239 // Common expat callbacks that persist across calls to {Attach,Detach}Driver, 240 // and consequently across sandbox reuses. 241 sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration; 242 sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData; 243 sandbox_callback_expat<XML_ProcessingInstructionHandler> 244 mHandleProcessingInstruction; 245 sandbox_callback_expat<XML_DefaultHandler> mHandleDefault; 246 sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef; 247 sandbox_callback_expat<XML_CommentHandler> mHandleComment; 248 sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection; 249 sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection; 250 sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl; 251 sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl; 252 // Expat callbacks specific to each driver, and thus (re)set across sandbox 253 // reuses. 254 sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement; 255 sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement; 256 }; 257 258 #endif