nsXMLContentSerializer.h (16455B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* 8 * nsIContentSerializer implementation that can be used with an 9 * nsIDocumentEncoder to convert an XML DOM to an XML string that 10 * could be parsed into more or less the original DOM. 11 */ 12 13 #ifndef nsXMLContentSerializer_h__ 14 #define nsXMLContentSerializer_h__ 15 16 #include "mozilla/Attributes.h" 17 #include "nsCOMPtr.h" 18 #include "nsIContentSerializer.h" 19 #include "nsISupportsUtils.h" 20 #include "nsString.h" 21 #include "nsTArray.h" 22 23 #define kIndentStr u" "_ns 24 #define kEndTag u"</"_ns 25 26 class nsAtom; 27 class nsINode; 28 29 namespace mozilla { 30 class Encoding; 31 } 32 33 class nsXMLContentSerializer : public nsIContentSerializer { 34 public: 35 nsXMLContentSerializer(); 36 37 NS_DECL_ISUPPORTS 38 39 NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn, 40 const mozilla::Encoding* aEncoding, bool aIsCopying, 41 bool aRewriteEncodingDeclaration, 42 bool* aNeedsPreformatScanning, nsAString& aOutput) override; 43 44 NS_IMETHOD AppendText(mozilla::dom::Text* aText, int32_t aStartOffset, 45 int32_t aEndOffset) override; 46 47 NS_IMETHOD AppendCDATASection(mozilla::dom::Text* aCDATASection, 48 int32_t aStartOffset, 49 int32_t aEndOffset) override; 50 51 NS_IMETHOD AppendProcessingInstruction( 52 mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset, 53 int32_t aEndOffset) override; 54 55 NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment, 56 int32_t aStartOffset, int32_t aEndOffset) override; 57 58 NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) override; 59 60 NS_IMETHOD AppendElementStart( 61 mozilla::dom::Element* aElement, 62 mozilla::dom::Element* aOriginalElement) override; 63 64 NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement, 65 mozilla::dom::Element* aOriginalElement) override; 66 67 NS_IMETHOD FlushAndFinish() override { return NS_OK; } 68 69 NS_IMETHOD Finish() override; 70 71 NS_IMETHOD GetOutputLength(uint32_t& aLength) const override; 72 73 NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override; 74 75 NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override { 76 return NS_OK; 77 } 78 NS_IMETHOD ForgetElementForPreformat( 79 mozilla::dom::Element* aElement) override { 80 return NS_OK; 81 } 82 83 protected: 84 virtual ~nsXMLContentSerializer(); 85 86 /** 87 * Appends a char16_t character and increments the column position 88 */ 89 [[nodiscard]] bool AppendToString(const char16_t aChar, 90 nsAString& aOutputStr); 91 92 /** 93 * Appends a nsAString string and increments the column position 94 */ 95 [[nodiscard]] bool AppendToString(const nsAString& aStr, 96 nsAString& aOutputStr); 97 98 /** 99 * Appends a string by replacing all line-endings 100 * by mLineBreak, except in the case of raw output. 101 * It increments the column position. 102 */ 103 [[nodiscard]] bool AppendToStringConvertLF(const nsAString& aStr, 104 nsAString& aOutputStr); 105 106 /** 107 * Appends a string by wrapping it when necessary. 108 * It updates the column position. 109 */ 110 [[nodiscard]] bool AppendToStringWrapped(const nsAString& aStr, 111 nsAString& aOutputStr); 112 113 /** 114 * Appends a string by formating and wrapping it when necessary 115 * It updates the column position. 116 */ 117 [[nodiscard]] bool AppendToStringFormatedWrapped(const nsAString& aStr, 118 nsAString& aOutputStr); 119 120 // used by AppendToStringWrapped 121 [[nodiscard]] bool AppendWrapped_WhitespaceSequence( 122 nsAString::const_char_iterator& aPos, 123 const nsAString::const_char_iterator aEnd, 124 const nsAString::const_char_iterator aSequenceStart, 125 nsAString& aOutputStr); 126 127 // used by AppendToStringFormatedWrapped 128 [[nodiscard]] bool AppendFormatedWrapped_WhitespaceSequence( 129 nsAString::const_char_iterator& aPos, 130 const nsAString::const_char_iterator aEnd, 131 const nsAString::const_char_iterator aSequenceStart, 132 bool& aMayIgnoreStartOfLineWhitespaceSequence, nsAString& aOutputStr); 133 134 // used by AppendToStringWrapped and AppendToStringFormatedWrapped 135 [[nodiscard]] bool AppendWrapped_NonWhitespaceSequence( 136 nsAString::const_char_iterator& aPos, 137 const nsAString::const_char_iterator aEnd, 138 const nsAString::const_char_iterator aSequenceStart, 139 bool& aMayIgnoreStartOfLineWhitespaceSequence, 140 bool& aSequenceStartAfterAWhiteSpace, nsAString& aOutputStr); 141 142 /** 143 * add mLineBreak to the string 144 * It updates the column position and other flags. 145 */ 146 [[nodiscard]] bool AppendNewLineToString(nsAString& aOutputStr); 147 148 /** 149 * Appends a string by translating entities 150 * It doesn't increment the column position 151 */ 152 [[nodiscard]] virtual bool AppendAndTranslateEntities(const nsAString& aStr, 153 nsAString& aOutputStr); 154 155 /** 156 * Helper for virtual AppendAndTranslateEntities that does the actualy work. 157 * 158 * Do not call this directly. Call it via the template helper below. 159 */ 160 private: 161 [[nodiscard]] static bool AppendAndTranslateEntities( 162 const nsAString& aStr, nsAString& aOutputStr, 163 const uint8_t aEntityTable[], uint16_t aMaxTableIndex, 164 const char* const aStringTable[]); 165 166 protected: 167 /** 168 * Helper for calling AppendAndTranslateEntities in a way that guarantees we 169 * don't mess up our aEntityTable sizing. This is a bit more complicated than 170 * it could be, becaue sometimes we don't want to use all of aEntityTable, so 171 * we have to allow passing the amount to use independently. But we can 172 * statically ensure it's not too big. 173 * 174 * The first integer template argument, which callers need to specify 175 * explicitly, is the index of the last entry in aEntityTable that should be 176 * considered for encoding as an entity reference. The second integer 177 * argument will be deduced from the actual table passed in. 178 * 179 * aEntityTable contains as values indices into aStringTable. Those represent 180 * the strings that should be used to replace the characters that are used to 181 * index into aEntityTable. aStringTable[0] should be nullptr, and characters 182 * that do not need replacement should map to 0 in aEntityTable. 183 */ 184 template <uint16_t LargestIndex, uint16_t TableLength> 185 [[nodiscard]] bool AppendAndTranslateEntities( 186 const nsAString& aStr, nsAString& aOutputStr, 187 const uint8_t (&aEntityTable)[TableLength], 188 const char* const aStringTable[]) { 189 static_assert(LargestIndex < TableLength, 190 "Largest allowed index must be smaller than table length"); 191 return AppendAndTranslateEntities(aStr, aOutputStr, aEntityTable, 192 LargestIndex, aStringTable); 193 } 194 195 /** 196 * Max index that can be used with some of our entity tables. 197 */ 198 static const uint16_t kGTVal = 62; 199 200 /** 201 * retrieve the text content of the node and append it to the given string 202 * It doesn't increment the column position 203 */ 204 nsresult AppendTextData(mozilla::dom::Text* aText, int32_t aStartOffset, 205 int32_t aEndOffset, nsAString& aStr, 206 bool aTranslateEntities); 207 208 virtual nsresult PushNameSpaceDecl(const nsAString& aPrefix, 209 const nsAString& aURI, nsIContent* aOwner); 210 void PopNameSpaceDeclsFor(nsIContent* aOwner); 211 212 /** 213 * The problem that ConfirmPrefix fixes is that anyone can insert nodes 214 * through the DOM that have a namespace URI and a random or empty or 215 * previously existing prefix that's totally unrelated to the prefixes 216 * declared at that point through xmlns attributes. So what ConfirmPrefix 217 * does is ensure that we can map aPrefix to the namespace URI aURI (for 218 * example, that the prefix is not already mapped to some other namespace). 219 * aPrefix will be adjusted, if necessary, so the value of the prefix 220 * _after_ this call is what should be serialized. 221 * @param aPrefix the prefix that may need adjusting 222 * @param aURI the namespace URI we want aPrefix to point to 223 * @param aElement the element we're working with (needed for proper default 224 * namespace handling) 225 * @param aIsAttribute true if we're confirming a prefix for an attribute. 226 * @return true if we need to push the (prefix, uri) pair on the namespace 227 * stack (note that this can happen even if the prefix is 228 * empty). 229 */ 230 bool ConfirmPrefix(nsAString& aPrefix, const nsAString& aURI, 231 nsIContent* aElement, bool aIsAttribute); 232 /** 233 * GenerateNewPrefix generates a new prefix and writes it to aPrefix 234 */ 235 void GenerateNewPrefix(nsAString& aPrefix); 236 237 uint32_t ScanNamespaceDeclarations(mozilla::dom::Element* aContent, 238 mozilla::dom::Element* aOriginalElement, 239 const nsAString& aTagNamespaceURI); 240 241 [[nodiscard]] virtual bool SerializeAttributes( 242 mozilla::dom::Element* aContent, mozilla::dom::Element* aOriginalElement, 243 nsAString& aTagPrefix, const nsAString& aTagNamespaceURI, 244 nsAtom* aTagName, nsAString& aStr, uint32_t aSkipAttr, bool aAddNSAttr); 245 246 [[nodiscard]] bool SerializeAttr(const nsAString& aPrefix, 247 const nsAString& aName, 248 const nsAString& aValue, nsAString& aStr, 249 bool aDoEscapeEntities); 250 251 bool IsJavaScript(nsIContent* aContent, nsAtom* aAttrNameAtom, 252 int32_t aAttrNamespaceID, const nsAString& aValueString); 253 254 /** 255 * This method can be redefined to check if the element can be serialized. 256 * It is called when the serialization of the start tag is asked 257 * (AppendElementStart) 258 * In this method you can also force the formating 259 * by setting aForceFormat to true. 260 * @return boolean true if the element can be output 261 */ 262 virtual bool CheckElementStart(mozilla::dom::Element* aElement, 263 bool& aForceFormat, nsAString& aStr, 264 nsresult& aResult); 265 266 /** 267 * This method is responsible for appending the '>' at the end of the start 268 * tag, possibly preceded by '/' and maybe a ' ' before that too. 269 * 270 * aElement and aOriginalElement are the same as the corresponding arguments 271 * to AppendElementStart. 272 */ 273 [[nodiscard]] bool AppendEndOfElementStart( 274 mozilla::dom::Element* aEleemnt, mozilla::dom::Element* aOriginalElement, 275 nsAString& aStr); 276 277 /** 278 * This method can be redefine to serialize additional things just after 279 * the serialization of the start tag. 280 * (called at the end of AppendElementStart) 281 */ 282 [[nodiscard]] virtual bool AfterElementStart(nsIContent* aContent, 283 nsIContent* aOriginalElement, 284 nsAString& aStr) { 285 return true; 286 }; 287 288 /** 289 * This method can be redefined to check if the element can be serialized. 290 * It is called when the serialization of the end tag is asked 291 * (AppendElementEnd) 292 * In this method you can also force the formating 293 * by setting aForceFormat to true. 294 * @return boolean true if the element can be output 295 */ 296 virtual bool CheckElementEnd(mozilla::dom::Element* aElement, 297 mozilla::dom::Element* aOriginalElement, 298 bool& aForceFormat, nsAString& aStr); 299 300 /** 301 * This method can be redefine to serialize additional things just after 302 * the serialization of the end tag. 303 * (called at the end of AppendElementStart) 304 */ 305 virtual void AfterElementEnd(nsIContent* aContent, nsAString& aStr) {}; 306 307 /** 308 * Returns true if a line break should be inserted before an element open tag 309 */ 310 virtual bool LineBreakBeforeOpen(int32_t aNamespaceID, nsAtom* aName); 311 312 /** 313 * Returns true if a line break should be inserted after an element open tag 314 */ 315 virtual bool LineBreakAfterOpen(int32_t aNamespaceID, nsAtom* aName); 316 317 /** 318 * Returns true if a line break should be inserted after an element close tag 319 */ 320 virtual bool LineBreakBeforeClose(int32_t aNamespaceID, nsAtom* aName); 321 322 /** 323 * Returns true if a line break should be inserted after an element close tag 324 */ 325 virtual bool LineBreakAfterClose(int32_t aNamespaceID, nsAtom* aName); 326 327 /** 328 * add intendation. Call only in the case of formating and if the current 329 * position is at 0. It updates the column position. 330 */ 331 [[nodiscard]] bool AppendIndentation(nsAString& aStr); 332 333 [[nodiscard]] bool IncrIndentation(nsAtom* aName); 334 void DecrIndentation(nsAtom* aName); 335 336 // Functions to check for newlines that needs to be added between nodes in 337 // the root of a document. See mAddNewlineForRootNode 338 [[nodiscard]] bool MaybeAddNewlineForRootNode(nsAString& aStr); 339 void MaybeFlagNewlineForRootNode(nsINode* aNode); 340 341 // Functions to check if we enter in or leave from a preformated content 342 virtual void MaybeEnterInPreContent(nsIContent* aNode); 343 virtual void MaybeLeaveFromPreContent(nsIContent* aNode); 344 345 bool ShouldMaintainPreLevel() const; 346 int32_t PreLevel() const { 347 MOZ_ASSERT(ShouldMaintainPreLevel()); 348 return mPreLevel; 349 } 350 int32_t& PreLevel() { 351 MOZ_ASSERT(ShouldMaintainPreLevel()); 352 return mPreLevel; 353 } 354 355 bool MaybeSerializeIsValue(mozilla::dom::Element* aElement, nsAString& aStr); 356 357 int32_t mPrefixIndex; 358 359 struct NameSpaceDecl { 360 nsString mPrefix; 361 nsString mURI; 362 nsIContent* mOwner; 363 }; 364 365 nsTArray<NameSpaceDecl> mNameSpaceStack; 366 367 // nsIDocumentEncoder flags 368 MOZ_INIT_OUTSIDE_CTOR uint32_t mFlags; 369 370 // characters to use for line break 371 nsString mLineBreak; 372 373 // The charset that was passed to Init() 374 nsCString mCharset; 375 376 // current column position on the current line 377 uint32_t mColPos; 378 379 // true = pretty formating should be done (OutputFormated flag) 380 MOZ_INIT_OUTSIDE_CTOR bool mDoFormat; 381 382 // true = no formatting,(OutputRaw flag) 383 // no newline convertion and no rewrap long lines even if OutputWrap is set. 384 MOZ_INIT_OUTSIDE_CTOR bool mDoRaw; 385 386 // true = wrapping should be done (OutputWrap flag) 387 MOZ_INIT_OUTSIDE_CTOR bool mDoWrap; 388 389 // true = we can break lines (OutputDisallowLineBreaking flag) 390 MOZ_INIT_OUTSIDE_CTOR bool mAllowLineBreaking; 391 392 // number of maximum column in a line, in the wrap mode 393 MOZ_INIT_OUTSIDE_CTOR uint32_t mMaxColumn; 394 395 // current indent value 396 nsString mIndent; 397 398 // this is the indentation level after the indentation reached 399 // the maximum length of indentation 400 int32_t mIndentOverflow; 401 402 // says if the indentation has been already added on the current line 403 bool mIsIndentationAddedOnCurrentLine; 404 405 // the string which is currently added is in an attribute 406 bool mInAttribute; 407 408 // true = a newline character should be added. It's only 409 // useful when serializing root nodes. see MaybeAddNewlineForRootNode and 410 // MaybeFlagNewlineForRootNode 411 bool mAddNewlineForRootNode; 412 413 // Indicates that a space will be added if and only if content is 414 // continued on the same line while serializing source. Otherwise, 415 // the newline character acts as the whitespace and no space is needed. 416 // used when mDoFormat = true 417 bool mAddSpace; 418 419 // says that if the next string to add contains a newline character at the 420 // begining, then this newline character should be ignored, because a 421 // such character has already been added into the output string 422 bool mMayIgnoreLineBreakSequence; 423 424 bool mBodyOnly; 425 int32_t mInBody; 426 427 // Non-owning. 428 nsAString* mOutput; 429 430 private: 431 // number of nested elements which have preformated content 432 MOZ_INIT_OUTSIDE_CTOR int32_t mPreLevel; 433 434 static const uint8_t kEntities[]; 435 static const uint8_t kAttrEntities[]; 436 static const char* const kEntityStrings[]; 437 }; 438 439 nsresult NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer); 440 441 #endif