tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsXMLContentSerializer.h (16455B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 /*
      8 * nsIContentSerializer implementation that can be used with an
      9 * nsIDocumentEncoder to convert an XML DOM to an XML string that
     10 * could be parsed into more or less the original DOM.
     11 */
     12 
     13 #ifndef nsXMLContentSerializer_h__
     14 #define nsXMLContentSerializer_h__
     15 
     16 #include "mozilla/Attributes.h"
     17 #include "nsCOMPtr.h"
     18 #include "nsIContentSerializer.h"
     19 #include "nsISupportsUtils.h"
     20 #include "nsString.h"
     21 #include "nsTArray.h"
     22 
     23 #define kIndentStr u"  "_ns
     24 #define kEndTag u"</"_ns
     25 
     26 class nsAtom;
     27 class nsINode;
     28 
     29 namespace mozilla {
     30 class Encoding;
     31 }
     32 
     33 class nsXMLContentSerializer : public nsIContentSerializer {
     34 public:
     35  nsXMLContentSerializer();
     36 
     37  NS_DECL_ISUPPORTS
     38 
     39  NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
     40                  const mozilla::Encoding* aEncoding, bool aIsCopying,
     41                  bool aRewriteEncodingDeclaration,
     42                  bool* aNeedsPreformatScanning, nsAString& aOutput) override;
     43 
     44  NS_IMETHOD AppendText(mozilla::dom::Text* aText, int32_t aStartOffset,
     45                        int32_t aEndOffset) override;
     46 
     47  NS_IMETHOD AppendCDATASection(mozilla::dom::Text* aCDATASection,
     48                                int32_t aStartOffset,
     49                                int32_t aEndOffset) override;
     50 
     51  NS_IMETHOD AppendProcessingInstruction(
     52      mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset,
     53      int32_t aEndOffset) override;
     54 
     55  NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment,
     56                           int32_t aStartOffset, int32_t aEndOffset) override;
     57 
     58  NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) override;
     59 
     60  NS_IMETHOD AppendElementStart(
     61      mozilla::dom::Element* aElement,
     62      mozilla::dom::Element* aOriginalElement) override;
     63 
     64  NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
     65                              mozilla::dom::Element* aOriginalElement) override;
     66 
     67  NS_IMETHOD FlushAndFinish() override { return NS_OK; }
     68 
     69  NS_IMETHOD Finish() override;
     70 
     71  NS_IMETHOD GetOutputLength(uint32_t& aLength) const override;
     72 
     73  NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override;
     74 
     75  NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override {
     76    return NS_OK;
     77  }
     78  NS_IMETHOD ForgetElementForPreformat(
     79      mozilla::dom::Element* aElement) override {
     80    return NS_OK;
     81  }
     82 
     83 protected:
     84  virtual ~nsXMLContentSerializer();
     85 
     86  /**
     87   * Appends a char16_t character and increments the column position
     88   */
     89  [[nodiscard]] bool AppendToString(const char16_t aChar,
     90                                    nsAString& aOutputStr);
     91 
     92  /**
     93   * Appends a nsAString string and increments the column position
     94   */
     95  [[nodiscard]] bool AppendToString(const nsAString& aStr,
     96                                    nsAString& aOutputStr);
     97 
     98  /**
     99   * Appends a string by replacing all line-endings
    100   * by mLineBreak, except in the case of raw output.
    101   * It increments the column position.
    102   */
    103  [[nodiscard]] bool AppendToStringConvertLF(const nsAString& aStr,
    104                                             nsAString& aOutputStr);
    105 
    106  /**
    107   * Appends a string by wrapping it when necessary.
    108   * It updates the column position.
    109   */
    110  [[nodiscard]] bool AppendToStringWrapped(const nsAString& aStr,
    111                                           nsAString& aOutputStr);
    112 
    113  /**
    114   * Appends a string by formating and wrapping it when necessary
    115   * It updates the column position.
    116   */
    117  [[nodiscard]] bool AppendToStringFormatedWrapped(const nsAString& aStr,
    118                                                   nsAString& aOutputStr);
    119 
    120  // used by AppendToStringWrapped
    121  [[nodiscard]] bool AppendWrapped_WhitespaceSequence(
    122      nsAString::const_char_iterator& aPos,
    123      const nsAString::const_char_iterator aEnd,
    124      const nsAString::const_char_iterator aSequenceStart,
    125      nsAString& aOutputStr);
    126 
    127  // used by AppendToStringFormatedWrapped
    128  [[nodiscard]] bool AppendFormatedWrapped_WhitespaceSequence(
    129      nsAString::const_char_iterator& aPos,
    130      const nsAString::const_char_iterator aEnd,
    131      const nsAString::const_char_iterator aSequenceStart,
    132      bool& aMayIgnoreStartOfLineWhitespaceSequence, nsAString& aOutputStr);
    133 
    134  // used by AppendToStringWrapped and AppendToStringFormatedWrapped
    135  [[nodiscard]] bool AppendWrapped_NonWhitespaceSequence(
    136      nsAString::const_char_iterator& aPos,
    137      const nsAString::const_char_iterator aEnd,
    138      const nsAString::const_char_iterator aSequenceStart,
    139      bool& aMayIgnoreStartOfLineWhitespaceSequence,
    140      bool& aSequenceStartAfterAWhiteSpace, nsAString& aOutputStr);
    141 
    142  /**
    143   * add mLineBreak to the string
    144   * It updates the column position and other flags.
    145   */
    146  [[nodiscard]] bool AppendNewLineToString(nsAString& aOutputStr);
    147 
    148  /**
    149   * Appends a string by translating entities
    150   * It doesn't increment the column position
    151   */
    152  [[nodiscard]] virtual bool AppendAndTranslateEntities(const nsAString& aStr,
    153                                                        nsAString& aOutputStr);
    154 
    155  /**
    156   * Helper for virtual AppendAndTranslateEntities that does the actualy work.
    157   *
    158   * Do not call this directly.  Call it via the template helper below.
    159   */
    160 private:
    161  [[nodiscard]] static bool AppendAndTranslateEntities(
    162      const nsAString& aStr, nsAString& aOutputStr,
    163      const uint8_t aEntityTable[], uint16_t aMaxTableIndex,
    164      const char* const aStringTable[]);
    165 
    166 protected:
    167  /**
    168   * Helper for calling AppendAndTranslateEntities in a way that guarantees we
    169   * don't mess up our aEntityTable sizing.  This is a bit more complicated than
    170   * it could be, becaue sometimes we don't want to use all of aEntityTable, so
    171   * we have to allow passing the amount to use independently.  But we can
    172   * statically ensure it's not too big.
    173   *
    174   * The first integer template argument, which callers need to specify
    175   * explicitly, is the index of the last entry in aEntityTable that should be
    176   * considered for encoding as an entity reference.  The second integer
    177   * argument will be deduced from the actual table passed in.
    178   *
    179   * aEntityTable contains as values indices into aStringTable.  Those represent
    180   * the strings that should be used to replace the characters that are used to
    181   * index into aEntityTable.  aStringTable[0] should be nullptr, and characters
    182   * that do not need replacement should map to 0 in aEntityTable.
    183   */
    184  template <uint16_t LargestIndex, uint16_t TableLength>
    185  [[nodiscard]] bool AppendAndTranslateEntities(
    186      const nsAString& aStr, nsAString& aOutputStr,
    187      const uint8_t (&aEntityTable)[TableLength],
    188      const char* const aStringTable[]) {
    189    static_assert(LargestIndex < TableLength,
    190                  "Largest allowed index must be smaller than table length");
    191    return AppendAndTranslateEntities(aStr, aOutputStr, aEntityTable,
    192                                      LargestIndex, aStringTable);
    193  }
    194 
    195  /**
    196   * Max index that can be used with some of our entity tables.
    197   */
    198  static const uint16_t kGTVal = 62;
    199 
    200  /**
    201   * retrieve the text content of the node and append it to the given string
    202   * It doesn't increment the column position
    203   */
    204  nsresult AppendTextData(mozilla::dom::Text* aText, int32_t aStartOffset,
    205                          int32_t aEndOffset, nsAString& aStr,
    206                          bool aTranslateEntities);
    207 
    208  virtual nsresult PushNameSpaceDecl(const nsAString& aPrefix,
    209                                     const nsAString& aURI, nsIContent* aOwner);
    210  void PopNameSpaceDeclsFor(nsIContent* aOwner);
    211 
    212  /**
    213   * The problem that ConfirmPrefix fixes is that anyone can insert nodes
    214   * through the DOM that have a namespace URI and a random or empty or
    215   * previously existing prefix that's totally unrelated to the prefixes
    216   * declared at that point through xmlns attributes.  So what ConfirmPrefix
    217   * does is ensure that we can map aPrefix to the namespace URI aURI (for
    218   * example, that the prefix is not already mapped to some other namespace).
    219   * aPrefix will be adjusted, if necessary, so the value of the prefix
    220   * _after_ this call is what should be serialized.
    221   * @param aPrefix the prefix that may need adjusting
    222   * @param aURI the namespace URI we want aPrefix to point to
    223   * @param aElement the element we're working with (needed for proper default
    224   *                 namespace handling)
    225   * @param aIsAttribute true if we're confirming a prefix for an attribute.
    226   * @return true if we need to push the (prefix, uri) pair on the namespace
    227   *                 stack (note that this can happen even if the prefix is
    228   *                 empty).
    229   */
    230  bool ConfirmPrefix(nsAString& aPrefix, const nsAString& aURI,
    231                     nsIContent* aElement, bool aIsAttribute);
    232  /**
    233   * GenerateNewPrefix generates a new prefix and writes it to aPrefix
    234   */
    235  void GenerateNewPrefix(nsAString& aPrefix);
    236 
    237  uint32_t ScanNamespaceDeclarations(mozilla::dom::Element* aContent,
    238                                     mozilla::dom::Element* aOriginalElement,
    239                                     const nsAString& aTagNamespaceURI);
    240 
    241  [[nodiscard]] virtual bool SerializeAttributes(
    242      mozilla::dom::Element* aContent, mozilla::dom::Element* aOriginalElement,
    243      nsAString& aTagPrefix, const nsAString& aTagNamespaceURI,
    244      nsAtom* aTagName, nsAString& aStr, uint32_t aSkipAttr, bool aAddNSAttr);
    245 
    246  [[nodiscard]] bool SerializeAttr(const nsAString& aPrefix,
    247                                   const nsAString& aName,
    248                                   const nsAString& aValue, nsAString& aStr,
    249                                   bool aDoEscapeEntities);
    250 
    251  bool IsJavaScript(nsIContent* aContent, nsAtom* aAttrNameAtom,
    252                    int32_t aAttrNamespaceID, const nsAString& aValueString);
    253 
    254  /**
    255   * This method can be redefined to check if the element can be serialized.
    256   * It is called when the serialization of the start tag is asked
    257   * (AppendElementStart)
    258   * In this method you can also force the formating
    259   * by setting aForceFormat to true.
    260   * @return boolean  true if the element can be output
    261   */
    262  virtual bool CheckElementStart(mozilla::dom::Element* aElement,
    263                                 bool& aForceFormat, nsAString& aStr,
    264                                 nsresult& aResult);
    265 
    266  /**
    267   * This method is responsible for appending the '>' at the end of the start
    268   * tag, possibly preceded by '/' and maybe a ' ' before that too.
    269   *
    270   * aElement and aOriginalElement are the same as the corresponding arguments
    271   * to AppendElementStart.
    272   */
    273  [[nodiscard]] bool AppendEndOfElementStart(
    274      mozilla::dom::Element* aEleemnt, mozilla::dom::Element* aOriginalElement,
    275      nsAString& aStr);
    276 
    277  /**
    278   * This method can be redefine to serialize additional things just after
    279   * the serialization of the start tag.
    280   * (called at the end of AppendElementStart)
    281   */
    282  [[nodiscard]] virtual bool AfterElementStart(nsIContent* aContent,
    283                                               nsIContent* aOriginalElement,
    284                                               nsAString& aStr) {
    285    return true;
    286  };
    287 
    288  /**
    289   * This method can be redefined to check if the element can be serialized.
    290   * It is called when the serialization of the end tag is asked
    291   * (AppendElementEnd)
    292   * In this method you can also force the formating
    293   * by setting aForceFormat to true.
    294   * @return boolean  true if the element can be output
    295   */
    296  virtual bool CheckElementEnd(mozilla::dom::Element* aElement,
    297                               mozilla::dom::Element* aOriginalElement,
    298                               bool& aForceFormat, nsAString& aStr);
    299 
    300  /**
    301   * This method can be redefine to serialize additional things just after
    302   * the serialization of the end tag.
    303   * (called at the end of AppendElementStart)
    304   */
    305  virtual void AfterElementEnd(nsIContent* aContent, nsAString& aStr) {};
    306 
    307  /**
    308   * Returns true if a line break should be inserted before an element open tag
    309   */
    310  virtual bool LineBreakBeforeOpen(int32_t aNamespaceID, nsAtom* aName);
    311 
    312  /**
    313   * Returns true if a line break should be inserted after an element open tag
    314   */
    315  virtual bool LineBreakAfterOpen(int32_t aNamespaceID, nsAtom* aName);
    316 
    317  /**
    318   * Returns true if a line break should be inserted after an element close tag
    319   */
    320  virtual bool LineBreakBeforeClose(int32_t aNamespaceID, nsAtom* aName);
    321 
    322  /**
    323   * Returns true if a line break should be inserted after an element close tag
    324   */
    325  virtual bool LineBreakAfterClose(int32_t aNamespaceID, nsAtom* aName);
    326 
    327  /**
    328   * add intendation. Call only in the case of formating and if the current
    329   * position is at 0. It updates the column position.
    330   */
    331  [[nodiscard]] bool AppendIndentation(nsAString& aStr);
    332 
    333  [[nodiscard]] bool IncrIndentation(nsAtom* aName);
    334  void DecrIndentation(nsAtom* aName);
    335 
    336  // Functions to check for newlines that needs to be added between nodes in
    337  // the root of a document. See mAddNewlineForRootNode
    338  [[nodiscard]] bool MaybeAddNewlineForRootNode(nsAString& aStr);
    339  void MaybeFlagNewlineForRootNode(nsINode* aNode);
    340 
    341  // Functions to check if we enter in or leave from a preformated content
    342  virtual void MaybeEnterInPreContent(nsIContent* aNode);
    343  virtual void MaybeLeaveFromPreContent(nsIContent* aNode);
    344 
    345  bool ShouldMaintainPreLevel() const;
    346  int32_t PreLevel() const {
    347    MOZ_ASSERT(ShouldMaintainPreLevel());
    348    return mPreLevel;
    349  }
    350  int32_t& PreLevel() {
    351    MOZ_ASSERT(ShouldMaintainPreLevel());
    352    return mPreLevel;
    353  }
    354 
    355  bool MaybeSerializeIsValue(mozilla::dom::Element* aElement, nsAString& aStr);
    356 
    357  int32_t mPrefixIndex;
    358 
    359  struct NameSpaceDecl {
    360    nsString mPrefix;
    361    nsString mURI;
    362    nsIContent* mOwner;
    363  };
    364 
    365  nsTArray<NameSpaceDecl> mNameSpaceStack;
    366 
    367  // nsIDocumentEncoder flags
    368  MOZ_INIT_OUTSIDE_CTOR uint32_t mFlags;
    369 
    370  // characters to use for line break
    371  nsString mLineBreak;
    372 
    373  // The charset that was passed to Init()
    374  nsCString mCharset;
    375 
    376  // current column position on the current line
    377  uint32_t mColPos;
    378 
    379  // true = pretty formating should be done (OutputFormated flag)
    380  MOZ_INIT_OUTSIDE_CTOR bool mDoFormat;
    381 
    382  // true = no formatting,(OutputRaw flag)
    383  // no newline convertion and no rewrap long lines even if OutputWrap is set.
    384  MOZ_INIT_OUTSIDE_CTOR bool mDoRaw;
    385 
    386  // true = wrapping should be done (OutputWrap flag)
    387  MOZ_INIT_OUTSIDE_CTOR bool mDoWrap;
    388 
    389  // true = we can break lines (OutputDisallowLineBreaking flag)
    390  MOZ_INIT_OUTSIDE_CTOR bool mAllowLineBreaking;
    391 
    392  // number of maximum column in a line, in the wrap mode
    393  MOZ_INIT_OUTSIDE_CTOR uint32_t mMaxColumn;
    394 
    395  // current indent value
    396  nsString mIndent;
    397 
    398  // this is the indentation level after the indentation reached
    399  // the maximum length of indentation
    400  int32_t mIndentOverflow;
    401 
    402  // says if the indentation has been already added on the current line
    403  bool mIsIndentationAddedOnCurrentLine;
    404 
    405  // the string which is currently added is in an attribute
    406  bool mInAttribute;
    407 
    408  // true = a newline character should be added. It's only
    409  // useful when serializing root nodes. see MaybeAddNewlineForRootNode and
    410  // MaybeFlagNewlineForRootNode
    411  bool mAddNewlineForRootNode;
    412 
    413  // Indicates that a space will be added if and only if content is
    414  // continued on the same line while serializing source.  Otherwise,
    415  // the newline character acts as the whitespace and no space is needed.
    416  // used when mDoFormat = true
    417  bool mAddSpace;
    418 
    419  // says that if the next string to add contains a newline character at the
    420  // begining, then this newline character should be ignored, because a
    421  // such character has already been added into the output string
    422  bool mMayIgnoreLineBreakSequence;
    423 
    424  bool mBodyOnly;
    425  int32_t mInBody;
    426 
    427  // Non-owning.
    428  nsAString* mOutput;
    429 
    430 private:
    431  // number of nested elements which have preformated content
    432  MOZ_INIT_OUTSIDE_CTOR int32_t mPreLevel;
    433 
    434  static const uint8_t kEntities[];
    435  static const uint8_t kAttrEntities[];
    436  static const char* const kEntityStrings[];
    437 };
    438 
    439 nsresult NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer);
    440 
    441 #endif