tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsHtml5Highlighter.h (10827B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 #ifndef nsHtml5Highlighter_h
      5 #define nsHtml5Highlighter_h
      6 
      7 #include "nsCOMPtr.h"
      8 #include "nsHtml5TreeOperation.h"
      9 #include "nsHtml5UTF16Buffer.h"
     10 #include "nsHtml5TreeOperation.h"
     11 #include "nsAHtml5TreeOpSink.h"
     12 
     13 #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
     14 
     15 /**
     16 * A state machine for generating HTML for display in View Source based on
     17 * the transitions the tokenizer makes on the source being viewed.
     18 */
     19 class nsHtml5Highlighter {
     20 public:
     21  /**
     22   * The constructor.
     23   *
     24   * @param aOpSink the sink for the tree ops generated by this highlighter
     25   */
     26  explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);
     27 
     28  /**
     29   * The destructor.
     30   */
     31  ~nsHtml5Highlighter();
     32 
     33  /**
     34   * Set the op sink (for speculation).
     35   */
     36  void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
     37 
     38  /**
     39   * Reset state to after generated head but before processing any of the input
     40   * stream.
     41   */
     42  void Rewind();
     43 
     44  /**
     45   * Starts the generated document.
     46   */
     47  void Start(const nsAutoString& aTitle);
     48 
     49  /**
     50   * Updates the charset source via the op queue.
     51   */
     52  void UpdateCharsetSource(nsCharsetSource aCharsetSource);
     53 
     54  /**
     55   * Report a tokenizer state transition.
     56   *
     57   * @param aState the state being transitioned to
     58   * @param aReconsume whether this is a reconsuming transition
     59   * @param aPos the tokenizer's current position into the buffer
     60   */
     61  int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);
     62 
     63  /**
     64   * Report end of file.
     65   *
     66   * Returns `true` normally and `false` on OOM.
     67   */
     68  [[nodiscard]] bool End();
     69 
     70  /**
     71   * Set the current buffer being tokenized
     72   */
     73  void SetBuffer(nsHtml5UTF16Buffer* aBuffer);
     74 
     75  /**
     76   * Let go of the buffer being tokenized but first, flush text from it.
     77   *
     78   * @param aPos the first UTF-16 code unit not to flush
     79   */
     80  void DropBuffer(int32_t aPos);
     81 
     82  /**
     83   * Query whether there are some many ops in the queue
     84   * that they should be flushed now.
     85   *
     86   * @return true if FlushOps() should be called now
     87   */
     88  bool ShouldFlushOps();
     89 
     90  /**
     91   * Flush the tree ops into the sink.
     92   *
     93   * @return Ok(true) if there were ops to flush, Ok(false)
     94   *         if there were no ops to flush and Err() on OOM.
     95   */
     96  mozilla::Result<bool, nsresult> FlushOps();
     97 
     98  /**
     99   * Linkify the current attribute value if the attribute name is one of
    100   * known URL attributes. (When executing tree ops, javascript: URLs will
    101   * not be linkified, though.)
    102   *
    103   * @param aName the name of the attribute
    104   * @param aValue the value of the attribute
    105   */
    106  void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
    107                                  nsHtml5String aValue);
    108 
    109  /**
    110   * Inform the highlighter that the tokenizer successfully completed a
    111   * named character reference.
    112   */
    113  void CompletedNamedCharacterReference();
    114 
    115  /**
    116   * Adds an error annotation to the node that's currently on top of
    117   * mStack.
    118   *
    119   * @param aMsgId the id of the message in the property file
    120   */
    121  void AddErrorToCurrentNode(const char* aMsgId);
    122 
    123  /**
    124   * Adds an error annotation to the node that corresponds to the most
    125   * recently opened markup declaration/tag span, character reference or
    126   * run of text.
    127   *
    128   * @param aMsgId the id of the message in the property file
    129   */
    130  void AddErrorToCurrentRun(const char* aMsgId);
    131 
    132  /**
    133   * Adds an error annotation to the node that corresponds to the most
    134   * recently opened markup declaration/tag span, character reference or
    135   * run of text with one atom to use when formatting the message.
    136   *
    137   * @param aMsgId the id of the message in the property file
    138   * @param aName the atom
    139   */
    140  void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);
    141 
    142  /**
    143   * Adds an error annotation to the node that corresponds to the most
    144   * recently opened markup declaration/tag span, character reference or
    145   * run of text with two atoms to use when formatting the message.
    146   *
    147   * @param aMsgId the id of the message in the property file
    148   * @param aName the first atom
    149   * @param aOther the second atom
    150   */
    151  void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);
    152 
    153  /**
    154   * Adds an error annotation to the node that corresponds to the most
    155   * recent potentially character reference-starting ampersand.
    156   *
    157   * @param aMsgId the id of the message in the property file
    158   */
    159  void AddErrorToCurrentAmpersand(const char* aMsgId);
    160 
    161  /**
    162   * Adds an error annotation to the node that corresponds to the most
    163   * recent potentially self-closing slash.
    164   *
    165   * @param aMsgId the id of the message in the property file
    166   */
    167  void AddErrorToCurrentSlash(const char* aMsgId);
    168 
    169  /**
    170   * Enqueues a tree op for adding base to the urls with the view-source:
    171   *
    172   * @param aValue the base URL to add
    173   */
    174  void AddBase(nsHtml5String aValue);
    175 
    176  /** Starts the body */
    177  void StartBodyContents();
    178 
    179 private:
    180  /**
    181   * Starts a wrapper around a run of characters.
    182   */
    183  void StartCharacters();
    184 
    185  /**
    186   * Starts a span with no class.
    187   */
    188  void StartSpan();
    189 
    190  /**
    191   * Starts a <span> and sets the class attribute on it.
    192   *
    193   * @param aClass the class to set (MUST be a static string that does not
    194   *        need to be released!)
    195   */
    196  void StartSpan(const char16_t* aClass);
    197 
    198  /**
    199   * End the current <span> or <a> in the highlighter output.
    200   */
    201  void EndSpanOrA();
    202 
    203  /** Ends a wrapper around a run of characters. */
    204  void EndCharactersAndStartMarkupRun();
    205 
    206  /**
    207   * Starts an <a>.
    208   */
    209  void StartA();
    210 
    211  /**
    212   * Flushes characters up to but not including the current one.
    213   */
    214  void FlushChars();
    215 
    216  /**
    217   * Flushes characters up to and including the current one.
    218   */
    219  void FlushCurrent();
    220 
    221  /**
    222   * Finishes highlighting a tag in the input data by closing the open
    223   * <span> and <a> elements in the highlighter output and then starts
    224   * another <span> for potentially highlighting characters potentially
    225   * appearing next.
    226   */
    227  void FinishTag();
    228 
    229  /**
    230   * Adds a class attribute to the current node.
    231   *
    232   * @param aClass the class to set (MUST be a static string that does not
    233   *        need to be released!)
    234   */
    235  void AddClass(const char16_t* aClass);
    236 
    237  /**
    238   * Allocates a handle for an element.
    239   *
    240   * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
    241   * in nsHtml5TreeBuilderHSupplement.h.
    242   *
    243   * @return the handle
    244   */
    245  nsIContent** AllocateContentHandle();
    246 
    247  /**
    248   * Enqueues an element creation tree operation.
    249   *
    250   * @param aName the name of the element
    251   * @param aAttributes the attribute holder (ownership will be taken) or
    252   *        nullptr for no attributes
    253   * @param aIntendedParent the intended parent node for the created element
    254   * @param aCreator the content creator function
    255   * @return the handle for the element that will be created
    256   */
    257  nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
    258                             nsIContent** aIntendedParent,
    259                             mozilla::dom::HTMLContentCreatorFunction aCreator);
    260 
    261  /**
    262   * Gets the handle for the current node. May be called only after the
    263   * root element has been set.
    264   *
    265   * @return the handle for the current node
    266   */
    267  nsIContent** CurrentNode();
    268 
    269  /**
    270   * Create an element and push it (its handle) on the stack.
    271   *
    272   * @param aName the name of the element
    273   * @param aAttributes the attribute holder (ownership will be taken) or
    274   *        nullptr for no attributes
    275   * @param aCreator the content creator function
    276   */
    277  void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
    278            mozilla::dom::HTMLContentCreatorFunction aCreator);
    279 
    280  /** Pushes a <span id="line<lineno>"> */
    281  void PushCurrentLineContainer();
    282 
    283  /**
    284   * Pops all inlines from the stack, pushes a pre, and pushes all inlines back
    285   * with the same attributes.
    286   */
    287  void NewLine();
    288 
    289  /**
    290   * Pops the current node off the stack.
    291   */
    292  void Pop();
    293 
    294  /**
    295   * Appends text content to the current node.
    296   *
    297   * @param aBuffer the buffer to copy from
    298   * @param aStart the index of the first code unit to copy
    299   * @param aLength the number of code units to copy
    300   */
    301  void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
    302                        int32_t aLength);
    303 
    304  /**
    305   * Enqueues a tree op for adding an href attribute with the view-source:
    306   * URL scheme to the current node.
    307   *
    308   * @param aValue the (potentially relative) URL to link to
    309   */
    310  void AddViewSourceHref(nsHtml5String aValue);
    311 
    312  /**
    313   * The state we are transitioning away from.
    314   */
    315  int32_t mState;
    316 
    317  /**
    318   * The index of the first UTF-16 code unit in mBuffer that hasn't been
    319   * flushed yet.
    320   */
    321  int32_t mCStart;
    322 
    323  /**
    324   * The position of the code unit in mBuffer that caused the current
    325   * transition.
    326   */
    327  int32_t mPos;
    328 
    329  /**
    330   * The current line number.
    331   */
    332  int32_t mLineNumber;
    333 
    334  /**
    335   * The number of inline elements open inside the <pre> excluding the
    336   * span potentially wrapping a run of characters.
    337   */
    338  int32_t mInlinesOpen;
    339 
    340  /**
    341   * Whether there's a span wrapping a run of characters (excluding CDATA
    342   * section) open.
    343   */
    344  bool mInCharacters;
    345 
    346  /**
    347   * The current buffer being tokenized.
    348   */
    349  nsHtml5UTF16Buffer* mBuffer;
    350 
    351  /**
    352   * The outgoing tree op queue.
    353   */
    354  nsTArray<nsHtml5TreeOperation> mOpQueue;
    355 
    356  /**
    357   * The tree op stage for the tree op executor or a speculation when looking
    358   * for meta charset.
    359   *
    360   * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
    361   * object, because this object is owned by the nsHtml5Tokenizer instance that
    362   * is owned by the nsHtml5StreamParser, which keeps the executor alive via
    363   * nsHtml5Streamparser::mExecutorFlusher.
    364   */
    365  nsAHtml5TreeOpSink* mOpSink;
    366 
    367  /**
    368   * The most recently opened markup declaration/tag or run of characters.
    369   */
    370  nsIContent** mCurrentRun;
    371 
    372  /**
    373   * The most recent ampersand in a place where character references were
    374   * allowed.
    375   */
    376  nsIContent** mAmpersand;
    377 
    378  /**
    379   * The most recent slash that might become a self-closing slash.
    380   */
    381  nsIContent** mSlash;
    382 
    383  /**
    384   * Memory for element handles.
    385   */
    386  mozilla::UniquePtr<nsIContent*[]> mHandles;
    387 
    388  /**
    389   * Number of handles used in mHandles
    390   */
    391  int32_t mHandlesUsed;
    392 
    393  /**
    394   * A holder for old contents of mHandles
    395   */
    396  nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;
    397 
    398  /**
    399   * The element stack.
    400   */
    401  nsTArray<nsIContent**> mStack;
    402 
    403  /**
    404   * Whether base is already visited once.
    405   */
    406  bool mSeenBase;
    407 };
    408 
    409 #endif  // nsHtml5Highlighter_h