tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ImportScanner.h (3281B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=2 sw=2 et tw=78: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef mozilla_ImportScanner_h
      8 #define mozilla_ImportScanner_h
      9 
     10 /* A simple best-effort scanner for @import rules for the HTML parser */
     11 
     12 #include "nsString.h"
     13 #include "nsTArray.h"
     14 
     15 namespace mozilla {
     16 
     17 struct ImportScanner final {
     18  ImportScanner() = default;
     19 
     20  // Called when a <style> element starts.
     21  //
     22  // Note that this function cannot make assumptions about the internal state,
     23  // as you can nest <svg:style> elements.
     24  void Start();
     25 
     26  // Called when a <style> element ends. Returns the list of URLs scanned.
     27  nsTArray<nsString> Stop();
     28 
     29  // Whether Scan() should be called.
     30  bool ShouldScan() const {
     31    return mState != State::OutsideOfStyleElement && mState != State::Done;
     32  }
     33 
     34  // Scan() should be called when text content is parsed, and returns an array
     35  // of found URLs, if any.
     36  //
     37  // Asserts ShouldScan() returns true.
     38  nsTArray<nsString> Scan(Span<const char16_t> aFragment);
     39 
     40 private:
     41  enum class State {
     42    // Initial state, doesn't scan anything until Start() is called.
     43    OutsideOfStyleElement,
     44    // In an idle state during the stylesheet scanning, either at the
     45    // beginning or after parsing a rule.
     46    Idle,
     47    // We've seen a '/' character, but not the '*' yet, so we don't know if
     48    // it's a comment.
     49    MaybeAtCommentStart,
     50    // We're inside a comment.
     51    AtComment,
     52    // We've seen a '*' while we're in a comment, but we don't now yet whether
     53    // '/' comes afterwards (thus ending the comment).
     54    MaybeAtCommentEnd,
     55    // We're parsing the '@' rule name.
     56    AtRuleName,
     57    // We're parsing the '@' rule value.
     58    AtRuleValue,
     59    // We're parsing the '@' rule value and we've seen the delimiter (quote or
     60    // url() function) that encloses the url.
     61    AtRuleValueDelimited,
     62    // We've seen the url, but haven't seen the ';' finishing the rule yet.
     63    AfterRuleValue,
     64    // We've seen anything that is not an @import or a @charset rule, and thus
     65    // further @import / @charset should not be parsed.
     66    Done,
     67  };
     68 
     69  void ResetState();
     70  void EmitUrl();
     71  [[nodiscard]] State Scan(char16_t aChar);
     72 
     73  static constexpr const uint32_t kMaxRuleNameLength = 7;  // (charset, import)
     74 
     75  State mState = State::OutsideOfStyleElement;
     76  nsAutoStringN<kMaxRuleNameLength> mRuleName;
     77  nsAutoStringN<128> mRuleValue;
     78  nsAutoStringN<128> mAfterRuleValue;
     79  nsTArray<nsString> mUrlsFound;
     80 
     81  // This is conceptually part of the AtRuleValue* / AfterRuleValue states,
     82  // and serves to differentiate between @import (where we actually care about
     83  // the value) and @charset (where we don't). It's just more convenient this
     84  // way than having separate states for them.
     85  bool mInImportRule = false;
     86  // If we're in the AtRuleValueDelimited state, what is the closing character
     87  // that will end the value. This is either a parenthesis (for unquoted
     88  // urls), or a quote, either single or double.
     89  char16_t mUrlValueDelimiterClosingChar = 0;
     90 };
     91 
     92 }  // namespace mozilla
     93 
     94 #endif