tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lexer.js (5775B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 "use strict";
      6 
      7 const EEOFCHARACTERS_NONE = 0x0000;
      8 
      9 // to handle \<EOF> inside strings
     10 const EEOFCHARACTERS_DROPBACKSLASH = 0x0001;
     11 
     12 // to handle \<EOF> outside strings
     13 const EEOFCHARACTERS_REPLACEMENTCHAR = 0x0002;
     14 
     15 // to close comments
     16 const EEOFCHARACTERS_ASTERISK = 0x0004;
     17 const EEOFCHARACTERS_SLASH = 0x0008;
     18 
     19 // to close double-quoted strings
     20 const EEOFCHARACTERS_DOUBLEQUOTE = 0x0010;
     21 
     22 // to close single-quoted strings
     23 const EEOFCHARACTERS_SINGLEQUOTE = 0x0020;
     24 
     25 // to close URLs
     26 const EEOFCHARACTERS_CLOSEPAREN = 0x0040;
     27 
     28 // Bridge the char/string divide.
     29 const APOSTROPHE = "'".charCodeAt(0);
     30 const ASTERISK = "*".charCodeAt(0);
     31 const QUOTATION_MARK = '"'.charCodeAt(0);
     32 const RIGHT_PARENTHESIS = ")".charCodeAt(0);
     33 const SOLIDUS = "/".charCodeAt(0);
     34 
     35 const UCS2_REPLACEMENT_CHAR = 0xfffd;
     36 
     37 const kImpliedEOFCharacters = [
     38  UCS2_REPLACEMENT_CHAR,
     39  ASTERISK,
     40  SOLIDUS,
     41  QUOTATION_MARK,
     42  APOSTROPHE,
     43  RIGHT_PARENTHESIS,
     44  0,
     45 ];
     46 
     47 /**
     48 * Wrapper around InspectorCSSParser.
     49 * Once/if https://github.com/servo/rust-cssparser/pull/374 lands, we can remove this class.
     50 */
     51 class InspectorCSSParserWrapper {
     52  #offset = 0;
     53  #trackEOFChars;
     54  #eofCharacters = EEOFCHARACTERS_NONE;
     55 
     56  /**
     57   *
     58   * @param {string} input: The CSS text to lex
     59   * @param {object} options
     60   * @param {boolean} options.trackEOFChars: Set to true if performEOFFixup will be called.
     61   */
     62  constructor(input, options = {}) {
     63    this.parser = new InspectorCSSParser(input);
     64    this.#trackEOFChars = options.trackEOFChars;
     65  }
     66 
     67  get lineNumber() {
     68    return this.parser.lineNumber;
     69  }
     70 
     71  get columnNumber() {
     72    return this.parser.columnNumber;
     73  }
     74 
     75  nextToken() {
     76    const token = this.parser.nextToken();
     77    if (!token) {
     78      return token;
     79    }
     80 
     81    if (this.#trackEOFChars) {
     82      const { tokenType, text } = token;
     83      const lastChar = text[text.length - 1];
     84      if (tokenType === "Comment" && lastChar !== `/`) {
     85        if (lastChar === `*`) {
     86          this.#eofCharacters = EEOFCHARACTERS_SLASH;
     87        } else {
     88          this.#eofCharacters = EEOFCHARACTERS_ASTERISK | EEOFCHARACTERS_SLASH;
     89        }
     90      } else if (tokenType === "QuotedString" || tokenType === "BadString") {
     91        if (lastChar === "\\") {
     92          this.#eofCharacters =
     93            this.#eofCharacters | EEOFCHARACTERS_DROPBACKSLASH;
     94        }
     95        if (text[0] !== lastChar || text.length === 1) {
     96          this.#eofCharacters =
     97            this.#eofCharacters |
     98            (text[0] === `"`
     99              ? EEOFCHARACTERS_DOUBLEQUOTE
    100              : EEOFCHARACTERS_SINGLEQUOTE);
    101        }
    102      } else {
    103        if (lastChar === "\\") {
    104          this.#eofCharacters = EEOFCHARACTERS_REPLACEMENTCHAR;
    105        }
    106 
    107        // For some reason, we only automatically close `url`, other functions
    108        // will have their opening parenthesis escaped.
    109        if (
    110          (tokenType === "Function" && token.value === "url") ||
    111          tokenType === "BadUrl" ||
    112          (tokenType === "UnquotedUrl" && lastChar !== ")")
    113        ) {
    114          this.#eofCharacters = this.#eofCharacters | EEOFCHARACTERS_CLOSEPAREN;
    115        }
    116 
    117        if (tokenType === "CloseParenthesis") {
    118          this.#eofCharacters =
    119            this.#eofCharacters & ~EEOFCHARACTERS_CLOSEPAREN;
    120        }
    121      }
    122    }
    123 
    124    // At the moment, InspectorCSSParser doesn't expose offsets, so we need to compute
    125    // them manually here.
    126    // We can do that because we are retrieving every token in the input string, and so the
    127    // end offset of the last token is the start offset of the new token.
    128    token.startOffset = this.#offset;
    129    this.#offset += token.text.length;
    130    token.endOffset = this.#offset;
    131    return token;
    132  }
    133 
    134  /**
    135   * When EOF is reached, the last token might be unterminated in some
    136   * ways.  This method takes an input string and appends the needed
    137   * terminators.  In particular:
    138   *
    139   * 1. If EOF occurs mid-string, this will append the correct quote.
    140   * 2. If EOF occurs in a url token, this will append the close paren.
    141   * 3. If EOF occurs in a comment this will append the comment closer.
    142   *
    143   * A trailing backslash might also have been present in the input
    144   * string.  This is handled in different ways, depending on the
    145   * context and arguments.
    146   *
    147   * The existing backslash at the end of inputString is preserved, and a new backslash
    148   * is appended.
    149   * That is, the input |\| is transformed to |\\|, and the
    150   * input |'\| is transformed to |'\\'|.
    151   *
    152   * @param inputString the input string
    153   * @return the input string with the termination characters appended
    154   */
    155  performEOFFixup(inputString) {
    156    let result = inputString;
    157 
    158    let eofChars = this.#eofCharacters;
    159    if (
    160      (eofChars &
    161        (EEOFCHARACTERS_DROPBACKSLASH | EEOFCHARACTERS_REPLACEMENTCHAR)) !=
    162      0
    163    ) {
    164      eofChars &= ~(
    165        EEOFCHARACTERS_DROPBACKSLASH | EEOFCHARACTERS_REPLACEMENTCHAR
    166      );
    167      result += "\\";
    168    }
    169 
    170    if (
    171      (eofChars & EEOFCHARACTERS_DROPBACKSLASH) != 0 &&
    172      !!result.length &&
    173      result.endsWith("\\")
    174    ) {
    175      result = result.slice(0, -1);
    176    }
    177 
    178    // First, ignore EEOFCHARACTERS_DROPBACKSLASH.
    179    let c = eofChars >> 1;
    180 
    181    // All of the remaining EOFCharacters bits represent appended characters,
    182    // and the bits are in the order that they need appending.
    183    for (const p of kImpliedEOFCharacters) {
    184      if (c & 1) {
    185        result += String.fromCharCode(p);
    186      }
    187      c >>= 1;
    188    }
    189 
    190    return result;
    191  }
    192 }
    193 
    194 exports.InspectorCSSParserWrapper = InspectorCSSParserWrapper;