lexer.js (5775B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 "use strict"; 6 7 const EEOFCHARACTERS_NONE = 0x0000; 8 9 // to handle \<EOF> inside strings 10 const EEOFCHARACTERS_DROPBACKSLASH = 0x0001; 11 12 // to handle \<EOF> outside strings 13 const EEOFCHARACTERS_REPLACEMENTCHAR = 0x0002; 14 15 // to close comments 16 const EEOFCHARACTERS_ASTERISK = 0x0004; 17 const EEOFCHARACTERS_SLASH = 0x0008; 18 19 // to close double-quoted strings 20 const EEOFCHARACTERS_DOUBLEQUOTE = 0x0010; 21 22 // to close single-quoted strings 23 const EEOFCHARACTERS_SINGLEQUOTE = 0x0020; 24 25 // to close URLs 26 const EEOFCHARACTERS_CLOSEPAREN = 0x0040; 27 28 // Bridge the char/string divide. 29 const APOSTROPHE = "'".charCodeAt(0); 30 const ASTERISK = "*".charCodeAt(0); 31 const QUOTATION_MARK = '"'.charCodeAt(0); 32 const RIGHT_PARENTHESIS = ")".charCodeAt(0); 33 const SOLIDUS = "/".charCodeAt(0); 34 35 const UCS2_REPLACEMENT_CHAR = 0xfffd; 36 37 const kImpliedEOFCharacters = [ 38 UCS2_REPLACEMENT_CHAR, 39 ASTERISK, 40 SOLIDUS, 41 QUOTATION_MARK, 42 APOSTROPHE, 43 RIGHT_PARENTHESIS, 44 0, 45 ]; 46 47 /** 48 * Wrapper around InspectorCSSParser. 49 * Once/if https://github.com/servo/rust-cssparser/pull/374 lands, we can remove this class. 50 */ 51 class InspectorCSSParserWrapper { 52 #offset = 0; 53 #trackEOFChars; 54 #eofCharacters = EEOFCHARACTERS_NONE; 55 56 /** 57 * 58 * @param {string} input: The CSS text to lex 59 * @param {object} options 60 * @param {boolean} options.trackEOFChars: Set to true if performEOFFixup will be called. 61 */ 62 constructor(input, options = {}) { 63 this.parser = new InspectorCSSParser(input); 64 this.#trackEOFChars = options.trackEOFChars; 65 } 66 67 get lineNumber() { 68 return this.parser.lineNumber; 69 } 70 71 get columnNumber() { 72 return this.parser.columnNumber; 73 } 74 75 nextToken() { 76 const token = this.parser.nextToken(); 77 if (!token) { 78 return token; 79 } 80 81 if (this.#trackEOFChars) { 82 const { tokenType, text } = token; 83 const lastChar = text[text.length - 1]; 84 if (tokenType === "Comment" && lastChar !== `/`) { 85 if (lastChar === `*`) { 86 this.#eofCharacters = EEOFCHARACTERS_SLASH; 87 } else { 88 this.#eofCharacters = EEOFCHARACTERS_ASTERISK | EEOFCHARACTERS_SLASH; 89 } 90 } else if (tokenType === "QuotedString" || tokenType === "BadString") { 91 if (lastChar === "\\") { 92 this.#eofCharacters = 93 this.#eofCharacters | EEOFCHARACTERS_DROPBACKSLASH; 94 } 95 if (text[0] !== lastChar || text.length === 1) { 96 this.#eofCharacters = 97 this.#eofCharacters | 98 (text[0] === `"` 99 ? EEOFCHARACTERS_DOUBLEQUOTE 100 : EEOFCHARACTERS_SINGLEQUOTE); 101 } 102 } else { 103 if (lastChar === "\\") { 104 this.#eofCharacters = EEOFCHARACTERS_REPLACEMENTCHAR; 105 } 106 107 // For some reason, we only automatically close `url`, other functions 108 // will have their opening parenthesis escaped. 109 if ( 110 (tokenType === "Function" && token.value === "url") || 111 tokenType === "BadUrl" || 112 (tokenType === "UnquotedUrl" && lastChar !== ")") 113 ) { 114 this.#eofCharacters = this.#eofCharacters | EEOFCHARACTERS_CLOSEPAREN; 115 } 116 117 if (tokenType === "CloseParenthesis") { 118 this.#eofCharacters = 119 this.#eofCharacters & ~EEOFCHARACTERS_CLOSEPAREN; 120 } 121 } 122 } 123 124 // At the moment, InspectorCSSParser doesn't expose offsets, so we need to compute 125 // them manually here. 126 // We can do that because we are retrieving every token in the input string, and so the 127 // end offset of the last token is the start offset of the new token. 128 token.startOffset = this.#offset; 129 this.#offset += token.text.length; 130 token.endOffset = this.#offset; 131 return token; 132 } 133 134 /** 135 * When EOF is reached, the last token might be unterminated in some 136 * ways. This method takes an input string and appends the needed 137 * terminators. In particular: 138 * 139 * 1. If EOF occurs mid-string, this will append the correct quote. 140 * 2. If EOF occurs in a url token, this will append the close paren. 141 * 3. If EOF occurs in a comment this will append the comment closer. 142 * 143 * A trailing backslash might also have been present in the input 144 * string. This is handled in different ways, depending on the 145 * context and arguments. 146 * 147 * The existing backslash at the end of inputString is preserved, and a new backslash 148 * is appended. 149 * That is, the input |\| is transformed to |\\|, and the 150 * input |'\| is transformed to |'\\'|. 151 * 152 * @param inputString the input string 153 * @return the input string with the termination characters appended 154 */ 155 performEOFFixup(inputString) { 156 let result = inputString; 157 158 let eofChars = this.#eofCharacters; 159 if ( 160 (eofChars & 161 (EEOFCHARACTERS_DROPBACKSLASH | EEOFCHARACTERS_REPLACEMENTCHAR)) != 162 0 163 ) { 164 eofChars &= ~( 165 EEOFCHARACTERS_DROPBACKSLASH | EEOFCHARACTERS_REPLACEMENTCHAR 166 ); 167 result += "\\"; 168 } 169 170 if ( 171 (eofChars & EEOFCHARACTERS_DROPBACKSLASH) != 0 && 172 !!result.length && 173 result.endsWith("\\") 174 ) { 175 result = result.slice(0, -1); 176 } 177 178 // First, ignore EEOFCHARACTERS_DROPBACKSLASH. 179 let c = eofChars >> 1; 180 181 // All of the remaining EOFCharacters bits represent appended characters, 182 // and the bits are in the order that they need appending. 183 for (const p of kImpliedEOFCharacters) { 184 if (c & 1) { 185 result += String.fromCharCode(p); 186 } 187 c >>= 1; 188 } 189 190 return result; 191 } 192 } 193 194 exports.InspectorCSSParserWrapper = InspectorCSSParserWrapper;