tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

analyze-input-string.js (13443B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 "use strict";
      6 
      7 const STATE_NORMAL = Symbol("STATE_NORMAL");
      8 const STATE_QUOTE = Symbol("STATE_QUOTE");
      9 const STATE_DQUOTE = Symbol("STATE_DQUOTE");
     10 const STATE_TEMPLATE_LITERAL = Symbol("STATE_TEMPLATE_LITERAL");
     11 const STATE_ESCAPE_QUOTE = Symbol("STATE_ESCAPE_QUOTE");
     12 const STATE_ESCAPE_DQUOTE = Symbol("STATE_ESCAPE_DQUOTE");
     13 const STATE_ESCAPE_TEMPLATE_LITERAL = Symbol("STATE_ESCAPE_TEMPLATE_LITERAL");
     14 const STATE_SLASH = Symbol("STATE_SLASH");
     15 const STATE_INLINE_COMMENT = Symbol("STATE_INLINE_COMMENT");
     16 const STATE_MULTILINE_COMMENT = Symbol("STATE_MULTILINE_COMMENT");
     17 const STATE_MULTILINE_COMMENT_CLOSE = Symbol("STATE_MULTILINE_COMMENT_CLOSE");
     18 const STATE_QUESTION_MARK = Symbol("STATE_QUESTION_MARK");
     19 
     20 const OPEN_BODY = "{[(".split("");
     21 const CLOSE_BODY = "}])".split("");
     22 const OPEN_CLOSE_BODY = {
     23  "{": "}",
     24  "[": "]",
     25  "(": ")",
     26 };
     27 
     28 const NO_AUTOCOMPLETE_PREFIXES = [
     29  "var",
     30  "const",
     31  "let",
     32  "function",
     33  "class",
     34  "using",
     35 ];
     36 const OPERATOR_CHARS_SET = new Set(";,:=<>+-*%|&^~!".split(""));
     37 
     38 /**
     39 * Analyses a given string to find the last statement that is interesting for
     40 * later completion.
     41 *
     42 * @param   string str
     43 *          A string to analyse.
     44 *
     45 * @returns object
     46 *          If there was an error in the string detected, then a object like
     47 *
     48 *            { err: "ErrorMesssage" }
     49 *
     50 *          is returned, otherwise a object like
     51 *
     52 *            {
     53 *              state: STATE_NORMAL|STATE_QUOTE|STATE_DQUOTE,
     54 *              lastStatement: the last statement in the string,
     55 *              isElementAccess: boolean that indicates if the lastStatement has an open
     56 *                               element access (e.g. `x["match`).
     57 *              isPropertyAccess: boolean indicating if we are accessing property
     58 *                                (e.g `true` in `var a = {b: 1};a.b`)
     59 *              matchProp: The part of the expression that should match the properties
     60 *                         on the mainExpression (e.g. `que` when expression is `document.body.que`)
     61 *              mainExpression: The part of the expression before any property access,
     62 *                              (e.g. `a.b` if expression is `a.b.`)
     63 *              expressionBeforePropertyAccess: The part of the expression before property access
     64 *                                              (e.g `var a = {b: 1};a` if expression is `var a = {b: 1};a.b`)
     65 *            }
     66 */
     67 // eslint-disable-next-line complexity
     68 exports.analyzeInputString = function (str, timeout = 2500) {
     69  // work variables.
     70  const bodyStack = [];
     71  let state = STATE_NORMAL;
     72  let previousNonWhitespaceChar;
     73  let lastStatement = "";
     74  let currentIndex = -1;
     75  let dotIndex;
     76  let pendingWhitespaceChars = "";
     77  const startingTime = Date.now();
     78 
     79  // Use a string iterator in order to handle character with a length >= 2 (e.g. 😎).
     80  for (const c of str) {
     81    // We are possibly dealing with a very large string that would take a long time to
     82    // analyze (and freeze the process). If the function has been running for more than
     83    // a given time, we stop the analysis (this isn't too bad because the only
     84    // consequence is that we won't provide autocompletion items).
     85    if (Date.now() - startingTime > timeout) {
     86      return {
     87        err: "timeout",
     88      };
     89    }
     90 
     91    currentIndex += c.length;
     92    let resetLastStatement = false;
     93    const isWhitespaceChar = c.trim() === "";
     94    switch (state) {
     95      case STATE_SLASH:
     96        if (c == "/") {
     97          state = STATE_INLINE_COMMENT;
     98          break;
     99        } else if (c == "*") {
    100          state = STATE_MULTILINE_COMMENT;
    101          break;
    102        } else {
    103          lastStatement = "";
    104          state = STATE_NORMAL;
    105        }
    106      // fall through
    107 
    108      // Normal JS state.
    109      // eslint-disable-next-line no-fallthrough
    110      case STATE_NORMAL:
    111        if (lastStatement.endsWith("?.") && /\d/.test(c)) {
    112          // If the current char is a number, the engine will consider we're not
    113          // performing an optional chaining, but a ternary (e.g. x ?.4 : 2).
    114          lastStatement = "";
    115        }
    116 
    117        // Storing the index of dot of the input string
    118        if (c === ".") {
    119          dotIndex = currentIndex;
    120        }
    121 
    122        // If the last characters were spaces, and the current one is not.
    123        if (pendingWhitespaceChars && !isWhitespaceChar) {
    124          // If we have a legitimate property/element access, or potential optional
    125          // chaining call, we append the spaces.
    126          if (c === "[" || c === "." || c === "?") {
    127            lastStatement = lastStatement + pendingWhitespaceChars;
    128          } else {
    129            // if not, we can be sure the statement was over, and we can start a new one.
    130            lastStatement = "";
    131          }
    132          pendingWhitespaceChars = "";
    133        }
    134 
    135        if (c == '"') {
    136          state = STATE_DQUOTE;
    137        } else if (c == "'") {
    138          state = STATE_QUOTE;
    139        } else if (c == "`") {
    140          state = STATE_TEMPLATE_LITERAL;
    141        } else if (c == "/") {
    142          state = STATE_SLASH;
    143        } else if (c == "?") {
    144          state = STATE_QUESTION_MARK;
    145        } else if (OPERATOR_CHARS_SET.has(c)) {
    146          // If the character is an operator, we can update the current statement.
    147          resetLastStatement = true;
    148        } else if (isWhitespaceChar) {
    149          // If the previous char isn't a dot or opening bracket, and the current computed
    150          // statement is not a variable/function/class declaration, we track the number
    151          // of consecutive spaces, so we can re-use them at some point (or drop them).
    152          if (
    153            previousNonWhitespaceChar !== "." &&
    154            previousNonWhitespaceChar !== "[" &&
    155            !NO_AUTOCOMPLETE_PREFIXES.includes(lastStatement)
    156          ) {
    157            pendingWhitespaceChars += c;
    158            continue;
    159          }
    160        } else if (OPEN_BODY.includes(c)) {
    161          // When opening a bracket or a parens, we store the current statement, in order
    162          // to be able to retrieve it later.
    163          bodyStack.push({
    164            token: c,
    165            lastStatement,
    166            index: currentIndex,
    167          });
    168          // And we compute a new statement.
    169          resetLastStatement = true;
    170        } else if (CLOSE_BODY.includes(c)) {
    171          const last = bodyStack.pop();
    172          if (!last || OPEN_CLOSE_BODY[last.token] != c) {
    173            return {
    174              err: "syntax error",
    175            };
    176          }
    177          if (c == "}") {
    178            resetLastStatement = true;
    179          } else {
    180            lastStatement = last.lastStatement;
    181          }
    182        }
    183        break;
    184 
    185      // Escaped quote
    186      case STATE_ESCAPE_QUOTE:
    187        state = STATE_QUOTE;
    188        break;
    189      case STATE_ESCAPE_DQUOTE:
    190        state = STATE_DQUOTE;
    191        break;
    192      case STATE_ESCAPE_TEMPLATE_LITERAL:
    193        state = STATE_TEMPLATE_LITERAL;
    194        break;
    195 
    196      // Double quote state > " <
    197      case STATE_DQUOTE:
    198        if (c == "\\") {
    199          state = STATE_ESCAPE_DQUOTE;
    200        } else if (c == "\n") {
    201          return {
    202            err: "unterminated string literal",
    203          };
    204        } else if (c == '"') {
    205          state = STATE_NORMAL;
    206        }
    207        break;
    208 
    209      // Template literal state > ` <
    210      case STATE_TEMPLATE_LITERAL:
    211        if (c == "\\") {
    212          state = STATE_ESCAPE_TEMPLATE_LITERAL;
    213        } else if (c == "`") {
    214          state = STATE_NORMAL;
    215        }
    216        break;
    217 
    218      // Single quote state > ' <
    219      case STATE_QUOTE:
    220        if (c == "\\") {
    221          state = STATE_ESCAPE_QUOTE;
    222        } else if (c == "\n") {
    223          return {
    224            err: "unterminated string literal",
    225          };
    226        } else if (c == "'") {
    227          state = STATE_NORMAL;
    228        }
    229        break;
    230 
    231      case STATE_INLINE_COMMENT:
    232        if (c === "\n") {
    233          state = STATE_NORMAL;
    234          resetLastStatement = true;
    235        }
    236        break;
    237 
    238      case STATE_MULTILINE_COMMENT:
    239        if (c === "*") {
    240          state = STATE_MULTILINE_COMMENT_CLOSE;
    241        }
    242        break;
    243 
    244      case STATE_MULTILINE_COMMENT_CLOSE:
    245        if (c === "/") {
    246          state = STATE_NORMAL;
    247          resetLastStatement = true;
    248        } else {
    249          state = STATE_MULTILINE_COMMENT;
    250        }
    251        break;
    252 
    253      case STATE_QUESTION_MARK:
    254        state = STATE_NORMAL;
    255        if (c === "?") {
    256          // If we have a nullish coalescing operator, we start a new statement
    257          resetLastStatement = true;
    258        } else if (c !== ".") {
    259          // If we're not dealing with optional chaining (?.), it means we have a ternary,
    260          // so we are starting a new statement that includes the current character.
    261          lastStatement = "";
    262        } else {
    263          dotIndex = currentIndex;
    264        }
    265        break;
    266    }
    267 
    268    if (!isWhitespaceChar) {
    269      previousNonWhitespaceChar = c;
    270    }
    271    if (resetLastStatement) {
    272      lastStatement = "";
    273    } else {
    274      lastStatement = lastStatement + c;
    275    }
    276 
    277    // We update all the open stacks lastStatement so they are up-to-date.
    278    bodyStack.forEach(stack => {
    279      if (stack.token !== "}") {
    280        stack.lastStatement = stack.lastStatement + c;
    281      }
    282    });
    283  }
    284 
    285  let isElementAccess = false;
    286  let lastOpeningBracketIndex = -1;
    287  if (bodyStack.length === 1 && bodyStack[0].token === "[") {
    288    lastStatement = bodyStack[0].lastStatement;
    289    lastOpeningBracketIndex = bodyStack[0].index;
    290    isElementAccess = true;
    291 
    292    if (
    293      state === STATE_DQUOTE ||
    294      state === STATE_QUOTE ||
    295      state === STATE_TEMPLATE_LITERAL ||
    296      state === STATE_ESCAPE_QUOTE ||
    297      state === STATE_ESCAPE_DQUOTE ||
    298      state === STATE_ESCAPE_TEMPLATE_LITERAL
    299    ) {
    300      state = STATE_NORMAL;
    301    }
    302  } else if (pendingWhitespaceChars) {
    303    lastStatement = "";
    304  }
    305 
    306  const lastCompletionCharIndex = isElementAccess
    307    ? lastOpeningBracketIndex
    308    : dotIndex;
    309 
    310  const stringBeforeLastCompletionChar = str.slice(0, lastCompletionCharIndex);
    311 
    312  const isPropertyAccess =
    313    lastCompletionCharIndex && lastCompletionCharIndex > 0;
    314 
    315  // Compute `isOptionalAccess`, so that we can use it
    316  // later for computing `expressionBeforePropertyAccess`.
    317  //Check `?.` before `[` for element access ( e.g `a?.["b` or `a  ?. ["b` )
    318  // and `?` before `.` for regular property access ( e.g `a?.b` or `a ?. b` )
    319  const optionalElementAccessRegex = /\?\.\s*$/;
    320  const isOptionalAccess = isElementAccess
    321    ? optionalElementAccessRegex.test(stringBeforeLastCompletionChar)
    322    : isPropertyAccess &&
    323      str.slice(lastCompletionCharIndex - 1, lastCompletionCharIndex + 1) ===
    324        "?.";
    325 
    326  // Get the filtered string for the properties (e.g if `document.qu` then `qu`)
    327  const matchProp = isPropertyAccess
    328    ? str.slice(lastCompletionCharIndex + 1).trimLeft()
    329    : null;
    330 
    331  const expressionBeforePropertyAccess = isPropertyAccess
    332    ? str.slice(
    333        0,
    334        // For optional access, we can take all the chars before the last "?" char.
    335        isOptionalAccess
    336          ? stringBeforeLastCompletionChar.lastIndexOf("?")
    337          : lastCompletionCharIndex
    338      )
    339    : str;
    340 
    341  let mainExpression = lastStatement;
    342  if (isPropertyAccess) {
    343    if (isOptionalAccess) {
    344      // Strip anything before the last `?`.
    345      mainExpression = mainExpression.slice(0, mainExpression.lastIndexOf("?"));
    346    } else {
    347      mainExpression = mainExpression.slice(
    348        0,
    349        -1 * (str.length - lastCompletionCharIndex)
    350      );
    351    }
    352  }
    353 
    354  mainExpression = mainExpression.trim();
    355 
    356  return {
    357    state,
    358    isElementAccess,
    359    isPropertyAccess,
    360    expressionBeforePropertyAccess,
    361    lastStatement,
    362    mainExpression,
    363    matchProp,
    364  };
    365 };
    366 
    367 /**
    368 * Checks whether the analyzed input string is in an appropriate state to autocomplete, e.g. not
    369 * inside a string, or declaring a variable.
    370 *
    371 * @param {object} inputAnalysisState The analyzed string to check
    372 * @returns {boolean} Whether the input should be autocompleted
    373 */
    374 exports.shouldInputBeAutocompleted = function (inputAnalysisState) {
    375  const { err, state, lastStatement } = inputAnalysisState;
    376 
    377  // There was an error analysing the string.
    378  if (err) {
    379    return false;
    380  }
    381 
    382  // If the current state is not STATE_NORMAL, then we are inside string,
    383  // which means that no completion is possible.
    384  if (state != STATE_NORMAL) {
    385    return false;
    386  }
    387 
    388  // Don't complete on just an empty string.
    389  if (lastStatement.trim() == "") {
    390    return false;
    391  }
    392 
    393  if (
    394    NO_AUTOCOMPLETE_PREFIXES.some(prefix =>
    395      lastStatement.startsWith(prefix + " ")
    396    )
    397  ) {
    398    return false;
    399  }
    400 
    401  return true;
    402 };
    403 
    404 /**
    405 * Checks whether the analyzed input string is in an appropriate state to be eagerly evaluated.
    406 *
    407 * @param {object} inputAnalysisState
    408 * @returns {boolean} Whether the input should be eagerly evaluated
    409 */
    410 exports.shouldInputBeEagerlyEvaluated = function ({ lastStatement }) {
    411  const inComputedProperty =
    412    lastStatement.lastIndexOf("[") !== -1 &&
    413    lastStatement.lastIndexOf("[") > lastStatement.lastIndexOf("]");
    414 
    415  const hasPropertyAccess =
    416    lastStatement.includes(".") || lastStatement.includes("[");
    417 
    418  return hasPropertyAccess && !inComputedProperty;
    419 };