tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

txExprLexer.h (4583B)


      1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef MITREXSL_EXPRLEXER_H
      7 #define MITREXSL_EXPRLEXER_H
      8 
      9 #include "nsString.h"
     10 #include "txCore.h"
     11 
     12 /**
     13 * A Token class for the ExprLexer.
     14 *
     15 * This class was ported from XSL:P, an open source Java based
     16 * XSLT processor, written by yours truly.
     17 */
     18 class Token {
     19 public:
     20  /**
     21   * Token types
     22   */
     23  enum Type {
     24    //-- Trivial Tokens
     25    NULL_TOKEN = 1,
     26    LITERAL,
     27    NUMBER,
     28    CNAME,
     29    VAR_REFERENCE,
     30    PARENT_NODE,
     31    SELF_NODE,
     32    R_PAREN,
     33    R_BRACKET,  // 9
     34    /**
     35     * start of tokens for 3.7, bullet 1
     36     * ExprLexer::nextIsOperatorToken bails if the tokens aren't
     37     * consecutive.
     38     */
     39    COMMA,
     40    AT_SIGN,
     41    L_PAREN,
     42    L_BRACKET,
     43    AXIS_IDENTIFIER,
     44 
     45    // These tokens include their following left parenthesis
     46    FUNCTION_NAME_AND_PAREN,  // 15
     47    COMMENT_AND_PAREN,
     48    NODE_AND_PAREN,
     49    PROC_INST_AND_PAREN,
     50    TEXT_AND_PAREN,
     51 
     52    /**
     53     * operators
     54     */
     55    //-- boolean ops
     56    AND_OP,  // 20
     57    OR_OP,
     58 
     59    //-- relational
     60    EQUAL_OP,  // 22
     61    NOT_EQUAL_OP,
     62    LESS_THAN_OP,
     63    GREATER_THAN_OP,
     64    LESS_OR_EQUAL_OP,
     65    GREATER_OR_EQUAL_OP,
     66    //-- additive operators
     67    ADDITION_OP,  // 28
     68    SUBTRACTION_OP,
     69    //-- multiplicative
     70    DIVIDE_OP,  // 30
     71    MULTIPLY_OP,
     72    MODULUS_OP,
     73    //-- path operators
     74    PARENT_OP,  // 33
     75    ANCESTOR_OP,
     76    UNION_OP,
     77    /**
     78     * end of tokens for 3.7, bullet 1 -/
     79     */
     80    //-- Special endtoken
     81    END  // 36
     82  };
     83 
     84  /**
     85   * Constructors
     86   */
     87  using iterator = nsAString::const_char_iterator;
     88 
     89  Token(iterator aStart, iterator aEnd, Type aType)
     90      : mStart(aStart), mEnd(aEnd), mType(aType), mNext(nullptr) {}
     91  Token(iterator aChar, Type aType)
     92      : mStart(aChar), mEnd(aChar + 1), mType(aType), mNext(nullptr) {}
     93 
     94  const nsDependentSubstring Value() { return Substring(mStart, mEnd); }
     95 
     96  iterator mStart, mEnd;
     97  Type mType;
     98  Token* mNext;
     99 };
    100 
    101 /**
    102 * A class for splitting an "Expr" String into tokens and
    103 * performing  basic Lexical Analysis.
    104 *
    105 * This class was ported from XSL:P, an open source Java based XSL processor
    106 */
    107 
    108 class txExprLexer {
    109 public:
    110  txExprLexer();
    111  ~txExprLexer();
    112 
    113  /**
    114   * Parse the given string.
    115   * returns an error result if lexing failed.
    116   * The given string must outlive the use of the lexer, as the
    117   * generated Tokens point to Substrings of it.
    118   * mPosition points to the offending location in case of an error.
    119   */
    120  nsresult parse(const nsAString& aPattern);
    121 
    122  using iterator = nsAString::const_char_iterator;
    123  iterator mPosition;
    124 
    125  /**
    126   * Functions for iterating over the TokenList
    127   */
    128 
    129  Token* nextToken();
    130  Token* peek() {
    131    NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer");
    132    return mCurrentItem;
    133  }
    134  Token* peekAhead() {
    135    NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer");
    136    // Don't peek past the end node
    137    return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext
    138                                                 : mCurrentItem;
    139  }
    140  bool hasMoreTokens() {
    141    NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer");
    142    return (mCurrentItem && mCurrentItem->mType != Token::END);
    143  }
    144 
    145  /**
    146   * Trivial Tokens
    147   */
    148  //-- LF, changed to enum
    149  enum _TrivialTokens {
    150    D_QUOTE = '\"',
    151    S_QUOTE = '\'',
    152    L_PAREN = '(',
    153    R_PAREN = ')',
    154    L_BRACKET = '[',
    155    R_BRACKET = ']',
    156    L_ANGLE = '<',
    157    R_ANGLE = '>',
    158    COMMA = ',',
    159    PERIOD = '.',
    160    ASTERISK = '*',
    161    FORWARD_SLASH = '/',
    162    EQUAL = '=',
    163    BANG = '!',
    164    VERT_BAR = '|',
    165    AT_SIGN = '@',
    166    DOLLAR_SIGN = '$',
    167    PLUS = '+',
    168    HYPHEN = '-',
    169    COLON = ':',
    170    //-- whitespace tokens
    171    SPACE = ' ',
    172    TX_TAB = '\t',
    173    TX_CR = '\n',
    174    TX_LF = '\r'
    175  };
    176 
    177 private:
    178  Token* mCurrentItem;
    179  Token* mFirstItem;
    180  Token* mLastItem;
    181 
    182  int mTokenCount;
    183 
    184  void addToken(Token* aToken);
    185 
    186  /**
    187   * Returns true if the following Token should be an operator.
    188   * This is a helper for the first bullet of [XPath 3.7]
    189   *  Lexical Structure
    190   */
    191  bool nextIsOperatorToken(Token* aToken);
    192 
    193  /**
    194   * Returns true if the given character represents a numeric letter (digit)
    195   * Implemented in ExprLexerChars.cpp
    196   */
    197  static bool isXPathDigit(char16_t ch) { return (ch >= '0' && ch <= '9'); }
    198 };
    199 
    200 #endif