txExprLexer.h (4583B)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef MITREXSL_EXPRLEXER_H 7 #define MITREXSL_EXPRLEXER_H 8 9 #include "nsString.h" 10 #include "txCore.h" 11 12 /** 13 * A Token class for the ExprLexer. 14 * 15 * This class was ported from XSL:P, an open source Java based 16 * XSLT processor, written by yours truly. 17 */ 18 class Token { 19 public: 20 /** 21 * Token types 22 */ 23 enum Type { 24 //-- Trivial Tokens 25 NULL_TOKEN = 1, 26 LITERAL, 27 NUMBER, 28 CNAME, 29 VAR_REFERENCE, 30 PARENT_NODE, 31 SELF_NODE, 32 R_PAREN, 33 R_BRACKET, // 9 34 /** 35 * start of tokens for 3.7, bullet 1 36 * ExprLexer::nextIsOperatorToken bails if the tokens aren't 37 * consecutive. 38 */ 39 COMMA, 40 AT_SIGN, 41 L_PAREN, 42 L_BRACKET, 43 AXIS_IDENTIFIER, 44 45 // These tokens include their following left parenthesis 46 FUNCTION_NAME_AND_PAREN, // 15 47 COMMENT_AND_PAREN, 48 NODE_AND_PAREN, 49 PROC_INST_AND_PAREN, 50 TEXT_AND_PAREN, 51 52 /** 53 * operators 54 */ 55 //-- boolean ops 56 AND_OP, // 20 57 OR_OP, 58 59 //-- relational 60 EQUAL_OP, // 22 61 NOT_EQUAL_OP, 62 LESS_THAN_OP, 63 GREATER_THAN_OP, 64 LESS_OR_EQUAL_OP, 65 GREATER_OR_EQUAL_OP, 66 //-- additive operators 67 ADDITION_OP, // 28 68 SUBTRACTION_OP, 69 //-- multiplicative 70 DIVIDE_OP, // 30 71 MULTIPLY_OP, 72 MODULUS_OP, 73 //-- path operators 74 PARENT_OP, // 33 75 ANCESTOR_OP, 76 UNION_OP, 77 /** 78 * end of tokens for 3.7, bullet 1 -/ 79 */ 80 //-- Special endtoken 81 END // 36 82 }; 83 84 /** 85 * Constructors 86 */ 87 using iterator = nsAString::const_char_iterator; 88 89 Token(iterator aStart, iterator aEnd, Type aType) 90 : mStart(aStart), mEnd(aEnd), mType(aType), mNext(nullptr) {} 91 Token(iterator aChar, Type aType) 92 : mStart(aChar), mEnd(aChar + 1), mType(aType), mNext(nullptr) {} 93 94 const nsDependentSubstring Value() { return Substring(mStart, mEnd); } 95 96 iterator mStart, mEnd; 97 Type mType; 98 Token* mNext; 99 }; 100 101 /** 102 * A class for splitting an "Expr" String into tokens and 103 * performing basic Lexical Analysis. 104 * 105 * This class was ported from XSL:P, an open source Java based XSL processor 106 */ 107 108 class txExprLexer { 109 public: 110 txExprLexer(); 111 ~txExprLexer(); 112 113 /** 114 * Parse the given string. 115 * returns an error result if lexing failed. 116 * The given string must outlive the use of the lexer, as the 117 * generated Tokens point to Substrings of it. 118 * mPosition points to the offending location in case of an error. 119 */ 120 nsresult parse(const nsAString& aPattern); 121 122 using iterator = nsAString::const_char_iterator; 123 iterator mPosition; 124 125 /** 126 * Functions for iterating over the TokenList 127 */ 128 129 Token* nextToken(); 130 Token* peek() { 131 NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer"); 132 return mCurrentItem; 133 } 134 Token* peekAhead() { 135 NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer"); 136 // Don't peek past the end node 137 return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext 138 : mCurrentItem; 139 } 140 bool hasMoreTokens() { 141 NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer"); 142 return (mCurrentItem && mCurrentItem->mType != Token::END); 143 } 144 145 /** 146 * Trivial Tokens 147 */ 148 //-- LF, changed to enum 149 enum _TrivialTokens { 150 D_QUOTE = '\"', 151 S_QUOTE = '\'', 152 L_PAREN = '(', 153 R_PAREN = ')', 154 L_BRACKET = '[', 155 R_BRACKET = ']', 156 L_ANGLE = '<', 157 R_ANGLE = '>', 158 COMMA = ',', 159 PERIOD = '.', 160 ASTERISK = '*', 161 FORWARD_SLASH = '/', 162 EQUAL = '=', 163 BANG = '!', 164 VERT_BAR = '|', 165 AT_SIGN = '@', 166 DOLLAR_SIGN = '$', 167 PLUS = '+', 168 HYPHEN = '-', 169 COLON = ':', 170 //-- whitespace tokens 171 SPACE = ' ', 172 TX_TAB = '\t', 173 TX_CR = '\n', 174 TX_LF = '\r' 175 }; 176 177 private: 178 Token* mCurrentItem; 179 Token* mFirstItem; 180 Token* mLastItem; 181 182 int mTokenCount; 183 184 void addToken(Token* aToken); 185 186 /** 187 * Returns true if the following Token should be an operator. 188 * This is a helper for the first bullet of [XPath 3.7] 189 * Lexical Structure 190 */ 191 bool nextIsOperatorToken(Token* aToken); 192 193 /** 194 * Returns true if the given character represents a numeric letter (digit) 195 * Implemented in ExprLexerChars.cpp 196 */ 197 static bool isXPathDigit(char16_t ch) { return (ch >= '0' && ch <= '9'); } 198 }; 199 200 #endif