txExprLexer.cpp (10517B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /** 7 * Lexical analyzer for XPath expressions 8 */ 9 10 #include "txExprLexer.h" 11 12 #include "nsError.h" 13 #include "nsGkAtoms.h" 14 #include "nsString.h" 15 #include "txXMLUtils.h" 16 17 /** 18 * Creates a new ExprLexer 19 */ 20 txExprLexer::txExprLexer() 21 : mPosition(nullptr), 22 mCurrentItem(nullptr), 23 mFirstItem(nullptr), 24 mLastItem(nullptr), 25 mTokenCount(0) {} 26 27 /** 28 * Destroys this instance of an txExprLexer 29 */ 30 txExprLexer::~txExprLexer() { 31 //-- delete tokens 32 Token* tok = mFirstItem; 33 while (tok) { 34 Token* temp = tok->mNext; 35 delete tok; 36 tok = temp; 37 } 38 mCurrentItem = nullptr; 39 } 40 41 Token* txExprLexer::nextToken() { 42 if (!mCurrentItem) { 43 MOZ_ASSERT_UNREACHABLE("nextToken called on uninitialized lexer"); 44 return nullptr; 45 } 46 47 if (mCurrentItem->mType == Token::END) { 48 // Do not progress beyond the end token 49 return mCurrentItem; 50 } 51 52 Token* token = mCurrentItem; 53 mCurrentItem = mCurrentItem->mNext; 54 return token; 55 } 56 57 void txExprLexer::addToken(Token* aToken) { 58 if (mLastItem) { 59 mLastItem->mNext = aToken; 60 } 61 if (!mFirstItem) { 62 mFirstItem = aToken; 63 mCurrentItem = aToken; 64 } 65 mLastItem = aToken; 66 ++mTokenCount; 67 } 68 69 /** 70 * Returns true if the following Token should be an operator. 71 * This is a helper for the first bullet of [XPath 3.7] 72 * Lexical Structure 73 */ 74 bool txExprLexer::nextIsOperatorToken(Token* aToken) { 75 if (!aToken || aToken->mType == Token::NULL_TOKEN) { 76 return false; 77 } 78 /* This relies on the tokens having the right order in txExprLexer.h */ 79 return aToken->mType < Token::COMMA || aToken->mType > Token::UNION_OP; 80 } 81 82 /** 83 * Parses the given string into a sequence of Tokens 84 */ 85 nsresult txExprLexer::parse(const nsAString& aPattern) { 86 iterator end; 87 aPattern.BeginReading(mPosition); 88 aPattern.EndReading(end); 89 90 //-- initialize previous token, this will automatically get 91 //-- deleted when it goes out of scope 92 Token nullToken(nullptr, nullptr, Token::NULL_TOKEN); 93 94 Token::Type defType; 95 Token* newToken = nullptr; 96 Token* prevToken = &nullToken; 97 bool isToken; 98 99 while (mPosition < end) { 100 defType = Token::CNAME; 101 isToken = true; 102 103 if (*mPosition == DOLLAR_SIGN) { 104 if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) { 105 return NS_ERROR_XPATH_INVALID_VAR_NAME; 106 } 107 defType = Token::VAR_REFERENCE; 108 } 109 // just reuse the QName parsing, which will use defType 110 // the token to construct 111 112 if (XMLUtils::isLetter(*mPosition)) { 113 // NCName, can get QName or OperatorName; 114 // FunctionName, NodeName, and AxisSpecifier may want whitespace, 115 // and are dealt with below 116 iterator start = mPosition; 117 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { 118 /* just go */ 119 } 120 if (mPosition < end && *mPosition == COLON) { 121 // try QName or wildcard, might need to step back for axis 122 if (++mPosition == end) { 123 return NS_ERROR_XPATH_UNEXPECTED_END; 124 } 125 if (XMLUtils::isLetter(*mPosition)) { 126 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { 127 /* just go */ 128 } 129 } else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) { 130 // eat wildcard for NameTest, bail for var ref at COLON 131 ++mPosition; 132 } else { 133 --mPosition; // step back 134 } 135 } 136 if (nextIsOperatorToken(prevToken)) { 137 nsDependentSubstring op(Substring(start, mPosition)); 138 if (nsGkAtoms::_and->Equals(op)) { 139 defType = Token::AND_OP; 140 } else if (nsGkAtoms::_or->Equals(op)) { 141 defType = Token::OR_OP; 142 } else if (nsGkAtoms::mod->Equals(op)) { 143 defType = Token::MODULUS_OP; 144 } else if (nsGkAtoms::div->Equals(op)) { 145 defType = Token::DIVIDE_OP; 146 } else { 147 // XXX QUESTION: spec is not too precise 148 // badops is sure an error, but is bad:ops, too? We say yes! 149 return NS_ERROR_XPATH_OPERATOR_EXPECTED; 150 } 151 } 152 newToken = new Token(start, mPosition, defType); 153 } else if (isXPathDigit(*mPosition)) { 154 iterator start = mPosition; 155 while (++mPosition < end && isXPathDigit(*mPosition)) { 156 /* just go */ 157 } 158 if (mPosition < end && *mPosition == '.') { 159 while (++mPosition < end && isXPathDigit(*mPosition)) { 160 /* just go */ 161 } 162 } 163 newToken = new Token(start, mPosition, Token::NUMBER); 164 } else { 165 switch (*mPosition) { 166 //-- ignore whitespace 167 case SPACE: 168 case TX_TAB: 169 case TX_CR: 170 case TX_LF: 171 ++mPosition; 172 isToken = false; 173 break; 174 case S_QUOTE: 175 case D_QUOTE: { 176 iterator start = mPosition; 177 while (++mPosition < end && *mPosition != *start) { 178 // eat literal 179 } 180 if (mPosition == end) { 181 mPosition = start; 182 return NS_ERROR_XPATH_UNCLOSED_LITERAL; 183 } 184 newToken = new Token(start + 1, mPosition, Token::LITERAL); 185 ++mPosition; 186 } break; 187 case PERIOD: 188 // period can be .., .(DIGITS)+ or ., check next 189 if (++mPosition == end) { 190 newToken = new Token(mPosition - 1, Token::SELF_NODE); 191 } else if (isXPathDigit(*mPosition)) { 192 iterator start = mPosition - 1; 193 while (++mPosition < end && isXPathDigit(*mPosition)) { 194 /* just go */ 195 } 196 newToken = new Token(start, mPosition, Token::NUMBER); 197 } else if (*mPosition == PERIOD) { 198 ++mPosition; 199 newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE); 200 } else { 201 newToken = new Token(mPosition - 1, Token::SELF_NODE); 202 } 203 break; 204 case COLON: // QNames are dealt above, must be axis ident 205 if (++mPosition >= end || *mPosition != COLON || 206 prevToken->mType != Token::CNAME) { 207 return NS_ERROR_XPATH_BAD_COLON; 208 } 209 prevToken->mType = Token::AXIS_IDENTIFIER; 210 ++mPosition; 211 isToken = false; 212 break; 213 case FORWARD_SLASH: 214 if (++mPosition < end && *mPosition == FORWARD_SLASH) { 215 ++mPosition; 216 newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP); 217 } else { 218 newToken = new Token(mPosition - 1, Token::PARENT_OP); 219 } 220 break; 221 case BANG: // can only be != 222 if (++mPosition < end && *mPosition == EQUAL) { 223 ++mPosition; 224 newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP); 225 break; 226 } 227 // Error ! is not not() 228 return NS_ERROR_XPATH_BAD_BANG; 229 case EQUAL: 230 newToken = new Token(mPosition, Token::EQUAL_OP); 231 ++mPosition; 232 break; 233 case L_ANGLE: 234 if (++mPosition == end) { 235 return NS_ERROR_XPATH_UNEXPECTED_END; 236 } 237 if (*mPosition == EQUAL) { 238 ++mPosition; 239 newToken = 240 new Token(mPosition - 2, mPosition, Token::LESS_OR_EQUAL_OP); 241 } else { 242 newToken = new Token(mPosition - 1, Token::LESS_THAN_OP); 243 } 244 break; 245 case R_ANGLE: 246 if (++mPosition == end) { 247 return NS_ERROR_XPATH_UNEXPECTED_END; 248 } 249 if (*mPosition == EQUAL) { 250 ++mPosition; 251 newToken = 252 new Token(mPosition - 2, mPosition, Token::GREATER_OR_EQUAL_OP); 253 } else { 254 newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP); 255 } 256 break; 257 case HYPHEN: 258 newToken = new Token(mPosition, Token::SUBTRACTION_OP); 259 ++mPosition; 260 break; 261 case ASTERISK: 262 if (nextIsOperatorToken(prevToken)) { 263 newToken = new Token(mPosition, Token::MULTIPLY_OP); 264 } else { 265 newToken = new Token(mPosition, Token::CNAME); 266 } 267 ++mPosition; 268 break; 269 case L_PAREN: 270 if (prevToken->mType == Token::CNAME) { 271 const nsDependentSubstring& val = prevToken->Value(); 272 if (val.EqualsLiteral("comment")) { 273 prevToken->mType = Token::COMMENT_AND_PAREN; 274 } else if (val.EqualsLiteral("node")) { 275 prevToken->mType = Token::NODE_AND_PAREN; 276 } else if (val.EqualsLiteral("processing-instruction")) { 277 prevToken->mType = Token::PROC_INST_AND_PAREN; 278 } else if (val.EqualsLiteral("text")) { 279 prevToken->mType = Token::TEXT_AND_PAREN; 280 } else { 281 prevToken->mType = Token::FUNCTION_NAME_AND_PAREN; 282 } 283 isToken = false; 284 } else { 285 newToken = new Token(mPosition, Token::L_PAREN); 286 } 287 ++mPosition; 288 break; 289 case R_PAREN: 290 newToken = new Token(mPosition, Token::R_PAREN); 291 ++mPosition; 292 break; 293 case L_BRACKET: 294 newToken = new Token(mPosition, Token::L_BRACKET); 295 ++mPosition; 296 break; 297 case R_BRACKET: 298 newToken = new Token(mPosition, Token::R_BRACKET); 299 ++mPosition; 300 break; 301 case COMMA: 302 newToken = new Token(mPosition, Token::COMMA); 303 ++mPosition; 304 break; 305 case AT_SIGN: 306 newToken = new Token(mPosition, Token::AT_SIGN); 307 ++mPosition; 308 break; 309 case PLUS: 310 newToken = new Token(mPosition, Token::ADDITION_OP); 311 ++mPosition; 312 break; 313 case VERT_BAR: 314 newToken = new Token(mPosition, Token::UNION_OP); 315 ++mPosition; 316 break; 317 default: 318 // Error, don't grok character :-( 319 return NS_ERROR_XPATH_ILLEGAL_CHAR; 320 } 321 } 322 if (isToken) { 323 NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE); 324 prevToken = newToken; 325 addToken(newToken); 326 } 327 } 328 329 // add a endToken to the list 330 newToken = new Token(end, end, Token::END); 331 addToken(newToken); 332 333 return NS_OK; 334 }