tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

txExprLexer.cpp (10517B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 /**
      7 * Lexical analyzer for XPath expressions
      8 */
      9 
     10 #include "txExprLexer.h"
     11 
     12 #include "nsError.h"
     13 #include "nsGkAtoms.h"
     14 #include "nsString.h"
     15 #include "txXMLUtils.h"
     16 
     17 /**
     18 * Creates a new ExprLexer
     19 */
     20 txExprLexer::txExprLexer()
     21    : mPosition(nullptr),
     22      mCurrentItem(nullptr),
     23      mFirstItem(nullptr),
     24      mLastItem(nullptr),
     25      mTokenCount(0) {}
     26 
     27 /**
     28 * Destroys this instance of an txExprLexer
     29 */
     30 txExprLexer::~txExprLexer() {
     31  //-- delete tokens
     32  Token* tok = mFirstItem;
     33  while (tok) {
     34    Token* temp = tok->mNext;
     35    delete tok;
     36    tok = temp;
     37  }
     38  mCurrentItem = nullptr;
     39 }
     40 
     41 Token* txExprLexer::nextToken() {
     42  if (!mCurrentItem) {
     43    MOZ_ASSERT_UNREACHABLE("nextToken called on uninitialized lexer");
     44    return nullptr;
     45  }
     46 
     47  if (mCurrentItem->mType == Token::END) {
     48    // Do not progress beyond the end token
     49    return mCurrentItem;
     50  }
     51 
     52  Token* token = mCurrentItem;
     53  mCurrentItem = mCurrentItem->mNext;
     54  return token;
     55 }
     56 
     57 void txExprLexer::addToken(Token* aToken) {
     58  if (mLastItem) {
     59    mLastItem->mNext = aToken;
     60  }
     61  if (!mFirstItem) {
     62    mFirstItem = aToken;
     63    mCurrentItem = aToken;
     64  }
     65  mLastItem = aToken;
     66  ++mTokenCount;
     67 }
     68 
     69 /**
     70 * Returns true if the following Token should be an operator.
     71 * This is a helper for the first bullet of [XPath 3.7]
     72 *  Lexical Structure
     73 */
     74 bool txExprLexer::nextIsOperatorToken(Token* aToken) {
     75  if (!aToken || aToken->mType == Token::NULL_TOKEN) {
     76    return false;
     77  }
     78  /* This relies on the tokens having the right order in txExprLexer.h */
     79  return aToken->mType < Token::COMMA || aToken->mType > Token::UNION_OP;
     80 }
     81 
     82 /**
     83 * Parses the given string into a sequence of Tokens
     84 */
     85 nsresult txExprLexer::parse(const nsAString& aPattern) {
     86  iterator end;
     87  aPattern.BeginReading(mPosition);
     88  aPattern.EndReading(end);
     89 
     90  //-- initialize previous token, this will automatically get
     91  //-- deleted when it goes out of scope
     92  Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
     93 
     94  Token::Type defType;
     95  Token* newToken = nullptr;
     96  Token* prevToken = &nullToken;
     97  bool isToken;
     98 
     99  while (mPosition < end) {
    100    defType = Token::CNAME;
    101    isToken = true;
    102 
    103    if (*mPosition == DOLLAR_SIGN) {
    104      if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
    105        return NS_ERROR_XPATH_INVALID_VAR_NAME;
    106      }
    107      defType = Token::VAR_REFERENCE;
    108    }
    109    // just reuse the QName parsing, which will use defType
    110    // the token to construct
    111 
    112    if (XMLUtils::isLetter(*mPosition)) {
    113      // NCName, can get QName or OperatorName;
    114      //  FunctionName, NodeName, and AxisSpecifier may want whitespace,
    115      //  and are dealt with below
    116      iterator start = mPosition;
    117      while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
    118        /* just go */
    119      }
    120      if (mPosition < end && *mPosition == COLON) {
    121        // try QName or wildcard, might need to step back for axis
    122        if (++mPosition == end) {
    123          return NS_ERROR_XPATH_UNEXPECTED_END;
    124        }
    125        if (XMLUtils::isLetter(*mPosition)) {
    126          while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
    127            /* just go */
    128          }
    129        } else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
    130          // eat wildcard for NameTest, bail for var ref at COLON
    131          ++mPosition;
    132        } else {
    133          --mPosition;  // step back
    134        }
    135      }
    136      if (nextIsOperatorToken(prevToken)) {
    137        nsDependentSubstring op(Substring(start, mPosition));
    138        if (nsGkAtoms::_and->Equals(op)) {
    139          defType = Token::AND_OP;
    140        } else if (nsGkAtoms::_or->Equals(op)) {
    141          defType = Token::OR_OP;
    142        } else if (nsGkAtoms::mod->Equals(op)) {
    143          defType = Token::MODULUS_OP;
    144        } else if (nsGkAtoms::div->Equals(op)) {
    145          defType = Token::DIVIDE_OP;
    146        } else {
    147          // XXX QUESTION: spec is not too precise
    148          // badops is sure an error, but is bad:ops, too? We say yes!
    149          return NS_ERROR_XPATH_OPERATOR_EXPECTED;
    150        }
    151      }
    152      newToken = new Token(start, mPosition, defType);
    153    } else if (isXPathDigit(*mPosition)) {
    154      iterator start = mPosition;
    155      while (++mPosition < end && isXPathDigit(*mPosition)) {
    156        /* just go */
    157      }
    158      if (mPosition < end && *mPosition == '.') {
    159        while (++mPosition < end && isXPathDigit(*mPosition)) {
    160          /* just go */
    161        }
    162      }
    163      newToken = new Token(start, mPosition, Token::NUMBER);
    164    } else {
    165      switch (*mPosition) {
    166          //-- ignore whitespace
    167        case SPACE:
    168        case TX_TAB:
    169        case TX_CR:
    170        case TX_LF:
    171          ++mPosition;
    172          isToken = false;
    173          break;
    174        case S_QUOTE:
    175        case D_QUOTE: {
    176          iterator start = mPosition;
    177          while (++mPosition < end && *mPosition != *start) {
    178            // eat literal
    179          }
    180          if (mPosition == end) {
    181            mPosition = start;
    182            return NS_ERROR_XPATH_UNCLOSED_LITERAL;
    183          }
    184          newToken = new Token(start + 1, mPosition, Token::LITERAL);
    185          ++mPosition;
    186        } break;
    187        case PERIOD:
    188          // period can be .., .(DIGITS)+ or ., check next
    189          if (++mPosition == end) {
    190            newToken = new Token(mPosition - 1, Token::SELF_NODE);
    191          } else if (isXPathDigit(*mPosition)) {
    192            iterator start = mPosition - 1;
    193            while (++mPosition < end && isXPathDigit(*mPosition)) {
    194              /* just go */
    195            }
    196            newToken = new Token(start, mPosition, Token::NUMBER);
    197          } else if (*mPosition == PERIOD) {
    198            ++mPosition;
    199            newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
    200          } else {
    201            newToken = new Token(mPosition - 1, Token::SELF_NODE);
    202          }
    203          break;
    204        case COLON:  // QNames are dealt above, must be axis ident
    205          if (++mPosition >= end || *mPosition != COLON ||
    206              prevToken->mType != Token::CNAME) {
    207            return NS_ERROR_XPATH_BAD_COLON;
    208          }
    209          prevToken->mType = Token::AXIS_IDENTIFIER;
    210          ++mPosition;
    211          isToken = false;
    212          break;
    213        case FORWARD_SLASH:
    214          if (++mPosition < end && *mPosition == FORWARD_SLASH) {
    215            ++mPosition;
    216            newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
    217          } else {
    218            newToken = new Token(mPosition - 1, Token::PARENT_OP);
    219          }
    220          break;
    221        case BANG:  // can only be !=
    222          if (++mPosition < end && *mPosition == EQUAL) {
    223            ++mPosition;
    224            newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
    225            break;
    226          }
    227          // Error ! is not not()
    228          return NS_ERROR_XPATH_BAD_BANG;
    229        case EQUAL:
    230          newToken = new Token(mPosition, Token::EQUAL_OP);
    231          ++mPosition;
    232          break;
    233        case L_ANGLE:
    234          if (++mPosition == end) {
    235            return NS_ERROR_XPATH_UNEXPECTED_END;
    236          }
    237          if (*mPosition == EQUAL) {
    238            ++mPosition;
    239            newToken =
    240                new Token(mPosition - 2, mPosition, Token::LESS_OR_EQUAL_OP);
    241          } else {
    242            newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
    243          }
    244          break;
    245        case R_ANGLE:
    246          if (++mPosition == end) {
    247            return NS_ERROR_XPATH_UNEXPECTED_END;
    248          }
    249          if (*mPosition == EQUAL) {
    250            ++mPosition;
    251            newToken =
    252                new Token(mPosition - 2, mPosition, Token::GREATER_OR_EQUAL_OP);
    253          } else {
    254            newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
    255          }
    256          break;
    257        case HYPHEN:
    258          newToken = new Token(mPosition, Token::SUBTRACTION_OP);
    259          ++mPosition;
    260          break;
    261        case ASTERISK:
    262          if (nextIsOperatorToken(prevToken)) {
    263            newToken = new Token(mPosition, Token::MULTIPLY_OP);
    264          } else {
    265            newToken = new Token(mPosition, Token::CNAME);
    266          }
    267          ++mPosition;
    268          break;
    269        case L_PAREN:
    270          if (prevToken->mType == Token::CNAME) {
    271            const nsDependentSubstring& val = prevToken->Value();
    272            if (val.EqualsLiteral("comment")) {
    273              prevToken->mType = Token::COMMENT_AND_PAREN;
    274            } else if (val.EqualsLiteral("node")) {
    275              prevToken->mType = Token::NODE_AND_PAREN;
    276            } else if (val.EqualsLiteral("processing-instruction")) {
    277              prevToken->mType = Token::PROC_INST_AND_PAREN;
    278            } else if (val.EqualsLiteral("text")) {
    279              prevToken->mType = Token::TEXT_AND_PAREN;
    280            } else {
    281              prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
    282            }
    283            isToken = false;
    284          } else {
    285            newToken = new Token(mPosition, Token::L_PAREN);
    286          }
    287          ++mPosition;
    288          break;
    289        case R_PAREN:
    290          newToken = new Token(mPosition, Token::R_PAREN);
    291          ++mPosition;
    292          break;
    293        case L_BRACKET:
    294          newToken = new Token(mPosition, Token::L_BRACKET);
    295          ++mPosition;
    296          break;
    297        case R_BRACKET:
    298          newToken = new Token(mPosition, Token::R_BRACKET);
    299          ++mPosition;
    300          break;
    301        case COMMA:
    302          newToken = new Token(mPosition, Token::COMMA);
    303          ++mPosition;
    304          break;
    305        case AT_SIGN:
    306          newToken = new Token(mPosition, Token::AT_SIGN);
    307          ++mPosition;
    308          break;
    309        case PLUS:
    310          newToken = new Token(mPosition, Token::ADDITION_OP);
    311          ++mPosition;
    312          break;
    313        case VERT_BAR:
    314          newToken = new Token(mPosition, Token::UNION_OP);
    315          ++mPosition;
    316          break;
    317        default:
    318          // Error, don't grok character :-(
    319          return NS_ERROR_XPATH_ILLEGAL_CHAR;
    320      }
    321    }
    322    if (isToken) {
    323      NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
    324      prevToken = newToken;
    325      addToken(newToken);
    326    }
    327  }
    328 
    329  // add a endToken to the list
    330  newToken = new Token(end, end, Token::END);
    331  addToken(newToken);
    332 
    333  return NS_OK;
    334 }