Token.h (6880B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* 8 * Token-affiliated data structures except for TokenKind (defined in its own 9 * header). 10 */ 11 12 #ifndef frontend_Token_h 13 #define frontend_Token_h 14 15 #include "mozilla/Assertions.h" // MOZ_ASSERT 16 17 #include <compare> // std::strong_ordering 18 #include <stdint.h> // uint32_t 19 20 #include "frontend/ParserAtom.h" // TaggedParserAtomIndex, TrivialTaggedParserAtomIndex 21 #include "frontend/TokenKind.h" // js::frontend::TokenKind 22 #include "js/RegExpFlags.h" // JS::RegExpFlags 23 24 namespace js { 25 26 namespace frontend { 27 28 struct TokenPos { 29 uint32_t begin = 0; // Offset of the token's first code unit. 30 uint32_t end = 0; // Offset of 1 past the token's last code unit. 31 32 TokenPos() = default; 33 TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {} 34 35 // Return a TokenPos that covers left, right, and anything in between. 36 static TokenPos box(const TokenPos& left, const TokenPos& right) { 37 MOZ_ASSERT(left.begin <= left.end); 38 MOZ_ASSERT(left.end <= right.begin); 39 MOZ_ASSERT(right.begin <= right.end); 40 return TokenPos(left.begin, right.end); 41 } 42 43 constexpr bool operator==(const TokenPos& bpos) const = default; 44 45 constexpr auto operator<=>(const TokenPos& bpos) const { 46 return begin <=> bpos.begin; 47 } 48 49 bool encloses(const TokenPos& pos) const { 50 return begin <= pos.begin && pos.end <= end; 51 } 52 }; 53 54 enum DecimalPoint { NoDecimal = false, HasDecimal = true }; 55 56 // The only escapes found in IdentifierName are of the Unicode flavor. 57 enum class IdentifierEscapes { None, SawUnicodeEscape }; 58 59 enum class NameVisibility { Public, Private }; 60 61 class TokenStreamShared; 62 63 struct Token { 64 private: 65 // The lexical grammar of JavaScript has a quirk around the '/' character. 66 // As the spec puts it: 67 // 68 // > There are several situations where the identification of lexical input 69 // > elements is sensitive to the syntactic grammar context that is consuming 70 // > the input elements. This requires multiple goal symbols for the lexical 71 // > grammar. [...] The InputElementRegExp goal symbol is used in all 72 // > syntactic grammar contexts where a RegularExpressionLiteral is permitted 73 // > [...] In all other contexts, InputElementDiv is used as the lexical 74 // > goal symbol. 75 // 76 // https://tc39.github.io/ecma262/#sec-lexical-and-regexp-grammars 77 // 78 // What "sensitive to the syntactic grammar context" means is, the parser has 79 // to tell the TokenStream whether to interpret '/' as division or 80 // RegExp. Because only one or the other (or neither) will be legal at that 81 // point in the program, and only the parser knows which one. 82 // 83 // But there's a problem: the parser often gets a token, puts it back, then 84 // consumes it later; or (equivalently) peeks at a token, leaves it, peeks 85 // again later, then finally consumes it. Of course we don't actually re-scan 86 // the token every time; we cache it in the TokenStream. This leads to the 87 // following rule: 88 // 89 // The parser must not pass SlashIsRegExp when getting/peeking at a token 90 // previously scanned with SlashIsDiv; or vice versa. 91 // 92 // That way, code that asks for a SlashIsRegExp mode will never get a cached 93 // Div token. But this rule is easy to screw up, because tokens are so often 94 // peeked at on Parser.cpp line A and consumed on line B, where |A-B| is 95 // thousands of lines. We therefore enforce it with the frontend's most 96 // annoying assertion (in verifyConsistentModifier), and provide 97 // Modifier::SlashIsInvalid to help avoid tripping it. 98 // 99 // This enum belongs in TokenStream, but C++, so we define it here and 100 // typedef it there. 101 enum Modifier { 102 // Parse `/` and `/=` as the division operators. (That is, use 103 // InputElementDiv as the goal symbol.) 104 SlashIsDiv, 105 106 // Parse `/` as the beginning of a RegExp literal. (That is, use 107 // InputElementRegExp.) 108 SlashIsRegExp, 109 110 // Neither a Div token nor a RegExp token is syntactically valid here. When 111 // the parser calls `getToken(SlashIsInvalid)`, it must be prepared to see 112 // either one (and throw a SyntaxError either way). 113 // 114 // It's OK to use SlashIsInvalid to get a token that was originally scanned 115 // with SlashIsDiv or SlashIsRegExp. The reverse--peeking with 116 // SlashIsInvalid, then getting with another mode--is not OK. If either Div 117 // or RegExp is syntactically valid here, use the appropriate modifier. 118 SlashIsInvalid, 119 }; 120 friend class TokenStreamShared; 121 122 public: 123 /** The type of this token. */ 124 TokenKind type; 125 126 /** The token's position in the overall script. */ 127 TokenPos pos; 128 129 union U { 130 private: 131 friend struct Token; 132 133 TrivialTaggedParserAtomIndex atom; 134 135 struct { 136 /** Numeric literal's value. */ 137 double value; 138 139 /** Does the numeric literal contain a '.'? */ 140 DecimalPoint decimalPoint; 141 } number; 142 143 /** Regular expression flags; use charBuffer to access source chars. */ 144 JS::RegExpFlags reflags; 145 146 public: 147 U() {}; 148 } u; 149 150 #ifdef DEBUG 151 /** The modifier used to get this token. */ 152 Modifier modifier; 153 #endif 154 155 // Mutators 156 157 void setName(TaggedParserAtomIndex name) { 158 MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName); 159 u.atom = TrivialTaggedParserAtomIndex::from(name); 160 } 161 162 void setAtom(TaggedParserAtomIndex atom) { 163 MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead || 164 type == TokenKind::NoSubsTemplate); 165 u.atom = TrivialTaggedParserAtomIndex::from(atom); 166 } 167 168 void setRegExpFlags(JS::RegExpFlags flags) { 169 MOZ_ASSERT(type == TokenKind::RegExp); 170 u.reflags = flags; 171 } 172 173 void setNumber(double n, DecimalPoint decimalPoint) { 174 MOZ_ASSERT(type == TokenKind::Number); 175 u.number.value = n; 176 u.number.decimalPoint = decimalPoint; 177 } 178 179 // Type-safe accessors 180 181 TaggedParserAtomIndex name() const { 182 MOZ_ASSERT(type == TokenKind::Name || type == TokenKind::PrivateName); 183 return u.atom; 184 } 185 186 TaggedParserAtomIndex atom() const { 187 MOZ_ASSERT(type == TokenKind::String || type == TokenKind::TemplateHead || 188 type == TokenKind::NoSubsTemplate); 189 return u.atom; 190 } 191 192 JS::RegExpFlags regExpFlags() const { 193 MOZ_ASSERT(type == TokenKind::RegExp); 194 return u.reflags; 195 } 196 197 double number() const { 198 MOZ_ASSERT(type == TokenKind::Number); 199 return u.number.value; 200 } 201 202 DecimalPoint decimalPoint() const { 203 MOZ_ASSERT(type == TokenKind::Number); 204 return u.number.decimalPoint; 205 } 206 }; 207 208 } // namespace frontend 209 210 } // namespace js 211 212 #endif // frontend_Token_h