demangle.cc (102130B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // For reference check out: 16 // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling 17 18 #include "absl/debugging/internal/demangle.h" 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <cstdio> 23 #include <cstdlib> 24 #include <cstring> 25 #include <limits> 26 #include <string> 27 28 #include "absl/base/config.h" 29 #include "absl/debugging/internal/demangle_rust.h" 30 31 #if ABSL_INTERNAL_HAS_CXA_DEMANGLE 32 #include <cxxabi.h> 33 #endif 34 35 namespace absl { 36 ABSL_NAMESPACE_BEGIN 37 namespace debugging_internal { 38 39 typedef struct { 40 const char *abbrev; 41 const char *real_name; 42 // Number of arguments in <expression> context, or 0 if disallowed. 43 int arity; 44 } AbbrevPair; 45 46 // List of operators from Itanium C++ ABI. 47 static const AbbrevPair kOperatorList[] = { 48 // New has special syntax. 49 {"nw", "new", 0}, 50 {"na", "new[]", 0}, 51 52 // Special-cased elsewhere to support the optional gs prefix. 53 {"dl", "delete", 1}, 54 {"da", "delete[]", 1}, 55 56 {"aw", "co_await", 1}, 57 58 {"ps", "+", 1}, // "positive" 59 {"ng", "-", 1}, // "negative" 60 {"ad", "&", 1}, // "address-of" 61 {"de", "*", 1}, // "dereference" 62 {"co", "~", 1}, 63 64 {"pl", "+", 2}, 65 {"mi", "-", 2}, 66 {"ml", "*", 2}, 67 {"dv", "/", 2}, 68 {"rm", "%", 2}, 69 {"an", "&", 2}, 70 {"or", "|", 2}, 71 {"eo", "^", 2}, 72 {"aS", "=", 2}, 73 {"pL", "+=", 2}, 74 {"mI", "-=", 2}, 75 {"mL", "*=", 2}, 76 {"dV", "/=", 2}, 77 {"rM", "%=", 2}, 78 {"aN", "&=", 2}, 79 {"oR", "|=", 2}, 80 {"eO", "^=", 2}, 81 {"ls", "<<", 2}, 82 {"rs", ">>", 2}, 83 {"lS", "<<=", 2}, 84 {"rS", ">>=", 2}, 85 {"ss", "<=>", 2}, 86 {"eq", "==", 2}, 87 {"ne", "!=", 2}, 88 {"lt", "<", 2}, 89 {"gt", ">", 2}, 90 {"le", "<=", 2}, 91 {"ge", ">=", 2}, 92 {"nt", "!", 1}, 93 {"aa", "&&", 2}, 94 {"oo", "||", 2}, 95 {"pp", "++", 1}, 96 {"mm", "--", 1}, 97 {"cm", ",", 2}, 98 {"pm", "->*", 2}, 99 {"pt", "->", 0}, // Special syntax 100 {"cl", "()", 0}, // Special syntax 101 {"ix", "[]", 2}, 102 {"qu", "?", 3}, 103 {"st", "sizeof", 0}, // Special syntax 104 {"sz", "sizeof", 1}, // Not a real operator name, but used in expressions. 105 {"sZ", "sizeof...", 0}, // Special syntax 106 {nullptr, nullptr, 0}, 107 }; 108 109 // List of builtin types from Itanium C++ ABI. 110 // 111 // Invariant: only one- or two-character type abbreviations here. 112 static const AbbrevPair kBuiltinTypeList[] = { 113 {"v", "void", 0}, 114 {"w", "wchar_t", 0}, 115 {"b", "bool", 0}, 116 {"c", "char", 0}, 117 {"a", "signed char", 0}, 118 {"h", "unsigned char", 0}, 119 {"s", "short", 0}, 120 {"t", "unsigned short", 0}, 121 {"i", "int", 0}, 122 {"j", "unsigned int", 0}, 123 {"l", "long", 0}, 124 {"m", "unsigned long", 0}, 125 {"x", "long long", 0}, 126 {"y", "unsigned long long", 0}, 127 {"n", "__int128", 0}, 128 {"o", "unsigned __int128", 0}, 129 {"f", "float", 0}, 130 {"d", "double", 0}, 131 {"e", "long double", 0}, 132 {"g", "__float128", 0}, 133 {"z", "ellipsis", 0}, 134 135 {"De", "decimal128", 0}, // IEEE 754r decimal floating point (128 bits) 136 {"Dd", "decimal64", 0}, // IEEE 754r decimal floating point (64 bits) 137 {"Dc", "decltype(auto)", 0}, 138 {"Da", "auto", 0}, 139 {"Dn", "std::nullptr_t", 0}, // i.e., decltype(nullptr) 140 {"Df", "decimal32", 0}, // IEEE 754r decimal floating point (32 bits) 141 {"Di", "char32_t", 0}, 142 {"Du", "char8_t", 0}, 143 {"Ds", "char16_t", 0}, 144 {"Dh", "float16", 0}, // IEEE 754r half-precision float (16 bits) 145 {nullptr, nullptr, 0}, 146 }; 147 148 // List of substitutions Itanium C++ ABI. 149 static const AbbrevPair kSubstitutionList[] = { 150 {"St", "", 0}, 151 {"Sa", "allocator", 0}, 152 {"Sb", "basic_string", 0}, 153 // std::basic_string<char, std::char_traits<char>,std::allocator<char> > 154 {"Ss", "string", 0}, 155 // std::basic_istream<char, std::char_traits<char> > 156 {"Si", "istream", 0}, 157 // std::basic_ostream<char, std::char_traits<char> > 158 {"So", "ostream", 0}, 159 // std::basic_iostream<char, std::char_traits<char> > 160 {"Sd", "iostream", 0}, 161 {nullptr, nullptr, 0}, 162 }; 163 164 // State needed for demangling. This struct is copied in almost every stack 165 // frame, so every byte counts. 166 typedef struct { 167 int mangled_idx; // Cursor of mangled name. 168 int out_cur_idx; // Cursor of output string. 169 int prev_name_idx; // For constructors/destructors. 170 unsigned int prev_name_length : 16; // For constructors/destructors. 171 signed int nest_level : 15; // For nested names. 172 unsigned int append : 1; // Append flag. 173 // Note: for some reason MSVC can't pack "bool append : 1" into the same int 174 // with the above two fields, so we use an int instead. Amusingly it can pack 175 // "signed bool" as expected, but relying on that to continue to be a legal 176 // type seems ill-advised (as it's illegal in at least clang). 177 } ParseState; 178 179 static_assert(sizeof(ParseState) == 4 * sizeof(int), 180 "unexpected size of ParseState"); 181 182 // One-off state for demangling that's not subject to backtracking -- either 183 // constant data, data that's intentionally immune to backtracking (steps), or 184 // data that would never be changed by backtracking anyway (recursion_depth). 185 // 186 // Only one copy of this exists for each call to Demangle, so the size of this 187 // struct is nearly inconsequential. 188 typedef struct { 189 const char *mangled_begin; // Beginning of input string. 190 char *out; // Beginning of output string. 191 int out_end_idx; // One past last allowed output character. 192 int recursion_depth; // For stack exhaustion prevention. 193 int steps; // Cap how much work we'll do, regardless of depth. 194 ParseState parse_state; // Backtrackable state copied for most frames. 195 196 // Conditionally compiled support for marking the position of the first 197 // construct Demangle couldn't parse. This preprocessor symbol is intended 198 // for use by Abseil demangler maintainers only; its behavior is not part of 199 // Abseil's public interface. 200 #ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK 201 int high_water_mark; // Input position where parsing failed. 202 bool too_complex; // True if any guard.IsTooComplex() call returned true. 203 #endif 204 } State; 205 206 namespace { 207 208 #ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK 209 void UpdateHighWaterMark(State *state) { 210 if (state->high_water_mark < state->parse_state.mangled_idx) { 211 state->high_water_mark = state->parse_state.mangled_idx; 212 } 213 } 214 215 void ReportHighWaterMark(State *state) { 216 // Write out the mangled name with the trouble point marked, provided that the 217 // output buffer is large enough and the mangled name did not hit a complexity 218 // limit (in which case the high water mark wouldn't point out an unparsable 219 // construct, only the point where a budget ran out). 220 const size_t input_length = std::strlen(state->mangled_begin); 221 if (input_length + 6 > static_cast<size_t>(state->out_end_idx) || 222 state->too_complex) { 223 if (state->out_end_idx > 0) state->out[0] = '\0'; 224 return; 225 } 226 const size_t high_water_mark = static_cast<size_t>(state->high_water_mark); 227 std::memcpy(state->out, state->mangled_begin, high_water_mark); 228 std::memcpy(state->out + high_water_mark, "--!--", 5); 229 std::memcpy(state->out + high_water_mark + 5, 230 state->mangled_begin + high_water_mark, 231 input_length - high_water_mark); 232 state->out[input_length + 5] = '\0'; 233 } 234 #else 235 void UpdateHighWaterMark(State *) {} 236 void ReportHighWaterMark(State *) {} 237 #endif 238 239 // Prevent deep recursion / stack exhaustion. 240 // Also prevent unbounded handling of complex inputs. 241 class ComplexityGuard { 242 public: 243 explicit ComplexityGuard(State *state) : state_(state) { 244 ++state->recursion_depth; 245 ++state->steps; 246 } 247 ~ComplexityGuard() { --state_->recursion_depth; } 248 249 // 256 levels of recursion seems like a reasonable upper limit on depth. 250 // 128 is not enough to demangle synthetic tests from demangle_unittest.txt: 251 // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..." 252 static constexpr int kRecursionDepthLimit = 256; 253 254 // We're trying to pick a charitable upper-limit on how many parse steps are 255 // necessary to handle something that a human could actually make use of. 256 // This is mostly in place as a bound on how much work we'll do if we are 257 // asked to demangle an mangled name from an untrusted source, so it should be 258 // much larger than the largest expected symbol, but much smaller than the 259 // amount of work we can do in, e.g., a second. 260 // 261 // Some real-world symbols from an arbitrary binary started failing between 262 // 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set 263 // the limit. 264 // 265 // Spending one second on 2^17 parse steps would require each step to take 266 // 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in 267 // under a second. 268 static constexpr int kParseStepsLimit = 1 << 17; 269 270 bool IsTooComplex() const { 271 if (state_->recursion_depth > kRecursionDepthLimit || 272 state_->steps > kParseStepsLimit) { 273 #ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK 274 state_->too_complex = true; 275 #endif 276 return true; 277 } 278 return false; 279 } 280 281 private: 282 State *state_; 283 }; 284 } // namespace 285 286 // We don't use strlen() in libc since it's not guaranteed to be async 287 // signal safe. 288 static size_t StrLen(const char *str) { 289 size_t len = 0; 290 while (*str != '\0') { 291 ++str; 292 ++len; 293 } 294 return len; 295 } 296 297 // Returns true if "str" has at least "n" characters remaining. 298 static bool AtLeastNumCharsRemaining(const char *str, size_t n) { 299 for (size_t i = 0; i < n; ++i) { 300 if (str[i] == '\0') { 301 return false; 302 } 303 } 304 return true; 305 } 306 307 // Returns true if "str" has "prefix" as a prefix. 308 static bool StrPrefix(const char *str, const char *prefix) { 309 size_t i = 0; 310 while (str[i] != '\0' && prefix[i] != '\0' && str[i] == prefix[i]) { 311 ++i; 312 } 313 return prefix[i] == '\0'; // Consumed everything in "prefix". 314 } 315 316 static void InitState(State* state, 317 const char* mangled, 318 char* out, 319 size_t out_size) { 320 state->mangled_begin = mangled; 321 state->out = out; 322 state->out_end_idx = static_cast<int>(out_size); 323 state->recursion_depth = 0; 324 state->steps = 0; 325 #ifdef ABSL_INTERNAL_DEMANGLE_RECORDS_HIGH_WATER_MARK 326 state->high_water_mark = 0; 327 state->too_complex = false; 328 #endif 329 330 state->parse_state.mangled_idx = 0; 331 state->parse_state.out_cur_idx = 0; 332 state->parse_state.prev_name_idx = 0; 333 state->parse_state.prev_name_length = 0; 334 state->parse_state.nest_level = -1; 335 state->parse_state.append = true; 336 } 337 338 static inline const char *RemainingInput(State *state) { 339 return &state->mangled_begin[state->parse_state.mangled_idx]; 340 } 341 342 // Returns true and advances "mangled_idx" if we find "one_char_token" 343 // at "mangled_idx" position. It is assumed that "one_char_token" does 344 // not contain '\0'. 345 static bool ParseOneCharToken(State *state, const char one_char_token) { 346 ComplexityGuard guard(state); 347 if (guard.IsTooComplex()) return false; 348 if (RemainingInput(state)[0] == one_char_token) { 349 ++state->parse_state.mangled_idx; 350 UpdateHighWaterMark(state); 351 return true; 352 } 353 return false; 354 } 355 356 // Returns true and advances "mangled_idx" if we find "two_char_token" 357 // at "mangled_idx" position. It is assumed that "two_char_token" does 358 // not contain '\0'. 359 static bool ParseTwoCharToken(State *state, const char *two_char_token) { 360 ComplexityGuard guard(state); 361 if (guard.IsTooComplex()) return false; 362 if (RemainingInput(state)[0] == two_char_token[0] && 363 RemainingInput(state)[1] == two_char_token[1]) { 364 state->parse_state.mangled_idx += 2; 365 UpdateHighWaterMark(state); 366 return true; 367 } 368 return false; 369 } 370 371 // Returns true and advances "mangled_idx" if we find "three_char_token" 372 // at "mangled_idx" position. It is assumed that "three_char_token" does 373 // not contain '\0'. 374 static bool ParseThreeCharToken(State *state, const char *three_char_token) { 375 ComplexityGuard guard(state); 376 if (guard.IsTooComplex()) return false; 377 if (RemainingInput(state)[0] == three_char_token[0] && 378 RemainingInput(state)[1] == three_char_token[1] && 379 RemainingInput(state)[2] == three_char_token[2]) { 380 state->parse_state.mangled_idx += 3; 381 UpdateHighWaterMark(state); 382 return true; 383 } 384 return false; 385 } 386 387 // Returns true and advances "mangled_idx" if we find a copy of the 388 // NUL-terminated string "long_token" at "mangled_idx" position. 389 static bool ParseLongToken(State *state, const char *long_token) { 390 ComplexityGuard guard(state); 391 if (guard.IsTooComplex()) return false; 392 int i = 0; 393 for (; long_token[i] != '\0'; ++i) { 394 // Note that we cannot run off the end of the NUL-terminated input here. 395 // Inside the loop body, long_token[i] is known to be different from NUL. 396 // So if we read the NUL on the end of the input here, we return at once. 397 if (RemainingInput(state)[i] != long_token[i]) return false; 398 } 399 state->parse_state.mangled_idx += i; 400 UpdateHighWaterMark(state); 401 return true; 402 } 403 404 // Returns true and advances "mangled_cur" if we find any character in 405 // "char_class" at "mangled_cur" position. 406 static bool ParseCharClass(State *state, const char *char_class) { 407 ComplexityGuard guard(state); 408 if (guard.IsTooComplex()) return false; 409 if (RemainingInput(state)[0] == '\0') { 410 return false; 411 } 412 const char *p = char_class; 413 for (; *p != '\0'; ++p) { 414 if (RemainingInput(state)[0] == *p) { 415 ++state->parse_state.mangled_idx; 416 UpdateHighWaterMark(state); 417 return true; 418 } 419 } 420 return false; 421 } 422 423 static bool ParseDigit(State *state, int *digit) { 424 char c = RemainingInput(state)[0]; 425 if (ParseCharClass(state, "0123456789")) { 426 if (digit != nullptr) { 427 *digit = c - '0'; 428 } 429 return true; 430 } 431 return false; 432 } 433 434 // This function is used for handling an optional non-terminal. 435 static bool Optional(bool /*status*/) { return true; } 436 437 // This function is used for handling <non-terminal>+ syntax. 438 typedef bool (*ParseFunc)(State *); 439 static bool OneOrMore(ParseFunc parse_func, State *state) { 440 if (parse_func(state)) { 441 while (parse_func(state)) { 442 } 443 return true; 444 } 445 return false; 446 } 447 448 // This function is used for handling <non-terminal>* syntax. The function 449 // always returns true and must be followed by a termination token or a 450 // terminating sequence not handled by parse_func (e.g. 451 // ParseOneCharToken(state, 'E')). 452 static bool ZeroOrMore(ParseFunc parse_func, State *state) { 453 while (parse_func(state)) { 454 } 455 return true; 456 } 457 458 // Append "str" at "out_cur_idx". If there is an overflow, out_cur_idx is 459 // set to out_end_idx+1. The output string is ensured to 460 // always terminate with '\0' as long as there is no overflow. 461 static void Append(State *state, const char *const str, const size_t length) { 462 for (size_t i = 0; i < length; ++i) { 463 if (state->parse_state.out_cur_idx + 1 < 464 state->out_end_idx) { // +1 for '\0' 465 state->out[state->parse_state.out_cur_idx++] = str[i]; 466 } else { 467 // signal overflow 468 state->parse_state.out_cur_idx = state->out_end_idx + 1; 469 break; 470 } 471 } 472 if (state->parse_state.out_cur_idx < state->out_end_idx) { 473 state->out[state->parse_state.out_cur_idx] = 474 '\0'; // Terminate it with '\0' 475 } 476 } 477 478 // We don't use equivalents in libc to avoid locale issues. 479 static bool IsLower(char c) { return c >= 'a' && c <= 'z'; } 480 481 static bool IsAlpha(char c) { 482 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 483 } 484 485 static bool IsDigit(char c) { return c >= '0' && c <= '9'; } 486 487 // Returns true if "str" is a function clone suffix. These suffixes are used 488 // by GCC 4.5.x and later versions (and our locally-modified version of GCC 489 // 4.4.x) to indicate functions which have been cloned during optimization. 490 // We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix. 491 // Additionally, '_' is allowed along with the alphanumeric sequence. 492 static bool IsFunctionCloneSuffix(const char *str) { 493 size_t i = 0; 494 while (str[i] != '\0') { 495 bool parsed = false; 496 // Consume a single [.<alpha> | _]*[.<digit>]* sequence. 497 if (str[i] == '.' && (IsAlpha(str[i + 1]) || str[i + 1] == '_')) { 498 parsed = true; 499 i += 2; 500 while (IsAlpha(str[i]) || str[i] == '_') { 501 ++i; 502 } 503 } 504 if (str[i] == '.' && IsDigit(str[i + 1])) { 505 parsed = true; 506 i += 2; 507 while (IsDigit(str[i])) { 508 ++i; 509 } 510 } 511 if (!parsed) 512 return false; 513 } 514 return true; // Consumed everything in "str". 515 } 516 517 static bool EndsWith(State *state, const char chr) { 518 return state->parse_state.out_cur_idx > 0 && 519 state->parse_state.out_cur_idx < state->out_end_idx && 520 chr == state->out[state->parse_state.out_cur_idx - 1]; 521 } 522 523 // Append "str" with some tweaks, iff "append" state is true. 524 static void MaybeAppendWithLength(State *state, const char *const str, 525 const size_t length) { 526 if (state->parse_state.append && length > 0) { 527 // Append a space if the output buffer ends with '<' and "str" 528 // starts with '<' to avoid <<<. 529 if (str[0] == '<' && EndsWith(state, '<')) { 530 Append(state, " ", 1); 531 } 532 // Remember the last identifier name for ctors/dtors, 533 // but only if we haven't yet overflown the buffer. 534 if (state->parse_state.out_cur_idx < state->out_end_idx && 535 (IsAlpha(str[0]) || str[0] == '_')) { 536 state->parse_state.prev_name_idx = state->parse_state.out_cur_idx; 537 state->parse_state.prev_name_length = static_cast<unsigned int>(length); 538 } 539 Append(state, str, length); 540 } 541 } 542 543 // Appends a positive decimal number to the output if appending is enabled. 544 static bool MaybeAppendDecimal(State *state, int val) { 545 // Max {32-64}-bit unsigned int is 20 digits. 546 constexpr size_t kMaxLength = 20; 547 char buf[kMaxLength]; 548 549 // We can't use itoa or sprintf as neither is specified to be 550 // async-signal-safe. 551 if (state->parse_state.append) { 552 // We can't have a one-before-the-beginning pointer, so instead start with 553 // one-past-the-end and manipulate one character before the pointer. 554 char *p = &buf[kMaxLength]; 555 do { // val=0 is the only input that should write a leading zero digit. 556 *--p = static_cast<char>((val % 10) + '0'); 557 val /= 10; 558 } while (p > buf && val != 0); 559 560 // 'p' landed on the last character we set. How convenient. 561 Append(state, p, kMaxLength - static_cast<size_t>(p - buf)); 562 } 563 564 return true; 565 } 566 567 // A convenient wrapper around MaybeAppendWithLength(). 568 // Returns true so that it can be placed in "if" conditions. 569 static bool MaybeAppend(State *state, const char *const str) { 570 if (state->parse_state.append) { 571 size_t length = StrLen(str); 572 MaybeAppendWithLength(state, str, length); 573 } 574 return true; 575 } 576 577 // This function is used for handling nested names. 578 static bool EnterNestedName(State *state) { 579 state->parse_state.nest_level = 0; 580 return true; 581 } 582 583 // This function is used for handling nested names. 584 static bool LeaveNestedName(State *state, int16_t prev_value) { 585 state->parse_state.nest_level = prev_value; 586 return true; 587 } 588 589 // Disable the append mode not to print function parameters, etc. 590 static bool DisableAppend(State *state) { 591 state->parse_state.append = false; 592 return true; 593 } 594 595 // Restore the append mode to the previous state. 596 static bool RestoreAppend(State *state, bool prev_value) { 597 state->parse_state.append = prev_value; 598 return true; 599 } 600 601 // Increase the nest level for nested names. 602 static void MaybeIncreaseNestLevel(State *state) { 603 if (state->parse_state.nest_level > -1) { 604 ++state->parse_state.nest_level; 605 } 606 } 607 608 // Appends :: for nested names if necessary. 609 static void MaybeAppendSeparator(State *state) { 610 if (state->parse_state.nest_level >= 1) { 611 MaybeAppend(state, "::"); 612 } 613 } 614 615 // Cancel the last separator if necessary. 616 static void MaybeCancelLastSeparator(State *state) { 617 if (state->parse_state.nest_level >= 1 && state->parse_state.append && 618 state->parse_state.out_cur_idx >= 2) { 619 state->parse_state.out_cur_idx -= 2; 620 state->out[state->parse_state.out_cur_idx] = '\0'; 621 } 622 } 623 624 // Returns true if the identifier of the given length pointed to by 625 // "mangled_cur" is anonymous namespace. 626 static bool IdentifierIsAnonymousNamespace(State *state, size_t length) { 627 // Returns true if "anon_prefix" is a proper prefix of "mangled_cur". 628 static const char anon_prefix[] = "_GLOBAL__N_"; 629 return (length > (sizeof(anon_prefix) - 1) && 630 StrPrefix(RemainingInput(state), anon_prefix)); 631 } 632 633 // Forward declarations of our parsing functions. 634 static bool ParseMangledName(State *state); 635 static bool ParseEncoding(State *state); 636 static bool ParseName(State *state); 637 static bool ParseUnscopedName(State *state); 638 static bool ParseNestedName(State *state); 639 static bool ParsePrefix(State *state); 640 static bool ParseUnqualifiedName(State *state); 641 static bool ParseSourceName(State *state); 642 static bool ParseLocalSourceName(State *state); 643 static bool ParseUnnamedTypeName(State *state); 644 static bool ParseNumber(State *state, int *number_out); 645 static bool ParseFloatNumber(State *state); 646 static bool ParseSeqId(State *state); 647 static bool ParseIdentifier(State *state, size_t length); 648 static bool ParseOperatorName(State *state, int *arity); 649 static bool ParseConversionOperatorType(State *state); 650 static bool ParseSpecialName(State *state); 651 static bool ParseCallOffset(State *state); 652 static bool ParseNVOffset(State *state); 653 static bool ParseVOffset(State *state); 654 static bool ParseAbiTags(State *state); 655 static bool ParseCtorDtorName(State *state); 656 static bool ParseDecltype(State *state); 657 static bool ParseType(State *state); 658 static bool ParseCVQualifiers(State *state); 659 static bool ParseExtendedQualifier(State *state); 660 static bool ParseBuiltinType(State *state); 661 static bool ParseVendorExtendedType(State *state); 662 static bool ParseFunctionType(State *state); 663 static bool ParseBareFunctionType(State *state); 664 static bool ParseOverloadAttribute(State *state); 665 static bool ParseClassEnumType(State *state); 666 static bool ParseArrayType(State *state); 667 static bool ParsePointerToMemberType(State *state); 668 static bool ParseTemplateParam(State *state); 669 static bool ParseTemplateParamDecl(State *state); 670 static bool ParseTemplateTemplateParam(State *state); 671 static bool ParseTemplateArgs(State *state); 672 static bool ParseTemplateArg(State *state); 673 static bool ParseBaseUnresolvedName(State *state); 674 static bool ParseUnresolvedName(State *state); 675 static bool ParseUnresolvedQualifierLevel(State *state); 676 static bool ParseUnionSelector(State* state); 677 static bool ParseFunctionParam(State* state); 678 static bool ParseBracedExpression(State *state); 679 static bool ParseExpression(State *state); 680 static bool ParseInitializer(State *state); 681 static bool ParseExprPrimary(State *state); 682 static bool ParseExprCastValueAndTrailingE(State *state); 683 static bool ParseQRequiresClauseExpr(State *state); 684 static bool ParseRequirement(State *state); 685 static bool ParseTypeConstraint(State *state); 686 static bool ParseLocalName(State *state); 687 static bool ParseLocalNameSuffix(State *state); 688 static bool ParseDiscriminator(State *state); 689 static bool ParseSubstitution(State *state, bool accept_std); 690 691 // Implementation note: the following code is a straightforward 692 // translation of the Itanium C++ ABI defined in BNF with a couple of 693 // exceptions. 694 // 695 // - Support GNU extensions not defined in the Itanium C++ ABI 696 // - <prefix> and <template-prefix> are combined to avoid infinite loop 697 // - Reorder patterns to shorten the code 698 // - Reorder patterns to give greedier functions precedence 699 // We'll mark "Less greedy than" for these cases in the code 700 // 701 // Each parsing function changes the parse state and returns true on 702 // success, or returns false and doesn't change the parse state (note: 703 // the parse-steps counter increases regardless of success or failure). 704 // To ensure that the parse state isn't changed in the latter case, we 705 // save the original state before we call multiple parsing functions 706 // consecutively with &&, and restore it if unsuccessful. See 707 // ParseEncoding() as an example of this convention. We follow the 708 // convention throughout the code. 709 // 710 // Originally we tried to do demangling without following the full ABI 711 // syntax but it turned out we needed to follow the full syntax to 712 // parse complicated cases like nested template arguments. Note that 713 // implementing a full-fledged demangler isn't trivial (libiberty's 714 // cp-demangle.c has +4300 lines). 715 // 716 // Note that (foo) in <(foo) ...> is a modifier to be ignored. 717 // 718 // Reference: 719 // - Itanium C++ ABI 720 // <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling> 721 722 // <mangled-name> ::= _Z <encoding> 723 static bool ParseMangledName(State *state) { 724 ComplexityGuard guard(state); 725 if (guard.IsTooComplex()) return false; 726 return ParseTwoCharToken(state, "_Z") && ParseEncoding(state); 727 } 728 729 // <encoding> ::= <(function) name> <bare-function-type> 730 // [`Q` <requires-clause expr>] 731 // ::= <(data) name> 732 // ::= <special-name> 733 // 734 // NOTE: Based on http://shortn/_Hoq9qG83rx 735 static bool ParseEncoding(State *state) { 736 ComplexityGuard guard(state); 737 if (guard.IsTooComplex()) return false; 738 // Since the first two productions both start with <name>, attempt 739 // to parse it only once to avoid exponential blowup of backtracking. 740 // 741 // We're careful about exponential blowup because <encoding> recursively 742 // appears in other productions downstream of its first two productions, 743 // which means that every call to `ParseName` would possibly indirectly 744 // result in two calls to `ParseName` etc. 745 if (ParseName(state)) { 746 if (!ParseBareFunctionType(state)) { 747 return true; // <(data) name> 748 } 749 750 // Parsed: <(function) name> <bare-function-type> 751 // Pending: [`Q` <requires-clause expr>] 752 ParseQRequiresClauseExpr(state); // restores state on failure 753 return true; 754 } 755 756 if (ParseSpecialName(state)) { 757 return true; // <special-name> 758 } 759 return false; 760 } 761 762 // <name> ::= <nested-name> 763 // ::= <unscoped-template-name> <template-args> 764 // ::= <unscoped-name> 765 // ::= <local-name> 766 static bool ParseName(State *state) { 767 ComplexityGuard guard(state); 768 if (guard.IsTooComplex()) return false; 769 if (ParseNestedName(state) || ParseLocalName(state)) { 770 return true; 771 } 772 773 // We reorganize the productions to avoid re-parsing unscoped names. 774 // - Inline <unscoped-template-name> productions: 775 // <name> ::= <substitution> <template-args> 776 // ::= <unscoped-name> <template-args> 777 // ::= <unscoped-name> 778 // - Merge the two productions that start with unscoped-name: 779 // <name> ::= <unscoped-name> [<template-args>] 780 781 ParseState copy = state->parse_state; 782 // "std<...>" isn't a valid name. 783 if (ParseSubstitution(state, /*accept_std=*/false) && 784 ParseTemplateArgs(state)) { 785 return true; 786 } 787 state->parse_state = copy; 788 789 // Note there's no need to restore state after this since only the first 790 // subparser can fail. 791 return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state)); 792 } 793 794 // <unscoped-name> ::= <unqualified-name> 795 // ::= St <unqualified-name> 796 static bool ParseUnscopedName(State *state) { 797 ComplexityGuard guard(state); 798 if (guard.IsTooComplex()) return false; 799 if (ParseUnqualifiedName(state)) { 800 return true; 801 } 802 803 ParseState copy = state->parse_state; 804 if (ParseTwoCharToken(state, "St") && MaybeAppend(state, "std::") && 805 ParseUnqualifiedName(state)) { 806 return true; 807 } 808 state->parse_state = copy; 809 return false; 810 } 811 812 // <ref-qualifer> ::= R // lvalue method reference qualifier 813 // ::= O // rvalue method reference qualifier 814 static inline bool ParseRefQualifier(State *state) { 815 return ParseCharClass(state, "OR"); 816 } 817 818 // <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix> 819 // <unqualified-name> E 820 // ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix> 821 // <template-args> E 822 static bool ParseNestedName(State *state) { 823 ComplexityGuard guard(state); 824 if (guard.IsTooComplex()) return false; 825 ParseState copy = state->parse_state; 826 if (ParseOneCharToken(state, 'N') && EnterNestedName(state) && 827 Optional(ParseCVQualifiers(state)) && 828 Optional(ParseRefQualifier(state)) && ParsePrefix(state) && 829 LeaveNestedName(state, copy.nest_level) && 830 ParseOneCharToken(state, 'E')) { 831 return true; 832 } 833 state->parse_state = copy; 834 return false; 835 } 836 837 // This part is tricky. If we literally translate them to code, we'll 838 // end up infinite loop. Hence we merge them to avoid the case. 839 // 840 // <prefix> ::= <prefix> <unqualified-name> 841 // ::= <template-prefix> <template-args> 842 // ::= <template-param> 843 // ::= <decltype> 844 // ::= <substitution> 845 // ::= # empty 846 // <template-prefix> ::= <prefix> <(template) unqualified-name> 847 // ::= <template-param> 848 // ::= <substitution> 849 // ::= <vendor-extended-type> 850 static bool ParsePrefix(State *state) { 851 ComplexityGuard guard(state); 852 if (guard.IsTooComplex()) return false; 853 bool has_something = false; 854 while (true) { 855 MaybeAppendSeparator(state); 856 if (ParseTemplateParam(state) || ParseDecltype(state) || 857 ParseSubstitution(state, /*accept_std=*/true) || 858 // Although the official grammar does not mention it, nested-names 859 // shaped like Nu14__some_builtinIiE6memberE occur in practice, and it 860 // is not clear what else a compiler is supposed to do when a 861 // vendor-extended type has named members. 862 ParseVendorExtendedType(state) || 863 ParseUnscopedName(state) || 864 (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) { 865 has_something = true; 866 MaybeIncreaseNestLevel(state); 867 continue; 868 } 869 MaybeCancelLastSeparator(state); 870 if (has_something && ParseTemplateArgs(state)) { 871 return ParsePrefix(state); 872 } else { 873 break; 874 } 875 } 876 return true; 877 } 878 879 // <unqualified-name> ::= <operator-name> [<abi-tags>] 880 // ::= <ctor-dtor-name> [<abi-tags>] 881 // ::= <source-name> [<abi-tags>] 882 // ::= <local-source-name> [<abi-tags>] 883 // ::= <unnamed-type-name> [<abi-tags>] 884 // ::= DC <source-name>+ E # C++17 structured binding 885 // ::= F <source-name> # C++20 constrained friend 886 // ::= F <operator-name> # C++20 constrained friend 887 // 888 // <local-source-name> is a GCC extension; see below. 889 // 890 // For the F notation for constrained friends, see 891 // https://github.com/itanium-cxx-abi/cxx-abi/issues/24#issuecomment-1491130332. 892 static bool ParseUnqualifiedName(State *state) { 893 ComplexityGuard guard(state); 894 if (guard.IsTooComplex()) return false; 895 if (ParseOperatorName(state, nullptr) || ParseCtorDtorName(state) || 896 ParseSourceName(state) || ParseLocalSourceName(state) || 897 ParseUnnamedTypeName(state)) { 898 return ParseAbiTags(state); 899 } 900 901 // DC <source-name>+ E 902 ParseState copy = state->parse_state; 903 if (ParseTwoCharToken(state, "DC") && OneOrMore(ParseSourceName, state) && 904 ParseOneCharToken(state, 'E')) { 905 return true; 906 } 907 state->parse_state = copy; 908 909 // F <source-name> 910 // F <operator-name> 911 if (ParseOneCharToken(state, 'F') && MaybeAppend(state, "friend ") && 912 (ParseSourceName(state) || ParseOperatorName(state, nullptr))) { 913 return true; 914 } 915 state->parse_state = copy; 916 917 return false; 918 } 919 920 // <abi-tags> ::= <abi-tag> [<abi-tags>] 921 // <abi-tag> ::= B <source-name> 922 static bool ParseAbiTags(State *state) { 923 ComplexityGuard guard(state); 924 if (guard.IsTooComplex()) return false; 925 926 while (ParseOneCharToken(state, 'B')) { 927 ParseState copy = state->parse_state; 928 MaybeAppend(state, "[abi:"); 929 930 if (!ParseSourceName(state)) { 931 state->parse_state = copy; 932 return false; 933 } 934 MaybeAppend(state, "]"); 935 } 936 937 return true; 938 } 939 940 // <source-name> ::= <positive length number> <identifier> 941 static bool ParseSourceName(State *state) { 942 ComplexityGuard guard(state); 943 if (guard.IsTooComplex()) return false; 944 ParseState copy = state->parse_state; 945 int length = -1; 946 if (ParseNumber(state, &length) && 947 ParseIdentifier(state, static_cast<size_t>(length))) { 948 return true; 949 } 950 state->parse_state = copy; 951 return false; 952 } 953 954 // <local-source-name> ::= L <source-name> [<discriminator>] 955 // 956 // References: 957 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775 958 // https://gcc.gnu.org/viewcvs?view=rev&revision=124467 959 static bool ParseLocalSourceName(State *state) { 960 ComplexityGuard guard(state); 961 if (guard.IsTooComplex()) return false; 962 ParseState copy = state->parse_state; 963 if (ParseOneCharToken(state, 'L') && ParseSourceName(state) && 964 Optional(ParseDiscriminator(state))) { 965 return true; 966 } 967 state->parse_state = copy; 968 return false; 969 } 970 971 // <unnamed-type-name> ::= Ut [<(nonnegative) number>] _ 972 // ::= <closure-type-name> 973 // <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _ 974 // <lambda-sig> ::= <template-param-decl>* <(parameter) type>+ 975 // 976 // For <template-param-decl>* in <lambda-sig> see: 977 // 978 // https://github.com/itanium-cxx-abi/cxx-abi/issues/31 979 static bool ParseUnnamedTypeName(State *state) { 980 ComplexityGuard guard(state); 981 if (guard.IsTooComplex()) return false; 982 ParseState copy = state->parse_state; 983 // Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }. 984 // Optionally parse the encoded value into 'which' and add 2 to get the index. 985 int which = -1; 986 987 // Unnamed type local to function or class. 988 if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) && 989 which <= std::numeric_limits<int>::max() - 2 && // Don't overflow. 990 ParseOneCharToken(state, '_')) { 991 MaybeAppend(state, "{unnamed type#"); 992 MaybeAppendDecimal(state, 2 + which); 993 MaybeAppend(state, "}"); 994 return true; 995 } 996 state->parse_state = copy; 997 998 // Closure type. 999 which = -1; 1000 if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) && 1001 ZeroOrMore(ParseTemplateParamDecl, state) && 1002 OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) && 1003 ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) && 1004 which <= std::numeric_limits<int>::max() - 2 && // Don't overflow. 1005 ParseOneCharToken(state, '_')) { 1006 MaybeAppend(state, "{lambda()#"); 1007 MaybeAppendDecimal(state, 2 + which); 1008 MaybeAppend(state, "}"); 1009 return true; 1010 } 1011 state->parse_state = copy; 1012 1013 return false; 1014 } 1015 1016 // <number> ::= [n] <non-negative decimal integer> 1017 // If "number_out" is non-null, then *number_out is set to the value of the 1018 // parsed number on success. 1019 static bool ParseNumber(State *state, int *number_out) { 1020 ComplexityGuard guard(state); 1021 if (guard.IsTooComplex()) return false; 1022 bool negative = false; 1023 if (ParseOneCharToken(state, 'n')) { 1024 negative = true; 1025 } 1026 const char *p = RemainingInput(state); 1027 uint64_t number = 0; 1028 for (; *p != '\0'; ++p) { 1029 if (IsDigit(*p)) { 1030 number = number * 10 + static_cast<uint64_t>(*p - '0'); 1031 } else { 1032 break; 1033 } 1034 } 1035 // Apply the sign with uint64_t arithmetic so overflows aren't UB. Gives 1036 // "incorrect" results for out-of-range inputs, but negative values only 1037 // appear for literals, which aren't printed. 1038 if (negative) { 1039 number = ~number + 1; 1040 } 1041 if (p != RemainingInput(state)) { // Conversion succeeded. 1042 state->parse_state.mangled_idx += 1043 static_cast<int>(p - RemainingInput(state)); 1044 UpdateHighWaterMark(state); 1045 if (number_out != nullptr) { 1046 // Note: possibly truncate "number". 1047 *number_out = static_cast<int>(number); 1048 } 1049 return true; 1050 } 1051 return false; 1052 } 1053 1054 // Floating-point literals are encoded using a fixed-length lowercase 1055 // hexadecimal string. 1056 static bool ParseFloatNumber(State *state) { 1057 ComplexityGuard guard(state); 1058 if (guard.IsTooComplex()) return false; 1059 const char *p = RemainingInput(state); 1060 for (; *p != '\0'; ++p) { 1061 if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) { 1062 break; 1063 } 1064 } 1065 if (p != RemainingInput(state)) { // Conversion succeeded. 1066 state->parse_state.mangled_idx += 1067 static_cast<int>(p - RemainingInput(state)); 1068 UpdateHighWaterMark(state); 1069 return true; 1070 } 1071 return false; 1072 } 1073 1074 // The <seq-id> is a sequence number in base 36, 1075 // using digits and upper case letters 1076 static bool ParseSeqId(State *state) { 1077 ComplexityGuard guard(state); 1078 if (guard.IsTooComplex()) return false; 1079 const char *p = RemainingInput(state); 1080 for (; *p != '\0'; ++p) { 1081 if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) { 1082 break; 1083 } 1084 } 1085 if (p != RemainingInput(state)) { // Conversion succeeded. 1086 state->parse_state.mangled_idx += 1087 static_cast<int>(p - RemainingInput(state)); 1088 UpdateHighWaterMark(state); 1089 return true; 1090 } 1091 return false; 1092 } 1093 1094 // <identifier> ::= <unqualified source code identifier> (of given length) 1095 static bool ParseIdentifier(State *state, size_t length) { 1096 ComplexityGuard guard(state); 1097 if (guard.IsTooComplex()) return false; 1098 if (!AtLeastNumCharsRemaining(RemainingInput(state), length)) { 1099 return false; 1100 } 1101 if (IdentifierIsAnonymousNamespace(state, length)) { 1102 MaybeAppend(state, "(anonymous namespace)"); 1103 } else { 1104 MaybeAppendWithLength(state, RemainingInput(state), length); 1105 } 1106 state->parse_state.mangled_idx += static_cast<int>(length); 1107 UpdateHighWaterMark(state); 1108 return true; 1109 } 1110 1111 // <operator-name> ::= nw, and other two letters cases 1112 // ::= cv <type> # (cast) 1113 // ::= li <source-name> # C++11 user-defined literal 1114 // ::= v <digit> <source-name> # vendor extended operator 1115 static bool ParseOperatorName(State *state, int *arity) { 1116 ComplexityGuard guard(state); 1117 if (guard.IsTooComplex()) return false; 1118 if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) { 1119 return false; 1120 } 1121 // First check with "cv" (cast) case. 1122 ParseState copy = state->parse_state; 1123 if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") && 1124 EnterNestedName(state) && ParseConversionOperatorType(state) && 1125 LeaveNestedName(state, copy.nest_level)) { 1126 if (arity != nullptr) { 1127 *arity = 1; 1128 } 1129 return true; 1130 } 1131 state->parse_state = copy; 1132 1133 // Then user-defined literals. 1134 if (ParseTwoCharToken(state, "li") && MaybeAppend(state, "operator\"\" ") && 1135 ParseSourceName(state)) { 1136 return true; 1137 } 1138 state->parse_state = copy; 1139 1140 // Then vendor extended operators. 1141 if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) && 1142 ParseSourceName(state)) { 1143 return true; 1144 } 1145 state->parse_state = copy; 1146 1147 // Other operator names should start with a lower alphabet followed 1148 // by a lower/upper alphabet. 1149 if (!(IsLower(RemainingInput(state)[0]) && 1150 IsAlpha(RemainingInput(state)[1]))) { 1151 return false; 1152 } 1153 // We may want to perform a binary search if we really need speed. 1154 const AbbrevPair *p; 1155 for (p = kOperatorList; p->abbrev != nullptr; ++p) { 1156 if (RemainingInput(state)[0] == p->abbrev[0] && 1157 RemainingInput(state)[1] == p->abbrev[1]) { 1158 if (arity != nullptr) { 1159 *arity = p->arity; 1160 } 1161 MaybeAppend(state, "operator"); 1162 if (IsLower(*p->real_name)) { // new, delete, etc. 1163 MaybeAppend(state, " "); 1164 } 1165 MaybeAppend(state, p->real_name); 1166 state->parse_state.mangled_idx += 2; 1167 UpdateHighWaterMark(state); 1168 return true; 1169 } 1170 } 1171 return false; 1172 } 1173 1174 // <operator-name> ::= cv <type> # (cast) 1175 // 1176 // The name of a conversion operator is the one place where cv-qualifiers, *, &, 1177 // and other simple type combinators are expected to appear in our stripped-down 1178 // demangling (elsewhere they appear in function signatures or template 1179 // arguments, which we omit from the output). We make reasonable efforts to 1180 // render simple cases accurately. 1181 static bool ParseConversionOperatorType(State *state) { 1182 ComplexityGuard guard(state); 1183 if (guard.IsTooComplex()) return false; 1184 ParseState copy = state->parse_state; 1185 1186 // Scan pointers, const, and other easy mangling prefixes with postfix 1187 // demanglings. Remember the range of input for later rescanning. 1188 // 1189 // See `ParseType` and the `switch` below for the meaning of each char. 1190 const char* begin_simple_prefixes = RemainingInput(state); 1191 while (ParseCharClass(state, "OPRCGrVK")) {} 1192 const char* end_simple_prefixes = RemainingInput(state); 1193 1194 // Emit the base type first. 1195 if (!ParseType(state)) { 1196 state->parse_state = copy; 1197 return false; 1198 } 1199 1200 // Then rescan the easy type combinators in reverse order to emit their 1201 // demanglings in the expected output order. 1202 while (begin_simple_prefixes != end_simple_prefixes) { 1203 switch (*--end_simple_prefixes) { 1204 case 'P': 1205 MaybeAppend(state, "*"); 1206 break; 1207 case 'R': 1208 MaybeAppend(state, "&"); 1209 break; 1210 case 'O': 1211 MaybeAppend(state, "&&"); 1212 break; 1213 case 'C': 1214 MaybeAppend(state, " _Complex"); 1215 break; 1216 case 'G': 1217 MaybeAppend(state, " _Imaginary"); 1218 break; 1219 case 'r': 1220 MaybeAppend(state, " restrict"); 1221 break; 1222 case 'V': 1223 MaybeAppend(state, " volatile"); 1224 break; 1225 case 'K': 1226 MaybeAppend(state, " const"); 1227 break; 1228 } 1229 } 1230 return true; 1231 } 1232 1233 // <special-name> ::= TV <type> 1234 // ::= TT <type> 1235 // ::= TI <type> 1236 // ::= TS <type> 1237 // ::= TW <name> # thread-local wrapper 1238 // ::= TH <name> # thread-local initialization 1239 // ::= Tc <call-offset> <call-offset> <(base) encoding> 1240 // ::= GV <(object) name> 1241 // ::= GR <(object) name> [<seq-id>] _ 1242 // ::= T <call-offset> <(base) encoding> 1243 // ::= GTt <encoding> # transaction-safe entry point 1244 // ::= TA <template-arg> # nontype template parameter object 1245 // G++ extensions: 1246 // ::= TC <type> <(offset) number> _ <(base) type> 1247 // ::= TF <type> 1248 // ::= TJ <type> 1249 // ::= GR <name> # without final _, perhaps an earlier form? 1250 // ::= GA <encoding> 1251 // ::= Th <call-offset> <(base) encoding> 1252 // ::= Tv <call-offset> <(base) encoding> 1253 // 1254 // Note: Most of these are special data, not functions that occur in stack 1255 // traces. Exceptions are TW and TH, which denote functions supporting the 1256 // thread_local feature. For these see: 1257 // 1258 // https://maskray.me/blog/2021-02-14-all-about-thread-local-storage 1259 // 1260 // For TA see https://github.com/itanium-cxx-abi/cxx-abi/issues/63. 1261 static bool ParseSpecialName(State *state) { 1262 ComplexityGuard guard(state); 1263 if (guard.IsTooComplex()) return false; 1264 ParseState copy = state->parse_state; 1265 1266 if (ParseTwoCharToken(state, "TW")) { 1267 MaybeAppend(state, "thread-local wrapper routine for "); 1268 if (ParseName(state)) return true; 1269 state->parse_state = copy; 1270 return false; 1271 } 1272 1273 if (ParseTwoCharToken(state, "TH")) { 1274 MaybeAppend(state, "thread-local initialization routine for "); 1275 if (ParseName(state)) return true; 1276 state->parse_state = copy; 1277 return false; 1278 } 1279 1280 if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTIS") && 1281 ParseType(state)) { 1282 return true; 1283 } 1284 state->parse_state = copy; 1285 1286 if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) && 1287 ParseCallOffset(state) && ParseEncoding(state)) { 1288 return true; 1289 } 1290 state->parse_state = copy; 1291 1292 if (ParseTwoCharToken(state, "GV") && ParseName(state)) { 1293 return true; 1294 } 1295 state->parse_state = copy; 1296 1297 if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) && 1298 ParseEncoding(state)) { 1299 return true; 1300 } 1301 state->parse_state = copy; 1302 1303 // G++ extensions 1304 if (ParseTwoCharToken(state, "TC") && ParseType(state) && 1305 ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') && 1306 DisableAppend(state) && ParseType(state)) { 1307 RestoreAppend(state, copy.append); 1308 return true; 1309 } 1310 state->parse_state = copy; 1311 1312 if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") && 1313 ParseType(state)) { 1314 return true; 1315 } 1316 state->parse_state = copy; 1317 1318 // <special-name> ::= GR <(object) name> [<seq-id>] _ # modern standard 1319 // ::= GR <(object) name> # also recognized 1320 if (ParseTwoCharToken(state, "GR")) { 1321 MaybeAppend(state, "reference temporary for "); 1322 if (!ParseName(state)) { 1323 state->parse_state = copy; 1324 return false; 1325 } 1326 const bool has_seq_id = ParseSeqId(state); 1327 const bool has_underscore = ParseOneCharToken(state, '_'); 1328 if (has_seq_id && !has_underscore) { 1329 state->parse_state = copy; 1330 return false; 1331 } 1332 return true; 1333 } 1334 1335 if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) { 1336 return true; 1337 } 1338 state->parse_state = copy; 1339 1340 if (ParseThreeCharToken(state, "GTt") && 1341 MaybeAppend(state, "transaction clone for ") && ParseEncoding(state)) { 1342 return true; 1343 } 1344 state->parse_state = copy; 1345 1346 if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") && 1347 ParseCallOffset(state) && ParseEncoding(state)) { 1348 return true; 1349 } 1350 state->parse_state = copy; 1351 1352 if (ParseTwoCharToken(state, "TA")) { 1353 bool append = state->parse_state.append; 1354 DisableAppend(state); 1355 if (ParseTemplateArg(state)) { 1356 RestoreAppend(state, append); 1357 MaybeAppend(state, "template parameter object"); 1358 return true; 1359 } 1360 } 1361 state->parse_state = copy; 1362 1363 return false; 1364 } 1365 1366 // <call-offset> ::= h <nv-offset> _ 1367 // ::= v <v-offset> _ 1368 static bool ParseCallOffset(State *state) { 1369 ComplexityGuard guard(state); 1370 if (guard.IsTooComplex()) return false; 1371 ParseState copy = state->parse_state; 1372 if (ParseOneCharToken(state, 'h') && ParseNVOffset(state) && 1373 ParseOneCharToken(state, '_')) { 1374 return true; 1375 } 1376 state->parse_state = copy; 1377 1378 if (ParseOneCharToken(state, 'v') && ParseVOffset(state) && 1379 ParseOneCharToken(state, '_')) { 1380 return true; 1381 } 1382 state->parse_state = copy; 1383 1384 return false; 1385 } 1386 1387 // <nv-offset> ::= <(offset) number> 1388 static bool ParseNVOffset(State *state) { 1389 ComplexityGuard guard(state); 1390 if (guard.IsTooComplex()) return false; 1391 return ParseNumber(state, nullptr); 1392 } 1393 1394 // <v-offset> ::= <(offset) number> _ <(virtual offset) number> 1395 static bool ParseVOffset(State *state) { 1396 ComplexityGuard guard(state); 1397 if (guard.IsTooComplex()) return false; 1398 ParseState copy = state->parse_state; 1399 if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') && 1400 ParseNumber(state, nullptr)) { 1401 return true; 1402 } 1403 state->parse_state = copy; 1404 return false; 1405 } 1406 1407 // <ctor-dtor-name> ::= C1 | C2 | C3 | CI1 <base-class-type> | CI2 1408 // <base-class-type> 1409 // ::= D0 | D1 | D2 1410 // # GCC extensions: "unified" constructor/destructor. See 1411 // # 1412 // https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847 1413 // ::= C4 | D4 1414 static bool ParseCtorDtorName(State *state) { 1415 ComplexityGuard guard(state); 1416 if (guard.IsTooComplex()) return false; 1417 ParseState copy = state->parse_state; 1418 if (ParseOneCharToken(state, 'C')) { 1419 if (ParseCharClass(state, "1234")) { 1420 const char *const prev_name = 1421 state->out + state->parse_state.prev_name_idx; 1422 MaybeAppendWithLength(state, prev_name, 1423 state->parse_state.prev_name_length); 1424 return true; 1425 } else if (ParseOneCharToken(state, 'I') && ParseCharClass(state, "12") && 1426 ParseClassEnumType(state)) { 1427 return true; 1428 } 1429 } 1430 state->parse_state = copy; 1431 1432 if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "0124")) { 1433 const char *const prev_name = state->out + state->parse_state.prev_name_idx; 1434 MaybeAppend(state, "~"); 1435 MaybeAppendWithLength(state, prev_name, 1436 state->parse_state.prev_name_length); 1437 return true; 1438 } 1439 state->parse_state = copy; 1440 return false; 1441 } 1442 1443 // <decltype> ::= Dt <expression> E # decltype of an id-expression or class 1444 // # member access (C++0x) 1445 // ::= DT <expression> E # decltype of an expression (C++0x) 1446 static bool ParseDecltype(State *state) { 1447 ComplexityGuard guard(state); 1448 if (guard.IsTooComplex()) return false; 1449 1450 ParseState copy = state->parse_state; 1451 if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") && 1452 ParseExpression(state) && ParseOneCharToken(state, 'E')) { 1453 return true; 1454 } 1455 state->parse_state = copy; 1456 1457 return false; 1458 } 1459 1460 // <type> ::= <CV-qualifiers> <type> 1461 // ::= P <type> # pointer-to 1462 // ::= R <type> # reference-to 1463 // ::= O <type> # rvalue reference-to (C++0x) 1464 // ::= C <type> # complex pair (C 2000) 1465 // ::= G <type> # imaginary (C 2000) 1466 // ::= <builtin-type> 1467 // ::= <function-type> 1468 // ::= <class-enum-type> # note: just an alias for <name> 1469 // ::= <array-type> 1470 // ::= <pointer-to-member-type> 1471 // ::= <template-template-param> <template-args> 1472 // ::= <template-param> 1473 // ::= <decltype> 1474 // ::= <substitution> 1475 // ::= Dp <type> # pack expansion of (C++0x) 1476 // ::= Dv <(elements) number> _ <type> # GNU vector extension 1477 // ::= Dv <(bytes) expression> _ <type> 1478 // ::= Dk <type-constraint> # constrained auto 1479 // 1480 static bool ParseType(State *state) { 1481 ComplexityGuard guard(state); 1482 if (guard.IsTooComplex()) return false; 1483 ParseState copy = state->parse_state; 1484 1485 // We should check CV-qualifers, and PRGC things first. 1486 // 1487 // CV-qualifiers overlap with some operator names, but an operator name is not 1488 // valid as a type. To avoid an ambiguity that can lead to exponential time 1489 // complexity, refuse to backtrack the CV-qualifiers. 1490 // 1491 // _Z4aoeuIrMvvE 1492 // => _Z 4aoeuI rM v v E 1493 // aoeu<operator%=, void, void> 1494 // => _Z 4aoeuI r Mv v E 1495 // aoeu<void void::* restrict> 1496 // 1497 // By consuming the CV-qualifiers first, the former parse is disabled. 1498 if (ParseCVQualifiers(state)) { 1499 const bool result = ParseType(state); 1500 if (!result) state->parse_state = copy; 1501 return result; 1502 } 1503 state->parse_state = copy; 1504 1505 // Similarly, these tag characters can overlap with other <name>s resulting in 1506 // two different parse prefixes that land on <template-args> in the same 1507 // place, such as "C3r1xI...". So, disable the "ctor-name = C3" parse by 1508 // refusing to backtrack the tag characters. 1509 if (ParseCharClass(state, "OPRCG")) { 1510 const bool result = ParseType(state); 1511 if (!result) state->parse_state = copy; 1512 return result; 1513 } 1514 state->parse_state = copy; 1515 1516 if (ParseTwoCharToken(state, "Dp") && ParseType(state)) { 1517 return true; 1518 } 1519 state->parse_state = copy; 1520 1521 if (ParseBuiltinType(state) || ParseFunctionType(state) || 1522 ParseClassEnumType(state) || ParseArrayType(state) || 1523 ParsePointerToMemberType(state) || ParseDecltype(state) || 1524 // "std" on its own isn't a type. 1525 ParseSubstitution(state, /*accept_std=*/false)) { 1526 return true; 1527 } 1528 1529 if (ParseTemplateTemplateParam(state) && ParseTemplateArgs(state)) { 1530 return true; 1531 } 1532 state->parse_state = copy; 1533 1534 // Less greedy than <template-template-param> <template-args>. 1535 if (ParseTemplateParam(state)) { 1536 return true; 1537 } 1538 1539 // GNU vector extension Dv <number> _ <type> 1540 if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) && 1541 ParseOneCharToken(state, '_') && ParseType(state)) { 1542 return true; 1543 } 1544 state->parse_state = copy; 1545 1546 // GNU vector extension Dv <expression> _ <type> 1547 if (ParseTwoCharToken(state, "Dv") && ParseExpression(state) && 1548 ParseOneCharToken(state, '_') && ParseType(state)) { 1549 return true; 1550 } 1551 state->parse_state = copy; 1552 1553 if (ParseTwoCharToken(state, "Dk") && ParseTypeConstraint(state)) { 1554 return true; 1555 } 1556 state->parse_state = copy; 1557 1558 // For this notation see CXXNameMangler::mangleType in Clang's source code. 1559 // The relevant logic and its comment "not clear how to mangle this!" date 1560 // from 2011, so it may be with us awhile. 1561 return ParseLongToken(state, "_SUBSTPACK_"); 1562 } 1563 1564 // <qualifiers> ::= <extended-qualifier>* <CV-qualifiers> 1565 // <CV-qualifiers> ::= [r] [V] [K] 1566 // 1567 // We don't allow empty <CV-qualifiers> to avoid infinite loop in 1568 // ParseType(). 1569 static bool ParseCVQualifiers(State *state) { 1570 ComplexityGuard guard(state); 1571 if (guard.IsTooComplex()) return false; 1572 int num_cv_qualifiers = 0; 1573 while (ParseExtendedQualifier(state)) ++num_cv_qualifiers; 1574 num_cv_qualifiers += ParseOneCharToken(state, 'r'); 1575 num_cv_qualifiers += ParseOneCharToken(state, 'V'); 1576 num_cv_qualifiers += ParseOneCharToken(state, 'K'); 1577 return num_cv_qualifiers > 0; 1578 } 1579 1580 // <extended-qualifier> ::= U <source-name> [<template-args>] 1581 static bool ParseExtendedQualifier(State *state) { 1582 ComplexityGuard guard(state); 1583 if (guard.IsTooComplex()) return false; 1584 ParseState copy = state->parse_state; 1585 1586 if (!ParseOneCharToken(state, 'U')) return false; 1587 1588 bool append = state->parse_state.append; 1589 DisableAppend(state); 1590 if (!ParseSourceName(state)) { 1591 state->parse_state = copy; 1592 return false; 1593 } 1594 Optional(ParseTemplateArgs(state)); 1595 RestoreAppend(state, append); 1596 return true; 1597 } 1598 1599 // <builtin-type> ::= v, etc. # single-character builtin types 1600 // ::= <vendor-extended-type> 1601 // ::= Dd, etc. # two-character builtin types 1602 // ::= DB (<number> | <expression>) _ # _BitInt(N) 1603 // ::= DU (<number> | <expression>) _ # unsigned _BitInt(N) 1604 // ::= DF <number> _ # _FloatN (N bits) 1605 // ::= DF <number> x # _FloatNx 1606 // ::= DF16b # std::bfloat16_t 1607 // 1608 // Not supported: 1609 // ::= [DS] DA <fixed-point-size> 1610 // ::= [DS] DR <fixed-point-size> 1611 // because real implementations of N1169 fixed-point are scant. 1612 static bool ParseBuiltinType(State *state) { 1613 ComplexityGuard guard(state); 1614 if (guard.IsTooComplex()) return false; 1615 ParseState copy = state->parse_state; 1616 1617 // DB (<number> | <expression>) _ # _BitInt(N) 1618 // DU (<number> | <expression>) _ # unsigned _BitInt(N) 1619 if (ParseTwoCharToken(state, "DB") || 1620 (ParseTwoCharToken(state, "DU") && MaybeAppend(state, "unsigned "))) { 1621 bool append = state->parse_state.append; 1622 DisableAppend(state); 1623 int number = -1; 1624 if (!ParseNumber(state, &number) && !ParseExpression(state)) { 1625 state->parse_state = copy; 1626 return false; 1627 } 1628 RestoreAppend(state, append); 1629 1630 if (!ParseOneCharToken(state, '_')) { 1631 state->parse_state = copy; 1632 return false; 1633 } 1634 1635 MaybeAppend(state, "_BitInt("); 1636 if (number >= 0) { 1637 MaybeAppendDecimal(state, number); 1638 } else { 1639 MaybeAppend(state, "?"); // the best we can do for dependent sizes 1640 } 1641 MaybeAppend(state, ")"); 1642 return true; 1643 } 1644 1645 // DF <number> _ # _FloatN 1646 // DF <number> x # _FloatNx 1647 // DF16b # std::bfloat16_t 1648 if (ParseTwoCharToken(state, "DF")) { 1649 if (ParseThreeCharToken(state, "16b")) { 1650 MaybeAppend(state, "std::bfloat16_t"); 1651 return true; 1652 } 1653 int number = 0; 1654 if (!ParseNumber(state, &number)) { 1655 state->parse_state = copy; 1656 return false; 1657 } 1658 MaybeAppend(state, "_Float"); 1659 MaybeAppendDecimal(state, number); 1660 if (ParseOneCharToken(state, 'x')) { 1661 MaybeAppend(state, "x"); 1662 return true; 1663 } 1664 if (ParseOneCharToken(state, '_')) return true; 1665 state->parse_state = copy; 1666 return false; 1667 } 1668 1669 for (const AbbrevPair *p = kBuiltinTypeList; p->abbrev != nullptr; ++p) { 1670 // Guaranteed only 1- or 2-character strings in kBuiltinTypeList. 1671 if (p->abbrev[1] == '\0') { 1672 if (ParseOneCharToken(state, p->abbrev[0])) { 1673 MaybeAppend(state, p->real_name); 1674 return true; // ::= v, etc. # single-character builtin types 1675 } 1676 } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) { 1677 MaybeAppend(state, p->real_name); 1678 return true; // ::= Dd, etc. # two-character builtin types 1679 } 1680 } 1681 1682 return ParseVendorExtendedType(state); 1683 } 1684 1685 // <vendor-extended-type> ::= u <source-name> [<template-args>] 1686 static bool ParseVendorExtendedType(State *state) { 1687 ComplexityGuard guard(state); 1688 if (guard.IsTooComplex()) return false; 1689 1690 ParseState copy = state->parse_state; 1691 if (ParseOneCharToken(state, 'u') && ParseSourceName(state) && 1692 Optional(ParseTemplateArgs(state))) { 1693 return true; 1694 } 1695 state->parse_state = copy; 1696 return false; 1697 } 1698 1699 // <exception-spec> ::= Do # non-throwing 1700 // exception-specification (e.g., 1701 // noexcept, throw()) 1702 // ::= DO <expression> E # computed (instantiation-dependent) 1703 // noexcept 1704 // ::= Dw <type>+ E # dynamic exception specification 1705 // with instantiation-dependent types 1706 static bool ParseExceptionSpec(State *state) { 1707 ComplexityGuard guard(state); 1708 if (guard.IsTooComplex()) return false; 1709 1710 if (ParseTwoCharToken(state, "Do")) return true; 1711 1712 ParseState copy = state->parse_state; 1713 if (ParseTwoCharToken(state, "DO") && ParseExpression(state) && 1714 ParseOneCharToken(state, 'E')) { 1715 return true; 1716 } 1717 state->parse_state = copy; 1718 if (ParseTwoCharToken(state, "Dw") && OneOrMore(ParseType, state) && 1719 ParseOneCharToken(state, 'E')) { 1720 return true; 1721 } 1722 state->parse_state = copy; 1723 1724 return false; 1725 } 1726 1727 // <function-type> ::= 1728 // [exception-spec] [Dx] F [Y] <bare-function-type> [<ref-qualifier>] E 1729 // 1730 // <ref-qualifier> ::= R | O 1731 static bool ParseFunctionType(State *state) { 1732 ComplexityGuard guard(state); 1733 if (guard.IsTooComplex()) return false; 1734 ParseState copy = state->parse_state; 1735 Optional(ParseExceptionSpec(state)); 1736 Optional(ParseTwoCharToken(state, "Dx")); 1737 if (!ParseOneCharToken(state, 'F')) { 1738 state->parse_state = copy; 1739 return false; 1740 } 1741 Optional(ParseOneCharToken(state, 'Y')); 1742 if (!ParseBareFunctionType(state)) { 1743 state->parse_state = copy; 1744 return false; 1745 } 1746 Optional(ParseCharClass(state, "RO")); 1747 if (!ParseOneCharToken(state, 'E')) { 1748 state->parse_state = copy; 1749 return false; 1750 } 1751 return true; 1752 } 1753 1754 // <bare-function-type> ::= <overload-attribute>* <(signature) type>+ 1755 // 1756 // The <overload-attribute>* prefix is nonstandard; see the comment on 1757 // ParseOverloadAttribute. 1758 static bool ParseBareFunctionType(State *state) { 1759 ComplexityGuard guard(state); 1760 if (guard.IsTooComplex()) return false; 1761 ParseState copy = state->parse_state; 1762 DisableAppend(state); 1763 if (ZeroOrMore(ParseOverloadAttribute, state) && 1764 OneOrMore(ParseType, state)) { 1765 RestoreAppend(state, copy.append); 1766 MaybeAppend(state, "()"); 1767 return true; 1768 } 1769 state->parse_state = copy; 1770 return false; 1771 } 1772 1773 // <overload-attribute> ::= Ua <name> 1774 // 1775 // The nonstandard <overload-attribute> production is sufficient to accept the 1776 // current implementation of __attribute__((enable_if(condition, "message"))) 1777 // and future attributes of a similar shape. See 1778 // https://clang.llvm.org/docs/AttributeReference.html#enable-if and the 1779 // definition of CXXNameMangler::mangleFunctionEncodingBareType in Clang's 1780 // source code. 1781 static bool ParseOverloadAttribute(State *state) { 1782 ComplexityGuard guard(state); 1783 if (guard.IsTooComplex()) return false; 1784 ParseState copy = state->parse_state; 1785 if (ParseTwoCharToken(state, "Ua") && ParseName(state)) { 1786 return true; 1787 } 1788 state->parse_state = copy; 1789 return false; 1790 } 1791 1792 // <class-enum-type> ::= <name> 1793 // ::= Ts <name> # struct Name or class Name 1794 // ::= Tu <name> # union Name 1795 // ::= Te <name> # enum Name 1796 // 1797 // See http://shortn/_W3YrltiEd0. 1798 static bool ParseClassEnumType(State *state) { 1799 ComplexityGuard guard(state); 1800 if (guard.IsTooComplex()) return false; 1801 ParseState copy = state->parse_state; 1802 if (Optional(ParseTwoCharToken(state, "Ts") || 1803 ParseTwoCharToken(state, "Tu") || 1804 ParseTwoCharToken(state, "Te")) && 1805 ParseName(state)) { 1806 return true; 1807 } 1808 state->parse_state = copy; 1809 return false; 1810 } 1811 1812 // <array-type> ::= A <(positive dimension) number> _ <(element) type> 1813 // ::= A [<(dimension) expression>] _ <(element) type> 1814 static bool ParseArrayType(State *state) { 1815 ComplexityGuard guard(state); 1816 if (guard.IsTooComplex()) return false; 1817 ParseState copy = state->parse_state; 1818 if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) && 1819 ParseOneCharToken(state, '_') && ParseType(state)) { 1820 return true; 1821 } 1822 state->parse_state = copy; 1823 1824 if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) && 1825 ParseOneCharToken(state, '_') && ParseType(state)) { 1826 return true; 1827 } 1828 state->parse_state = copy; 1829 return false; 1830 } 1831 1832 // <pointer-to-member-type> ::= M <(class) type> <(member) type> 1833 static bool ParsePointerToMemberType(State *state) { 1834 ComplexityGuard guard(state); 1835 if (guard.IsTooComplex()) return false; 1836 ParseState copy = state->parse_state; 1837 if (ParseOneCharToken(state, 'M') && ParseType(state) && ParseType(state)) { 1838 return true; 1839 } 1840 state->parse_state = copy; 1841 return false; 1842 } 1843 1844 // <template-param> ::= T_ 1845 // ::= T <parameter-2 non-negative number> _ 1846 // ::= TL <level-1> __ 1847 // ::= TL <level-1> _ <parameter-2 non-negative number> _ 1848 static bool ParseTemplateParam(State *state) { 1849 ComplexityGuard guard(state); 1850 if (guard.IsTooComplex()) return false; 1851 if (ParseTwoCharToken(state, "T_")) { 1852 MaybeAppend(state, "?"); // We don't support template substitutions. 1853 return true; // ::= T_ 1854 } 1855 1856 ParseState copy = state->parse_state; 1857 if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) && 1858 ParseOneCharToken(state, '_')) { 1859 MaybeAppend(state, "?"); // We don't support template substitutions. 1860 return true; // ::= T <parameter-2 non-negative number> _ 1861 } 1862 state->parse_state = copy; 1863 1864 if (ParseTwoCharToken(state, "TL") && ParseNumber(state, nullptr)) { 1865 if (ParseTwoCharToken(state, "__")) { 1866 MaybeAppend(state, "?"); // We don't support template substitutions. 1867 return true; // ::= TL <level-1> __ 1868 } 1869 1870 if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) && 1871 ParseOneCharToken(state, '_')) { 1872 MaybeAppend(state, "?"); // We don't support template substitutions. 1873 return true; // ::= TL <level-1> _ <parameter-2 non-negative number> _ 1874 } 1875 } 1876 state->parse_state = copy; 1877 return false; 1878 } 1879 1880 // <template-param-decl> 1881 // ::= Ty # template type parameter 1882 // ::= Tk <concept name> [<template-args>] # constrained type parameter 1883 // ::= Tn <type> # template non-type parameter 1884 // ::= Tt <template-param-decl>* E # template template parameter 1885 // ::= Tp <template-param-decl> # template parameter pack 1886 // 1887 // NOTE: <concept name> is just a <name>: http://shortn/_MqJVyr0fc1 1888 // TODO(b/324066279): Implement optional suffix for `Tt`: 1889 // [Q <requires-clause expr>] 1890 static bool ParseTemplateParamDecl(State *state) { 1891 ComplexityGuard guard(state); 1892 if (guard.IsTooComplex()) return false; 1893 ParseState copy = state->parse_state; 1894 1895 if (ParseTwoCharToken(state, "Ty")) { 1896 return true; 1897 } 1898 state->parse_state = copy; 1899 1900 if (ParseTwoCharToken(state, "Tk") && ParseName(state) && 1901 Optional(ParseTemplateArgs(state))) { 1902 return true; 1903 } 1904 state->parse_state = copy; 1905 1906 if (ParseTwoCharToken(state, "Tn") && ParseType(state)) { 1907 return true; 1908 } 1909 state->parse_state = copy; 1910 1911 if (ParseTwoCharToken(state, "Tt") && 1912 ZeroOrMore(ParseTemplateParamDecl, state) && 1913 ParseOneCharToken(state, 'E')) { 1914 return true; 1915 } 1916 state->parse_state = copy; 1917 1918 if (ParseTwoCharToken(state, "Tp") && ParseTemplateParamDecl(state)) { 1919 return true; 1920 } 1921 state->parse_state = copy; 1922 1923 return false; 1924 } 1925 1926 // <template-template-param> ::= <template-param> 1927 // ::= <substitution> 1928 static bool ParseTemplateTemplateParam(State *state) { 1929 ComplexityGuard guard(state); 1930 if (guard.IsTooComplex()) return false; 1931 return (ParseTemplateParam(state) || 1932 // "std" on its own isn't a template. 1933 ParseSubstitution(state, /*accept_std=*/false)); 1934 } 1935 1936 // <template-args> ::= I <template-arg>+ [Q <requires-clause expr>] E 1937 static bool ParseTemplateArgs(State *state) { 1938 ComplexityGuard guard(state); 1939 if (guard.IsTooComplex()) return false; 1940 ParseState copy = state->parse_state; 1941 DisableAppend(state); 1942 if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) && 1943 Optional(ParseQRequiresClauseExpr(state)) && 1944 ParseOneCharToken(state, 'E')) { 1945 RestoreAppend(state, copy.append); 1946 MaybeAppend(state, "<>"); 1947 return true; 1948 } 1949 state->parse_state = copy; 1950 return false; 1951 } 1952 1953 // <template-arg> ::= <template-param-decl> <template-arg> 1954 // ::= <type> 1955 // ::= <expr-primary> 1956 // ::= J <template-arg>* E # argument pack 1957 // ::= X <expression> E 1958 static bool ParseTemplateArg(State *state) { 1959 ComplexityGuard guard(state); 1960 if (guard.IsTooComplex()) return false; 1961 ParseState copy = state->parse_state; 1962 if (ParseOneCharToken(state, 'J') && ZeroOrMore(ParseTemplateArg, state) && 1963 ParseOneCharToken(state, 'E')) { 1964 return true; 1965 } 1966 state->parse_state = copy; 1967 1968 // There can be significant overlap between the following leading to 1969 // exponential backtracking: 1970 // 1971 // <expr-primary> ::= L <type> <expr-cast-value> E 1972 // e.g. L 2xxIvE 1 E 1973 // <type> ==> <local-source-name> <template-args> 1974 // e.g. L 2xx IvE 1975 // 1976 // This means parsing an entire <type> twice, and <type> can contain 1977 // <template-arg>, so this can generate exponential backtracking. There is 1978 // only overlap when the remaining input starts with "L <source-name>", so 1979 // parse all cases that can start this way jointly to share the common prefix. 1980 // 1981 // We have: 1982 // 1983 // <template-arg> ::= <type> 1984 // ::= <expr-primary> 1985 // 1986 // First, drop all the productions of <type> that must start with something 1987 // other than 'L'. All that's left is <class-enum-type>; inline it. 1988 // 1989 // <type> ::= <nested-name> # starts with 'N' 1990 // ::= <unscoped-name> 1991 // ::= <unscoped-template-name> <template-args> 1992 // ::= <local-name> # starts with 'Z' 1993 // 1994 // Drop and inline again: 1995 // 1996 // <type> ::= <unscoped-name> 1997 // ::= <unscoped-name> <template-args> 1998 // ::= <substitution> <template-args> # starts with 'S' 1999 // 2000 // Merge the first two, inline <unscoped-name>, drop last: 2001 // 2002 // <type> ::= <unqualified-name> [<template-args>] 2003 // ::= St <unqualified-name> [<template-args>] # starts with 'S' 2004 // 2005 // Drop and inline: 2006 // 2007 // <type> ::= <operator-name> [<template-args>] # starts with lowercase 2008 // ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D' 2009 // ::= <source-name> [<template-args>] # starts with digit 2010 // ::= <local-source-name> [<template-args>] 2011 // ::= <unnamed-type-name> [<template-args>] # starts with 'U' 2012 // 2013 // One more time: 2014 // 2015 // <type> ::= L <source-name> [<template-args>] 2016 // 2017 // Likewise with <expr-primary>: 2018 // 2019 // <expr-primary> ::= L <type> <expr-cast-value> E 2020 // ::= LZ <encoding> E # cannot overlap; drop 2021 // ::= L <mangled_name> E # cannot overlap; drop 2022 // 2023 // By similar reasoning as shown above, the only <type>s starting with 2024 // <source-name> are "<source-name> [<template-args>]". Inline this. 2025 // 2026 // <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E 2027 // 2028 // Now inline both of these into <template-arg>: 2029 // 2030 // <template-arg> ::= L <source-name> [<template-args>] 2031 // ::= L <source-name> [<template-args>] <expr-cast-value> E 2032 // 2033 // Merge them and we're done: 2034 // <template-arg> 2035 // ::= L <source-name> [<template-args>] [<expr-cast-value> E] 2036 if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) { 2037 copy = state->parse_state; 2038 if (ParseExprCastValueAndTrailingE(state)) { 2039 return true; 2040 } 2041 state->parse_state = copy; 2042 return true; 2043 } 2044 2045 // Now that the overlapping cases can't reach this code, we can safely call 2046 // both of these. 2047 if (ParseType(state) || ParseExprPrimary(state)) { 2048 return true; 2049 } 2050 state->parse_state = copy; 2051 2052 if (ParseOneCharToken(state, 'X') && ParseExpression(state) && 2053 ParseOneCharToken(state, 'E')) { 2054 return true; 2055 } 2056 state->parse_state = copy; 2057 2058 if (ParseTemplateParamDecl(state) && ParseTemplateArg(state)) { 2059 return true; 2060 } 2061 state->parse_state = copy; 2062 2063 return false; 2064 } 2065 2066 // <unresolved-type> ::= <template-param> [<template-args>] 2067 // ::= <decltype> 2068 // ::= <substitution> 2069 static inline bool ParseUnresolvedType(State *state) { 2070 // No ComplexityGuard because we don't copy the state in this stack frame. 2071 return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) || 2072 ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/false); 2073 } 2074 2075 // <simple-id> ::= <source-name> [<template-args>] 2076 static inline bool ParseSimpleId(State *state) { 2077 // No ComplexityGuard because we don't copy the state in this stack frame. 2078 2079 // Note: <simple-id> cannot be followed by a parameter pack; see comment in 2080 // ParseUnresolvedType. 2081 return ParseSourceName(state) && Optional(ParseTemplateArgs(state)); 2082 } 2083 2084 // <base-unresolved-name> ::= <source-name> [<template-args>] 2085 // ::= on <operator-name> [<template-args>] 2086 // ::= dn <destructor-name> 2087 static bool ParseBaseUnresolvedName(State *state) { 2088 ComplexityGuard guard(state); 2089 if (guard.IsTooComplex()) return false; 2090 2091 if (ParseSimpleId(state)) { 2092 return true; 2093 } 2094 2095 ParseState copy = state->parse_state; 2096 if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) && 2097 Optional(ParseTemplateArgs(state))) { 2098 return true; 2099 } 2100 state->parse_state = copy; 2101 2102 if (ParseTwoCharToken(state, "dn") && 2103 (ParseUnresolvedType(state) || ParseSimpleId(state))) { 2104 return true; 2105 } 2106 state->parse_state = copy; 2107 2108 return false; 2109 } 2110 2111 // <unresolved-name> ::= [gs] <base-unresolved-name> 2112 // ::= sr <unresolved-type> <base-unresolved-name> 2113 // ::= srN <unresolved-type> <unresolved-qualifier-level>+ E 2114 // <base-unresolved-name> 2115 // ::= [gs] sr <unresolved-qualifier-level>+ E 2116 // <base-unresolved-name> 2117 // ::= sr St <simple-id> <simple-id> # nonstandard 2118 // 2119 // The last case is not part of the official grammar but has been observed in 2120 // real-world examples that the GNU demangler (but not the LLVM demangler) is 2121 // able to decode; see demangle_test.cc for one such symbol name. The shape 2122 // sr St <simple-id> <simple-id> was inferred by closed-box testing of the GNU 2123 // demangler. 2124 static bool ParseUnresolvedName(State *state) { 2125 ComplexityGuard guard(state); 2126 if (guard.IsTooComplex()) return false; 2127 2128 ParseState copy = state->parse_state; 2129 if (Optional(ParseTwoCharToken(state, "gs")) && 2130 ParseBaseUnresolvedName(state)) { 2131 return true; 2132 } 2133 state->parse_state = copy; 2134 2135 if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) && 2136 ParseBaseUnresolvedName(state)) { 2137 return true; 2138 } 2139 state->parse_state = copy; 2140 2141 if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') && 2142 ParseUnresolvedType(state) && 2143 OneOrMore(ParseUnresolvedQualifierLevel, state) && 2144 ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { 2145 return true; 2146 } 2147 state->parse_state = copy; 2148 2149 if (Optional(ParseTwoCharToken(state, "gs")) && 2150 ParseTwoCharToken(state, "sr") && 2151 OneOrMore(ParseUnresolvedQualifierLevel, state) && 2152 ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { 2153 return true; 2154 } 2155 state->parse_state = copy; 2156 2157 if (ParseTwoCharToken(state, "sr") && ParseTwoCharToken(state, "St") && 2158 ParseSimpleId(state) && ParseSimpleId(state)) { 2159 return true; 2160 } 2161 state->parse_state = copy; 2162 2163 return false; 2164 } 2165 2166 // <unresolved-qualifier-level> ::= <simple-id> 2167 // ::= <substitution> <template-args> 2168 // 2169 // The production <substitution> <template-args> is nonstandard but is observed 2170 // in practice. An upstream discussion on the best shape of <unresolved-name> 2171 // has not converged: 2172 // 2173 // https://github.com/itanium-cxx-abi/cxx-abi/issues/38 2174 static bool ParseUnresolvedQualifierLevel(State *state) { 2175 ComplexityGuard guard(state); 2176 if (guard.IsTooComplex()) return false; 2177 2178 if (ParseSimpleId(state)) return true; 2179 2180 ParseState copy = state->parse_state; 2181 if (ParseSubstitution(state, /*accept_std=*/false) && 2182 ParseTemplateArgs(state)) { 2183 return true; 2184 } 2185 state->parse_state = copy; 2186 return false; 2187 } 2188 2189 // <union-selector> ::= _ [<number>] 2190 // 2191 // https://github.com/itanium-cxx-abi/cxx-abi/issues/47 2192 static bool ParseUnionSelector(State *state) { 2193 return ParseOneCharToken(state, '_') && Optional(ParseNumber(state, nullptr)); 2194 } 2195 2196 // <function-param> ::= fp <(top-level) CV-qualifiers> _ 2197 // ::= fp <(top-level) CV-qualifiers> <number> _ 2198 // ::= fL <number> p <(top-level) CV-qualifiers> _ 2199 // ::= fL <number> p <(top-level) CV-qualifiers> <number> _ 2200 // ::= fpT # this 2201 static bool ParseFunctionParam(State *state) { 2202 ComplexityGuard guard(state); 2203 if (guard.IsTooComplex()) return false; 2204 2205 ParseState copy = state->parse_state; 2206 2207 // Function-param expression (level 0). 2208 if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) && 2209 Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { 2210 return true; 2211 } 2212 state->parse_state = copy; 2213 2214 // Function-param expression (level 1+). 2215 if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) && 2216 ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) && 2217 Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { 2218 return true; 2219 } 2220 state->parse_state = copy; 2221 2222 return ParseThreeCharToken(state, "fpT"); 2223 } 2224 2225 // <braced-expression> ::= <expression> 2226 // ::= di <field source-name> <braced-expression> 2227 // ::= dx <index expression> <braced-expression> 2228 // ::= dX <expression> <expression> <braced-expression> 2229 static bool ParseBracedExpression(State *state) { 2230 ComplexityGuard guard(state); 2231 if (guard.IsTooComplex()) return false; 2232 2233 ParseState copy = state->parse_state; 2234 2235 if (ParseTwoCharToken(state, "di") && ParseSourceName(state) && 2236 ParseBracedExpression(state)) { 2237 return true; 2238 } 2239 state->parse_state = copy; 2240 2241 if (ParseTwoCharToken(state, "dx") && ParseExpression(state) && 2242 ParseBracedExpression(state)) { 2243 return true; 2244 } 2245 state->parse_state = copy; 2246 2247 if (ParseTwoCharToken(state, "dX") && 2248 ParseExpression(state) && ParseExpression(state) && 2249 ParseBracedExpression(state)) { 2250 return true; 2251 } 2252 state->parse_state = copy; 2253 2254 return ParseExpression(state); 2255 } 2256 2257 // <expression> ::= <1-ary operator-name> <expression> 2258 // ::= <2-ary operator-name> <expression> <expression> 2259 // ::= <3-ary operator-name> <expression> <expression> <expression> 2260 // ::= pp_ <expression> # ++e; pp <expression> is e++ 2261 // ::= mm_ <expression> # --e; mm <expression> is e-- 2262 // ::= cl <expression>+ E 2263 // ::= cp <simple-id> <expression>* E # Clang-specific. 2264 // ::= so <type> <expression> [<number>] <union-selector>* [p] E 2265 // ::= cv <type> <expression> # type (expression) 2266 // ::= cv <type> _ <expression>* E # type (expr-list) 2267 // ::= tl <type> <braced-expression>* E 2268 // ::= il <braced-expression>* E 2269 // ::= [gs] nw <expression>* _ <type> E 2270 // ::= [gs] nw <expression>* _ <type> <initializer> 2271 // ::= [gs] na <expression>* _ <type> E 2272 // ::= [gs] na <expression>* _ <type> <initializer> 2273 // ::= [gs] dl <expression> 2274 // ::= [gs] da <expression> 2275 // ::= dc <type> <expression> 2276 // ::= sc <type> <expression> 2277 // ::= cc <type> <expression> 2278 // ::= rc <type> <expression> 2279 // ::= ti <type> 2280 // ::= te <expression> 2281 // ::= st <type> 2282 // ::= at <type> 2283 // ::= az <expression> 2284 // ::= nx <expression> 2285 // ::= <template-param> 2286 // ::= <function-param> 2287 // ::= sZ <template-param> 2288 // ::= sZ <function-param> 2289 // ::= sP <template-arg>* E 2290 // ::= <expr-primary> 2291 // ::= dt <expression> <unresolved-name> # expr.name 2292 // ::= pt <expression> <unresolved-name> # expr->name 2293 // ::= sp <expression> # argument pack expansion 2294 // ::= fl <binary operator-name> <expression> 2295 // ::= fr <binary operator-name> <expression> 2296 // ::= fL <binary operator-name> <expression> <expression> 2297 // ::= fR <binary operator-name> <expression> <expression> 2298 // ::= tw <expression> 2299 // ::= tr 2300 // ::= sr <type> <unqualified-name> <template-args> 2301 // ::= sr <type> <unqualified-name> 2302 // ::= u <source-name> <template-arg>* E # vendor extension 2303 // ::= rq <requirement>+ E 2304 // ::= rQ <bare-function-type> _ <requirement>+ E 2305 static bool ParseExpression(State *state) { 2306 ComplexityGuard guard(state); 2307 if (guard.IsTooComplex()) return false; 2308 if (ParseTemplateParam(state) || ParseExprPrimary(state)) { 2309 return true; 2310 } 2311 2312 ParseState copy = state->parse_state; 2313 2314 // Object/function call expression. 2315 if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) && 2316 ParseOneCharToken(state, 'E')) { 2317 return true; 2318 } 2319 state->parse_state = copy; 2320 2321 // Preincrement and predecrement. Postincrement and postdecrement are handled 2322 // by the operator-name logic later on. 2323 if ((ParseThreeCharToken(state, "pp_") || 2324 ParseThreeCharToken(state, "mm_")) && 2325 ParseExpression(state)) { 2326 return true; 2327 } 2328 state->parse_state = copy; 2329 2330 // Clang-specific "cp <simple-id> <expression>* E" 2331 // https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338 2332 if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) && 2333 ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, 'E')) { 2334 return true; 2335 } 2336 state->parse_state = copy; 2337 2338 // <expression> ::= so <type> <expression> [<number>] <union-selector>* [p] E 2339 // 2340 // https://github.com/itanium-cxx-abi/cxx-abi/issues/47 2341 if (ParseTwoCharToken(state, "so") && ParseType(state) && 2342 ParseExpression(state) && Optional(ParseNumber(state, nullptr)) && 2343 ZeroOrMore(ParseUnionSelector, state) && 2344 Optional(ParseOneCharToken(state, 'p')) && 2345 ParseOneCharToken(state, 'E')) { 2346 return true; 2347 } 2348 state->parse_state = copy; 2349 2350 // <expression> ::= <function-param> 2351 if (ParseFunctionParam(state)) return true; 2352 state->parse_state = copy; 2353 2354 // <expression> ::= tl <type> <braced-expression>* E 2355 if (ParseTwoCharToken(state, "tl") && ParseType(state) && 2356 ZeroOrMore(ParseBracedExpression, state) && 2357 ParseOneCharToken(state, 'E')) { 2358 return true; 2359 } 2360 state->parse_state = copy; 2361 2362 // <expression> ::= il <braced-expression>* E 2363 if (ParseTwoCharToken(state, "il") && 2364 ZeroOrMore(ParseBracedExpression, state) && 2365 ParseOneCharToken(state, 'E')) { 2366 return true; 2367 } 2368 state->parse_state = copy; 2369 2370 // <expression> ::= [gs] nw <expression>* _ <type> E 2371 // ::= [gs] nw <expression>* _ <type> <initializer> 2372 // ::= [gs] na <expression>* _ <type> E 2373 // ::= [gs] na <expression>* _ <type> <initializer> 2374 if (Optional(ParseTwoCharToken(state, "gs")) && 2375 (ParseTwoCharToken(state, "nw") || ParseTwoCharToken(state, "na")) && 2376 ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, '_') && 2377 ParseType(state) && 2378 (ParseOneCharToken(state, 'E') || ParseInitializer(state))) { 2379 return true; 2380 } 2381 state->parse_state = copy; 2382 2383 // <expression> ::= [gs] dl <expression> 2384 // ::= [gs] da <expression> 2385 if (Optional(ParseTwoCharToken(state, "gs")) && 2386 (ParseTwoCharToken(state, "dl") || ParseTwoCharToken(state, "da")) && 2387 ParseExpression(state)) { 2388 return true; 2389 } 2390 state->parse_state = copy; 2391 2392 // dynamic_cast, static_cast, const_cast, reinterpret_cast. 2393 // 2394 // <expression> ::= (dc | sc | cc | rc) <type> <expression> 2395 if (ParseCharClass(state, "dscr") && ParseOneCharToken(state, 'c') && 2396 ParseType(state) && ParseExpression(state)) { 2397 return true; 2398 } 2399 state->parse_state = copy; 2400 2401 // Parse the conversion expressions jointly to avoid re-parsing the <type> in 2402 // their common prefix. Parsed as: 2403 // <expression> ::= cv <type> <conversion-args> 2404 // <conversion-args> ::= _ <expression>* E 2405 // ::= <expression> 2406 // 2407 // Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName 2408 // also needs to accept "cv <type>" in other contexts. 2409 if (ParseTwoCharToken(state, "cv")) { 2410 if (ParseType(state)) { 2411 ParseState copy2 = state->parse_state; 2412 if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) && 2413 ParseOneCharToken(state, 'E')) { 2414 return true; 2415 } 2416 state->parse_state = copy2; 2417 if (ParseExpression(state)) { 2418 return true; 2419 } 2420 } 2421 } else { 2422 // Parse unary, binary, and ternary operator expressions jointly, taking 2423 // care not to re-parse subexpressions repeatedly. Parse like: 2424 // <expression> ::= <operator-name> <expression> 2425 // [<one-to-two-expressions>] 2426 // <one-to-two-expressions> ::= <expression> [<expression>] 2427 int arity = -1; 2428 if (ParseOperatorName(state, &arity) && 2429 arity > 0 && // 0 arity => disabled. 2430 (arity < 3 || ParseExpression(state)) && 2431 (arity < 2 || ParseExpression(state)) && 2432 (arity < 1 || ParseExpression(state))) { 2433 return true; 2434 } 2435 } 2436 state->parse_state = copy; 2437 2438 // typeid(type) 2439 if (ParseTwoCharToken(state, "ti") && ParseType(state)) { 2440 return true; 2441 } 2442 state->parse_state = copy; 2443 2444 // typeid(expression) 2445 if (ParseTwoCharToken(state, "te") && ParseExpression(state)) { 2446 return true; 2447 } 2448 state->parse_state = copy; 2449 2450 // sizeof type 2451 if (ParseTwoCharToken(state, "st") && ParseType(state)) { 2452 return true; 2453 } 2454 state->parse_state = copy; 2455 2456 // alignof(type) 2457 if (ParseTwoCharToken(state, "at") && ParseType(state)) { 2458 return true; 2459 } 2460 state->parse_state = copy; 2461 2462 // alignof(expression), a GNU extension 2463 if (ParseTwoCharToken(state, "az") && ParseExpression(state)) { 2464 return true; 2465 } 2466 state->parse_state = copy; 2467 2468 // noexcept(expression) appearing as an expression in a dependent signature 2469 if (ParseTwoCharToken(state, "nx") && ParseExpression(state)) { 2470 return true; 2471 } 2472 state->parse_state = copy; 2473 2474 // sizeof...(pack) 2475 // 2476 // <expression> ::= sZ <template-param> 2477 // ::= sZ <function-param> 2478 if (ParseTwoCharToken(state, "sZ") && 2479 (ParseFunctionParam(state) || ParseTemplateParam(state))) { 2480 return true; 2481 } 2482 state->parse_state = copy; 2483 2484 // sizeof...(pack) captured from an alias template 2485 // 2486 // <expression> ::= sP <template-arg>* E 2487 if (ParseTwoCharToken(state, "sP") && ZeroOrMore(ParseTemplateArg, state) && 2488 ParseOneCharToken(state, 'E')) { 2489 return true; 2490 } 2491 state->parse_state = copy; 2492 2493 // Unary folds (... op pack) and (pack op ...). 2494 // 2495 // <expression> ::= fl <binary operator-name> <expression> 2496 // ::= fr <binary operator-name> <expression> 2497 if ((ParseTwoCharToken(state, "fl") || ParseTwoCharToken(state, "fr")) && 2498 ParseOperatorName(state, nullptr) && ParseExpression(state)) { 2499 return true; 2500 } 2501 state->parse_state = copy; 2502 2503 // Binary folds (init op ... op pack) and (pack op ... op init). 2504 // 2505 // <expression> ::= fL <binary operator-name> <expression> <expression> 2506 // ::= fR <binary operator-name> <expression> <expression> 2507 if ((ParseTwoCharToken(state, "fL") || ParseTwoCharToken(state, "fR")) && 2508 ParseOperatorName(state, nullptr) && ParseExpression(state) && 2509 ParseExpression(state)) { 2510 return true; 2511 } 2512 state->parse_state = copy; 2513 2514 // tw <expression>: throw e 2515 if (ParseTwoCharToken(state, "tw") && ParseExpression(state)) { 2516 return true; 2517 } 2518 state->parse_state = copy; 2519 2520 // tr: throw (rethrows an exception from the handler that caught it) 2521 if (ParseTwoCharToken(state, "tr")) return true; 2522 2523 // Object and pointer member access expressions. 2524 // 2525 // <expression> ::= (dt | pt) <expression> <unresolved-name> 2526 if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) && 2527 ParseExpression(state) && ParseUnresolvedName(state)) { 2528 return true; 2529 } 2530 state->parse_state = copy; 2531 2532 // Pointer-to-member access expressions. This parses the same as a binary 2533 // operator, but it's implemented separately because "ds" shouldn't be 2534 // accepted in other contexts that parse an operator name. 2535 if (ParseTwoCharToken(state, "ds") && ParseExpression(state) && 2536 ParseExpression(state)) { 2537 return true; 2538 } 2539 state->parse_state = copy; 2540 2541 // Parameter pack expansion 2542 if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) { 2543 return true; 2544 } 2545 state->parse_state = copy; 2546 2547 // Vendor extended expressions 2548 if (ParseOneCharToken(state, 'u') && ParseSourceName(state) && 2549 ZeroOrMore(ParseTemplateArg, state) && ParseOneCharToken(state, 'E')) { 2550 return true; 2551 } 2552 state->parse_state = copy; 2553 2554 // <expression> ::= rq <requirement>+ E 2555 // 2556 // https://github.com/itanium-cxx-abi/cxx-abi/issues/24 2557 if (ParseTwoCharToken(state, "rq") && OneOrMore(ParseRequirement, state) && 2558 ParseOneCharToken(state, 'E')) { 2559 return true; 2560 } 2561 state->parse_state = copy; 2562 2563 // <expression> ::= rQ <bare-function-type> _ <requirement>+ E 2564 // 2565 // https://github.com/itanium-cxx-abi/cxx-abi/issues/24 2566 if (ParseTwoCharToken(state, "rQ") && ParseBareFunctionType(state) && 2567 ParseOneCharToken(state, '_') && OneOrMore(ParseRequirement, state) && 2568 ParseOneCharToken(state, 'E')) { 2569 return true; 2570 } 2571 state->parse_state = copy; 2572 2573 return ParseUnresolvedName(state); 2574 } 2575 2576 // <initializer> ::= pi <expression>* E 2577 // ::= il <braced-expression>* E 2578 // 2579 // The il ... E form is not in the ABI spec but is seen in practice for 2580 // braced-init-lists in new-expressions, which are standard syntax from C++11 2581 // on. 2582 static bool ParseInitializer(State *state) { 2583 ComplexityGuard guard(state); 2584 if (guard.IsTooComplex()) return false; 2585 ParseState copy = state->parse_state; 2586 2587 if (ParseTwoCharToken(state, "pi") && ZeroOrMore(ParseExpression, state) && 2588 ParseOneCharToken(state, 'E')) { 2589 return true; 2590 } 2591 state->parse_state = copy; 2592 2593 if (ParseTwoCharToken(state, "il") && 2594 ZeroOrMore(ParseBracedExpression, state) && 2595 ParseOneCharToken(state, 'E')) { 2596 return true; 2597 } 2598 state->parse_state = copy; 2599 return false; 2600 } 2601 2602 // <expr-primary> ::= L <type> <(value) number> E 2603 // ::= L <type> <(value) float> E 2604 // ::= L <mangled-name> E 2605 // // A bug in g++'s C++ ABI version 2 (-fabi-version=2). 2606 // ::= LZ <encoding> E 2607 // 2608 // Warning, subtle: the "bug" LZ production above is ambiguous with the first 2609 // production where <type> starts with <local-name>, which can lead to 2610 // exponential backtracking in two scenarios: 2611 // 2612 // - When whatever follows the E in the <local-name> in the first production is 2613 // not a name, we backtrack the whole <encoding> and re-parse the whole thing. 2614 // 2615 // - When whatever follows the <local-name> in the first production is not a 2616 // number and this <expr-primary> may be followed by a name, we backtrack the 2617 // <name> and re-parse it. 2618 // 2619 // Moreover this ambiguity isn't always resolved -- for example, the following 2620 // has two different parses: 2621 // 2622 // _ZaaILZ4aoeuE1x1EvE 2623 // => operator&&<aoeu, x, E, void> 2624 // => operator&&<(aoeu::x)(1), void> 2625 // 2626 // To resolve this, we just do what GCC's demangler does, and refuse to parse 2627 // casts to <local-name> types. 2628 static bool ParseExprPrimary(State *state) { 2629 ComplexityGuard guard(state); 2630 if (guard.IsTooComplex()) return false; 2631 ParseState copy = state->parse_state; 2632 2633 // The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E" 2634 // or fail, no backtracking. 2635 if (ParseTwoCharToken(state, "LZ")) { 2636 if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) { 2637 return true; 2638 } 2639 2640 state->parse_state = copy; 2641 return false; 2642 } 2643 2644 if (ParseOneCharToken(state, 'L')) { 2645 // There are two special cases in which a literal may or must contain a type 2646 // without a value. The first is that both LDnE and LDn0E are valid 2647 // encodings of nullptr, used in different situations. Recognize LDnE here, 2648 // leaving LDn0E to be recognized by the general logic afterward. 2649 if (ParseThreeCharToken(state, "DnE")) return true; 2650 2651 // The second special case is a string literal, currently mangled in C++98 2652 // style as LA<length + 1>_KcE. This is inadequate to support C++11 and 2653 // later versions, and the discussion of this problem has not converged. 2654 // 2655 // https://github.com/itanium-cxx-abi/cxx-abi/issues/64 2656 // 2657 // For now the bare-type mangling is what's used in practice, so we 2658 // recognize this form and only this form if an array type appears here. 2659 // Someday we'll probably have to accept a new form of value mangling in 2660 // LA...E constructs. (Note also that C++20 allows a wide range of 2661 // class-type objects as template arguments, so someday their values will be 2662 // mangled and we'll have to recognize them here too.) 2663 if (RemainingInput(state)[0] == 'A' /* an array type follows */) { 2664 if (ParseType(state) && ParseOneCharToken(state, 'E')) return true; 2665 state->parse_state = copy; 2666 return false; 2667 } 2668 2669 // The merged cast production. 2670 if (ParseType(state) && ParseExprCastValueAndTrailingE(state)) { 2671 return true; 2672 } 2673 } 2674 state->parse_state = copy; 2675 2676 if (ParseOneCharToken(state, 'L') && ParseMangledName(state) && 2677 ParseOneCharToken(state, 'E')) { 2678 return true; 2679 } 2680 state->parse_state = copy; 2681 2682 return false; 2683 } 2684 2685 // <number> or <float>, followed by 'E', as described above ParseExprPrimary. 2686 static bool ParseExprCastValueAndTrailingE(State *state) { 2687 ComplexityGuard guard(state); 2688 if (guard.IsTooComplex()) return false; 2689 // We have to be able to backtrack after accepting a number because we could 2690 // have e.g. "7fffE", which will accept "7" as a number but then fail to find 2691 // the 'E'. 2692 ParseState copy = state->parse_state; 2693 if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) { 2694 return true; 2695 } 2696 state->parse_state = copy; 2697 2698 if (ParseFloatNumber(state)) { 2699 // <float> for ordinary floating-point types 2700 if (ParseOneCharToken(state, 'E')) return true; 2701 2702 // <float> _ <float> for complex floating-point types 2703 if (ParseOneCharToken(state, '_') && ParseFloatNumber(state) && 2704 ParseOneCharToken(state, 'E')) { 2705 return true; 2706 } 2707 } 2708 state->parse_state = copy; 2709 2710 return false; 2711 } 2712 2713 // Parses `Q <requires-clause expr>`. 2714 // If parsing fails, applies backtracking to `state`. 2715 // 2716 // This function covers two symbols instead of one for convenience, 2717 // because in LLVM's Itanium ABI mangling grammar, <requires-clause expr> 2718 // always appears after Q. 2719 // 2720 // Does not emit the parsed `requires` clause to simplify the implementation. 2721 // In other words, these two functions' mangled names will demangle identically: 2722 // 2723 // template <typename T> 2724 // int foo(T) requires IsIntegral<T>; 2725 // 2726 // vs. 2727 // 2728 // template <typename T> 2729 // int foo(T); 2730 static bool ParseQRequiresClauseExpr(State *state) { 2731 ComplexityGuard guard(state); 2732 if (guard.IsTooComplex()) return false; 2733 ParseState copy = state->parse_state; 2734 DisableAppend(state); 2735 2736 // <requires-clause expr> is just an <expression>: http://shortn/_9E1Ul0rIM8 2737 if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) { 2738 RestoreAppend(state, copy.append); 2739 return true; 2740 } 2741 2742 // also restores append 2743 state->parse_state = copy; 2744 return false; 2745 } 2746 2747 // <requirement> ::= X <expression> [N] [R <type-constraint>] 2748 // <requirement> ::= T <type> 2749 // <requirement> ::= Q <constraint-expression> 2750 // 2751 // <constraint-expression> ::= <expression> 2752 // 2753 // https://github.com/itanium-cxx-abi/cxx-abi/issues/24 2754 static bool ParseRequirement(State *state) { 2755 ComplexityGuard guard(state); 2756 if (guard.IsTooComplex()) return false; 2757 2758 ParseState copy = state->parse_state; 2759 2760 if (ParseOneCharToken(state, 'X') && ParseExpression(state) && 2761 Optional(ParseOneCharToken(state, 'N')) && 2762 // This logic backtracks cleanly if we eat an R but a valid type doesn't 2763 // follow it. 2764 (!ParseOneCharToken(state, 'R') || ParseTypeConstraint(state))) { 2765 return true; 2766 } 2767 state->parse_state = copy; 2768 2769 if (ParseOneCharToken(state, 'T') && ParseType(state)) return true; 2770 state->parse_state = copy; 2771 2772 if (ParseOneCharToken(state, 'Q') && ParseExpression(state)) return true; 2773 state->parse_state = copy; 2774 2775 return false; 2776 } 2777 2778 // <type-constraint> ::= <name> 2779 static bool ParseTypeConstraint(State *state) { 2780 return ParseName(state); 2781 } 2782 2783 // <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>] 2784 // ::= Z <(function) encoding> E s [<discriminator>] 2785 // ::= Z <(function) encoding> E d [<(parameter) number>] _ <name> 2786 // 2787 // Parsing a common prefix of these two productions together avoids an 2788 // exponential blowup of backtracking. Parse like: 2789 // <local-name> := Z <encoding> E <local-name-suffix> 2790 // <local-name-suffix> ::= s [<discriminator>] 2791 // ::= d [<(parameter) number>] _ <name> 2792 // ::= <name> [<discriminator>] 2793 2794 static bool ParseLocalNameSuffix(State *state) { 2795 ComplexityGuard guard(state); 2796 if (guard.IsTooComplex()) return false; 2797 ParseState copy = state->parse_state; 2798 2799 // <local-name-suffix> ::= d [<(parameter) number>] _ <name> 2800 if (ParseOneCharToken(state, 'd') && 2801 (IsDigit(RemainingInput(state)[0]) || RemainingInput(state)[0] == '_')) { 2802 int number = -1; 2803 Optional(ParseNumber(state, &number)); 2804 if (number < -1 || number > 2147483645) { 2805 // Work around overflow cases. We do not expect these outside of a fuzzer 2806 // or other source of adversarial input. If we do detect overflow here, 2807 // we'll print {default arg#1}. 2808 number = -1; 2809 } 2810 number += 2; 2811 2812 // The ::{default arg#1}:: infix must be rendered before the lambda itself, 2813 // so print this before parsing the rest of the <local-name-suffix>. 2814 MaybeAppend(state, "::{default arg#"); 2815 MaybeAppendDecimal(state, number); 2816 MaybeAppend(state, "}::"); 2817 if (ParseOneCharToken(state, '_') && ParseName(state)) return true; 2818 2819 // On late parse failure, roll back not only the input but also the output, 2820 // whose trailing NUL was overwritten. 2821 state->parse_state = copy; 2822 if (state->parse_state.append && 2823 state->parse_state.out_cur_idx < state->out_end_idx) { 2824 state->out[state->parse_state.out_cur_idx] = '\0'; 2825 } 2826 return false; 2827 } 2828 state->parse_state = copy; 2829 2830 // <local-name-suffix> ::= <name> [<discriminator>] 2831 if (MaybeAppend(state, "::") && ParseName(state) && 2832 Optional(ParseDiscriminator(state))) { 2833 return true; 2834 } 2835 state->parse_state = copy; 2836 if (state->parse_state.append && 2837 state->parse_state.out_cur_idx < state->out_end_idx) { 2838 state->out[state->parse_state.out_cur_idx] = '\0'; 2839 } 2840 2841 // <local-name-suffix> ::= s [<discriminator>] 2842 return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state)); 2843 } 2844 2845 static bool ParseLocalName(State *state) { 2846 ComplexityGuard guard(state); 2847 if (guard.IsTooComplex()) return false; 2848 ParseState copy = state->parse_state; 2849 if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) && 2850 ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) { 2851 return true; 2852 } 2853 state->parse_state = copy; 2854 return false; 2855 } 2856 2857 // <discriminator> := _ <digit> 2858 // := __ <number (>= 10)> _ 2859 static bool ParseDiscriminator(State *state) { 2860 ComplexityGuard guard(state); 2861 if (guard.IsTooComplex()) return false; 2862 ParseState copy = state->parse_state; 2863 2864 // Both forms start with _ so parse that first. 2865 if (!ParseOneCharToken(state, '_')) return false; 2866 2867 // <digit> 2868 if (ParseDigit(state, nullptr)) return true; 2869 2870 // _ <number> _ 2871 if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr) && 2872 ParseOneCharToken(state, '_')) { 2873 return true; 2874 } 2875 state->parse_state = copy; 2876 return false; 2877 } 2878 2879 // <substitution> ::= S_ 2880 // ::= S <seq-id> _ 2881 // ::= St, etc. 2882 // 2883 // "St" is special in that it's not valid as a standalone name, and it *is* 2884 // allowed to precede a name without being wrapped in "N...E". This means that 2885 // if we accept it on its own, we can accept "St1a" and try to parse 2886 // template-args, then fail and backtrack, accept "St" on its own, then "1a" as 2887 // an unqualified name and re-parse the same template-args. To block this 2888 // exponential backtracking, we disable it with 'accept_std=false' in 2889 // problematic contexts. 2890 static bool ParseSubstitution(State *state, bool accept_std) { 2891 ComplexityGuard guard(state); 2892 if (guard.IsTooComplex()) return false; 2893 if (ParseTwoCharToken(state, "S_")) { 2894 MaybeAppend(state, "?"); // We don't support substitutions. 2895 return true; 2896 } 2897 2898 ParseState copy = state->parse_state; 2899 if (ParseOneCharToken(state, 'S') && ParseSeqId(state) && 2900 ParseOneCharToken(state, '_')) { 2901 MaybeAppend(state, "?"); // We don't support substitutions. 2902 return true; 2903 } 2904 state->parse_state = copy; 2905 2906 // Expand abbreviations like "St" => "std". 2907 if (ParseOneCharToken(state, 'S')) { 2908 const AbbrevPair *p; 2909 for (p = kSubstitutionList; p->abbrev != nullptr; ++p) { 2910 if (RemainingInput(state)[0] == p->abbrev[1] && 2911 (accept_std || p->abbrev[1] != 't')) { 2912 MaybeAppend(state, "std"); 2913 if (p->real_name[0] != '\0') { 2914 MaybeAppend(state, "::"); 2915 MaybeAppend(state, p->real_name); 2916 } 2917 ++state->parse_state.mangled_idx; 2918 UpdateHighWaterMark(state); 2919 return true; 2920 } 2921 } 2922 } 2923 state->parse_state = copy; 2924 return false; 2925 } 2926 2927 // Parse <mangled-name>, optionally followed by either a function-clone suffix 2928 // or version suffix. Returns true only if all of "mangled_cur" was consumed. 2929 static bool ParseTopLevelMangledName(State *state) { 2930 ComplexityGuard guard(state); 2931 if (guard.IsTooComplex()) return false; 2932 if (ParseMangledName(state)) { 2933 if (RemainingInput(state)[0] != '\0') { 2934 // Drop trailing function clone suffix, if any. 2935 if (IsFunctionCloneSuffix(RemainingInput(state))) { 2936 return true; 2937 } 2938 // Append trailing version suffix if any. 2939 // ex. _Z3foo@@GLIBCXX_3.4 2940 if (RemainingInput(state)[0] == '@') { 2941 MaybeAppend(state, RemainingInput(state)); 2942 return true; 2943 } 2944 ReportHighWaterMark(state); 2945 return false; // Unconsumed suffix. 2946 } 2947 return true; 2948 } 2949 2950 ReportHighWaterMark(state); 2951 return false; 2952 } 2953 2954 static bool Overflowed(const State *state) { 2955 return state->parse_state.out_cur_idx >= state->out_end_idx; 2956 } 2957 2958 // The demangler entry point. 2959 bool Demangle(const char* mangled, char* out, size_t out_size) { 2960 // mozilla - hazard-linux64-haz/debug failure when demangle_rust.cc is 2961 // included in the build. For now we'll avoid this code. 2962 #if 0 2963 if (mangled[0] == '_' && mangled[1] == 'R') { 2964 return DemangleRustSymbolEncoding(mangled, out, out_size); 2965 } 2966 #endif 2967 2968 State state; 2969 InitState(&state, mangled, out, out_size); 2970 return ParseTopLevelMangledName(&state) && !Overflowed(&state) && 2971 state.parse_state.out_cur_idx > 0; 2972 } 2973 2974 std::string DemangleString(const char* mangled) { 2975 std::string out; 2976 int status = 0; 2977 char* demangled = nullptr; 2978 #if ABSL_INTERNAL_HAS_CXA_DEMANGLE 2979 demangled = abi::__cxa_demangle(mangled, nullptr, nullptr, &status); 2980 #endif 2981 if (status == 0 && demangled != nullptr) { 2982 out.append(demangled); 2983 free(demangled); 2984 } else { 2985 out.append(mangled); 2986 } 2987 return out; 2988 } 2989 2990 } // namespace debugging_internal 2991 ABSL_NAMESPACE_END 2992 } // namespace absl