tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

demangle_rust.cc (35353B)


      1 // Copyright 2024 The Abseil Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "absl/debugging/internal/demangle_rust.h"
     16 
     17 #include <cstddef>
     18 #include <cstdint>
     19 #include <cstring>
     20 #include <limits>
     21 
     22 #include "absl/base/attributes.h"
     23 #include "absl/base/config.h"
     24 #include "absl/debugging/internal/decode_rust_punycode.h"
     25 
     26 namespace absl {
     27 ABSL_NAMESPACE_BEGIN
     28 namespace debugging_internal {
     29 
     30 namespace {
     31 
     32 // Same step limit as the C++ demangler in demangle.cc uses.
     33 constexpr int kMaxReturns = 1 << 17;
     34 
     35 bool IsDigit(char c) { return '0' <= c && c <= '9'; }
     36 bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
     37 bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
     38 bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
     39 bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
     40 bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); }
     41 
     42 const char* BasicTypeName(char c) {
     43  switch (c) {
     44    case 'a': return "i8";
     45    case 'b': return "bool";
     46    case 'c': return "char";
     47    case 'd': return "f64";
     48    case 'e': return "str";
     49    case 'f': return "f32";
     50    case 'h': return "u8";
     51    case 'i': return "isize";
     52    case 'j': return "usize";
     53    case 'l': return "i32";
     54    case 'm': return "u32";
     55    case 'n': return "i128";
     56    case 'o': return "u128";
     57    case 'p': return "_";
     58    case 's': return "i16";
     59    case 't': return "u16";
     60    case 'u': return "()";
     61    case 'v': return "...";
     62    case 'x': return "i64";
     63    case 'y': return "u64";
     64    case 'z': return "!";
     65  }
     66  return nullptr;
     67 }
     68 
     69 // Parser for Rust symbol mangling v0, whose grammar is defined here:
     70 //
     71 // https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
     72 class RustSymbolParser {
     73 public:
     74  // Prepares to demangle the given encoding, a Rust symbol name starting with
     75  // _R, into the output buffer [out, out_end).  The caller is expected to
     76  // continue by calling the new object's Parse function.
     77  RustSymbolParser(const char* encoding, char* out, char* const out_end)
     78      : encoding_(encoding), out_(out), out_end_(out_end) {
     79    if (out_ != out_end_) *out_ = '\0';
     80  }
     81 
     82  // Parses the constructor's encoding argument, writing output into the range
     83  // [out, out_end).  Returns true on success and false for input whose
     84  // structure was not recognized or exceeded implementation limits, such as by
     85  // nesting structures too deep.  In either case *this should not be used
     86  // again.
     87  [[nodiscard]] bool Parse() && {
     88    // Recursively parses the grammar production named by callee, then resumes
     89    // execution at the next statement.
     90    //
     91    // Recursive-descent parsing is a beautifully readable translation of a
     92    // grammar, but it risks stack overflow if implemented by naive recursion on
     93    // the C++ call stack.  So we simulate recursion by goto and switch instead,
     94    // keeping a bounded stack of "return addresses" in the recursion_stack_
     95    // member.
     96    //
     97    // The callee argument is a statement label.  We goto that label after
     98    // saving the "return address" on recursion_stack_.  The next continue
     99    // statement in the for loop below "returns" from this "call".
    100    //
    101    // The caller argument names the return point.  Each value of caller must
    102    // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the
    103    // definition of enum ReturnAddress.  The switch implements the control
    104    // transfer from the end of a "called" subroutine back to the statement
    105    // after the "call".
    106    //
    107    // Note that not all the grammar productions have to be packed into the
    108    // switch, but only those which appear in a cycle in the grammar.  Anything
    109    // acyclic can be written as ordinary functions and function calls, e.g.,
    110    // ParseIdentifier.
    111 #define ABSL_DEMANGLER_RECURSE(callee, caller) \
    112    do { \
    113      if (recursion_depth_ == kStackSize) return false; \
    114      /* The next continue will switch on this saved value ... */ \
    115      recursion_stack_[recursion_depth_++] = caller; \
    116      goto callee; \
    117      /* ... and will land here, resuming the suspended code. */ \
    118      case caller: {} \
    119    } while (0)
    120 
    121    // Parse the encoding, counting completed recursive calls to guard against
    122    // excessively complex input and infinite-loop bugs.
    123    int iter = 0;
    124    goto whole_encoding;
    125    for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) {
    126      // This switch resumes the code path most recently suspended by
    127      // ABSL_DEMANGLER_RECURSE.
    128      switch (recursion_stack_[--recursion_depth_]) {
    129        //
    130        // symbol-name ->
    131        // _R decimal-number? path instantiating-crate? vendor-specific-suffix?
    132        whole_encoding:
    133          if (!Eat('_') || !Eat('R')) return false;
    134          // decimal-number? is always empty today, so proceed to path, which
    135          // can't start with a decimal digit.
    136          ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
    137          if (IsAlpha(Peek())) {
    138            ++silence_depth_;  // Print nothing more from here on.
    139            ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
    140          }
    141          switch (Take()) {
    142            case '.': case '$': case '\0': return true;
    143          }
    144          return false;  // unexpected trailing content
    145 
    146        // path -> crate-root | inherent-impl | trait-impl | trait-definition |
    147        //         nested-path | generic-args | backref
    148        //
    149        // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch
    150        // (which would hide the generated case label).  Thus we jump out of the
    151        // inner switch with gotos before performing any fake recursion.
    152        path:
    153          switch (Take()) {
    154            case 'C': goto crate_root;
    155            case 'M': goto inherent_impl;
    156            case 'X': goto trait_impl;
    157            case 'Y': goto trait_definition;
    158            case 'N': goto nested_path;
    159            case 'I': goto generic_args;
    160            case 'B': goto path_backref;
    161            default: return false;
    162          }
    163 
    164        // crate-root -> C identifier (C consumed above)
    165        crate_root:
    166          if (!ParseIdentifier()) return false;
    167          continue;
    168 
    169        // inherent-impl -> M impl-path type (M already consumed)
    170        inherent_impl:
    171          if (!Emit("<")) return false;
    172          ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType);
    173          ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding);
    174          if (!Emit(">")) return false;
    175          continue;
    176 
    177        // trait-impl -> X impl-path type path (X already consumed)
    178        trait_impl:
    179          if (!Emit("<")) return false;
    180          ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType);
    181          ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix);
    182          if (!Emit(" as ")) return false;
    183          ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding);
    184          if (!Emit(">")) return false;
    185          continue;
    186 
    187        // impl-path -> disambiguator? path (but never print it!)
    188        impl_path:
    189          ++silence_depth_;
    190          {
    191            int ignored_disambiguator;
    192            if (!ParseDisambiguator(ignored_disambiguator)) return false;
    193          }
    194          ABSL_DEMANGLER_RECURSE(path, kImplPathEnding);
    195          --silence_depth_;
    196          continue;
    197 
    198        // trait-definition -> Y type path (Y already consumed)
    199        trait_definition:
    200          if (!Emit("<")) return false;
    201          ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix);
    202          if (!Emit(" as ")) return false;
    203          ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding);
    204          if (!Emit(">")) return false;
    205          continue;
    206 
    207        // nested-path -> N namespace path identifier (N already consumed)
    208        // namespace -> lower | upper
    209        nested_path:
    210          // Uppercase namespaces must be saved on a stack so we can print
    211          // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
    212          if (IsUpper(Peek())) {
    213            if (!PushNamespace(Take())) return false;
    214            ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
    215            if (!Emit("::")) return false;
    216            if (!ParseIdentifier(PopNamespace())) return false;
    217            continue;
    218          }
    219 
    220          // Lowercase namespaces, however, are never represented in the output;
    221          // they all emit just ::name.
    222          if (IsLower(Take())) {
    223            ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
    224            if (!Emit("::")) return false;
    225            if (!ParseIdentifier()) return false;
    226            continue;
    227          }
    228 
    229          // Neither upper or lower
    230          return false;
    231 
    232        // type -> basic-type | array-type | slice-type | tuple-type |
    233        //         ref-type | mut-ref-type | const-ptr-type | mut-ptr-type |
    234        //         fn-type | dyn-trait-type | path | backref
    235        //
    236        // We use ifs instead of switch (Take()) because the default case jumps
    237        // to path, which will need to see the first character not yet Taken
    238        // from the input.  Because we do not use a nested switch here,
    239        // ABSL_DEMANGLER_RECURSE works fine in the 'S' case.
    240        type:
    241          if (IsLower(Peek())) {
    242            const char* type_name = BasicTypeName(Take());
    243            if (type_name == nullptr || !Emit(type_name)) return false;
    244            continue;
    245          }
    246          if (Eat('A')) {
    247            // array-type = A type const
    248            if (!Emit("[")) return false;
    249            ABSL_DEMANGLER_RECURSE(type, kArraySize);
    250            if (!Emit("; ")) return false;
    251            ABSL_DEMANGLER_RECURSE(constant, kFinishArray);
    252            if (!Emit("]")) return false;
    253            continue;
    254          }
    255          if (Eat('S')) {
    256            if (!Emit("[")) return false;
    257            ABSL_DEMANGLER_RECURSE(type, kSliceEnding);
    258            if (!Emit("]")) return false;
    259            continue;
    260          }
    261          if (Eat('T')) goto tuple_type;
    262          if (Eat('R')) {
    263            if (!Emit("&")) return false;
    264            if (!ParseOptionalLifetime()) return false;
    265            goto type;
    266          }
    267          if (Eat('Q')) {
    268            if (!Emit("&mut ")) return false;
    269            if (!ParseOptionalLifetime()) return false;
    270            goto type;
    271          }
    272          if (Eat('P')) {
    273            if (!Emit("*const ")) return false;
    274            goto type;
    275          }
    276          if (Eat('O')) {
    277            if (!Emit("*mut ")) return false;
    278            goto type;
    279          }
    280          if (Eat('F')) goto fn_type;
    281          if (Eat('D')) goto dyn_trait_type;
    282          if (Eat('B')) goto type_backref;
    283          goto path;
    284 
    285        // tuple-type -> T type* E (T already consumed)
    286        tuple_type:
    287          if (!Emit("(")) return false;
    288 
    289          // The toolchain should call the unit type u instead of TE, but the
    290          // grammar and other demanglers also recognize TE, so we do too.
    291          if (Eat('E')) {
    292            if (!Emit(")")) return false;
    293            continue;
    294          }
    295 
    296          // A tuple with one element is rendered (type,) instead of (type).
    297          ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement);
    298          if (Eat('E')) {
    299            if (!Emit(",)")) return false;
    300            continue;
    301          }
    302 
    303          // A tuple with two elements is of course (x, y).
    304          if (!Emit(", ")) return false;
    305          ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement);
    306          if (Eat('E')) {
    307            if (!Emit(")")) return false;
    308            continue;
    309          }
    310 
    311          // And (x, y, z) for three elements.
    312          if (!Emit(", ")) return false;
    313          ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement);
    314          if (Eat('E')) {
    315            if (!Emit(")")) return false;
    316            continue;
    317          }
    318 
    319          // For longer tuples we write (x, y, z, ...), printing none of the
    320          // content of the fourth and later types.  Thus we avoid exhausting
    321          // output buffers and human readers' patience when some library has a
    322          // long tuple as an implementation detail, without having to
    323          // completely obfuscate all tuples.
    324          if (!Emit(", ...)")) return false;
    325          ++silence_depth_;
    326          while (!Eat('E')) {
    327            ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement);
    328          }
    329          --silence_depth_;
    330          continue;
    331 
    332        // fn-type -> F fn-sig (F already consumed)
    333        // fn-sig -> binder? U? (K abi)? type* E type
    334        // abi -> C | undisambiguated-identifier
    335        //
    336        // We follow the C++ demangler in suppressing details of function
    337        // signatures.  Every function type is rendered "fn...".
    338        fn_type:
    339          if (!Emit("fn...")) return false;
    340          ++silence_depth_;
    341          if (!ParseOptionalBinder()) return false;
    342          (void)Eat('U');
    343          if (Eat('K')) {
    344            if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false;
    345          }
    346          while (!Eat('E')) {
    347            ABSL_DEMANGLER_RECURSE(type, kContinueParameterList);
    348          }
    349          ABSL_DEMANGLER_RECURSE(type, kFinishFn);
    350          --silence_depth_;
    351          continue;
    352 
    353        // dyn-trait-type -> D dyn-bounds lifetime (D already consumed)
    354        // dyn-bounds -> binder? dyn-trait* E
    355        //
    356        // The grammar strangely allows an empty trait list, even though the
    357        // compiler should never output one.  We follow existing demanglers in
    358        // rendering DEL_ as "dyn ".
    359        //
    360        // Because auto traits lengthen a type name considerably without
    361        // providing much value to a search for related source code, it would be
    362        // desirable to abbreviate
    363        //     dyn main::Trait + std::marker::Copy + std::marker::Send
    364        // to
    365        //     dyn main::Trait + ...,
    366        // eliding the auto traits.  But it is difficult to do so correctly, in
    367        // part because there is no guarantee that the mangling will list the
    368        // main trait first.  So we just print all the traits in their order of
    369        // appearance in the mangled name.
    370        dyn_trait_type:
    371          if (!Emit("dyn ")) return false;
    372          if (!ParseOptionalBinder()) return false;
    373          if (!Eat('E')) {
    374            ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits);
    375            while (!Eat('E')) {
    376              if (!Emit(" + ")) return false;
    377              ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits);
    378            }
    379          }
    380          if (!ParseRequiredLifetime()) return false;
    381          continue;
    382 
    383        // dyn-trait -> path dyn-trait-assoc-binding*
    384        // dyn-trait-assoc-binding -> p undisambiguated-identifier type
    385        //
    386        // We render nonempty binding lists as <>, omitting their contents as
    387        // for generic-args.
    388        dyn_trait:
    389          ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait);
    390          if (Peek() == 'p') {
    391            if (!Emit("<>")) return false;
    392            ++silence_depth_;
    393            while (Eat('p')) {
    394              if (!ParseUndisambiguatedIdentifier()) return false;
    395              ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding);
    396            }
    397            --silence_depth_;
    398          }
    399          continue;
    400 
    401        // const -> type const-data | p | backref
    402        //
    403        // const is a C++ keyword, so we use the label `constant` instead.
    404        constant:
    405          if (Eat('B')) goto const_backref;
    406          if (Eat('p')) {
    407            if (!Emit("_")) return false;
    408            continue;
    409          }
    410 
    411          // Scan the type without printing it.
    412          //
    413          // The Rust language restricts the type of a const generic argument
    414          // much more than the mangling grammar does.  We do not enforce this.
    415          //
    416          // We also do not bother printing false, true, 'A', and '\u{abcd}' for
    417          // the types bool and char.  Because we do not print generic-args
    418          // contents, we expect to print constants only in array sizes, and
    419          // those should not be bool or char.
    420          ++silence_depth_;
    421          ABSL_DEMANGLER_RECURSE(type, kConstData);
    422          --silence_depth_;
    423 
    424          // const-data -> n? hex-digit* _
    425          //
    426          // Although the grammar doesn't say this, existing demanglers expect
    427          // that zero is 0, not an empty digit sequence, and no nonzero value
    428          // may have leading zero digits.  Also n0_ is accepted and printed as
    429          // -0, though a toolchain will probably never write that encoding.
    430          if (Eat('n') && !EmitChar('-')) return false;
    431          if (!Emit("0x")) return false;
    432          if (Eat('0')) {
    433            if (!EmitChar('0')) return false;
    434            if (!Eat('_')) return false;
    435            continue;
    436          }
    437          while (IsLowerHexDigit(Peek())) {
    438            if (!EmitChar(Take())) return false;
    439          }
    440          if (!Eat('_')) return false;
    441          continue;
    442 
    443        // generic-args -> I path generic-arg* E (I already consumed)
    444        //
    445        // We follow the C++ demangler in omitting all the arguments from the
    446        // output, printing only the list opening and closing tokens.
    447        generic_args:
    448          ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList);
    449          if (!Emit("::<>")) return false;
    450          ++silence_depth_;
    451          while (!Eat('E')) {
    452            ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList);
    453          }
    454          --silence_depth_;
    455          continue;
    456 
    457        // generic-arg -> lifetime | type | K const
    458        generic_arg:
    459          if (Peek() == 'L') {
    460            if (!ParseOptionalLifetime()) return false;
    461            continue;
    462          }
    463          if (Eat('K')) goto constant;
    464          goto type;
    465 
    466        // backref -> B base-62-number (B already consumed)
    467        //
    468        // The BeginBackref call parses and range-checks the base-62-number.  We
    469        // always do that much.
    470        //
    471        // The recursive call parses and prints what the backref points at.  We
    472        // save CPU and stack by skipping this work if the output would be
    473        // suppressed anyway.
    474        path_backref:
    475          if (!BeginBackref()) return false;
    476          if (silence_depth_ == 0) {
    477            ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
    478          }
    479          EndBackref();
    480          continue;
    481 
    482        // This represents the same backref production as in path_backref but
    483        // parses the target as a type instead of a path.
    484        type_backref:
    485          if (!BeginBackref()) return false;
    486          if (silence_depth_ == 0) {
    487            ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
    488          }
    489          EndBackref();
    490          continue;
    491 
    492        const_backref:
    493          if (!BeginBackref()) return false;
    494          if (silence_depth_ == 0) {
    495            ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding);
    496          }
    497          EndBackref();
    498          continue;
    499      }
    500    }
    501 
    502    return false;  // hit iteration limit or a bug in our stack handling
    503  }
    504 
    505 private:
    506  // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls.
    507  enum ReturnAddress : uint8_t {
    508    kInstantiatingCrate,
    509    kVendorSpecificSuffix,
    510    kIdentifierInUppercaseNamespace,
    511    kIdentifierInLowercaseNamespace,
    512    kInherentImplType,
    513    kInherentImplEnding,
    514    kTraitImplType,
    515    kTraitImplInfix,
    516    kTraitImplEnding,
    517    kImplPathEnding,
    518    kTraitDefinitionInfix,
    519    kTraitDefinitionEnding,
    520    kArraySize,
    521    kFinishArray,
    522    kSliceEnding,
    523    kAfterFirstTupleElement,
    524    kAfterSecondTupleElement,
    525    kAfterThirdTupleElement,
    526    kAfterSubsequentTupleElement,
    527    kContinueParameterList,
    528    kFinishFn,
    529    kBeginAutoTraits,
    530    kContinueAutoTraits,
    531    kContinueDynTrait,
    532    kContinueAssocBinding,
    533    kConstData,
    534    kBeginGenericArgList,
    535    kContinueGenericArgList,
    536    kPathBackrefEnding,
    537    kTypeBackrefEnding,
    538    kConstantBackrefEnding,
    539  };
    540 
    541  // Element counts for the stack arrays.  Larger stack sizes accommodate more
    542  // deeply nested names at the cost of a larger footprint on the C++ call
    543  // stack.
    544  enum {
    545    // Maximum recursive calls outstanding at one time.
    546    kStackSize = 256,
    547 
    548    // Maximum N<uppercase> nested-paths open at once.  We do not expect
    549    // closures inside closures inside closures as much as functions inside
    550    // modules inside other modules, so we can use a smaller array here.
    551    kNamespaceStackSize = 64,
    552 
    553    // Maximum number of nested backrefs.  We can keep this stack pretty small
    554    // because we do not follow backrefs inside generic-args or other contexts
    555    // that suppress printing, so deep stacking is unlikely in practice.
    556    kPositionStackSize = 16,
    557  };
    558 
    559  // Returns the next input character without consuming it.
    560  char Peek() const { return encoding_[pos_]; }
    561 
    562  // Consumes and returns the next input character.
    563  char Take() { return encoding_[pos_++]; }
    564 
    565  // If the next input character is the given character, consumes it and returns
    566  // true; otherwise returns false without consuming a character.
    567  [[nodiscard]] bool Eat(char want) {
    568    if (encoding_[pos_] != want) return false;
    569    ++pos_;
    570    return true;
    571  }
    572 
    573  // Provided there is enough remaining output space, appends c to the output,
    574  // writing a fresh NUL terminator afterward, and returns true.  Returns false
    575  // if the output buffer had less than two bytes free.
    576  [[nodiscard]] bool EmitChar(char c) {
    577    if (silence_depth_ > 0) return true;
    578    if (out_end_ - out_ < 2) return false;
    579    *out_++ = c;
    580    *out_ = '\0';
    581    return true;
    582  }
    583 
    584  // Provided there is enough remaining output space, appends the C string token
    585  // to the output, followed by a NUL character, and returns true.  Returns
    586  // false if not everything fit into the output buffer.
    587  [[nodiscard]] bool Emit(const char* token) {
    588    if (silence_depth_ > 0) return true;
    589    const size_t token_length = std::strlen(token);
    590    const size_t bytes_to_copy = token_length + 1;  // token and final NUL
    591    if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false;
    592    std::memcpy(out_, token, bytes_to_copy);
    593    out_ += token_length;
    594    return true;
    595  }
    596 
    597  // Provided there is enough remaining output space, appends the decimal form
    598  // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
    599  // output, followed by a NUL character, and returns true.  Returns false if
    600  // not everything fit into the output buffer.
    601  [[nodiscard]] bool EmitDisambiguator(int disambiguator) {
    602    if (disambiguator < 0) return EmitChar('?');  // parsed but too large
    603    if (disambiguator == 0) return EmitChar('0');
    604    // Convert disambiguator to decimal text.  Three digits per byte is enough
    605    // because 999 > 256.  The bound will remain correct even if future
    606    // maintenance changes the type of the disambiguator variable.
    607    char digits[3 * sizeof(disambiguator)] = {};
    608    size_t leading_digit_index = sizeof(digits) - 1;
    609    for (; disambiguator > 0; disambiguator /= 10) {
    610      digits[--leading_digit_index] =
    611          static_cast<char>('0' + disambiguator % 10);
    612    }
    613    return Emit(digits + leading_digit_index);
    614  }
    615 
    616  // Consumes an optional disambiguator (s123_) from the input.
    617  //
    618  // On success returns true and fills value with the encoded value if it was
    619  // not too big, otherwise with -1.  If the optional disambiguator was omitted,
    620  // value is 0.  On parse failure returns false and sets value to -1.
    621  [[nodiscard]] bool ParseDisambiguator(int& value) {
    622    value = -1;
    623 
    624    // disambiguator = s base-62-number
    625    //
    626    // Disambiguators are optional.  An omitted disambiguator is zero.
    627    if (!Eat('s')) {
    628      value = 0;
    629      return true;
    630    }
    631    int base_62_value = 0;
    632    if (!ParseBase62Number(base_62_value)) return false;
    633    value = base_62_value < 0 ? -1 : base_62_value + 1;
    634    return true;
    635  }
    636 
    637  // Consumes a base-62 number like _ or 123_ from the input.
    638  //
    639  // On success returns true and fills value with the encoded value if it was
    640  // not too big, otherwise with -1.  On parse failure returns false and sets
    641  // value to -1.
    642  [[nodiscard]] bool ParseBase62Number(int& value) {
    643    value = -1;
    644 
    645    // base-62-number = (digit | lower | upper)* _
    646    //
    647    // An empty base-62 digit sequence means 0.
    648    if (Eat('_')) {
    649      value = 0;
    650      return true;
    651    }
    652 
    653    // A nonempty digit sequence denotes its base-62 value plus 1.
    654    int encoded_number = 0;
    655    bool overflowed = false;
    656    while (IsAlpha(Peek()) || IsDigit(Peek())) {
    657      const char c = Take();
    658      if (encoded_number >= std::numeric_limits<int>::max()/62) {
    659        // If we are close to overflowing an int, keep parsing but stop updating
    660        // encoded_number and remember to return -1 at the end.  The point is to
    661        // avoid undefined behavior while parsing crate-root disambiguators,
    662        // which are large in practice but not shown in demangling, while
    663        // successfully computing closure and shim disambiguators, which are
    664        // typically small and are printed out.
    665        overflowed = true;
    666      } else {
    667        int digit;
    668        if (IsDigit(c)) {
    669          digit = c - '0';
    670        } else if (IsLower(c)) {
    671          digit = c - 'a' + 10;
    672        } else {
    673          digit = c - 'A' + 36;
    674        }
    675        encoded_number = 62 * encoded_number + digit;
    676      }
    677    }
    678 
    679    if (!Eat('_')) return false;
    680    if (!overflowed) value = encoded_number + 1;
    681    return true;
    682  }
    683 
    684  // Consumes an identifier from the input, returning true on success.
    685  //
    686  // A nonzero uppercase_namespace specifies the character after the N in a
    687  // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
    688  // write out the name with the conventional decoration for that namespace.
    689  [[nodiscard]] bool ParseIdentifier(char uppercase_namespace = '\0') {
    690    // identifier -> disambiguator? undisambiguated-identifier
    691    int disambiguator = 0;
    692    if (!ParseDisambiguator(disambiguator)) return false;
    693 
    694    return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator);
    695  }
    696 
    697  // Consumes from the input an identifier with no preceding disambiguator,
    698  // returning true on success.
    699  //
    700  // When ParseIdentifier calls this, it passes the N<namespace> character and
    701  // disambiguator value so that "{closure#42}" and similar forms can be
    702  // rendered correctly.
    703  //
    704  // At other appearances of undisambiguated-identifier in the grammar, this
    705  // treatment is not applicable, and the call site omits both arguments.
    706  [[nodiscard]] bool ParseUndisambiguatedIdentifier(
    707      char uppercase_namespace = '\0', int disambiguator = 0) {
    708    // undisambiguated-identifier -> u? decimal-number _? bytes
    709    const bool is_punycoded = Eat('u');
    710    if (!IsDigit(Peek())) return false;
    711    int num_bytes = 0;
    712    if (!ParseDecimalNumber(num_bytes)) return false;
    713    (void)Eat('_');  // optional separator, needed if a digit follows
    714    if (is_punycoded) {
    715      DecodeRustPunycodeOptions options;
    716      options.punycode_begin = &encoding_[pos_];
    717      options.punycode_end = &encoding_[pos_] + num_bytes;
    718      options.out_begin = out_;
    719      options.out_end = out_end_;
    720      out_ = DecodeRustPunycode(options);
    721      if (out_ == nullptr) return false;
    722      pos_ += static_cast<size_t>(num_bytes);
    723    }
    724 
    725    // Emit the beginnings of braced forms like {shim:vtable#0}.
    726    if (uppercase_namespace != '\0') {
    727      switch (uppercase_namespace) {
    728        case 'C':
    729          if (!Emit("{closure")) return false;
    730          break;
    731        case 'S':
    732          if (!Emit("{shim")) return false;
    733          break;
    734        default:
    735          if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
    736          break;
    737      }
    738      if (num_bytes > 0 && !Emit(":")) return false;
    739    }
    740 
    741    // Emit the name itself.
    742    if (!is_punycoded) {
    743      for (int i = 0; i < num_bytes; ++i) {
    744        const char c = Take();
    745        if (!IsIdentifierChar(c) &&
    746            // The spec gives toolchains the choice of Punycode or raw UTF-8 for
    747            // identifiers containing code points above 0x7f, so accept bytes
    748            // with the high bit set.
    749            (c & 0x80) == 0) {
    750          return false;
    751        }
    752        if (!EmitChar(c)) return false;
    753      }
    754    }
    755 
    756    // Emit the endings of braced forms, e.g., "#42}".
    757    if (uppercase_namespace != '\0') {
    758      if (!EmitChar('#')) return false;
    759      if (!EmitDisambiguator(disambiguator)) return false;
    760      if (!EmitChar('}')) return false;
    761    }
    762 
    763    return true;
    764  }
    765 
    766  // Consumes a decimal number like 0 or 123 from the input.  On success returns
    767  // true and fills value with the encoded value.  If the encoded value is too
    768  // large or otherwise unparsable, returns false and sets value to -1.
    769  [[nodiscard]] bool ParseDecimalNumber(int& value) {
    770    value = -1;
    771    if (!IsDigit(Peek())) return false;
    772    int encoded_number = Take() - '0';
    773    if (encoded_number == 0) {
    774      // Decimal numbers are never encoded with extra leading zeroes.
    775      value = 0;
    776      return true;
    777    }
    778    while (IsDigit(Peek()) &&
    779           // avoid overflow
    780           encoded_number < std::numeric_limits<int>::max()/10) {
    781      encoded_number = 10 * encoded_number + (Take() - '0');
    782    }
    783    if (IsDigit(Peek())) return false;  // too big
    784    value = encoded_number;
    785    return true;
    786  }
    787 
    788  // Consumes a binder of higher-ranked lifetimes if one is present.  On success
    789  // returns true and discards the encoded lifetime count.  On parse failure
    790  // returns false.
    791  [[nodiscard]] bool ParseOptionalBinder() {
    792    // binder -> G base-62-number
    793    if (!Eat('G')) return true;
    794    int ignored_binding_count;
    795    return ParseBase62Number(ignored_binding_count);
    796  }
    797 
    798  // Consumes a lifetime if one is present.
    799  //
    800  // On success returns true and discards the lifetime index.  We do not print
    801  // or even range-check lifetimes because they are a finer detail than other
    802  // things we omit from output, such as the entire contents of generic-args.
    803  //
    804  // On parse failure returns false.
    805  [[nodiscard]] bool ParseOptionalLifetime() {
    806    // lifetime -> L base-62-number
    807    if (!Eat('L')) return true;
    808    int ignored_de_bruijn_index;
    809    return ParseBase62Number(ignored_de_bruijn_index);
    810  }
    811 
    812  // Consumes a lifetime just like ParseOptionalLifetime, but returns false if
    813  // there is no lifetime here.
    814  [[nodiscard]] bool ParseRequiredLifetime() {
    815    if (Peek() != 'L') return false;
    816    return ParseOptionalLifetime();
    817  }
    818 
    819  // Pushes ns onto the namespace stack and returns true if the stack is not
    820  // full, else returns false.
    821  [[nodiscard]] bool PushNamespace(char ns) {
    822    if (namespace_depth_ == kNamespaceStackSize) return false;
    823    namespace_stack_[namespace_depth_++] = ns;
    824    return true;
    825  }
    826 
    827  // Pops the last pushed namespace.  Requires that the namespace stack is not
    828  // empty (namespace_depth_ > 0).
    829  char PopNamespace() { return namespace_stack_[--namespace_depth_]; }
    830 
    831  // Pushes position onto the position stack and returns true if the stack is
    832  // not full, else returns false.
    833  [[nodiscard]] bool PushPosition(int position) {
    834    if (position_depth_ == kPositionStackSize) return false;
    835    position_stack_[position_depth_++] = position;
    836    return true;
    837  }
    838 
    839  // Pops the last pushed input position.  Requires that the position stack is
    840  // not empty (position_depth_ > 0).
    841  int PopPosition() { return position_stack_[--position_depth_]; }
    842 
    843  // Consumes a base-62-number denoting a backref target, pushes the current
    844  // input position on the data stack, and sets the input position to the
    845  // beginning of the backref target.  Returns true on success.  Returns false
    846  // if parsing failed, the stack is exhausted, or the backref target position
    847  // is out of range.
    848  [[nodiscard]] bool BeginBackref() {
    849    // backref = B base-62-number (B already consumed)
    850    //
    851    // Reject backrefs that don't parse, overflow int, or don't point backward.
    852    // If the offset looks fine, adjust it to account for the _R prefix.
    853    int offset = 0;
    854    const int offset_of_this_backref =
    855        pos_ - 2 /* _R */ - 1 /* B already consumed */;
    856    if (!ParseBase62Number(offset) || offset < 0 ||
    857        offset >= offset_of_this_backref) {
    858      return false;
    859    }
    860    offset += 2;
    861 
    862    // Save the old position to restore later.
    863    if (!PushPosition(pos_)) return false;
    864 
    865    // Move the input position to the backref target.
    866    //
    867    // Note that we do not check whether the new position points to the
    868    // beginning of a construct matching the context in which the backref
    869    // appeared.  We just jump to it and see whether nested parsing succeeds.
    870    // We therefore accept various wrong manglings, e.g., a type backref
    871    // pointing to an 'l' character inside an identifier, which happens to mean
    872    // i32 when parsed as a type mangling.  This saves the complexity and RAM
    873    // footprint of remembering which offsets began which kinds of
    874    // substructures.  Existing demanglers take similar shortcuts.
    875    pos_ = offset;
    876    return true;
    877  }
    878 
    879  // Cleans up after a backref production by restoring the previous input
    880  // position from the data stack.
    881  void EndBackref() { pos_ = PopPosition(); }
    882 
    883  // The leftmost recursion_depth_ elements of recursion_stack_ contain the
    884  // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed.
    885  ReturnAddress recursion_stack_[kStackSize] = {};
    886  int recursion_depth_ = 0;
    887 
    888  // The leftmost namespace_depth_ elements of namespace_stack_ contain the
    889  // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a
    890  // closure.
    891  char namespace_stack_[kNamespaceStackSize] = {};
    892  int namespace_depth_ = 0;
    893 
    894  // The leftmost position_depth_ elements of position_stack_ contain the input
    895  // positions to return to after fully printing the targets of backrefs.
    896  int position_stack_[kPositionStackSize] = {};
    897  int position_depth_ = 0;
    898 
    899  // Anything parsed while silence_depth_ > 0 contributes nothing to the
    900  // demangled output.  For constructs omitted from the demangling, such as
    901  // impl-path and the contents of generic-args, we will increment
    902  // silence_depth_ on the way in and decrement silence_depth_ on the way out.
    903  int silence_depth_ = 0;
    904 
    905  // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
    906  // the next input character to be scanned.
    907  int pos_ = 0;
    908  const char* encoding_ = nullptr;
    909 
    910  // Output: *out_ is where the next output character should be written, and
    911  // out_end_ points past the last byte of available space.
    912  char* out_ = nullptr;
    913  char* out_end_ = nullptr;
    914 };
    915 
    916 }  // namespace
    917 
    918 bool DemangleRustSymbolEncoding(const char* mangled, char* out,
    919                                size_t out_size) {
    920  return RustSymbolParser(mangled, out, out + out_size).Parse();
    921 }
    922 
    923 }  // namespace debugging_internal
    924 ABSL_NAMESPACE_END
    925 }  // namespace absl