tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MozsearchIndexer.cpp (112414B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "clang/AST/AST.h"
      7 #include "clang/AST/ASTConsumer.h"
      8 #include "clang/AST/ASTContext.h"
      9 #include "clang/AST/Expr.h"
     10 #include "clang/AST/ExprCXX.h"
     11 #include "clang/AST/Mangle.h"
     12 #include "clang/AST/RecordLayout.h"
     13 #include "clang/AST/RecursiveASTVisitor.h"
     14 #include "clang/Basic/FileManager.h"
     15 #include "clang/Basic/SourceManager.h"
     16 #include "clang/Basic/Version.h"
     17 #include "clang/Format/Format.h"
     18 #include "clang/Frontend/CompilerInstance.h"
     19 #include "clang/Frontend/FrontendPluginRegistry.h"
     20 #include "clang/Lex/Lexer.h"
     21 #include "clang/Lex/PPCallbacks.h"
     22 #include "clang/Lex/Preprocessor.h"
     23 #include "clang/Lex/TokenConcatenation.h"
     24 #include "llvm/ADT/SmallString.h"
     25 #include "llvm/Support/JSON.h"
     26 #include "llvm/Support/raw_ostream.h"
     27 
     28 #include <algorithm>
     29 #include <fstream>
     30 #include <iostream>
     31 #include <map>
     32 #include <memory>
     33 #include <sstream>
     34 #include <stack>
     35 #include <string>
     36 #include <unordered_set>
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 
     41 #include "BindingOperations.h"
     42 #include "FileOperations.h"
     43 #include "StringOperations.h"
     44 #include "from-clangd/HeuristicResolver.h"
     45 
     46 #if CLANG_VERSION_MAJOR < 8
     47 // Starting with Clang 8.0 some basic functions have been renamed
     48 #define getBeginLoc getLocStart
     49 #define getEndLoc getLocEnd
     50 #endif
     51 // We want std::make_unique, but that's only available in c++14.  In versions
     52 // prior to that, we need to fall back to llvm's make_unique.  It's also the
     53 // case that we expect clang 10 to build with c++14 and clang 9 and earlier to
     54 // build with c++11, at least as suggested by the llvm-config --cxxflags on
     55 // non-windows platforms.  firefox-main seems to build with -std=c++17 on
     56 // windows so we need to make this decision based on __cplusplus instead of
     57 // the CLANG_VERSION_MAJOR.
     58 #if __cplusplus < 201402L
     59 using llvm::make_unique;
     60 #else
     61 using std::make_unique;
     62 #endif
     63 
     64 using namespace clang;
     65 
     66 const std::string GENERATED("__GENERATED__" PATHSEP_STRING);
     67 
     68 // Absolute path to directory containing source code.
     69 std::string Srcdir;
     70 
     71 // Absolute path to objdir (including generated code).
     72 std::string Objdir;
     73 
     74 // Absolute path where analysis JSON output will be stored.
     75 std::string Outdir;
     76 
     77 enum class FileType {
     78  // The file was either in the source tree nor objdir. It might be a system
     79  // include, for example.
     80  Unknown,
     81  // A file from the source tree.
     82  Source,
     83  // A file from the objdir.
     84  Generated,
     85 };
     86 
     87 // Takes an absolute path to a file, and returns the type of file it is. If
     88 // it's a Source or Generated file, the provided inout path argument is modified
     89 // in-place so that it is relative to the source dir or objdir, respectively.
     90 // Otherwise we strip the first include path that matches, if any.
     91 FileType relativizePath(std::string &path, const HeaderSearchOptions &HeaderSearchOpts) {
     92  if (path.compare(0, Objdir.length(), Objdir) == 0) {
     93    path.replace(0, Objdir.length(), GENERATED);
     94    return FileType::Generated;
     95  }
     96  // Empty filenames can get turned into Srcdir when they are resolved as
     97  // absolute paths, so we should exclude files that are exactly equal to
     98  // Srcdir or anything outside Srcdir.
     99  if (path.length() > Srcdir.length() &&
    100      path.compare(0, Srcdir.length(), Srcdir) == 0) {
    101    // Remove the trailing `/' as well.
    102    path.erase(0, Srcdir.length() + 1);
    103    return FileType::Source;
    104  }
    105 
    106  for (const auto &Entry : HeaderSearchOpts.UserEntries) {
    107    if (path.compare(0, Entry.Path.length(), Entry.Path) == 0) {
    108      path.erase(0, Entry.Path.size() + 1);
    109      break;
    110    }
    111  }
    112 
    113  return FileType::Unknown;
    114 }
    115 
    116 #if !defined(_WIN32) && !defined(_WIN64)
    117 #include <sys/time.h>
    118 
    119 static double time() {
    120  struct timeval Tv;
    121  gettimeofday(&Tv, nullptr);
    122  return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.;
    123 }
    124 #endif
    125 
    126 // Return true if |input| is a valid C++ identifier. We don't want to generate
    127 // analysis information for operators, string literals, etc. by accident since
    128 // it trips up consumers of the data.
    129 static bool isValidIdentifier(std::string Input) {
    130  for (char C : Input) {
    131    if (!(isalpha(C) || isdigit(C) || C == '_')) {
    132      return false;
    133    }
    134  }
    135  return true;
    136 }
    137 
    138 template <size_t N>
    139 static bool stringStartsWith(const std::string &Input,
    140                             const char (&Prefix)[N]) {
    141  return Input.length() > N - 1 && memcmp(Input.c_str(), Prefix, N - 1) == 0;
    142 }
    143 
    144 static bool isASCII(const std::string &Input) {
    145  for (char C : Input) {
    146    if (C & 0x80) {
    147      return false;
    148    }
    149  }
    150  return true;
    151 }
    152 
    153 struct RAIITracer {
    154  RAIITracer(const char *log) : mLog(log) { printf("<%s>\n", mLog); }
    155 
    156  ~RAIITracer() { printf("</%s>\n", mLog); }
    157 
    158  const char *mLog;
    159 };
    160 
    161 #define TRACEFUNC RAIITracer tracer(__FUNCTION__);
    162 
    163 // Sets variable to value on creation then resets variable to its original
    164 // value on destruction
    165 template <typename T> class ValueRollback {
    166 public:
    167  template <typename U = T>
    168  ValueRollback(T &variable, U &&value)
    169      : mVariable{&variable},
    170        mSavedValue{std::exchange(variable, std::forward<U>(value))} {}
    171 
    172  ValueRollback(ValueRollback &&other) noexcept
    173      : mVariable{std::exchange(other.mVariable, nullptr)},
    174        mSavedValue{std::move(other.mSavedValue)} {}
    175 
    176  ValueRollback(const ValueRollback &) = delete;
    177  ValueRollback &operator=(ValueRollback &&) = delete;
    178  ValueRollback &operator=(const ValueRollback &) = delete;
    179 
    180  ~ValueRollback() {
    181    if (mVariable)
    182      *mVariable = std::move(mSavedValue);
    183  }
    184 
    185 private:
    186  T *mVariable;
    187  T mSavedValue;
    188 };
    189 
    190 class IndexConsumer;
    191 
    192 bool isPure(FunctionDecl *D) {
    193 #if CLANG_VERSION_MAJOR >= 18
    194  return D->isPureVirtual();
    195 #else
    196  return D->isPure();
    197 #endif
    198 }
    199 
    200 // For each C++ file seen by the analysis (.cpp or .h), we track a
    201 // FileInfo. This object tracks whether the file is "interesting" (i.e., whether
    202 // it's in the source dir or the objdir). We also store the analysis output
    203 // here.
    204 struct FileInfo {
    205  FileInfo(std::string &Rname, const HeaderSearchOptions &HeaderSearchOptions) : Realname(Rname) {
    206    switch (relativizePath(Realname, HeaderSearchOptions)) {
    207    case FileType::Generated:
    208      Interesting = true;
    209      Generated = true;
    210      break;
    211    case FileType::Source:
    212      Interesting = true;
    213      Generated = false;
    214      break;
    215    case FileType::Unknown:
    216      Interesting = false;
    217      Generated = false;
    218      break;
    219    }
    220  }
    221  std::string Realname;
    222  std::vector<std::string> Output;
    223  bool Interesting;
    224  bool Generated;
    225 };
    226 
    227 struct MacroExpansionState {
    228  Token MacroNameToken;
    229  const MacroInfo *MacroInfo = nullptr;
    230  // other macro symbols this expansion depends on
    231  std::vector<std::string> Dependencies;
    232  std::string Expansion;
    233  std::map<SourceLocation, unsigned> TokenLocations;
    234  SourceRange Range;
    235  Token PrevPrevTok;
    236  Token PrevTok;
    237 };
    238 
    239 struct ExpandedMacro {
    240  std::string Symbol;
    241  std::string Key; // "{Symbol}(,{Dependencies})..."
    242  std::string Expansion;
    243  std::map<SourceLocation, unsigned> TokenLocations;
    244 };
    245 
    246 class IndexConsumer;
    247 
    248 class PreprocessorHook : public PPCallbacks {
    249  IndexConsumer *Indexer;
    250 
    251 public:
    252  PreprocessorHook(IndexConsumer *C) : Indexer(C) {}
    253 
    254  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
    255                           SrcMgr::CharacteristicKind FileType,
    256                           FileID PrevFID) override;
    257 
    258  virtual void InclusionDirective(SourceLocation HashLoc,
    259                                  const Token &IncludeTok, StringRef FileName,
    260                                  bool IsAngled, CharSourceRange FileNameRange,
    261 #if CLANG_VERSION_MAJOR >= 16
    262                                  OptionalFileEntryRef File,
    263 #elif CLANG_VERSION_MAJOR >= 15
    264                                  Optional<FileEntryRef> File,
    265 #else
    266                                  const FileEntry *File,
    267 #endif
    268                                  StringRef SearchPath, StringRef RelativePath,
    269 #if CLANG_VERSION_MAJOR >= 19
    270                                  const Module *SuggestedModule,
    271                                  bool ModuleImported,
    272 #else
    273                                  const Module *Imported,
    274 #endif
    275                                  SrcMgr::CharacteristicKind FileType) override;
    276 
    277  virtual void MacroDefined(const Token &Tok,
    278                            const MacroDirective *Md) override;
    279 
    280  virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md,
    281                            SourceRange Range, const MacroArgs *Ma) override;
    282  virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md,
    283                              const MacroDirective *Undef) override;
    284  virtual void Defined(const Token &Tok, const MacroDefinition &Md,
    285                       SourceRange Range) override;
    286  virtual void Ifdef(SourceLocation Loc, const Token &Tok,
    287                     const MacroDefinition &Md) override;
    288  virtual void Ifndef(SourceLocation Loc, const Token &Tok,
    289                      const MacroDefinition &Md) override;
    290 };
    291 
    292 class IndexConsumer : public ASTConsumer,
    293                      public RecursiveASTVisitor<IndexConsumer>,
    294                      public DiagnosticConsumer {
    295 private:
    296  CompilerInstance &CI;
    297  SourceManager &SM;
    298  LangOptions &LO;
    299  std::map<FileID, std::unique_ptr<FileInfo>> FileMap;
    300  MangleContext *CurMangleContext;
    301  ASTContext *AstContext;
    302  std::unique_ptr<clangd::HeuristicResolver> Resolver;
    303 
    304  // Used during a macro expansion to build the expanded string
    305  TokenConcatenation ConcatInfo;
    306  std::optional<MacroExpansionState> MacroExpansionState;
    307  // Keeps track of the positions of tokens inside each expanded macro
    308  std::map<SourceLocation, ExpandedMacro> MacroMaps;
    309 
    310  typedef RecursiveASTVisitor<IndexConsumer> Super;
    311 
    312  // Tracks the set of declarations that the current expression/statement is
    313  // nested inside of.
    314  struct AutoSetContext {
    315    AutoSetContext(IndexConsumer *Self, NamedDecl *Context,
    316                   bool VisitImplicit = false)
    317        : Self(Self), Prev(Self->CurDeclContext), Decl(Context) {
    318      this->VisitImplicit =
    319          VisitImplicit || (Prev ? Prev->VisitImplicit : false);
    320      Self->CurDeclContext = this;
    321    }
    322 
    323    ~AutoSetContext() { Self->CurDeclContext = Prev; }
    324 
    325    IndexConsumer *Self;
    326    AutoSetContext *Prev;
    327    NamedDecl *Decl;
    328    bool VisitImplicit;
    329  };
    330  AutoSetContext *CurDeclContext;
    331 
    332  FileInfo *getFileInfo(SourceLocation Loc) {
    333    FileID Id = SM.getFileID(Loc);
    334 
    335    std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
    336    It = FileMap.find(Id);
    337    if (It == FileMap.end()) {
    338      // We haven't seen this file before. We need to make the FileInfo
    339      // structure information ourselves
    340      std::string Filename = std::string(SM.getFilename(Loc));
    341      std::string Absolute;
    342      // If Loc is a macro id rather than a file id, it Filename might be
    343      // empty. Also for some types of file locations that are clang-internal
    344      // like "<scratch>" it can return an empty Filename. In these cases we
    345      // want to leave Absolute as empty.
    346      if (!Filename.empty()) {
    347        Absolute = getAbsolutePath(Filename);
    348        if (Absolute.empty()) {
    349          Absolute = Filename;
    350        }
    351      }
    352      std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute, CI.getHeaderSearchOpts());
    353      It = FileMap.insert(std::make_pair(Id, std::move(Info))).first;
    354    }
    355    return It->second.get();
    356  }
    357 
    358  // Helpers for processing declarations
    359  // Should we ignore this location?
    360  bool isInterestingLocation(SourceLocation Loc) {
    361    if (SM.isMacroBodyExpansion(Loc)) {
    362      Loc = SM.getFileLoc(Loc);
    363    }
    364 
    365    normalizeLocation(&Loc);
    366    if (Loc.isInvalid()) {
    367      return false;
    368    }
    369 
    370    return getFileInfo(Loc)->Interesting;
    371  }
    372 
    373  // Convert location to "line:column" or "line:column-column" given length.
    374  // In resulting string rep, line is 1-based and zero-padded to 5 digits, while
    375  // column is 0-based and unpadded.
    376  std::string locationToString(SourceLocation Loc, size_t Length = 0) {
    377    std::pair<FileID, unsigned> Pair = SM.getDecomposedExpansionLoc(Loc);
    378 
    379    bool IsInvalid;
    380    unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid);
    381    if (IsInvalid) {
    382      return "";
    383    }
    384    unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid);
    385    if (IsInvalid) {
    386      return "";
    387    }
    388 
    389    if (Length) {
    390      return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length);
    391    } else {
    392      return stringFormat("%05d:%d", Line, Column - 1);
    393    }
    394  }
    395 
    396  // Convert SourceRange to "line-line" or "line".
    397  // In the resulting string rep, line is 1-based.
    398  std::string lineRangeToString(SourceRange Range, bool omitEnd = false) {
    399    std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin());
    400    std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd());
    401 
    402    bool IsInvalid;
    403    unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
    404    if (IsInvalid) {
    405      return "";
    406    }
    407    unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
    408    if (IsInvalid) {
    409      return "";
    410    }
    411 
    412    if (omitEnd && Line1 == Line2) {
    413      return stringFormat("%d", Line1);
    414    }
    415 
    416    return stringFormat("%d-%d", Line1, Line2);
    417  }
    418 
    419  // Convert SourceRange to "PATH#line-line" or "PATH#line".
    420  // If Range's file is same as fromFileID, PATH is omitted.
    421  std::string pathAndLineRangeToString(FileID fromFileID, SourceRange Range) {
    422    FileInfo *toFile = getFileInfo(Range.getBegin());
    423    FileInfo *fromFile = FileMap.find(fromFileID)->second.get();
    424 
    425    auto lineRange = lineRangeToString(Range, true);
    426 
    427    if (lineRange.empty()) {
    428      return "";
    429    }
    430 
    431    if (toFile == fromFile) {
    432      return "#" + lineRange;
    433    }
    434 
    435    if (toFile->Realname.empty()) {
    436      return "#" + lineRange;
    437    }
    438 
    439    std::string result = toFile->Realname;
    440    result += "#";
    441    result += lineRange;
    442    return result;
    443  }
    444 
    445  bool needsNestingRangeForVarDecl(SourceRange& Range) {
    446    std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin());
    447    std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd());
    448 
    449    bool IsInvalid;
    450    unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
    451    if (IsInvalid) {
    452      return false;
    453    }
    454    unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
    455    if (IsInvalid) {
    456      return false;
    457    }
    458 
    459    static constexpr unsigned MinVarDeclNestingRangeLines = 10;
    460 
    461    return Line2 > Line1 + MinVarDeclNestingRangeLines;
    462  }
    463 
    464  // Convert SourceRange to "line:column-line:column".
    465  // In the resulting string rep, line is 1-based, column is 0-based.
    466  std::string fullRangeToString(SourceRange Range) {
    467    std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin());
    468    std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd());
    469 
    470    bool IsInvalid;
    471    unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid);
    472    if (IsInvalid) {
    473      return "";
    474    }
    475    unsigned Column1 =
    476        SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid);
    477    if (IsInvalid) {
    478      return "";
    479    }
    480    unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid);
    481    if (IsInvalid) {
    482      return "";
    483    }
    484    unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid);
    485    if (IsInvalid) {
    486      return "";
    487    }
    488 
    489    return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1);
    490  }
    491 
    492  // Returns the qualified name of `d` without considering template parameters.
    493  std::string getQualifiedName(const NamedDecl *D) {
    494    const DeclContext *Ctx = D->getDeclContext();
    495    if (Ctx->isFunctionOrMethod()) {
    496      return D->getQualifiedNameAsString();
    497    }
    498 
    499    std::vector<const DeclContext *> Contexts;
    500 
    501    // Collect contexts.
    502    while (Ctx && isa<NamedDecl>(Ctx)) {
    503      Contexts.push_back(Ctx);
    504      Ctx = Ctx->getParent();
    505    }
    506 
    507    std::string Result;
    508 
    509    std::reverse(Contexts.begin(), Contexts.end());
    510 
    511    for (const DeclContext *DC : Contexts) {
    512      if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
    513        Result += Spec->getNameAsString();
    514 
    515        if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) {
    516          std::string Backing;
    517          llvm::raw_string_ostream Stream(Backing);
    518          const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
    519          printTemplateArgumentList(Stream, TemplateArgs.asArray(),
    520                                    PrintingPolicy(CI.getLangOpts()));
    521          Result += Stream.str();
    522        }
    523      } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) {
    524        if (Nd->isAnonymousNamespace() || Nd->isInline()) {
    525          continue;
    526        }
    527        Result += Nd->getNameAsString();
    528      } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) {
    529        if (!Rd->getIdentifier()) {
    530          Result += "(anonymous)";
    531        } else {
    532          Result += Rd->getNameAsString();
    533        }
    534      } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) {
    535        Result += Fd->getNameAsString();
    536      } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) {
    537        // C++ [dcl.enum]p10: Each enum-name and each unscoped
    538        // enumerator is declared in the scope that immediately contains
    539        // the enum-specifier. Each scoped enumerator is declared in the
    540        // scope of the enumeration.
    541        if (Ed->isScoped() || Ed->getIdentifier())
    542          Result += Ed->getNameAsString();
    543        else
    544          continue;
    545      } else {
    546        Result += cast<NamedDecl>(DC)->getNameAsString();
    547      }
    548      Result += "::";
    549    }
    550 
    551    if (D->getDeclName())
    552      Result += D->getNameAsString();
    553    else
    554      Result += "(anonymous)";
    555 
    556    return Result;
    557  }
    558 
    559  std::string mangleLocation(SourceLocation Loc,
    560                             std::string Backup = std::string()) {
    561    FileInfo *F = getFileInfo(Loc);
    562    std::string Filename = F->Realname;
    563    if (Filename.length() == 0 && Backup.length() != 0) {
    564      return Backup;
    565    }
    566    if (F->Generated) {
    567      // Since generated files may be different on different platforms,
    568      // we need to include a platform-specific thing in the hash. Otherwise
    569      // we can end up with hash collisions where different symbols from
    570      // different platforms map to the same thing.
    571      char *Platform = getenv("MOZSEARCH_PLATFORM");
    572      Filename =
    573          std::string(Platform ? Platform : "") + std::string("@") + Filename;
    574    }
    575    return hash(Filename + std::string("@") + locationToString(Loc));
    576  }
    577 
    578  bool isAcceptableSymbolChar(char c) {
    579    return isalpha(c) || isdigit(c) || c == '_' || c == '/';
    580  }
    581 
    582  std::string mangleFile(std::string Filename, FileType Type) {
    583    // "Mangle" the file path, such that:
    584    // 1. The majority of paths will still be mostly human-readable.
    585    // 2. The sanitization algorithm doesn't produce collisions where two
    586    //    different unsanitized paths can result in the same sanitized paths.
    587    // 3. The produced symbol doesn't cause problems with downstream consumers.
    588    // In order to accomplish this, we keep alphanumeric chars, underscores,
    589    // and slashes, and replace everything else with an "@xx" hex encoding.
    590    // The majority of path characters are letters and slashes which don't get
    591    // encoded, so that satisfies (1). Since "@" characters in the unsanitized
    592    // path get encoded, there should be no "@" characters in the sanitized path
    593    // that got preserved from the unsanitized input, so that should satisfy
    594    // (2). And (3) was done by trial-and-error. Note in particular the dot (.)
    595    // character needs to be encoded, or the symbol-search feature of mozsearch
    596    // doesn't work correctly, as all dot characters in the symbol query get
    597    // replaced by #.
    598    for (size_t i = 0; i < Filename.length(); i++) {
    599      char c = Filename[i];
    600      if (isAcceptableSymbolChar(c)) {
    601        continue;
    602      }
    603      char hex[4];
    604      sprintf(hex, "@%02X", ((int)c) & 0xFF);
    605      Filename.replace(i, 1, hex);
    606      i += 2;
    607    }
    608 
    609    if (Type == FileType::Generated) {
    610      // Since generated files may be different on different platforms,
    611      // we need to include a platform-specific thing in the hash. Otherwise
    612      // we can end up with hash collisions where different symbols from
    613      // different platforms map to the same thing.
    614      char *Platform = getenv("MOZSEARCH_PLATFORM");
    615      Filename =
    616          std::string(Platform ? Platform : "") + std::string("@") + Filename;
    617    }
    618    return Filename;
    619  }
    620 
    621  std::string mangleURL(std::string Url) {
    622    return mangleFile(Url, FileType::Source);
    623  }
    624 
    625  std::string mangleQualifiedName(std::string Name) {
    626    std::replace(Name.begin(), Name.end(), ' ', '_');
    627    return Name;
    628  }
    629 
    630  std::string getMangledName(clang::MangleContext *Ctx,
    631                             const clang::NamedDecl *Decl) {
    632    // Main functions will tend to collide because they inherently have similar
    633    // signatures, so let's provide a custom location-based signature.
    634    if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isMain()) {
    635      return std::string("MF_") + mangleLocation(Decl->getLocation());
    636    }
    637 
    638    if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) {
    639      return cast<FunctionDecl>(Decl)->getNameAsString();
    640    }
    641 
    642    if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) {
    643      const DeclContext *DC = Decl->getDeclContext();
    644      if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
    645          isa<LinkageSpecDecl>(DC) ||
    646          // isa<ExternCContextDecl>(DC) ||
    647          isa<TagDecl>(DC)) {
    648        llvm::SmallVector<char, 512> Output;
    649        llvm::raw_svector_ostream Out(Output);
    650 #if CLANG_VERSION_MAJOR >= 11
    651        // This code changed upstream in version 11:
    652        // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47
    653        GlobalDecl GD;
    654        if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
    655          GD = GlobalDecl(D, Ctor_Complete);
    656        } else if (const CXXDestructorDecl *D =
    657                       dyn_cast<CXXDestructorDecl>(Decl)) {
    658          GD = GlobalDecl(D, Dtor_Complete);
    659        } else {
    660          GD = GlobalDecl(Decl);
    661        }
    662        Ctx->mangleName(GD, Out);
    663 #else
    664        if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) {
    665          Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out);
    666        } else if (const CXXDestructorDecl *D =
    667                       dyn_cast<CXXDestructorDecl>(Decl)) {
    668          Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out);
    669        } else {
    670          Ctx->mangleName(Decl, Out);
    671        }
    672 #endif
    673        return Out.str().str();
    674      } else {
    675        return std::string("V_") + mangleLocation(Decl->getLocation()) +
    676               std::string("_") + hash(std::string(Decl->getName()));
    677      }
    678    } else if (isa<TagDecl>(Decl) || isa<ObjCInterfaceDecl>(Decl)) {
    679      if (!Decl->getIdentifier()) {
    680        // Anonymous.
    681        return std::string("T_") + mangleLocation(Decl->getLocation());
    682      }
    683 
    684      return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl));
    685    } else if (isa<TypedefNameDecl>(Decl)) {
    686      if (!Decl->getIdentifier()) {
    687        // Anonymous.
    688        return std::string("TA_") + mangleLocation(Decl->getLocation());
    689      }
    690 
    691      return std::string("TA_") + mangleQualifiedName(getQualifiedName(Decl));
    692    } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) {
    693      if (!Decl->getIdentifier()) {
    694        // Anonymous.
    695        return std::string("NS_") + mangleLocation(Decl->getLocation());
    696      }
    697 
    698      return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl));
    699    } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) {
    700      const ObjCInterfaceDecl *Iface = D2->getContainingInterface();
    701      return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" +
    702             D2->getNameAsString();
    703    } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) {
    704      const RecordDecl *Record = D2->getParent();
    705      return std::string("F_<") + getMangledName(Ctx, Record) + ">_" +
    706             D2->getNameAsString();
    707    } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) {
    708      const DeclContext *DC = Decl->getDeclContext();
    709      if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) {
    710        return std::string("E_<") + getMangledName(Ctx, Named) + ">_" +
    711               D2->getNameAsString();
    712      }
    713    }
    714 
    715    assert(false);
    716    return std::string("");
    717  }
    718 
    719  void debugLocation(SourceLocation Loc) {
    720    std::string S = locationToString(Loc);
    721    StringRef Filename = SM.getFilename(Loc);
    722    printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str());
    723  }
    724 
    725  void debugRange(SourceRange Range) {
    726    printf("Range\n");
    727    debugLocation(Range.getBegin());
    728    debugLocation(Range.getEnd());
    729  }
    730 
    731 public:
    732  IndexConsumer(CompilerInstance &CI)
    733      : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()),
    734        CurMangleContext(nullptr), AstContext(nullptr),
    735        ConcatInfo(CI.getPreprocessor()), CurDeclContext(nullptr),
    736        TemplateStack(nullptr) {
    737    CI.getPreprocessor().addPPCallbacks(make_unique<PreprocessorHook>(this));
    738    CI.getPreprocessor().setTokenWatcher(
    739        [this](const auto &token) { onTokenLexed(token); });
    740  }
    741 
    742  virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
    743    return new IndexConsumer(CI);
    744  }
    745 
    746 #if !defined(_WIN32) && !defined(_WIN64)
    747  struct AutoTime {
    748    AutoTime(double *Counter) : Counter(Counter), Start(time()) {}
    749    ~AutoTime() {
    750      if (Start) {
    751        *Counter += time() - Start;
    752      }
    753    }
    754    void stop() {
    755      *Counter += time() - Start;
    756      Start = 0;
    757    }
    758    double *Counter;
    759    double Start;
    760  };
    761 #endif
    762 
    763  // All we need is to follow the final declaration.
    764  virtual void HandleTranslationUnit(ASTContext &Ctx) {
    765    CurMangleContext =
    766        clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics());
    767 
    768    AstContext = &Ctx;
    769    Resolver = std::make_unique<clangd::HeuristicResolver>(Ctx);
    770    TraverseDecl(Ctx.getTranslationUnitDecl());
    771 
    772    // Emit the JSON data for all files now.
    773    std::map<FileID, std::unique_ptr<FileInfo>>::iterator It;
    774    for (It = FileMap.begin(); It != FileMap.end(); It++) {
    775      if (!It->second->Interesting) {
    776        continue;
    777      }
    778 
    779      FileInfo &Info = *It->second;
    780 
    781      std::string Filename = Outdir + Info.Realname;
    782      std::string SrcFilename =
    783          Info.Generated ? Objdir + Info.Realname.substr(GENERATED.length())
    784                         : Srcdir + PATHSEP_STRING + Info.Realname;
    785 
    786      ensurePath(Filename);
    787 
    788      // We lock the output file in case some other clang process is trying to
    789      // write to it at the same time.
    790      AutoLockFile Lock(SrcFilename, Filename);
    791 
    792      if (!Lock.success()) {
    793        fprintf(stderr, "Unable to lock file %s\n", Filename.c_str());
    794        exit(1);
    795      }
    796 
    797      // Merge our results with the existing lines from the output file.
    798      // This ensures that header files that are included multiple times
    799      // in different ways are analyzed completely.
    800      std::ifstream Fin(Filename.c_str(), std::ios::in | std::ios::binary);
    801      FILE *OutFp = Lock.openTmp();
    802      if (!OutFp) {
    803        fprintf(stderr, "Unable to open tmp out file for %s\n",
    804                Filename.c_str());
    805        exit(1);
    806      }
    807 
    808      // Sort our new results and get an iterator to them
    809      std::sort(Info.Output.begin(), Info.Output.end());
    810      std::vector<std::string>::const_iterator NewLinesIter =
    811          Info.Output.begin();
    812      std::string LastNewWritten;
    813 
    814      // Loop over the existing (sorted) lines in the analysis output file.
    815      // (The good() check also handles the case where Fin did not exist when we
    816      // went to open it.)
    817      while (Fin.good()) {
    818        std::string OldLine;
    819        std::getline(Fin, OldLine);
    820        // Skip blank lines.
    821        if (OldLine.length() == 0) {
    822          continue;
    823        }
    824        // We need to put the newlines back that getline() eats.
    825        OldLine.push_back('\n');
    826 
    827        // Write any results from Info.Output that are lexicographically
    828        // smaller than OldLine (read from the existing file), but make sure
    829        // to skip duplicates. Keep advancing NewLinesIter until we reach an
    830        // entry that is lexicographically greater than OldLine.
    831        for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
    832          if (*NewLinesIter > OldLine) {
    833            break;
    834          }
    835          if (*NewLinesIter == OldLine) {
    836            continue;
    837          }
    838          if (*NewLinesIter == LastNewWritten) {
    839            // dedupe the new entries being written
    840            continue;
    841          }
    842          if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) !=
    843              1) {
    844            fprintf(stderr,
    845                    "Unable to write %zu bytes[1] to tmp output file for %s\n",
    846                    NewLinesIter->length(), Filename.c_str());
    847            exit(1);
    848          }
    849          LastNewWritten = *NewLinesIter;
    850        }
    851 
    852        // Write the entry read from the existing file.
    853        if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) {
    854          fprintf(stderr,
    855                  "Unable to write %zu bytes[2] to tmp output file for %s\n",
    856                  OldLine.length(), Filename.c_str());
    857          exit(1);
    858        }
    859      }
    860 
    861      // We finished reading from Fin
    862      Fin.close();
    863 
    864      // Finish iterating our new results, discarding duplicates
    865      for (; NewLinesIter != Info.Output.end(); NewLinesIter++) {
    866        if (*NewLinesIter == LastNewWritten) {
    867          continue;
    868        }
    869        if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) !=
    870            1) {
    871          fprintf(stderr,
    872                  "Unable to write %zu bytes[3] to tmp output file for %s\n",
    873                  NewLinesIter->length(), Filename.c_str());
    874          exit(1);
    875        }
    876        LastNewWritten = *NewLinesIter;
    877      }
    878 
    879      // Done writing all the things, close it and replace the old output file
    880      // with the new one.
    881      fclose(OutFp);
    882      if (!Lock.moveTmp()) {
    883        fprintf(stderr,
    884                "Unable to move tmp output file into place for %s (err %d)\n",
    885                Filename.c_str(), errno);
    886        exit(1);
    887      }
    888    }
    889  }
    890 
    891  // Unfortunately, we have to override all these methods in order to track the
    892  // context we're inside.
    893 
    894  bool TraverseEnumDecl(EnumDecl *D) {
    895    AutoSetContext Asc(this, D);
    896    return Super::TraverseEnumDecl(D);
    897  }
    898  bool TraverseRecordDecl(RecordDecl *D) {
    899    AutoSetContext Asc(this, D);
    900    return Super::TraverseRecordDecl(D);
    901  }
    902  bool TraverseCXXRecordDecl(CXXRecordDecl *D) {
    903    AutoSetContext Asc(this, D);
    904    return Super::TraverseCXXRecordDecl(D);
    905  }
    906  bool TraverseFunctionDecl(FunctionDecl *D) {
    907    AutoSetContext Asc(this, D);
    908    const FunctionDecl *Def;
    909    // (See the larger AutoTemplateContext comment for more information.) If a
    910    // method on a templated class is declared out-of-line, we need to analyze
    911    // the definition inside the scope of the template or else we won't properly
    912    // handle member access on the templated type.
    913    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
    914      const auto _ = ValueRollback(CurDeclContext, nullptr);
    915      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
    916    }
    917    return Super::TraverseFunctionDecl(D);
    918  }
    919  bool TraverseCXXMethodDecl(CXXMethodDecl *D) {
    920    AutoSetContext Asc(this, D);
    921    const FunctionDecl *Def;
    922    // See TraverseFunctionDecl.
    923    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
    924      const auto _ = ValueRollback(CurDeclContext, nullptr);
    925      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
    926    }
    927    return Super::TraverseCXXMethodDecl(D);
    928  }
    929  bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) {
    930    AutoSetContext Asc(this, D, /*VisitImplicit=*/true);
    931    const FunctionDecl *Def;
    932    // See TraverseFunctionDecl.
    933    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
    934      const auto _ = ValueRollback(CurDeclContext, nullptr);
    935      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
    936    }
    937    return Super::TraverseCXXConstructorDecl(D);
    938  }
    939  bool TraverseCXXConversionDecl(CXXConversionDecl *D) {
    940    AutoSetContext Asc(this, D);
    941    const FunctionDecl *Def;
    942    // See TraverseFunctionDecl.
    943    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
    944      const auto _ = ValueRollback(CurDeclContext, nullptr);
    945      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
    946    }
    947    return Super::TraverseCXXConversionDecl(D);
    948  }
    949  bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) {
    950    AutoSetContext Asc(this, D);
    951    const FunctionDecl *Def;
    952    // See TraverseFunctionDecl.
    953    if (TemplateStack && D->isDefined(Def) && Def && D != Def) {
    954      const auto _ = ValueRollback(CurDeclContext, nullptr);
    955      TraverseFunctionDecl(const_cast<FunctionDecl *>(Def));
    956    }
    957    return Super::TraverseCXXDestructorDecl(D);
    958  }
    959 
    960  bool TraverseLambdaExpr(LambdaExpr *E) {
    961    AutoSetContext Asc(this, nullptr, true);
    962    return Super::TraverseLambdaExpr(E);
    963  }
    964 
    965  // Used to keep track of the context in which a token appears.
    966  struct Context {
    967    // Ultimately this becomes the "context" JSON property.
    968    std::string Name;
    969 
    970    // Ultimately this becomes the "contextsym" JSON property.
    971    std::string Symbol;
    972 
    973    Context() {}
    974    Context(std::string Name, std::string Symbol)
    975        : Name(Name), Symbol(Symbol) {}
    976  };
    977 
    978  Context translateContext(NamedDecl *D) {
    979    const FunctionDecl *F = dyn_cast<FunctionDecl>(D);
    980    if (F && F->isTemplateInstantiation()) {
    981      D = F->getTemplateInstantiationPattern();
    982    }
    983 
    984    return Context(D->getQualifiedNameAsString(),
    985                   getMangledName(CurMangleContext, D));
    986  }
    987 
    988  Context getContext(SourceLocation Loc) {
    989    if (SM.isMacroBodyExpansion(Loc)) {
    990      // If we're inside a macro definition, we don't return any context. It
    991      // will probably not be what the user expects if we do.
    992      return Context();
    993    }
    994 
    995    AutoSetContext *Ctxt = CurDeclContext;
    996    while (Ctxt) {
    997      if (Ctxt->Decl) {
    998        return translateContext(Ctxt->Decl);
    999      }
   1000      Ctxt = Ctxt->Prev;
   1001    }
   1002    return Context();
   1003  }
   1004 
   1005  // Similar to GetContext(SourceLocation), but it skips the declaration passed
   1006  // in. This is useful if we want the context of a declaration that's already
   1007  // on the stack.
   1008  Context getContext(Decl *D) {
   1009    if (SM.isMacroBodyExpansion(D->getLocation())) {
   1010      // If we're inside a macro definition, we don't return any context. It
   1011      // will probably not be what the user expects if we do.
   1012      return Context();
   1013    }
   1014 
   1015    AutoSetContext *Ctxt = CurDeclContext;
   1016    while (Ctxt) {
   1017      if (Ctxt->Decl && Ctxt->Decl != D) {
   1018        return translateContext(Ctxt->Decl);
   1019      }
   1020      Ctxt = Ctxt->Prev;
   1021    }
   1022    return Context();
   1023  }
   1024 
   1025  // Searches for the closest CurDeclContext parent that is a function template
   1026  // instantiation
   1027  const FunctionDecl *getCurrentFunctionTemplateInstantiation() {
   1028    const auto *Ctxt = CurDeclContext;
   1029    while (Ctxt) {
   1030      if (Ctxt->Decl && isa<FunctionDecl>(Ctxt->Decl)) {
   1031        const auto *F = Ctxt->Decl->getAsFunction();
   1032        if (F->isTemplateInstantiation())
   1033          return F;
   1034      }
   1035      Ctxt = Ctxt->Prev;
   1036    }
   1037    return nullptr;
   1038  }
   1039 
   1040  // Analyzing template code is tricky. Suppose we have this code:
   1041  //
   1042  //   template<class T>
   1043  //   bool Foo(T* ptr) { return T::StaticMethod(ptr); }
   1044  //
   1045  // If we analyze the body of Foo without knowing the type T, then we will not
   1046  // be able to generate any information for StaticMethod. However, analyzing
   1047  // Foo for every possible instantiation is inefficient and it also generates
   1048  // too much data in some cases. For example, the following code would generate
   1049  // one definition of Baz for every instantiation, which is undesirable:
   1050  //
   1051  //   template<class T>
   1052  //   class Bar { struct Baz { ... }; };
   1053  //
   1054  // To solve this problem, we analyze templates only once. We do so in a
   1055  // GatherDependent mode where we look for "dependent scoped member
   1056  // expressions" (i.e., things like StaticMethod). We keep track of the
   1057  // locations of these expressions. If we find one or more of them, we analyze
   1058  // the template for each instantiation, in an AnalyzeDependent mode. This mode
   1059  // ignores all source locations except for the ones where we found dependent
   1060  // scoped member expressions before. For these locations, we generate a
   1061  // separate JSON result for each instantiation.
   1062  //
   1063  // We inherit our parent's mode if it is exists.  This is because if our
   1064  // parent is in analyze mode, it means we've already lived a full life in
   1065  // gather mode and we must not restart in gather mode or we'll cause the
   1066  // indexer to visit EVERY identifier, which is way too much data.
   1067  struct AutoTemplateContext {
   1068    AutoTemplateContext(IndexConsumer *Self)
   1069        : Self(Self), CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode
   1070                                                  : Mode::GatherDependent),
   1071          Parent(Self->TemplateStack) {
   1072      Self->TemplateStack = this;
   1073    }
   1074 
   1075    ~AutoTemplateContext() { Self->TemplateStack = Parent; }
   1076 
   1077    // We traverse templates in two modes:
   1078    enum class Mode {
   1079      // Gather mode does not traverse into specializations. It looks for
   1080      // locations where it would help to have more info from template
   1081      // specializations.
   1082      GatherDependent,
   1083 
   1084      // Analyze mode traverses into template specializations and records
   1085      // information about token locations saved in gather mode.
   1086      AnalyzeDependent,
   1087    };
   1088 
   1089    // We found a dependent scoped member expression! Keep track of it for
   1090    // later.
   1091    void visitDependent(SourceLocation Loc) {
   1092      if (CurMode == Mode::AnalyzeDependent) {
   1093        return;
   1094      }
   1095 
   1096      DependentLocations.insert(Loc.getRawEncoding());
   1097      if (Parent) {
   1098        Parent->visitDependent(Loc);
   1099      }
   1100    }
   1101 
   1102    bool inGatherMode() { return CurMode == Mode::GatherDependent; }
   1103 
   1104    // Do we need to perform the extra AnalyzeDependent passes (one per
   1105    // instantiation)?
   1106    bool needsAnalysis() const {
   1107      if (!DependentLocations.empty()) {
   1108        return true;
   1109      }
   1110      if (Parent) {
   1111        return Parent->needsAnalysis();
   1112      }
   1113      return false;
   1114    }
   1115 
   1116    void switchMode() { CurMode = Mode::AnalyzeDependent; }
   1117 
   1118    // Do we want to analyze each template instantiation separately?
   1119    bool shouldVisitTemplateInstantiations() const {
   1120      if (CurMode == Mode::AnalyzeDependent) {
   1121        return true;
   1122      }
   1123      if (Parent) {
   1124        return Parent->shouldVisitTemplateInstantiations();
   1125      }
   1126      return false;
   1127    }
   1128 
   1129    // For a given expression/statement, should we emit JSON data for it?
   1130    bool shouldVisit(SourceLocation Loc) {
   1131      if (CurMode == Mode::GatherDependent) {
   1132        return true;
   1133      }
   1134      if (DependentLocations.find(Loc.getRawEncoding()) !=
   1135          DependentLocations.end()) {
   1136        return true;
   1137      }
   1138      if (Parent) {
   1139        return Parent->shouldVisit(Loc);
   1140      }
   1141      return false;
   1142    }
   1143 
   1144  private:
   1145    IndexConsumer *Self;
   1146    Mode CurMode;
   1147    std::unordered_set<unsigned> DependentLocations;
   1148    AutoTemplateContext *Parent;
   1149  };
   1150 
   1151  AutoTemplateContext *TemplateStack;
   1152 
   1153  std::unordered_multimap<const FunctionDecl *, const Stmt *>
   1154      ForwardingTemplates;
   1155  std::unordered_set<unsigned> ForwardedTemplateLocations;
   1156 
   1157  bool shouldVisitTemplateInstantiations() const {
   1158    if (TemplateStack) {
   1159      return TemplateStack->shouldVisitTemplateInstantiations();
   1160    }
   1161    return false;
   1162  }
   1163 
   1164  bool shouldVisitImplicitCode() const {
   1165    return CurDeclContext && CurDeclContext->VisitImplicit;
   1166  }
   1167 
   1168  // We don't want to traverse all specializations everytime we find a forward
   1169  // declaration, so only traverse specializations related to an actual
   1170  // definition.
   1171  //
   1172  // ```
   1173  // // This is the canonical declaration for Maybe but isn't really useful.
   1174  // template <typename T>
   1175  // struct Maybe;
   1176  //
   1177  // // This is another ClassTemplateDecl, but not the canonical one, where we
   1178  // // actually have the definition. This is the one we want to traverse.
   1179  // template <typename T>
   1180  // struct Maybe {
   1181  //   // This is both the canonical declaration and the definition for
   1182  //   // inline_method and we want to traverse it.
   1183  //   template <typename... Args>
   1184  //   T *inline_method(Args&&... args) {
   1185  //     // definition
   1186  //   }
   1187  //
   1188  //   // This is the canonical declaration, TraverseFunctionTemplateDecl
   1189  //   // traverses its out of line definition too.
   1190  //   template <typename... Args>
   1191  //   T *out_of_line_method(Args&&... args);
   1192  // }
   1193  //
   1194  // // This is the definition for Maybe<T>::out_of_line_method<Args...>
   1195  // // It is traversed when calling TraverseFunctionTemplateDecl on the
   1196  // // canonical declaration.
   1197  // template <typename T>
   1198  // template <typename... Args>
   1199  // T *maybe(Args&&... args) {
   1200  //   // definition
   1201  // }
   1202  // ```
   1203  //
   1204  // So:
   1205  // - for class templates we check isThisDeclarationADefinition
   1206  // - for function templates we check isCanonicalDecl
   1207  bool TraverseClassTemplateDecl(ClassTemplateDecl *D) {
   1208    AutoTemplateContext Atc(this);
   1209    Super::TraverseClassTemplateDecl(D);
   1210 
   1211    // Gather dependent locations from partial specializations too
   1212    SmallVector<ClassTemplatePartialSpecializationDecl *> PS;
   1213    D->getPartialSpecializations(PS);
   1214    for (auto *Spec : PS) {
   1215      for (auto *Rd : Spec->redecls()) {
   1216        TraverseDecl(Rd);
   1217      }
   1218    }
   1219 
   1220    if (!Atc.needsAnalysis()) {
   1221      return true;
   1222    }
   1223 
   1224    Atc.switchMode();
   1225 
   1226    if (!D->isThisDeclarationADefinition())
   1227      return true;
   1228 
   1229    for (auto *Spec : D->specializations()) {
   1230      for (auto *Rd : Spec->redecls()) {
   1231        // We don't want to visit injected-class-names in this traversal.
   1232        if (cast<CXXRecordDecl>(Rd)->isInjectedClassName())
   1233          continue;
   1234 
   1235        TraverseDecl(Rd);
   1236      }
   1237    }
   1238 
   1239    return true;
   1240  }
   1241 
   1242  // See also comment above TraverseClassTemplateDecl
   1243  bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) {
   1244    AutoTemplateContext Atc(this);
   1245    if (Atc.inGatherMode()) {
   1246      Super::TraverseFunctionTemplateDecl(D);
   1247    }
   1248 
   1249    if (!Atc.needsAnalysis()) {
   1250      return true;
   1251    }
   1252 
   1253    Atc.switchMode();
   1254 
   1255    if (!D->isCanonicalDecl())
   1256      return true;
   1257 
   1258    for (auto *Spec : D->specializations()) {
   1259      for (auto *Rd : Spec->redecls()) {
   1260        TraverseDecl(Rd);
   1261      }
   1262    }
   1263 
   1264    return true;
   1265  }
   1266 
   1267  bool shouldVisit(SourceLocation Loc) {
   1268    if (TemplateStack) {
   1269      return TemplateStack->shouldVisit(Loc);
   1270    }
   1271    return true;
   1272  }
   1273 
   1274  // Returns true if the class has template in its entire class hierarchy.
   1275  bool hasTemplateInHierarchy(const CXXRecordDecl* cxxDecl) {
   1276    if (cxxDecl->isDependentType()) {
   1277      // This class is templatized.
   1278      return true;
   1279    }
   1280 
   1281 
   1282    if (dyn_cast<const ClassTemplateSpecializationDecl>(cxxDecl)) {
   1283      // This class is template specialization.
   1284      return true;
   1285    }
   1286 
   1287    for (const CXXBaseSpecifier &Base : cxxDecl->bases()) {
   1288      const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
   1289      if (!BaseDecl) {
   1290        // The base class is not-yet-substituted.
   1291        return true;
   1292      }
   1293 
   1294      const Type* ty = Base.getType().getTypePtr();
   1295      if (dyn_cast<const SubstTemplateTypeParmType>(ty)) {
   1296        // The base class is a substituted template parameter.
   1297        return true;
   1298      }
   1299 
   1300      if (hasTemplateInHierarchy(BaseDecl)) {
   1301        return true;
   1302      }
   1303    }
   1304 
   1305    return false;
   1306  }
   1307 
   1308  enum {
   1309    // Flag to omit the identifier from being cross-referenced across files.
   1310    // This is usually desired for local variables.
   1311    NoCrossref = 1 << 0,
   1312    // Flag to indicate the token with analysis data is not an identifier.
   1313    // Indicates
   1314    // we want to skip the check that tries to ensure a sane identifier token.
   1315    NotIdentifierToken = 1 << 1,
   1316    // This indicates that the end of the provided SourceRange is valid and
   1317    // should be respected. If this flag is not set, the visitIdentifier
   1318    // function should use only the start of the SourceRange and auto-detect
   1319    // the end based on whatever token is found at the start.
   1320    LocRangeEndValid = 1 << 2,
   1321    // Indicates this record was generated through heuristic template
   1322    // resolution.
   1323    Heuristic = 1 << 3,
   1324  };
   1325 
   1326  enum class LayoutHandling {
   1327    // Emit the layout information (size, offset, etc) and the other fields.
   1328    // This should be used when the struct is not templatized.
   1329    UseLayout,
   1330 
   1331    // Only emit the layout information.
   1332    // This should be used for emitting the data for base classes.
   1333    LayoutOnly,
   1334  };
   1335 
   1336  void emitStructuredRecordInfo(llvm::json::OStream &J, SourceLocation Loc,
   1337                                const RecordDecl *decl,
   1338                                LayoutHandling layoutHandling = LayoutHandling::UseLayout) {
   1339    if (layoutHandling != LayoutHandling::LayoutOnly) {
   1340      J.attribute("kind",
   1341                  TypeWithKeyword::getTagTypeKindName(decl->getTagKind()));
   1342    }
   1343 
   1344    const ASTContext &C = *AstContext;
   1345    const ASTRecordLayout &Layout = C.getASTRecordLayout(decl);
   1346 
   1347    J.attribute("sizeBytes", Layout.getSize().getQuantity());
   1348    J.attribute("alignmentBytes", Layout.getAlignment().getQuantity());
   1349 
   1350    emitBindingAttributes(J, *decl);
   1351 
   1352    auto cxxDecl = dyn_cast<CXXRecordDecl>(decl);
   1353 
   1354    if (cxxDecl) {
   1355      if (Layout.hasOwnVFPtr()) {
   1356        // Encode the size of virtual function table pointer
   1357        // instead of just true/false, for 2 reasons:
   1358        //  * having the size here is easier for the consumer
   1359        //  * the size string 4/8 is shorter than true/false in the analysis
   1360        //    file
   1361        const QualType ptrType = C.getUIntPtrType();
   1362        J.attribute("ownVFPtrBytes",
   1363                    C.getTypeSizeInChars(ptrType).getQuantity());
   1364      }
   1365 
   1366      bool emitLayout = false;
   1367      if (layoutHandling == LayoutHandling::LayoutOnly) {
   1368        emitLayout = true;
   1369      } else {
   1370        emitLayout = hasTemplateInHierarchy(cxxDecl);
   1371      }
   1372 
   1373      J.attributeBegin("supers");
   1374      J.arrayBegin();
   1375      for (const CXXBaseSpecifier &Base : cxxDecl->bases()) {
   1376        const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
   1377 
   1378        if (!BaseDecl) {
   1379          // If the base class is dependent of template parameters and
   1380          // not yet fixed, skip it.
   1381          // Those information will be emitted in the subclass that has
   1382          // fixed template parameters.
   1383          continue;
   1384        }
   1385 
   1386        J.objectBegin();
   1387 
   1388        J.attribute("sym", getMangledName(CurMangleContext, BaseDecl));
   1389 
   1390        if (Base.isVirtual()) {
   1391          CharUnits superOffsetBytes = Layout.getVBaseClassOffset(BaseDecl);
   1392          J.attribute("offsetBytes", superOffsetBytes.getQuantity());
   1393        } else {
   1394          CharUnits superOffsetBytes = Layout.getBaseClassOffset(BaseDecl);
   1395          J.attribute("offsetBytes", superOffsetBytes.getQuantity());
   1396        }
   1397 
   1398        J.attributeBegin("props");
   1399        J.arrayBegin();
   1400        if (Base.isVirtual()) {
   1401          J.value("virtual");
   1402        }
   1403        J.arrayEnd();
   1404        J.attributeEnd();
   1405 
   1406        if (emitLayout) {
   1407          // In order to reduce the file size, emit the entire super class
   1408          // layout only if there's any template class in the hierarchy
   1409          // Otherwise the field layout can be constructed with each
   1410          // superclass's data.
   1411 
   1412          J.attributeBegin("layout");
   1413          J.objectBegin();
   1414 
   1415          // The structured info for template leaf classes is not emitted,
   1416          // which means we don't have "pretty" format of the class.
   1417          // Thus we emit it here.
   1418          //
   1419          // Once that part is solved, the pretty field here can be removed.
   1420          //
   1421          // See the emitStructuredInfo callsite in VisitNamedDecl.
   1422          J.attribute("pretty", getQualifiedName(BaseDecl));
   1423 
   1424          emitStructuredRecordInfo(J, Loc, BaseDecl,
   1425                                   LayoutHandling::LayoutOnly);
   1426          J.objectEnd();
   1427          J.attributeEnd();
   1428        }
   1429 
   1430        J.objectEnd();
   1431      }
   1432      J.arrayEnd();
   1433      J.attributeEnd();
   1434 
   1435      if (layoutHandling != LayoutHandling::LayoutOnly) {
   1436        J.attributeBegin("methods");
   1437        J.arrayBegin();
   1438        for (const CXXMethodDecl *MethodDecl : cxxDecl->methods()) {
   1439          J.objectBegin();
   1440 
   1441          J.attribute("pretty", getQualifiedName(MethodDecl));
   1442          J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
   1443 
   1444          // TODO: Better figure out what to do for non-isUserProvided methods
   1445          // which means there's potentially semantic data that doesn't correspond
   1446          // to a source location in the source.  Should we be emitting
   1447          // structured info for those when we're processing the class here?
   1448 
   1449          J.attributeBegin("props");
   1450          J.arrayBegin();
   1451          if (MethodDecl->isStatic()) {
   1452            J.value("static");
   1453          }
   1454          if (MethodDecl->isInstance()) {
   1455            J.value("instance");
   1456          }
   1457          if (MethodDecl->isVirtual()) {
   1458            J.value("virtual");
   1459          }
   1460          if (MethodDecl->isUserProvided()) {
   1461            J.value("user");
   1462          }
   1463          if (MethodDecl->isDefaulted()) {
   1464            J.value("defaulted");
   1465          }
   1466          if (MethodDecl->isDeleted()) {
   1467            J.value("deleted");
   1468          }
   1469          if (MethodDecl->isConstexpr()) {
   1470            J.value("constexpr");
   1471          }
   1472          J.arrayEnd();
   1473          J.attributeEnd();
   1474 
   1475          J.objectEnd();
   1476        }
   1477        J.arrayEnd();
   1478        J.attributeEnd();
   1479      }
   1480    }
   1481 
   1482    FileID structFileID = SM.getFileID(Loc);
   1483 
   1484    J.attributeBegin("fields");
   1485    J.arrayBegin();
   1486    uint64_t iField = 0;
   1487    for (RecordDecl::field_iterator It = decl->field_begin(),
   1488                                    End = decl->field_end();
   1489         It != End; ++It, ++iField) {
   1490      const FieldDecl &Field = **It;
   1491      auto sourceRange =
   1492          SM.getExpansionRange(Field.getSourceRange()).getAsRange();
   1493      uint64_t localOffsetBits = Layout.getFieldOffset(iField);
   1494      CharUnits localOffsetBytes = C.toCharUnitsFromBits(localOffsetBits);
   1495 
   1496      J.objectBegin();
   1497      J.attribute("lineRange",
   1498                  pathAndLineRangeToString(structFileID, sourceRange));
   1499      J.attribute("pretty", getQualifiedName(&Field));
   1500      J.attribute("sym", getMangledName(CurMangleContext, &Field));
   1501 
   1502      QualType FieldType = Field.getType();
   1503      QualType CanonicalFieldType = FieldType.getCanonicalType();
   1504      LangOptions langOptions;
   1505      PrintingPolicy Policy(langOptions);
   1506      Policy.PrintCanonicalTypes = true;
   1507      J.attribute("type", CanonicalFieldType.getAsString(Policy));
   1508 
   1509      const TagDecl *tagDecl = CanonicalFieldType->getAsTagDecl();
   1510      if (!tagDecl) {
   1511        // Try again piercing any pointers/references involved.  Note that our
   1512        // typesym semantics are dubious-ish and right now crossref just does
   1513        // some parsing of "type" itself until we improve this rep.
   1514        CanonicalFieldType = CanonicalFieldType->getPointeeType();
   1515        if (!CanonicalFieldType.isNull()) {
   1516          tagDecl = CanonicalFieldType->getAsTagDecl();
   1517        }
   1518      }
   1519      if (tagDecl) {
   1520        J.attribute("typesym", getMangledName(CurMangleContext, tagDecl));
   1521      }
   1522 
   1523      J.attribute("offsetBytes", localOffsetBytes.getQuantity());
   1524      if (Field.isBitField()) {
   1525        J.attributeBegin("bitPositions");
   1526        J.objectBegin();
   1527 
   1528        J.attribute("begin",
   1529                    unsigned(localOffsetBits - C.toBits(localOffsetBytes)));
   1530 #if CLANG_VERSION_MAJOR < 20
   1531        J.attribute("width", Field.getBitWidthValue(C));
   1532 #else
   1533        J.attribute("width", Field.getBitWidthValue());
   1534 #endif
   1535 
   1536        J.objectEnd();
   1537        J.attributeEnd();
   1538      } else {
   1539        // Try and get the field as a record itself so we can know its size, but
   1540        // we don't actually want to recurse into it.
   1541        if (auto FieldRec = Field.getType()->getAs<RecordType>()) {
   1542          auto const &FieldLayout = C.getASTRecordLayout(FieldRec->getDecl());
   1543          J.attribute("sizeBytes", FieldLayout.getSize().getQuantity());
   1544        } else {
   1545          // We were unable to get it as a record, which suggests it's a normal
   1546          // type, in which case let's just ask for the type size.  (Maybe this
   1547          // would also work for the above case too?)
   1548          uint64_t typeSizeBits = C.getTypeSize(Field.getType());
   1549          CharUnits typeSizeBytes = C.toCharUnitsFromBits(typeSizeBits);
   1550          J.attribute("sizeBytes", typeSizeBytes.getQuantity());
   1551        }
   1552      }
   1553      J.objectEnd();
   1554    }
   1555    J.arrayEnd();
   1556    J.attributeEnd();
   1557  }
   1558 
   1559  void emitStructuredEnumInfo(llvm::json::OStream &J, const EnumDecl *ED) {
   1560    J.attribute("kind", "enum");
   1561  }
   1562 
   1563  void emitStructuredEnumConstantInfo(llvm::json::OStream &J,
   1564                                      const EnumConstantDecl *ECD) {
   1565    J.attribute("kind", "enumConstant");
   1566  }
   1567 
   1568  void emitStructuredFunctionInfo(llvm::json::OStream &J,
   1569                                  const FunctionDecl *decl) {
   1570    emitBindingAttributes(J, *decl);
   1571 
   1572    J.attributeBegin("args");
   1573    J.arrayBegin();
   1574 
   1575    for (auto param : decl->parameters()) {
   1576      J.objectBegin();
   1577 
   1578      J.attribute("name", param->getName());
   1579      QualType ArgType = param->getOriginalType();
   1580      J.attribute("type", ArgType.getAsString());
   1581 
   1582      QualType CanonicalArgType = ArgType.getCanonicalType();
   1583      const TagDecl *canonDecl = CanonicalArgType->getAsTagDecl();
   1584      if (!canonDecl) {
   1585        // Try again piercing any pointers/references involved.  Note that our
   1586        // typesym semantics are dubious-ish and right now crossref just does
   1587        // some parsing of "type" itself until we improve this rep.
   1588        CanonicalArgType = CanonicalArgType->getPointeeType();
   1589        if (!CanonicalArgType.isNull()) {
   1590          canonDecl = CanonicalArgType->getAsTagDecl();
   1591        }
   1592      }
   1593      if (canonDecl) {
   1594        J.attribute("typesym", getMangledName(CurMangleContext, canonDecl));
   1595      }
   1596 
   1597      J.objectEnd();
   1598    }
   1599 
   1600    J.arrayEnd();
   1601    J.attributeEnd();
   1602 
   1603    auto cxxDecl = dyn_cast<CXXMethodDecl>(decl);
   1604 
   1605    if (cxxDecl) {
   1606      J.attribute("kind", "method");
   1607      if (auto parentDecl = cxxDecl->getParent()) {
   1608        J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
   1609      }
   1610 
   1611      J.attributeBegin("overrides");
   1612      J.arrayBegin();
   1613      for (const CXXMethodDecl *MethodDecl : cxxDecl->overridden_methods()) {
   1614        J.objectBegin();
   1615 
   1616        // TODO: Make sure we're doing template traversals appropriately...
   1617        // findOverriddenMethods (now removed) liked to do:
   1618        //   if (Decl->isTemplateInstantiation()) {
   1619        //     Decl =
   1620        //     dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern());
   1621        //   }
   1622        // I think our pre-emptive dereferencing/avoidance of templates may
   1623        // protect us from this, but it needs more investigation.
   1624 
   1625        J.attribute("sym", getMangledName(CurMangleContext, MethodDecl));
   1626 
   1627        J.objectEnd();
   1628      }
   1629      J.arrayEnd();
   1630      J.attributeEnd();
   1631 
   1632    } else {
   1633      J.attribute("kind", "function");
   1634    }
   1635 
   1636    // ## Props
   1637    J.attributeBegin("props");
   1638    J.arrayBegin();
   1639    // some of these are only possible on a CXXMethodDecl, but we want them all
   1640    // in the same array, so condition these first ones.
   1641    if (cxxDecl) {
   1642      if (cxxDecl->isStatic()) {
   1643        J.value("static");
   1644      }
   1645      if (cxxDecl->isInstance()) {
   1646        J.value("instance");
   1647      }
   1648      if (cxxDecl->isVirtual()) {
   1649        J.value("virtual");
   1650      }
   1651      if (cxxDecl->isUserProvided()) {
   1652        J.value("user");
   1653      }
   1654    }
   1655    if (decl->isDefaulted()) {
   1656      J.value("defaulted");
   1657    }
   1658    if (decl->isDeleted()) {
   1659      J.value("deleted");
   1660    }
   1661    if (decl->isConstexpr()) {
   1662      J.value("constexpr");
   1663    }
   1664    J.arrayEnd();
   1665    J.attributeEnd();
   1666  }
   1667 
   1668  /**
   1669   * Emit structured info for a field.  Right now the intent is for this to just
   1670   * be a pointer to its parent's structured info with this method entirely
   1671   * avoiding getting the ASTRecordLayout.
   1672   *
   1673   * TODO: Give more thought on where to locate the canonical info on fields and
   1674   * how to normalize their exposure over the web.  We could relink the info
   1675   * both at cross-reference time and web-server lookup time.  This is also
   1676   * called out in `analysis.md`.
   1677   */
   1678  void emitStructuredFieldInfo(llvm::json::OStream &J, const FieldDecl *decl) {
   1679    J.attribute("kind", "field");
   1680 
   1681    // XXX the call to decl::getParent will assert below for ObjCIvarDecl
   1682    // instances because their DecContext is not a RecordDecl.  So just bail
   1683    // for now.
   1684    // TODO: better support ObjC.
   1685    if (!dyn_cast<ObjCIvarDecl>(decl)) {
   1686      if (auto parentDecl = decl->getParent()) {
   1687        J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
   1688      }
   1689    }
   1690  }
   1691 
   1692  /**
   1693   * Emit structured info for a variable if it is a static class member.
   1694   */
   1695  void emitStructuredVarInfo(llvm::json::OStream &J, const VarDecl *decl) {
   1696    const auto *parentDecl =
   1697        dyn_cast_or_null<RecordDecl>(decl->getDeclContext());
   1698 
   1699    if (parentDecl) {
   1700      J.attribute("kind", "field");
   1701    } else if (llvm::isa<ParmVarDecl>(decl)) {
   1702      J.attribute("kind", "parameter");
   1703    } else if (decl->isLocalVarDecl()) {
   1704      J.attribute("kind", "localVar");
   1705    } else {
   1706      // namespace scope variable
   1707      J.attribute("kind", "variable");
   1708    }
   1709 
   1710    if (parentDecl) {
   1711      J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl));
   1712    }
   1713 
   1714    emitBindingAttributes(J, *decl);
   1715  }
   1716 
   1717  void emitStructuredInfo(SourceLocation Loc, const NamedDecl *decl,
   1718                          LayoutHandling layoutHandling = LayoutHandling::UseLayout) {
   1719    std::string json_str;
   1720    llvm::raw_string_ostream ros(json_str);
   1721    llvm::json::OStream J(ros);
   1722    // Start the top-level object.
   1723    J.objectBegin();
   1724 
   1725    unsigned StartOffset = SM.getFileOffset(Loc);
   1726    unsigned EndOffset =
   1727        StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts());
   1728    J.attribute("loc", locationToString(Loc, EndOffset - StartOffset));
   1729    J.attribute("structured", 1);
   1730    J.attribute("pretty", getQualifiedName(decl));
   1731    J.attribute("sym", getMangledName(CurMangleContext, decl));
   1732 
   1733    if (const RecordDecl *RD = dyn_cast<RecordDecl>(decl)) {
   1734      emitStructuredRecordInfo(J, Loc, RD, layoutHandling);
   1735    } else if (const EnumDecl *ED = dyn_cast<EnumDecl>(decl)) {
   1736      emitStructuredEnumInfo(J, ED);
   1737    } else if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(decl)) {
   1738      emitStructuredEnumConstantInfo(J, ECD);
   1739    } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(decl)) {
   1740      emitStructuredFunctionInfo(J, FD);
   1741    } else if (const FieldDecl *FD = dyn_cast<FieldDecl>(decl)) {
   1742      emitStructuredFieldInfo(J, FD);
   1743    } else if (const VarDecl *VD = dyn_cast<VarDecl>(decl)) {
   1744      emitStructuredVarInfo(J, VD);
   1745    }
   1746 
   1747    // End the top-level object.
   1748    J.objectEnd();
   1749 
   1750    FileInfo *F = getFileInfo(Loc);
   1751    // we want a newline.
   1752    ros << '\n';
   1753    F->Output.push_back(std::move(ros.str()));
   1754  }
   1755 
   1756  // XXX Type annotating.
   1757  // QualType is the type class.  It has helpers like TagDecl via getAsTagDecl.
   1758  // ValueDecl exposes a getType() method.
   1759  //
   1760  // Arguably it makes sense to only expose types that Searchfox has definitions
   1761  // for as first-class.  Probably the way to go is like context/contextsym.
   1762  // We expose a "type" which is just a human-readable string which has no
   1763  // semantic purposes and is just a display string, plus then a "typesym" which
   1764  // we expose if we were able to map the type.
   1765  //
   1766  // Other meta-info: field offsets.  Ancestor types.
   1767 
   1768  // This is the only function that emits analysis JSON data. It should be
   1769  // called for each identifier that corresponds to a symbol.
   1770  void visitIdentifier(const char *Kind, const char *SyntaxKind,
   1771                       llvm::StringRef QualName, SourceRange LocRange,
   1772                       std::string Symbol, QualType MaybeType = QualType(),
   1773                       Context TokenContext = Context(), int Flags = 0,
   1774                       SourceRange PeekRange = SourceRange(),
   1775                       SourceRange NestingRange = SourceRange(),
   1776                       std::vector<SourceRange> *ArgRanges = nullptr) {
   1777    SourceLocation Loc = LocRange.getBegin();
   1778 
   1779    // Also visit the spelling site.
   1780    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   1781    if (SpellingLoc != Loc) {
   1782      visitIdentifier(Kind, SyntaxKind, QualName, SpellingLoc, Symbol,
   1783                      MaybeType, TokenContext, Flags, PeekRange, NestingRange,
   1784                      ArgRanges);
   1785    }
   1786 
   1787    SourceLocation ExpansionLoc = SM.getExpansionLoc(Loc);
   1788    normalizeLocation(&ExpansionLoc);
   1789 
   1790    if (!shouldVisit(ExpansionLoc)) {
   1791      return;
   1792    }
   1793 
   1794    if (ExpansionLoc != Loc)
   1795      Flags = Flags & ~LocRangeEndValid;
   1796 
   1797    // Find the file positions corresponding to the token.
   1798    unsigned StartOffset = SM.getFileOffset(ExpansionLoc);
   1799    unsigned EndOffset =
   1800        (Flags & LocRangeEndValid)
   1801            ? SM.getFileOffset(LocRange.getEnd())
   1802            : StartOffset +
   1803                  Lexer::MeasureTokenLength(ExpansionLoc, SM, CI.getLangOpts());
   1804 
   1805    std::string LocStr =
   1806        locationToString(ExpansionLoc, EndOffset - StartOffset);
   1807    std::string RangeStr =
   1808        locationToString(ExpansionLoc, EndOffset - StartOffset);
   1809    std::string PeekRangeStr;
   1810 
   1811    if (!(Flags & NotIdentifierToken)) {
   1812      // Get the token's characters so we can make sure it's a valid token.
   1813      const char *StartChars = SM.getCharacterData(ExpansionLoc);
   1814      std::string Text(StartChars, EndOffset - StartOffset);
   1815      if (!isValidIdentifier(Text)) {
   1816        return;
   1817      }
   1818    }
   1819 
   1820    FileInfo *F = getFileInfo(ExpansionLoc);
   1821 
   1822    if (!(Flags & NoCrossref)) {
   1823      std::string json_str;
   1824      llvm::raw_string_ostream ros(json_str);
   1825      llvm::json::OStream J(ros);
   1826      // Start the top-level object.
   1827      J.objectBegin();
   1828 
   1829      J.attribute("loc", LocStr);
   1830      J.attribute("target", 1);
   1831      J.attribute("kind", Kind);
   1832      J.attribute("pretty", QualName.data());
   1833      J.attribute("sym", Symbol);
   1834      if (!TokenContext.Name.empty()) {
   1835        J.attribute("context", TokenContext.Name);
   1836      }
   1837      if (!TokenContext.Symbol.empty()) {
   1838        J.attribute("contextsym", TokenContext.Symbol);
   1839      }
   1840      if (PeekRange.isValid()) {
   1841        PeekRangeStr = lineRangeToString(PeekRange);
   1842        if (!PeekRangeStr.empty()) {
   1843          J.attribute("peekRange", PeekRangeStr);
   1844        }
   1845      }
   1846 
   1847      if (ArgRanges) {
   1848        J.attributeBegin("argRanges");
   1849        J.arrayBegin();
   1850 
   1851        for (auto range : *ArgRanges) {
   1852          std::string ArgRangeStr = fullRangeToString(range);
   1853          if (!ArgRangeStr.empty()) {
   1854            J.value(ArgRangeStr);
   1855          }
   1856        }
   1857 
   1858        J.arrayEnd();
   1859        J.attributeEnd();
   1860      }
   1861 
   1862      // End the top-level object.
   1863      J.objectEnd();
   1864      // we want a newline.
   1865      ros << '\n';
   1866      F->Output.push_back(std::move(ros.str()));
   1867    }
   1868 
   1869    // Generate a single "source":1 for all the symbols. If we search from here,
   1870    // we want to union the results for every symbol in `symbols`.
   1871    std::string json_str;
   1872    llvm::raw_string_ostream ros(json_str);
   1873    llvm::json::OStream J(ros);
   1874    // Start the top-level object.
   1875    J.objectBegin();
   1876 
   1877    J.attribute("loc", RangeStr);
   1878    J.attribute("source", 1);
   1879 
   1880    if (NestingRange.isValid()) {
   1881      std::string NestingRangeStr = fullRangeToString(NestingRange);
   1882      if (!NestingRangeStr.empty()) {
   1883        J.attribute("nestingRange", NestingRangeStr);
   1884      }
   1885    }
   1886 
   1887    std::string Syntax;
   1888    if (Flags & NoCrossref) {
   1889      J.attribute("syntax", "");
   1890    } else {
   1891      Syntax = Kind;
   1892      Syntax.push_back(',');
   1893      Syntax.append(SyntaxKind);
   1894      J.attribute("syntax", Syntax);
   1895    }
   1896 
   1897    if (!MaybeType.isNull()) {
   1898      J.attribute("type", MaybeType.getAsString());
   1899      QualType canonical = MaybeType.getCanonicalType();
   1900      const TagDecl *decl = canonical->getAsTagDecl();
   1901      if (!decl) {
   1902        // Try again piercing any pointers/references involved.  Note that our
   1903        // typesym semantics are dubious-ish and right now crossref just does
   1904        // some parsing of "type" itself until we improve this rep.
   1905        canonical = canonical->getPointeeType();
   1906        if (!canonical.isNull()) {
   1907          decl = canonical->getAsTagDecl();
   1908        }
   1909      }
   1910      if (decl) {
   1911        std::string Mangled = getMangledName(CurMangleContext, decl);
   1912        J.attribute("typesym", Mangled);
   1913      }
   1914    }
   1915 
   1916    std::string Pretty(SyntaxKind);
   1917    Pretty.push_back(' ');
   1918    Pretty.append(QualName.data());
   1919    J.attribute("pretty", Pretty);
   1920 
   1921    J.attribute("sym", Symbol);
   1922 
   1923    if (Flags & NoCrossref) {
   1924      J.attribute("no_crossref", 1);
   1925    }
   1926 
   1927    if (Flags & Heuristic) {
   1928      J.attributeBegin("confidence");
   1929      J.arrayBegin();
   1930      J.value("cppTemplateHeuristic");
   1931      J.arrayEnd();
   1932      J.attributeEnd();
   1933    }
   1934 
   1935    if (ArgRanges) {
   1936      J.attributeBegin("argRanges");
   1937      J.arrayBegin();
   1938 
   1939      for (auto range : *ArgRanges) {
   1940        std::string ArgRangeStr = fullRangeToString(range);
   1941        if (!ArgRangeStr.empty()) {
   1942          J.value(ArgRangeStr);
   1943        }
   1944      }
   1945 
   1946      J.arrayEnd();
   1947      J.attributeEnd();
   1948    }
   1949 
   1950    const auto macro = MacroMaps.find(ExpansionLoc);
   1951    if (macro != MacroMaps.end()) {
   1952      const auto &macroInfo = macro->second;
   1953      if (macroInfo.Symbol == Symbol) {
   1954        J.attributeBegin("expandsTo");
   1955        J.objectBegin();
   1956        J.attributeBegin(macroInfo.Key);
   1957        J.objectBegin();
   1958        J.attribute("", macroInfo.Expansion); // "" is the platform key,
   1959                                              // populated by the merge step
   1960        J.objectEnd();
   1961        J.attributeEnd();
   1962        J.objectEnd();
   1963        J.attributeEnd();
   1964      } else {
   1965        const auto it = macroInfo.TokenLocations.find(Loc);
   1966        if (it != macroInfo.TokenLocations.end()) {
   1967          J.attributeBegin("inExpansionAt");
   1968          J.objectBegin();
   1969          J.attributeBegin(macroInfo.Key);
   1970          J.objectBegin();
   1971          J.attributeBegin(
   1972              ""); // "" is the platform key, populated by the merge step
   1973          J.arrayBegin();
   1974          J.value(it->second);
   1975          J.arrayEnd();
   1976          J.attributeEnd();
   1977          J.objectEnd();
   1978          J.attributeEnd();
   1979          J.objectEnd();
   1980          J.attributeEnd();
   1981        }
   1982      }
   1983    }
   1984 
   1985    // End the top-level object.
   1986    J.objectEnd();
   1987 
   1988    // we want a newline.
   1989    ros << '\n';
   1990    F->Output.push_back(std::move(ros.str()));
   1991  }
   1992 
   1993  void normalizeLocation(SourceLocation *Loc) {
   1994    *Loc = SM.getSpellingLoc(*Loc);
   1995  }
   1996 
   1997  // For cases where the left-brace is not directly accessible from the AST,
   1998  // helper to use the lexer to find the brace.  Make sure you're picking the
   1999  // start location appropriately!
   2000  SourceLocation findLeftBraceFromLoc(SourceLocation Loc) {
   2001    return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false);
   2002  }
   2003 
   2004  // If the provided statement is compound, return its range.
   2005  SourceRange getCompoundStmtRange(Stmt *D) {
   2006    if (!D) {
   2007      return SourceRange();
   2008    }
   2009 
   2010    CompoundStmt *D2 = dyn_cast<CompoundStmt>(D);
   2011    if (D2) {
   2012      return D2->getSourceRange();
   2013    }
   2014 
   2015    return SourceRange();
   2016  }
   2017 
   2018  SourceRange getFunctionPeekRange(FunctionDecl *D) {
   2019    // We always start at the start of the function decl, which may include the
   2020    // return type on a separate line.
   2021    SourceLocation Start = D->getBeginLoc();
   2022 
   2023    // By default, we end at the line containing the function's name.
   2024    SourceLocation End = D->getLocation();
   2025 
   2026    std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedExpansionLoc(End);
   2027 
   2028    // But if there are parameters, we want to include those as well.
   2029    for (ParmVarDecl *Param : D->parameters()) {
   2030      std::pair<FileID, unsigned> ParamLoc =
   2031          SM.getDecomposedExpansionLoc(Param->getLocation());
   2032 
   2033      // It's possible there are macros involved or something. We don't include
   2034      // the parameters in that case.
   2035      if (ParamLoc.first == FuncLoc.first) {
   2036        // Assume parameters are in order, so we always take the last one.
   2037        End = Param->getEndLoc();
   2038      }
   2039    }
   2040 
   2041    return SourceRange(Start, End);
   2042  }
   2043 
   2044  SourceRange getTagPeekRange(TagDecl *D) {
   2045    SourceLocation Start = D->getBeginLoc();
   2046 
   2047    // By default, we end at the line containing the name.
   2048    SourceLocation End = D->getLocation();
   2049 
   2050    std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedExpansionLoc(End);
   2051 
   2052    if (CXXRecordDecl *D2 = dyn_cast<CXXRecordDecl>(D)) {
   2053      // But if there are parameters, we want to include those as well.
   2054      for (CXXBaseSpecifier &Base : D2->bases()) {
   2055        std::pair<FileID, unsigned> Loc = SM.getDecomposedExpansionLoc(Base.getEndLoc());
   2056 
   2057        // It's possible there are macros involved or something. We don't
   2058        // include the parameters in that case.
   2059        if (Loc.first == FuncLoc.first) {
   2060          // Assume parameters are in order, so we always take the last one.
   2061          End = Base.getEndLoc();
   2062        }
   2063      }
   2064    }
   2065 
   2066    return SourceRange(Start, End);
   2067  }
   2068 
   2069  SourceRange getCommentRange(NamedDecl *D) {
   2070    const RawComment *RC = AstContext->getRawCommentForDeclNoCache(D);
   2071    if (!RC) {
   2072      return SourceRange();
   2073    }
   2074 
   2075    return RC->getSourceRange();
   2076  }
   2077 
   2078  // Sanity checks that all ranges are in the same file, returning the first if
   2079  // they're in different files.  Unions the ranges based on which is first.
   2080  SourceRange combineRanges(SourceRange Range1, SourceRange Range2) {
   2081    if (Range1.isInvalid()) {
   2082      return Range2;
   2083    }
   2084    if (Range2.isInvalid()) {
   2085      return Range1;
   2086    }
   2087 
   2088    std::pair<FileID, unsigned> Begin1 = SM.getDecomposedExpansionLoc(Range1.getBegin());
   2089    std::pair<FileID, unsigned> End1 = SM.getDecomposedExpansionLoc(Range1.getEnd());
   2090    std::pair<FileID, unsigned> Begin2 = SM.getDecomposedExpansionLoc(Range2.getBegin());
   2091    std::pair<FileID, unsigned> End2 = SM.getDecomposedExpansionLoc(Range2.getEnd());
   2092 
   2093    if (End1.first != Begin2.first) {
   2094      // Something weird is probably happening with the preprocessor. Just
   2095      // return the first range.
   2096      return Range1;
   2097    }
   2098 
   2099    // See which range comes first.
   2100    if (Begin1.second <= End2.second) {
   2101      return SourceRange(Range1.getBegin(), Range2.getEnd());
   2102    } else {
   2103      return SourceRange(Range2.getBegin(), Range1.getEnd());
   2104    }
   2105  }
   2106 
   2107  // Given a location and a range, returns the range if:
   2108  // - The location and the range live in the same file.
   2109  // - The range is well ordered (end is not before begin).
   2110  // Returns an empty range otherwise.
   2111  SourceRange validateRange(SourceLocation Loc, SourceRange Range) {
   2112    std::pair<FileID, unsigned> Decomposed = SM.getDecomposedExpansionLoc(Loc);
   2113    std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin());
   2114    std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd());
   2115 
   2116    if (Begin.first != Decomposed.first || End.first != Decomposed.first) {
   2117      return SourceRange();
   2118    }
   2119 
   2120    if (Begin.second >= End.second) {
   2121      return SourceRange();
   2122    }
   2123 
   2124    return Range;
   2125  }
   2126 
   2127  bool VisitNamedDecl(NamedDecl *D) {
   2128    SourceLocation Loc = D->getLocation();
   2129    if (!isInterestingLocation(Loc)) {
   2130      return true;
   2131    }
   2132 
   2133    SourceLocation ExpansionLoc = Loc;
   2134    if (SM.isMacroBodyExpansion(Loc)) {
   2135      ExpansionLoc = SM.getFileLoc(Loc);
   2136    }
   2137    normalizeLocation(&ExpansionLoc);
   2138 
   2139    if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) {
   2140      // Unnamed parameter in function proto.
   2141      return true;
   2142    }
   2143 
   2144    int Flags = 0;
   2145    const char *Kind = "def";
   2146    const char *PrettyKind = "?";
   2147    bool wasTemplate = false;
   2148    SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc());
   2149    // The nesting range identifies the left brace and right brace, which
   2150    // heavily depends on the AST node type.
   2151    SourceRange NestingRange;
   2152    QualType qtype = QualType();
   2153    if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
   2154      if (D2->isTemplateInstantiation()) {
   2155        wasTemplate = true;
   2156        D = D2->getTemplateInstantiationPattern();
   2157      }
   2158      // We treat pure virtual declarations as definitions.
   2159      Kind =
   2160          (D2->isThisDeclarationADefinition() || isPure(D2)) ? "def" : "decl";
   2161      PrettyKind = "function";
   2162      PeekRange = getFunctionPeekRange(D2);
   2163 
   2164      // Only emit the nesting range if:
   2165      // - This is a definition AND
   2166      // - This isn't a template instantiation.  Function templates'
   2167      //   instantiations can end up as a definition with a Loc at their point
   2168      //   of declaration but with the CompoundStmt of the template's
   2169      //   point of definition.  This really messes up the nesting range logic.
   2170      //   At the time of writing this, the test repo's `big_header.h`'s
   2171      //   `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as
   2172      //   instantiated by `big_cpp.cpp` triggers this phenomenon.
   2173      //
   2174      // Note: As covered elsewhere, template processing is tricky and it's
   2175      // conceivable that we may change traversal patterns in the future,
   2176      // mooting this guard.
   2177      if (D2->isThisDeclarationADefinition() &&
   2178          !D2->isTemplateInstantiation()) {
   2179        // The CompoundStmt range is the brace range.
   2180        NestingRange = getCompoundStmtRange(D2->getBody());
   2181      }
   2182    } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) {
   2183      Kind = D2->isThisDeclarationADefinition() ? "def" : "forward";
   2184      PrettyKind = "type";
   2185 
   2186      if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) {
   2187        PeekRange = getTagPeekRange(D2);
   2188        NestingRange = D2->getBraceRange();
   2189      } else {
   2190        PeekRange = SourceRange();
   2191      }
   2192    } else if (TypedefNameDecl *D2 = dyn_cast<TypedefNameDecl>(D)) {
   2193      Kind = "alias";
   2194      PrettyKind = "type";
   2195      PeekRange = SourceRange(ExpansionLoc, ExpansionLoc);
   2196      qtype = D2->getUnderlyingType();
   2197    } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
   2198      if (D2->isLocalVarDeclOrParm()) {
   2199        Flags = NoCrossref;
   2200      }
   2201 
   2202      Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly
   2203                 ? "decl"
   2204                 : "def";
   2205      PrettyKind = "variable";
   2206 
   2207      if (needsNestingRangeForVarDecl(PeekRange)) {
   2208        NestingRange = PeekRange;
   2209      }
   2210    } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) {
   2211      Kind = "def";
   2212      PrettyKind = "namespace";
   2213      PeekRange = SourceRange(ExpansionLoc, ExpansionLoc);
   2214      NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D);
   2215      if (D2) {
   2216        // There's no exposure of the left brace so we have to find it.
   2217        NestingRange = SourceRange(
   2218            findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc()
   2219                                                            : ExpansionLoc),
   2220            D2->getRBraceLoc());
   2221      }
   2222    } else if (isa<FieldDecl>(D)) {
   2223      Kind = "def";
   2224      PrettyKind = "field";
   2225    } else if (isa<EnumConstantDecl>(D)) {
   2226      Kind = "def";
   2227      PrettyKind = "enum constant";
   2228    } else {
   2229      return true;
   2230    }
   2231 
   2232    if (ValueDecl *D2 = dyn_cast<ValueDecl>(D)) {
   2233      qtype = D2->getType();
   2234    }
   2235 
   2236    SourceRange CommentRange = getCommentRange(D);
   2237    PeekRange = combineRanges(PeekRange, CommentRange);
   2238    PeekRange = validateRange(Loc, PeekRange);
   2239    NestingRange = validateRange(Loc, NestingRange);
   2240 
   2241    std::string Symbol = getMangledName(CurMangleContext, D);
   2242 
   2243    // In the case of destructors, Loc might point to the ~ character. In that
   2244    // case we want to skip to the name of the class. However, Loc might also
   2245    // point to other places that generate destructors, such as a lambda
   2246    // (apparently clang 8 creates a destructor declaration for at least some
   2247    // lambdas). In that case we'll just drop the declaration.
   2248    if (isa<CXXDestructorDecl>(D)) {
   2249      PrettyKind = "destructor";
   2250      const char *P = SM.getCharacterData(Loc);
   2251      if (*P == '~') {
   2252        // Advance Loc to the class name
   2253        P++;
   2254 
   2255        unsigned Skipped = 1;
   2256        while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') {
   2257          P++;
   2258          Skipped++;
   2259        }
   2260 
   2261        Loc = Loc.getLocWithOffset(Skipped);
   2262      } else {
   2263        return true;
   2264      }
   2265    }
   2266 
   2267    visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc),
   2268                    Symbol, qtype, getContext(D), Flags, PeekRange,
   2269                    NestingRange);
   2270 
   2271    // In-progress structured info emission.
   2272    if (RecordDecl *D2 = dyn_cast<RecordDecl>(D)) {
   2273      if (D2->isThisDeclarationADefinition() &&
   2274          // We don't emit structured info for template leaf classes
   2275          // in order to reduce the memory consumption comes from
   2276          // too many instantiation gathered to container classes in
   2277          // crossref-extra and jumpref-extra.
   2278          //
   2279          // Once that part is solved, those template leaf classes
   2280          // can be emitted by skipping getASTRecordLayout call and
   2281          // the Layout handling in emitStructuredRecordInfo.
   2282          //
   2283          // See https://github.com/mozsearch/mozsearch/pull/906
   2284          !D2->isDependentType() && !TemplateStack) {
   2285        if (auto *D3 = dyn_cast<CXXRecordDecl>(D2)) {
   2286          findBindingToJavaClass(*AstContext, *D3);
   2287          findBoundAsJavaClasses(*AstContext, *D3);
   2288        }
   2289        emitStructuredInfo(ExpansionLoc, D2, LayoutHandling::UseLayout);
   2290      }
   2291    }
   2292    if (EnumDecl *D2 = dyn_cast<EnumDecl>(D)) {
   2293      if (D2->isThisDeclarationADefinition() && !D2->isDependentType() &&
   2294          !TemplateStack) {
   2295        emitStructuredInfo(ExpansionLoc, D2);
   2296      }
   2297    }
   2298    if (EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(D)) {
   2299      if (!D2->isTemplated() && !TemplateStack) {
   2300        emitStructuredInfo(ExpansionLoc, D2);
   2301      }
   2302    }
   2303    if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) {
   2304      if ((D2->isThisDeclarationADefinition() || isPure(D2)) &&
   2305          // a clause at the top should have generalized and set wasTemplate so
   2306          // it shouldn't be the case that isTemplateInstantiation() is true.
   2307          !D2->isTemplateInstantiation() && !wasTemplate &&
   2308          !D2->isFunctionTemplateSpecialization() && !TemplateStack) {
   2309        if (auto *D3 = dyn_cast<CXXMethodDecl>(D2)) {
   2310          findBindingToJavaMember(*AstContext, *D3);
   2311        } else {
   2312          findBindingToJavaFunction(*AstContext, *D2);
   2313        }
   2314        emitStructuredInfo(ExpansionLoc, D2);
   2315      }
   2316    }
   2317    if (FieldDecl *D2 = dyn_cast<FieldDecl>(D)) {
   2318      if (!D2->isTemplated() && !TemplateStack) {
   2319        emitStructuredInfo(ExpansionLoc, D2);
   2320      }
   2321    }
   2322    if (VarDecl *D2 = dyn_cast<VarDecl>(D)) {
   2323      if (!D2->isTemplated() && !TemplateStack) {
   2324        findBindingToJavaConstant(*AstContext, *D2);
   2325        emitStructuredInfo(ExpansionLoc, D2);
   2326      }
   2327    }
   2328 
   2329    return true;
   2330  }
   2331 
   2332  bool VisitCXXConstructExpr(const CXXConstructExpr *E) {
   2333    // If we are in a template and find a Stmt that was registed in
   2334    // ForwardedTemplateLocations, convert the location to an actual Stmt* in
   2335    // ForwardingTemplates
   2336    if (TemplateStack && !TemplateStack->inGatherMode()) {
   2337      if (ForwardedTemplateLocations.find(E->getBeginLoc().getRawEncoding()) !=
   2338          ForwardedTemplateLocations.end()) {
   2339        if (const auto *currentTemplate =
   2340                getCurrentFunctionTemplateInstantiation()) {
   2341          ForwardingTemplates.insert({currentTemplate, E});
   2342        }
   2343        return true;
   2344      }
   2345    }
   2346 
   2347    SourceLocation Loc = E->getBeginLoc();
   2348    if (!isInterestingLocation(Loc)) {
   2349      return true;
   2350    }
   2351 
   2352    return VisitCXXConstructExpr(E, Loc);
   2353  }
   2354 
   2355  bool VisitCXXConstructExpr(const CXXConstructExpr *E, SourceLocation Loc) {
   2356    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2357 
   2358    FunctionDecl *Ctor = E->getConstructor();
   2359    if (Ctor->isTemplateInstantiation()) {
   2360      Ctor = Ctor->getTemplateInstantiationPattern();
   2361    }
   2362    std::string Mangled = getMangledName(CurMangleContext, Ctor);
   2363 
   2364    // FIXME: Need to do something different for list initialization.
   2365 
   2366    visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled,
   2367                    QualType(), getContext(SpellingLoc));
   2368 
   2369    return true;
   2370  }
   2371 
   2372  CallExpr *CurrentCall = nullptr;
   2373  bool TraverseCallExpr(CallExpr *E) {
   2374    const auto _ = ValueRollback(CurrentCall, E);
   2375    return Super::TraverseCallExpr(E);
   2376  }
   2377 
   2378  bool VisitCallExpr(CallExpr *E) {
   2379    Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts();
   2380 
   2381    if (TemplateStack) {
   2382      const auto CalleeLocation = [&] {
   2383        if (const auto *Member =
   2384                dyn_cast<CXXDependentScopeMemberExpr>(CalleeExpr)) {
   2385          return Member->getMemberLoc();
   2386        }
   2387        if (const auto *DeclRef =
   2388                dyn_cast<DependentScopeDeclRefExpr>(CalleeExpr)) {
   2389          return DeclRef->getLocation();
   2390        }
   2391        if (const auto *DeclRef = dyn_cast<DeclRefExpr>(CalleeExpr)) {
   2392          return DeclRef->getLocation();
   2393        }
   2394 
   2395        // Does the right thing for MemberExpr and UnresolvedMemberExpr at
   2396        // least.
   2397        return CalleeExpr->getExprLoc();
   2398      }();
   2399 
   2400      // If we are in a template:
   2401      // - when in GatherDependent mode and the callee is type-dependent,
   2402      //   register it in ForwardedTemplateLocations
   2403      // - when in AnalyseDependent mode and the callee is in
   2404      //   ForwardedTemplateLocations, convert the location to an actual Stmt*
   2405      //   in ForwardingTemplates
   2406      if (TemplateStack->inGatherMode()) {
   2407        if (CalleeExpr->isTypeDependent()) {
   2408          TemplateStack->visitDependent(CalleeLocation);
   2409          ForwardedTemplateLocations.insert(CalleeLocation.getRawEncoding());
   2410        }
   2411      } else {
   2412        if (ForwardedTemplateLocations.find(CalleeLocation.getRawEncoding()) !=
   2413            ForwardedTemplateLocations.end()) {
   2414          if (const auto *currentTemplate =
   2415                  getCurrentFunctionTemplateInstantiation()) {
   2416            ForwardingTemplates.insert({currentTemplate, E});
   2417          }
   2418        }
   2419      }
   2420    }
   2421 
   2422    Decl *Callee = E->getCalleeDecl();
   2423    if (!Callee || !FunctionDecl::classof(Callee)) {
   2424      return true;
   2425    }
   2426 
   2427    const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee);
   2428 
   2429    SourceLocation Loc;
   2430 
   2431    const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee);
   2432    if (F->isTemplateInstantiation()) {
   2433      NamedCallee = F->getTemplateInstantiationPattern();
   2434    }
   2435 
   2436    std::string Mangled = getMangledName(CurMangleContext, NamedCallee);
   2437    int Flags = 0;
   2438 
   2439    if (CXXOperatorCallExpr::classof(E)) {
   2440      // Just take the first token.
   2441      CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E);
   2442      Loc = Op->getOperatorLoc();
   2443      Flags |= NotIdentifierToken;
   2444    } else if (MemberExpr::classof(CalleeExpr)) {
   2445      MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr);
   2446      Loc = Member->getMemberLoc();
   2447    } else if (DeclRefExpr::classof(CalleeExpr)) {
   2448      // We handle this in VisitDeclRefExpr.
   2449      return true;
   2450    } else {
   2451      return true;
   2452    }
   2453 
   2454    if (!isInterestingLocation(Loc)) {
   2455      return true;
   2456    }
   2457 
   2458    if (F->isTemplateInstantiation()) {
   2459      VisitForwardedStatements(E, Loc);
   2460    }
   2461 
   2462    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2463 
   2464    std::vector<SourceRange> argRanges;
   2465    for (auto argExpr : E->arguments()) {
   2466      argRanges.push_back(argExpr->getSourceRange());
   2467    }
   2468 
   2469    visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc,
   2470                    Mangled, E->getCallReturnType(*AstContext),
   2471                    getContext(SpellingLoc), Flags, SourceRange(),
   2472                    SourceRange(), &argRanges);
   2473 
   2474    return true;
   2475  }
   2476 
   2477  bool VisitTagTypeLoc(TagTypeLoc L) {
   2478    SourceLocation Loc = L.getBeginLoc();
   2479    if (!isInterestingLocation(Loc)) {
   2480      return true;
   2481    }
   2482 
   2483    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2484 
   2485    TagDecl *Decl = L.getDecl();
   2486    std::string Mangled = getMangledName(CurMangleContext, Decl);
   2487    visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
   2488                    L.getType(), getContext(SpellingLoc));
   2489    return true;
   2490  }
   2491 
   2492  bool VisitTypedefTypeLoc(TypedefTypeLoc L) {
   2493    SourceLocation Loc = L.getBeginLoc();
   2494    if (!isInterestingLocation(Loc)) {
   2495      return true;
   2496    }
   2497 
   2498    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2499 
   2500    NamedDecl *Decl = L.getTypedefNameDecl();
   2501    std::string Mangled = getMangledName(CurMangleContext, Decl);
   2502    visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
   2503                    L.getType(), getContext(SpellingLoc));
   2504    return true;
   2505  }
   2506 
   2507  bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) {
   2508    SourceLocation Loc = L.getBeginLoc();
   2509    if (!isInterestingLocation(Loc)) {
   2510      return true;
   2511    }
   2512 
   2513    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2514 
   2515    NamedDecl *Decl = L.getDecl();
   2516    std::string Mangled = getMangledName(CurMangleContext, Decl);
   2517    visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
   2518                    L.getType(), getContext(SpellingLoc));
   2519    return true;
   2520  }
   2521 
   2522  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) {
   2523    SourceLocation Loc = L.getBeginLoc();
   2524    if (!isInterestingLocation(Loc)) {
   2525      return true;
   2526    }
   2527 
   2528    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2529 
   2530    TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl();
   2531    if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) {
   2532      NamedDecl *Decl = D->getTemplatedDecl();
   2533      std::string Mangled = getMangledName(CurMangleContext, Decl);
   2534      visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
   2535                      QualType(), getContext(SpellingLoc));
   2536    } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) {
   2537      NamedDecl *Decl = D->getTemplatedDecl();
   2538      std::string Mangled = getMangledName(CurMangleContext, Decl);
   2539      visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled,
   2540                      QualType(), getContext(SpellingLoc));
   2541    }
   2542 
   2543    return true;
   2544  }
   2545 
   2546  bool VisitDependentNameTypeLoc(DependentNameTypeLoc L) {
   2547    SourceLocation Loc = L.getNameLoc();
   2548    if (!isInterestingLocation(Loc)) {
   2549      return true;
   2550    }
   2551 
   2552    for (const NamedDecl *D :
   2553         Resolver->resolveDependentNameType(L.getTypePtr())) {
   2554      visitHeuristicResult(Loc, D);
   2555    }
   2556    return true;
   2557  }
   2558 
   2559  void VisitForwardedStatements(const Expr *E, SourceLocation Loc) {
   2560    // If Loc itself is forwarded to its callers, do nothing
   2561    if (ForwardedTemplateLocations.find(Loc.getRawEncoding()) !=
   2562        ForwardedTemplateLocations.cend())
   2563      return;
   2564 
   2565    // If this is a forwarding template (eg MakeUnique), visit the forwarded
   2566    // statements
   2567    auto todo = std::stack{std::vector<const Stmt *>{E}};
   2568    auto seen = std::unordered_set<const Stmt *>{};
   2569    while (!todo.empty()) {
   2570      const auto forwarded = std::move(todo.top());
   2571      todo.pop();
   2572      if (seen.find(forwarded) != seen.end())
   2573        continue;
   2574      seen.insert(forwarded);
   2575 
   2576      if (const auto *C = dyn_cast<CXXConstructExpr>(forwarded))
   2577        VisitCXXConstructExpr(C, Loc);
   2578 
   2579      const Decl *Decl = nullptr;
   2580      if (const auto *D = dyn_cast<CallExpr>(forwarded))
   2581        Decl = D->getCalleeDecl();
   2582      if (const auto *D = dyn_cast<DeclRefExpr>(forwarded))
   2583        Decl = D->getDecl();
   2584 
   2585      if (!Decl)
   2586        continue;
   2587      const auto *F = Decl->getAsFunction();
   2588      if (!F)
   2589        continue;
   2590      if (!F->isTemplateInstantiation())
   2591        continue;
   2592      const auto [ForwardedBegin, ForwardedEnd] =
   2593          ForwardingTemplates.equal_range(F);
   2594      for (auto ForwardedIt = ForwardedBegin; ForwardedIt != ForwardedEnd;
   2595           ++ForwardedIt)
   2596        if (seen.find(ForwardedIt->second) == seen.end())
   2597          todo.push(ForwardedIt->second);
   2598    }
   2599  }
   2600 
   2601  bool VisitDeclRefExpr(const DeclRefExpr *E) {
   2602    SourceLocation Loc = E->getExprLoc();
   2603    if (!isInterestingLocation(Loc)) {
   2604      return true;
   2605    }
   2606 
   2607    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2608 
   2609    if (E->hasQualifier()) {
   2610      Loc = E->getNameInfo().getLoc();
   2611      SpellingLoc = SM.getSpellingLoc(Loc);
   2612    }
   2613 
   2614    const NamedDecl *Decl = E->getDecl();
   2615    if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) {
   2616      int Flags = 0;
   2617      if (D2->isLocalVarDeclOrParm()) {
   2618        Flags = NoCrossref;
   2619      }
   2620      std::string Mangled = getMangledName(CurMangleContext, Decl);
   2621      visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled,
   2622                      D2->getType(), getContext(SpellingLoc), Flags);
   2623    } else if (isa<FunctionDecl>(Decl)) {
   2624      const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl);
   2625      if (F->isTemplateInstantiation()) {
   2626        Decl = F->getTemplateInstantiationPattern();
   2627        VisitForwardedStatements(E, Loc);
   2628      }
   2629 
   2630      std::string Mangled = getMangledName(CurMangleContext, Decl);
   2631      visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled,
   2632                      E->getType(), getContext(SpellingLoc));
   2633    } else if (isa<EnumConstantDecl>(Decl)) {
   2634      std::string Mangled = getMangledName(CurMangleContext, Decl);
   2635      visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled,
   2636                      E->getType(), getContext(SpellingLoc));
   2637    }
   2638 
   2639    return true;
   2640  }
   2641 
   2642  bool VisitCXXConstructorDecl(CXXConstructorDecl *D) {
   2643    if (!isInterestingLocation(D->getLocation())) {
   2644      return true;
   2645    }
   2646 
   2647    for (CXXConstructorDecl::init_const_iterator It = D->init_begin();
   2648         It != D->init_end(); ++It) {
   2649      const CXXCtorInitializer *Ci = *It;
   2650      if (!Ci->getMember() || !Ci->isWritten()) {
   2651        continue;
   2652      }
   2653 
   2654      SourceLocation Loc = Ci->getMemberLocation();
   2655      if (!isInterestingLocation(Loc)) {
   2656        continue;
   2657      }
   2658 
   2659      FieldDecl *Member = Ci->getMember();
   2660      std::string Mangled = getMangledName(CurMangleContext, Member);
   2661      // We want the constructor to be the context of the field use and
   2662      // `getContext(D)` would skip the current context.  An alternate approach
   2663      // would be `getContext(Loc)` but the heuristic to omit a context if we're
   2664      // in a macro body expansion seems incorrect for field initializations; if
   2665      // code is using macros to initialize the fields, we still care.
   2666      visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled,
   2667                      Member->getType(), translateContext(D));
   2668    }
   2669 
   2670    return true;
   2671  }
   2672 
   2673  bool VisitMemberExpr(MemberExpr *E) {
   2674    SourceLocation Loc = E->getExprLoc();
   2675    if (!isInterestingLocation(Loc)) {
   2676      return true;
   2677    }
   2678 
   2679    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2680 
   2681    ValueDecl *Decl = E->getMemberDecl();
   2682    if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) {
   2683      std::string Mangled = getMangledName(CurMangleContext, Field);
   2684      visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled,
   2685                      Field->getType(), getContext(SpellingLoc));
   2686    }
   2687    return true;
   2688  }
   2689 
   2690  // Helper function for producing heuristic results for usages in dependent
   2691  // code. These are distinguished from concrete results (obtained for dependent
   2692  // code using the AutoTemplateContext machinery) by setting the “confidence”
   2693  // property to “cppTemplateHeuristic”. We don't expect this method to be
   2694  // intentionally called multiple times for a given (Loc, NamedDecl) pair
   2695  // because our callers should be mutually exclusive AST node types. However,
   2696  // it's fine if this method is called multiple time for a given pair because
   2697  // we explicitly de-duplicate records with an identical string representation
   2698  // (which is a good reason to have this helper, as it ensures identical
   2699  // representations).
   2700  void visitHeuristicResult(SourceLocation Loc, const NamedDecl *ND) {
   2701    SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
   2702 
   2703    if (const UsingShadowDecl *USD = dyn_cast<UsingShadowDecl>(ND)) {
   2704      ND = USD->getTargetDecl();
   2705    }
   2706    if (const TemplateDecl *TD = dyn_cast<TemplateDecl>(ND)) {
   2707      ND = TD->getTemplatedDecl();
   2708    }
   2709    QualType MaybeType;
   2710    const char *SyntaxKind = nullptr;
   2711    if (const FunctionDecl *F = dyn_cast<FunctionDecl>(ND)) {
   2712      MaybeType = F->getType();
   2713      SyntaxKind = "function";
   2714    } else if (const FieldDecl *F = dyn_cast<FieldDecl>(ND)) {
   2715      MaybeType = F->getType();
   2716      SyntaxKind = "field";
   2717    } else if (const EnumConstantDecl *E = dyn_cast<EnumConstantDecl>(ND)) {
   2718      MaybeType = E->getType();
   2719      SyntaxKind = "enum";
   2720    } else if (const TypedefNameDecl *T = dyn_cast<TypedefNameDecl>(ND)) {
   2721      MaybeType = T->getUnderlyingType();
   2722      SyntaxKind = "type";
   2723    }
   2724    if (SyntaxKind) {
   2725      std::string Mangled = getMangledName(CurMangleContext, ND);
   2726      visitIdentifier("use", SyntaxKind, getQualifiedName(ND), Loc, Mangled,
   2727                      MaybeType, getContext(SpellingLoc), Heuristic);
   2728    }
   2729  }
   2730 
   2731  bool arityMatchesCurrentCallExpr(const Expr *E, const NamedDecl *Candidate) {
   2732    const auto IsCurrentCallee = CurrentCall && E == CurrentCall->getCallee();
   2733    const auto CallNumArgs =
   2734        IsCurrentCallee ? CurrentCall->getNumArgs() : std::optional<uint>{};
   2735 
   2736    const FunctionDecl *CandidateFunc;
   2737    if (const auto *UsingDecl = dyn_cast<UsingShadowDecl>(Candidate)) {
   2738      CandidateFunc = UsingDecl->getTargetDecl()->getAsFunction();
   2739    } else {
   2740      CandidateFunc = Candidate->getAsFunction();
   2741    }
   2742 
   2743    // We try and filter candidates by arity, but be conservative and accept
   2744    // them when we don't know better
   2745    if (!CandidateFunc || !CallNumArgs) {
   2746      return true;
   2747    }
   2748 
   2749    const auto MinNumArgs = CandidateFunc->getMinRequiredExplicitArguments();
   2750    const auto MaxNumArgs = [&]() -> std::optional<uint> {
   2751      const auto IsVariadic =
   2752          CandidateFunc->isVariadic() ||
   2753          std::any_of(CandidateFunc->param_begin(), CandidateFunc->param_end(),
   2754                      [](const ParmVarDecl *param) {
   2755                        return param->isParameterPack();
   2756                      });
   2757 
   2758      if (IsVariadic)
   2759        return {};
   2760 
   2761      return CandidateFunc->getNumNonObjectParams();
   2762    }();
   2763 
   2764    if (CallNumArgs < MinNumArgs || (MaxNumArgs && CallNumArgs > *MaxNumArgs)) {
   2765      return false;
   2766    }
   2767 
   2768    return true;
   2769  }
   2770 
   2771  bool VisitOverloadExpr(OverloadExpr *E) {
   2772    SourceLocation Loc = E->getExprLoc();
   2773    normalizeLocation(&Loc);
   2774    if (!isInterestingLocation(Loc)) {
   2775      return true;
   2776    }
   2777 
   2778    for (auto *Candidate : E->decls()) {
   2779      if (arityMatchesCurrentCallExpr(E, Candidate))
   2780        visitHeuristicResult(Loc, Candidate);
   2781    }
   2782 
   2783    // Also record this location so that if we have instantiations, we can
   2784    // gather more accurate results from them.
   2785    if (TemplateStack) {
   2786      TemplateStack->visitDependent(Loc);
   2787    }
   2788    return true;
   2789  }
   2790 
   2791  bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) {
   2792    SourceLocation Loc = E->getMemberLoc();
   2793    normalizeLocation(&Loc);
   2794    if (!isInterestingLocation(Loc)) {
   2795      return true;
   2796    }
   2797 
   2798    for (const NamedDecl *Candidate : Resolver->resolveMemberExpr(E)) {
   2799      if (arityMatchesCurrentCallExpr(E, Candidate))
   2800        visitHeuristicResult(Loc, Candidate);
   2801    }
   2802 
   2803    // Also record this location so that if we have instantiations, we can
   2804    // gather more accurate results from them.
   2805    if (TemplateStack) {
   2806      TemplateStack->visitDependent(Loc);
   2807    }
   2808    return true;
   2809  }
   2810 
   2811  bool VisitCXXNewExpr(CXXNewExpr *N) {
   2812    // If we are in a template and the new is type-dependent, register it in
   2813    // ForwardedTemplateLocations to forward its uses to the surrounding
   2814    // template call site
   2815    if (TemplateStack && TemplateStack->inGatherMode()) {
   2816      const auto *TypeInfo = N->getAllocatedTypeSourceInfo();
   2817      const auto ConstructExprLoc = TypeInfo->getTypeLoc().getBeginLoc();
   2818      if (N->isTypeDependent()) {
   2819        TemplateStack->visitDependent(ConstructExprLoc);
   2820        ForwardedTemplateLocations.insert(ConstructExprLoc.getRawEncoding());
   2821      }
   2822    }
   2823    return true;
   2824  }
   2825 
   2826  bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
   2827    SourceLocation Loc = E->getLocation();
   2828    normalizeLocation(&Loc);
   2829    if (!isInterestingLocation(Loc)) {
   2830      return true;
   2831    }
   2832 
   2833    for (const NamedDecl *Candidate : Resolver->resolveDeclRefExpr(E)) {
   2834      if (arityMatchesCurrentCallExpr(E, Candidate))
   2835        visitHeuristicResult(Loc, Candidate);
   2836    }
   2837 
   2838    // Also record this location so that if we have instantiations, we can
   2839    // gather more accurate results from them.
   2840    if (TemplateStack) {
   2841      TemplateStack->visitDependent(Loc);
   2842 
   2843      // Also record the dependent NestedNameSpecifier locations
   2844      for (auto NestedNameLoc = E->getQualifierLoc();
   2845           NestedNameLoc &&
   2846           NestedNameLoc.getNestedNameSpecifier()->isDependent();
   2847           NestedNameLoc = NestedNameLoc.getPrefix()) {
   2848        TemplateStack->visitDependent(NestedNameLoc.getLocalBeginLoc());
   2849      }
   2850    }
   2851 
   2852    return true;
   2853  }
   2854 
   2855  bool VisitStringLiteral(StringLiteral *E) {
   2856    if (E->getCharByteWidth() != 1) {
   2857      return true;
   2858    }
   2859 
   2860    StringRef sref = E->getString();
   2861    std::string s = sref.str();
   2862 
   2863    bool isMozSrc = stringStartsWith(s, "moz-src:///");
   2864 
   2865    if (!stringStartsWith(s, "chrome://") &&
   2866        !stringStartsWith(s, "resource://") &&
   2867        !isMozSrc) {
   2868      return true;
   2869    }
   2870 
   2871    if (!isASCII(s)) {
   2872      return true;
   2873    }
   2874 
   2875    SourceLocation Loc = E->getStrTokenLoc(0);
   2876    normalizeLocation(&Loc);
   2877 
   2878    std::string symbol;
   2879 
   2880    if (isMozSrc) {
   2881      symbol = std::string("FILE_") + mangleFile(s.substr(11), FileType::Source);
   2882    } else {
   2883      symbol = std::string("URL_") + mangleURL(s);
   2884    }
   2885 
   2886    visitIdentifier("use", "file", StringRef(s), Loc, symbol, QualType(),
   2887                    Context(), NotIdentifierToken | LocRangeEndValid);
   2888 
   2889    return true;
   2890  }
   2891 
   2892  void enterSourceFile(SourceLocation Loc) {
   2893    normalizeLocation(&Loc);
   2894    FileInfo *newFile = getFileInfo(Loc);
   2895    if (!newFile->Interesting) {
   2896      return;
   2897    }
   2898    FileType type = newFile->Generated ? FileType::Generated : FileType::Source;
   2899    std::string symbol =
   2900        std::string("FILE_") + mangleFile(newFile->Realname, type);
   2901 
   2902    // We use an explicit zero-length source range at the start of the file. If
   2903    // we don't set the LocRangeEndValid flag, the visitIdentifier code will use
   2904    // the entire first token, which could be e.g. a long multiline-comment.
   2905    visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc), symbol,
   2906                    QualType(), Context(),
   2907                    NotIdentifierToken | LocRangeEndValid);
   2908  }
   2909 
   2910  void inclusionDirective(SourceLocation HashLoc, SourceRange FileNameRange, const FileEntry *File) {
   2911    std::string includedFile(File->tryGetRealPathName());
   2912    FileType type = relativizePath(includedFile, CI.getHeaderSearchOpts());
   2913    if (type == FileType::Unknown) {
   2914      return;
   2915    }
   2916    std::string symbol = std::string("FILE_") + mangleFile(includedFile, type);
   2917 
   2918    // Support the #include MACRO use-case
   2919    // When parsing #include MACRO:
   2920    // - the filename is never passed to onTokenLexed
   2921    // - inclusionDirective is called before endMacroExpansion (which is only
   2922    // called when the following token is parsed) So add the filename here and
   2923    // call endMacroExpansion immediately. This ensures the macro has a correct
   2924    // expansion and it has been added to MacroMaps so the referenced filename
   2925    // knows to populate inExpansionAt.
   2926    if (MacroExpansionState) {
   2927      MacroExpansionState->TokenLocations[FileNameRange.getBegin()] =
   2928          MacroExpansionState->Expansion.length();
   2929      MacroExpansionState->Expansion += '"';
   2930      MacroExpansionState->Expansion += includedFile;
   2931      MacroExpansionState->Expansion += '"';
   2932      endMacroExpansion();
   2933    }
   2934 
   2935    normalizeLocation(&HashLoc);
   2936    FileInfo *thisFile = getFileInfo(HashLoc);
   2937    FileType thisType = thisFile->Generated ? FileType::Generated : FileType::Source;
   2938    std::string thisFilePretty = thisFile->Realname;
   2939    std::string thisFileSym =
   2940        std::string("FILE_") + mangleFile(thisFile->Realname, thisType);
   2941 
   2942    visitIdentifier("use", "file", includedFile, FileNameRange, symbol,
   2943                    QualType(), Context(thisFilePretty, thisFileSym),
   2944                    NotIdentifierToken | LocRangeEndValid);
   2945  }
   2946 
   2947  void macroDefined(const Token &Tok, const MacroDirective *Macro) {
   2948    if (Macro->getMacroInfo()->isBuiltinMacro()) {
   2949      return;
   2950    }
   2951    SourceLocation Loc = Tok.getLocation();
   2952    normalizeLocation(&Loc);
   2953    if (!isInterestingLocation(Loc)) {
   2954      return;
   2955    }
   2956 
   2957    IdentifierInfo *Ident = Tok.getIdentifierInfo();
   2958    if (Ident) {
   2959      std::string Mangled = std::string("M_") +
   2960                            mangleLocation(Loc, std::string(Ident->getName()));
   2961      visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled);
   2962    }
   2963  }
   2964 
   2965  void macroUsed(const Token &Tok, const MacroInfo *Macro) {
   2966    if (!Macro) {
   2967      return;
   2968    }
   2969    if (Macro->isBuiltinMacro()) {
   2970      return;
   2971    }
   2972    SourceLocation Loc = Tok.getLocation();
   2973    if (!isInterestingLocation(Loc)) {
   2974      return;
   2975    }
   2976 
   2977    IdentifierInfo *Ident = Tok.getIdentifierInfo();
   2978    if (Ident) {
   2979      std::string Mangled =
   2980          std::string("M_") + mangleLocation(Macro->getDefinitionLoc(),
   2981                                             std::string(Ident->getName()));
   2982      visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled);
   2983    }
   2984  }
   2985 
   2986  void beginMacroExpansion(const Token &Tok, const MacroInfo *Macro,
   2987                           SourceRange Range) {
   2988    if (!Macro)
   2989      return;
   2990 
   2991    if (Macro->isBuiltinMacro())
   2992      return;
   2993 
   2994    if (!Tok.getIdentifierInfo())
   2995      return;
   2996 
   2997    auto location = Tok.getLocation();
   2998    normalizeLocation(&location);
   2999    if (!isInterestingLocation(location))
   3000      return;
   3001 
   3002    if (MacroExpansionState) {
   3003      const auto InMacroArgs = MacroExpansionState->Range.fullyContains(
   3004          SM.getExpansionRange(Range).getAsRange());
   3005      const auto InMacroBody =
   3006          SM.getExpansionLoc(Tok.getLocation()) ==
   3007          SM.getExpansionLoc(MacroExpansionState->MacroNameToken.getLocation());
   3008      if (InMacroArgs || InMacroBody) {
   3009        if (MacroExpansionState->MacroInfo->getDefinitionLoc() !=
   3010            Macro->getDefinitionLoc()) {
   3011          IdentifierInfo *DependencyIdent = Tok.getIdentifierInfo();
   3012          std::string DependencySymbol =
   3013              std::string("M_") +
   3014              mangleLocation(Macro->getDefinitionLoc(),
   3015                             std::string(DependencyIdent->getName()));
   3016 
   3017          MacroExpansionState->Dependencies.push_back(DependencySymbol);
   3018        }
   3019 
   3020        macroUsed(Tok, Macro);
   3021        return;
   3022      }
   3023 
   3024      endMacroExpansion();
   3025    }
   3026 
   3027    MacroExpansionState = ::MacroExpansionState{
   3028        .MacroNameToken = Tok,
   3029        .MacroInfo = Macro,
   3030        .Expansion = {},
   3031        .TokenLocations = {},
   3032        .Range = Range,
   3033        .PrevPrevTok = {},
   3034        .PrevTok = {},
   3035    };
   3036  }
   3037 
   3038  void endMacroExpansion() {
   3039    // large macros are too slow to reformat, don't reformat macros larger than
   3040    // those arbitrary thresholds
   3041    static constexpr auto includedFileExpansionReformatThreshold = 20'000;
   3042    static constexpr auto mainFileExpansionReformatThreshold = 200'000;
   3043 
   3044    const auto expansionLocation =
   3045        SM.getExpansionLoc(MacroExpansionState->MacroNameToken.getLocation());
   3046    const auto expansionFilename = SM.getFilename(expansionLocation);
   3047    const auto includedExtensions =
   3048        std::array{".h", ".hpp", ".hxx", ".inc", ".def"};
   3049    const auto isIncludedFile =
   3050        std::any_of(includedExtensions.begin(), includedExtensions.end(),
   3051                    [&](const auto *extension) {
   3052                      return expansionFilename.ends_with_insensitive(extension);
   3053                    });
   3054    const auto expansionReformatThreshold =
   3055        isIncludedFile ? includedFileExpansionReformatThreshold
   3056                       : mainFileExpansionReformatThreshold;
   3057 
   3058    if (MacroExpansionState->Expansion.length() < expansionReformatThreshold) {
   3059      // large macros are too memory-hungry to reformat with ColumnLimit != 0
   3060      // see https://github.com/llvm/llvm-project/issues/107434
   3061      auto style = clang::format::getMozillaStyle();
   3062      if (MacroExpansionState->Expansion.length() >
   3063          includedFileExpansionReformatThreshold)
   3064        style.ColumnLimit = 0;
   3065 
   3066      const auto replacements = clang::format::reformat(
   3067          style, MacroExpansionState->Expansion,
   3068          {tooling::Range(0, MacroExpansionState->Expansion.length())});
   3069      auto formatted = clang::tooling::applyAllReplacements(
   3070          MacroExpansionState->Expansion, replacements);
   3071      if (formatted) {
   3072        for (auto &[k, v] : MacroExpansionState->TokenLocations) {
   3073          v = replacements.getShiftedCodePosition(v);
   3074        }
   3075        MacroExpansionState->Expansion = std::move(formatted.get());
   3076      }
   3077    }
   3078 
   3079    IdentifierInfo *Ident =
   3080        MacroExpansionState->MacroNameToken.getIdentifierInfo();
   3081    std::string Symbol =
   3082        std::string("M_") +
   3083        mangleLocation(MacroExpansionState->MacroInfo->getDefinitionLoc(),
   3084                       std::string(Ident->getName()));
   3085 
   3086    const auto dependenciesBegin = MacroExpansionState->Dependencies.begin();
   3087    const auto dependenciesEnd = MacroExpansionState->Dependencies.end();
   3088    std::sort(dependenciesBegin, dependenciesEnd);
   3089    MacroExpansionState->Dependencies.erase(
   3090        std::unique(dependenciesBegin, dependenciesEnd), dependenciesEnd);
   3091 
   3092    auto Key = Symbol;
   3093    for (const auto &Dependency : MacroExpansionState->Dependencies) {
   3094      Key.push_back(',');
   3095      Key += Dependency;
   3096    }
   3097 
   3098    MacroMaps.emplace(std::pair{
   3099        MacroExpansionState->MacroNameToken.getLocation(),
   3100        ExpandedMacro{
   3101            std::move(Symbol),
   3102            std::move(Key),
   3103            std::move(MacroExpansionState->Expansion),
   3104            std::move(MacroExpansionState->TokenLocations),
   3105        },
   3106    });
   3107 
   3108    MacroExpansionState.reset();
   3109 
   3110    macroUsed(MacroExpansionState->MacroNameToken,
   3111              MacroExpansionState->MacroInfo);
   3112  }
   3113 
   3114  void onTokenLexed(const Token &Tok) {
   3115    if (!MacroExpansionState)
   3116      return;
   3117 
   3118    // check if we exited the macro expansion
   3119    SourceLocation SLoc = Tok.getLocation();
   3120    if (!SLoc.isMacroID()) {
   3121      endMacroExpansion();
   3122      return;
   3123    }
   3124 
   3125    if (ConcatInfo.AvoidConcat(MacroExpansionState->PrevPrevTok,
   3126                               MacroExpansionState->PrevTok, Tok)) {
   3127      MacroExpansionState->Expansion += ' ';
   3128    }
   3129 
   3130    if (Tok.isAnnotation()) {
   3131      const auto Range = SM.getImmediateExpansionRange(Tok.getLocation());
   3132      const char *Start = SM.getCharacterData(Range.getBegin());
   3133      const char *End = SM.getCharacterData(Range.getEnd()) + 1;
   3134      MacroExpansionState->Expansion += StringRef(Start, End - Start);
   3135    } else {
   3136      const auto spelling = CI.getPreprocessor().getSpelling(Tok);
   3137      if (Tok.isAnyIdentifier()) {
   3138        MacroExpansionState->TokenLocations[SLoc] =
   3139            MacroExpansionState->Expansion.length();
   3140      }
   3141      MacroExpansionState->Expansion += spelling;
   3142    }
   3143 
   3144    MacroExpansionState->PrevPrevTok = MacroExpansionState->PrevTok;
   3145    MacroExpansionState->PrevTok = Tok;
   3146  }
   3147 };
   3148 
   3149 void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason,
   3150                                   SrcMgr::CharacteristicKind FileType,
   3151                                   FileID PrevFID = FileID()) {
   3152  switch (Reason) {
   3153  case PPCallbacks::RenameFile:
   3154  case PPCallbacks::SystemHeaderPragma:
   3155    // Don't care about these, since we want the actual on-disk filenames
   3156    break;
   3157  case PPCallbacks::EnterFile:
   3158    Indexer->enterSourceFile(Loc);
   3159    break;
   3160  case PPCallbacks::ExitFile:
   3161    // Don't care about exiting files
   3162    break;
   3163  }
   3164 }
   3165 
   3166 void PreprocessorHook::InclusionDirective(
   3167    SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
   3168    bool IsAngled, CharSourceRange FileNameRange,
   3169 #if CLANG_VERSION_MAJOR >= 16
   3170    OptionalFileEntryRef File,
   3171 #elif CLANG_VERSION_MAJOR >= 15
   3172    Optional<FileEntryRef> File,
   3173 #else
   3174    const FileEntry *File,
   3175 #endif
   3176    StringRef SearchPath, StringRef RelativePath,
   3177 #if CLANG_VERSION_MAJOR >= 19
   3178    const Module *SuggestedModule, bool ModuleImported,
   3179 #else
   3180    const Module *Imported,
   3181 #endif
   3182    SrcMgr::CharacteristicKind FileType) {
   3183 #if CLANG_VERSION_MAJOR >= 15
   3184  if (!File) {
   3185    return;
   3186  }
   3187  Indexer->inclusionDirective(HashLoc, FileNameRange.getAsRange(),
   3188                              &File->getFileEntry());
   3189 #else
   3190  Indexer->inclusionDirective(HashLoc, FileNameRange.getAsRange(), File);
   3191 #endif
   3192 }
   3193 
   3194 void PreprocessorHook::MacroDefined(const Token &Tok,
   3195                                    const MacroDirective *Md) {
   3196  Indexer->macroDefined(Tok, Md);
   3197 }
   3198 
   3199 void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md,
   3200                                    SourceRange Range, const MacroArgs *Ma) {
   3201  Indexer->beginMacroExpansion(Tok, Md.getMacroInfo(), Range);
   3202 }
   3203 
   3204 void PreprocessorHook::MacroUndefined(const Token &Tok,
   3205                                      const MacroDefinition &Md,
   3206                                      const MacroDirective *Undef) {
   3207  Indexer->macroUsed(Tok, Md.getMacroInfo());
   3208 }
   3209 
   3210 void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md,
   3211                               SourceRange Range) {
   3212  Indexer->macroUsed(Tok, Md.getMacroInfo());
   3213 }
   3214 
   3215 void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok,
   3216                             const MacroDefinition &Md) {
   3217  Indexer->macroUsed(Tok, Md.getMacroInfo());
   3218 }
   3219 
   3220 void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok,
   3221                              const MacroDefinition &Md) {
   3222  Indexer->macroUsed(Tok, Md.getMacroInfo());
   3223 }
   3224 
   3225 class IndexAction : public PluginASTAction {
   3226 protected:
   3227  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
   3228                                                 llvm::StringRef F) {
   3229    return make_unique<IndexConsumer>(CI);
   3230  }
   3231 
   3232  bool ParseArgs(const CompilerInstance &CI,
   3233                 const std::vector<std::string> &Args) {
   3234    if (Args.size() != 3) {
   3235      DiagnosticsEngine &D = CI.getDiagnostics();
   3236      unsigned DiagID = D.getCustomDiagID(
   3237          DiagnosticsEngine::Error,
   3238          "Need arguments for the source, output, and object directories");
   3239      D.Report(DiagID);
   3240      return false;
   3241    }
   3242 
   3243    // Load our directories
   3244    Srcdir = getAbsolutePath(Args[0]);
   3245    if (Srcdir.empty()) {
   3246      DiagnosticsEngine &D = CI.getDiagnostics();
   3247      unsigned DiagID = D.getCustomDiagID(
   3248          DiagnosticsEngine::Error, "Source directory '%0' does not exist");
   3249      D.Report(DiagID) << Args[0];
   3250      return false;
   3251    }
   3252 
   3253    ensurePath(Args[1] + PATHSEP_STRING);
   3254    Outdir = getAbsolutePath(Args[1]);
   3255    Outdir += PATHSEP_STRING;
   3256 
   3257    Objdir = getAbsolutePath(Args[2]);
   3258    if (Objdir.empty()) {
   3259      DiagnosticsEngine &D = CI.getDiagnostics();
   3260      unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error,
   3261                                          "Objdir '%0' does not exist");
   3262      D.Report(DiagID) << Args[2];
   3263      return false;
   3264    }
   3265    Objdir += PATHSEP_STRING;
   3266 
   3267    printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(),
   3268           Objdir.c_str());
   3269 
   3270    return true;
   3271  }
   3272 
   3273  void printHelp(llvm::raw_ostream &Ros) {
   3274    Ros << "Help for mozsearch plugin goes here\n";
   3275  }
   3276 };
   3277 
   3278 static FrontendPluginRegistry::Add<IndexAction>
   3279    Y("mozsearch-index", "create the mozsearch index database");