MozsearchIndexer.cpp (112414B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "clang/AST/AST.h" 7 #include "clang/AST/ASTConsumer.h" 8 #include "clang/AST/ASTContext.h" 9 #include "clang/AST/Expr.h" 10 #include "clang/AST/ExprCXX.h" 11 #include "clang/AST/Mangle.h" 12 #include "clang/AST/RecordLayout.h" 13 #include "clang/AST/RecursiveASTVisitor.h" 14 #include "clang/Basic/FileManager.h" 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Basic/Version.h" 17 #include "clang/Format/Format.h" 18 #include "clang/Frontend/CompilerInstance.h" 19 #include "clang/Frontend/FrontendPluginRegistry.h" 20 #include "clang/Lex/Lexer.h" 21 #include "clang/Lex/PPCallbacks.h" 22 #include "clang/Lex/Preprocessor.h" 23 #include "clang/Lex/TokenConcatenation.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/Support/JSON.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <fstream> 30 #include <iostream> 31 #include <map> 32 #include <memory> 33 #include <sstream> 34 #include <stack> 35 #include <string> 36 #include <unordered_set> 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 41 #include "BindingOperations.h" 42 #include "FileOperations.h" 43 #include "StringOperations.h" 44 #include "from-clangd/HeuristicResolver.h" 45 46 #if CLANG_VERSION_MAJOR < 8 47 // Starting with Clang 8.0 some basic functions have been renamed 48 #define getBeginLoc getLocStart 49 #define getEndLoc getLocEnd 50 #endif 51 // We want std::make_unique, but that's only available in c++14. In versions 52 // prior to that, we need to fall back to llvm's make_unique. It's also the 53 // case that we expect clang 10 to build with c++14 and clang 9 and earlier to 54 // build with c++11, at least as suggested by the llvm-config --cxxflags on 55 // non-windows platforms. firefox-main seems to build with -std=c++17 on 56 // windows so we need to make this decision based on __cplusplus instead of 57 // the CLANG_VERSION_MAJOR. 58 #if __cplusplus < 201402L 59 using llvm::make_unique; 60 #else 61 using std::make_unique; 62 #endif 63 64 using namespace clang; 65 66 const std::string GENERATED("__GENERATED__" PATHSEP_STRING); 67 68 // Absolute path to directory containing source code. 69 std::string Srcdir; 70 71 // Absolute path to objdir (including generated code). 72 std::string Objdir; 73 74 // Absolute path where analysis JSON output will be stored. 75 std::string Outdir; 76 77 enum class FileType { 78 // The file was either in the source tree nor objdir. It might be a system 79 // include, for example. 80 Unknown, 81 // A file from the source tree. 82 Source, 83 // A file from the objdir. 84 Generated, 85 }; 86 87 // Takes an absolute path to a file, and returns the type of file it is. If 88 // it's a Source or Generated file, the provided inout path argument is modified 89 // in-place so that it is relative to the source dir or objdir, respectively. 90 // Otherwise we strip the first include path that matches, if any. 91 FileType relativizePath(std::string &path, const HeaderSearchOptions &HeaderSearchOpts) { 92 if (path.compare(0, Objdir.length(), Objdir) == 0) { 93 path.replace(0, Objdir.length(), GENERATED); 94 return FileType::Generated; 95 } 96 // Empty filenames can get turned into Srcdir when they are resolved as 97 // absolute paths, so we should exclude files that are exactly equal to 98 // Srcdir or anything outside Srcdir. 99 if (path.length() > Srcdir.length() && 100 path.compare(0, Srcdir.length(), Srcdir) == 0) { 101 // Remove the trailing `/' as well. 102 path.erase(0, Srcdir.length() + 1); 103 return FileType::Source; 104 } 105 106 for (const auto &Entry : HeaderSearchOpts.UserEntries) { 107 if (path.compare(0, Entry.Path.length(), Entry.Path) == 0) { 108 path.erase(0, Entry.Path.size() + 1); 109 break; 110 } 111 } 112 113 return FileType::Unknown; 114 } 115 116 #if !defined(_WIN32) && !defined(_WIN64) 117 #include <sys/time.h> 118 119 static double time() { 120 struct timeval Tv; 121 gettimeofday(&Tv, nullptr); 122 return double(Tv.tv_sec) + double(Tv.tv_usec) / 1000000.; 123 } 124 #endif 125 126 // Return true if |input| is a valid C++ identifier. We don't want to generate 127 // analysis information for operators, string literals, etc. by accident since 128 // it trips up consumers of the data. 129 static bool isValidIdentifier(std::string Input) { 130 for (char C : Input) { 131 if (!(isalpha(C) || isdigit(C) || C == '_')) { 132 return false; 133 } 134 } 135 return true; 136 } 137 138 template <size_t N> 139 static bool stringStartsWith(const std::string &Input, 140 const char (&Prefix)[N]) { 141 return Input.length() > N - 1 && memcmp(Input.c_str(), Prefix, N - 1) == 0; 142 } 143 144 static bool isASCII(const std::string &Input) { 145 for (char C : Input) { 146 if (C & 0x80) { 147 return false; 148 } 149 } 150 return true; 151 } 152 153 struct RAIITracer { 154 RAIITracer(const char *log) : mLog(log) { printf("<%s>\n", mLog); } 155 156 ~RAIITracer() { printf("</%s>\n", mLog); } 157 158 const char *mLog; 159 }; 160 161 #define TRACEFUNC RAIITracer tracer(__FUNCTION__); 162 163 // Sets variable to value on creation then resets variable to its original 164 // value on destruction 165 template <typename T> class ValueRollback { 166 public: 167 template <typename U = T> 168 ValueRollback(T &variable, U &&value) 169 : mVariable{&variable}, 170 mSavedValue{std::exchange(variable, std::forward<U>(value))} {} 171 172 ValueRollback(ValueRollback &&other) noexcept 173 : mVariable{std::exchange(other.mVariable, nullptr)}, 174 mSavedValue{std::move(other.mSavedValue)} {} 175 176 ValueRollback(const ValueRollback &) = delete; 177 ValueRollback &operator=(ValueRollback &&) = delete; 178 ValueRollback &operator=(const ValueRollback &) = delete; 179 180 ~ValueRollback() { 181 if (mVariable) 182 *mVariable = std::move(mSavedValue); 183 } 184 185 private: 186 T *mVariable; 187 T mSavedValue; 188 }; 189 190 class IndexConsumer; 191 192 bool isPure(FunctionDecl *D) { 193 #if CLANG_VERSION_MAJOR >= 18 194 return D->isPureVirtual(); 195 #else 196 return D->isPure(); 197 #endif 198 } 199 200 // For each C++ file seen by the analysis (.cpp or .h), we track a 201 // FileInfo. This object tracks whether the file is "interesting" (i.e., whether 202 // it's in the source dir or the objdir). We also store the analysis output 203 // here. 204 struct FileInfo { 205 FileInfo(std::string &Rname, const HeaderSearchOptions &HeaderSearchOptions) : Realname(Rname) { 206 switch (relativizePath(Realname, HeaderSearchOptions)) { 207 case FileType::Generated: 208 Interesting = true; 209 Generated = true; 210 break; 211 case FileType::Source: 212 Interesting = true; 213 Generated = false; 214 break; 215 case FileType::Unknown: 216 Interesting = false; 217 Generated = false; 218 break; 219 } 220 } 221 std::string Realname; 222 std::vector<std::string> Output; 223 bool Interesting; 224 bool Generated; 225 }; 226 227 struct MacroExpansionState { 228 Token MacroNameToken; 229 const MacroInfo *MacroInfo = nullptr; 230 // other macro symbols this expansion depends on 231 std::vector<std::string> Dependencies; 232 std::string Expansion; 233 std::map<SourceLocation, unsigned> TokenLocations; 234 SourceRange Range; 235 Token PrevPrevTok; 236 Token PrevTok; 237 }; 238 239 struct ExpandedMacro { 240 std::string Symbol; 241 std::string Key; // "{Symbol}(,{Dependencies})..." 242 std::string Expansion; 243 std::map<SourceLocation, unsigned> TokenLocations; 244 }; 245 246 class IndexConsumer; 247 248 class PreprocessorHook : public PPCallbacks { 249 IndexConsumer *Indexer; 250 251 public: 252 PreprocessorHook(IndexConsumer *C) : Indexer(C) {} 253 254 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, 255 SrcMgr::CharacteristicKind FileType, 256 FileID PrevFID) override; 257 258 virtual void InclusionDirective(SourceLocation HashLoc, 259 const Token &IncludeTok, StringRef FileName, 260 bool IsAngled, CharSourceRange FileNameRange, 261 #if CLANG_VERSION_MAJOR >= 16 262 OptionalFileEntryRef File, 263 #elif CLANG_VERSION_MAJOR >= 15 264 Optional<FileEntryRef> File, 265 #else 266 const FileEntry *File, 267 #endif 268 StringRef SearchPath, StringRef RelativePath, 269 #if CLANG_VERSION_MAJOR >= 19 270 const Module *SuggestedModule, 271 bool ModuleImported, 272 #else 273 const Module *Imported, 274 #endif 275 SrcMgr::CharacteristicKind FileType) override; 276 277 virtual void MacroDefined(const Token &Tok, 278 const MacroDirective *Md) override; 279 280 virtual void MacroExpands(const Token &Tok, const MacroDefinition &Md, 281 SourceRange Range, const MacroArgs *Ma) override; 282 virtual void MacroUndefined(const Token &Tok, const MacroDefinition &Md, 283 const MacroDirective *Undef) override; 284 virtual void Defined(const Token &Tok, const MacroDefinition &Md, 285 SourceRange Range) override; 286 virtual void Ifdef(SourceLocation Loc, const Token &Tok, 287 const MacroDefinition &Md) override; 288 virtual void Ifndef(SourceLocation Loc, const Token &Tok, 289 const MacroDefinition &Md) override; 290 }; 291 292 class IndexConsumer : public ASTConsumer, 293 public RecursiveASTVisitor<IndexConsumer>, 294 public DiagnosticConsumer { 295 private: 296 CompilerInstance &CI; 297 SourceManager &SM; 298 LangOptions &LO; 299 std::map<FileID, std::unique_ptr<FileInfo>> FileMap; 300 MangleContext *CurMangleContext; 301 ASTContext *AstContext; 302 std::unique_ptr<clangd::HeuristicResolver> Resolver; 303 304 // Used during a macro expansion to build the expanded string 305 TokenConcatenation ConcatInfo; 306 std::optional<MacroExpansionState> MacroExpansionState; 307 // Keeps track of the positions of tokens inside each expanded macro 308 std::map<SourceLocation, ExpandedMacro> MacroMaps; 309 310 typedef RecursiveASTVisitor<IndexConsumer> Super; 311 312 // Tracks the set of declarations that the current expression/statement is 313 // nested inside of. 314 struct AutoSetContext { 315 AutoSetContext(IndexConsumer *Self, NamedDecl *Context, 316 bool VisitImplicit = false) 317 : Self(Self), Prev(Self->CurDeclContext), Decl(Context) { 318 this->VisitImplicit = 319 VisitImplicit || (Prev ? Prev->VisitImplicit : false); 320 Self->CurDeclContext = this; 321 } 322 323 ~AutoSetContext() { Self->CurDeclContext = Prev; } 324 325 IndexConsumer *Self; 326 AutoSetContext *Prev; 327 NamedDecl *Decl; 328 bool VisitImplicit; 329 }; 330 AutoSetContext *CurDeclContext; 331 332 FileInfo *getFileInfo(SourceLocation Loc) { 333 FileID Id = SM.getFileID(Loc); 334 335 std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; 336 It = FileMap.find(Id); 337 if (It == FileMap.end()) { 338 // We haven't seen this file before. We need to make the FileInfo 339 // structure information ourselves 340 std::string Filename = std::string(SM.getFilename(Loc)); 341 std::string Absolute; 342 // If Loc is a macro id rather than a file id, it Filename might be 343 // empty. Also for some types of file locations that are clang-internal 344 // like "<scratch>" it can return an empty Filename. In these cases we 345 // want to leave Absolute as empty. 346 if (!Filename.empty()) { 347 Absolute = getAbsolutePath(Filename); 348 if (Absolute.empty()) { 349 Absolute = Filename; 350 } 351 } 352 std::unique_ptr<FileInfo> Info = make_unique<FileInfo>(Absolute, CI.getHeaderSearchOpts()); 353 It = FileMap.insert(std::make_pair(Id, std::move(Info))).first; 354 } 355 return It->second.get(); 356 } 357 358 // Helpers for processing declarations 359 // Should we ignore this location? 360 bool isInterestingLocation(SourceLocation Loc) { 361 if (SM.isMacroBodyExpansion(Loc)) { 362 Loc = SM.getFileLoc(Loc); 363 } 364 365 normalizeLocation(&Loc); 366 if (Loc.isInvalid()) { 367 return false; 368 } 369 370 return getFileInfo(Loc)->Interesting; 371 } 372 373 // Convert location to "line:column" or "line:column-column" given length. 374 // In resulting string rep, line is 1-based and zero-padded to 5 digits, while 375 // column is 0-based and unpadded. 376 std::string locationToString(SourceLocation Loc, size_t Length = 0) { 377 std::pair<FileID, unsigned> Pair = SM.getDecomposedExpansionLoc(Loc); 378 379 bool IsInvalid; 380 unsigned Line = SM.getLineNumber(Pair.first, Pair.second, &IsInvalid); 381 if (IsInvalid) { 382 return ""; 383 } 384 unsigned Column = SM.getColumnNumber(Pair.first, Pair.second, &IsInvalid); 385 if (IsInvalid) { 386 return ""; 387 } 388 389 if (Length) { 390 return stringFormat("%05d:%d-%d", Line, Column - 1, Column - 1 + Length); 391 } else { 392 return stringFormat("%05d:%d", Line, Column - 1); 393 } 394 } 395 396 // Convert SourceRange to "line-line" or "line". 397 // In the resulting string rep, line is 1-based. 398 std::string lineRangeToString(SourceRange Range, bool omitEnd = false) { 399 std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin()); 400 std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd()); 401 402 bool IsInvalid; 403 unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); 404 if (IsInvalid) { 405 return ""; 406 } 407 unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); 408 if (IsInvalid) { 409 return ""; 410 } 411 412 if (omitEnd && Line1 == Line2) { 413 return stringFormat("%d", Line1); 414 } 415 416 return stringFormat("%d-%d", Line1, Line2); 417 } 418 419 // Convert SourceRange to "PATH#line-line" or "PATH#line". 420 // If Range's file is same as fromFileID, PATH is omitted. 421 std::string pathAndLineRangeToString(FileID fromFileID, SourceRange Range) { 422 FileInfo *toFile = getFileInfo(Range.getBegin()); 423 FileInfo *fromFile = FileMap.find(fromFileID)->second.get(); 424 425 auto lineRange = lineRangeToString(Range, true); 426 427 if (lineRange.empty()) { 428 return ""; 429 } 430 431 if (toFile == fromFile) { 432 return "#" + lineRange; 433 } 434 435 if (toFile->Realname.empty()) { 436 return "#" + lineRange; 437 } 438 439 std::string result = toFile->Realname; 440 result += "#"; 441 result += lineRange; 442 return result; 443 } 444 445 bool needsNestingRangeForVarDecl(SourceRange& Range) { 446 std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin()); 447 std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd()); 448 449 bool IsInvalid; 450 unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); 451 if (IsInvalid) { 452 return false; 453 } 454 unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); 455 if (IsInvalid) { 456 return false; 457 } 458 459 static constexpr unsigned MinVarDeclNestingRangeLines = 10; 460 461 return Line2 > Line1 + MinVarDeclNestingRangeLines; 462 } 463 464 // Convert SourceRange to "line:column-line:column". 465 // In the resulting string rep, line is 1-based, column is 0-based. 466 std::string fullRangeToString(SourceRange Range) { 467 std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin()); 468 std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd()); 469 470 bool IsInvalid; 471 unsigned Line1 = SM.getLineNumber(Begin.first, Begin.second, &IsInvalid); 472 if (IsInvalid) { 473 return ""; 474 } 475 unsigned Column1 = 476 SM.getColumnNumber(Begin.first, Begin.second, &IsInvalid); 477 if (IsInvalid) { 478 return ""; 479 } 480 unsigned Line2 = SM.getLineNumber(End.first, End.second, &IsInvalid); 481 if (IsInvalid) { 482 return ""; 483 } 484 unsigned Column2 = SM.getColumnNumber(End.first, End.second, &IsInvalid); 485 if (IsInvalid) { 486 return ""; 487 } 488 489 return stringFormat("%d:%d-%d:%d", Line1, Column1 - 1, Line2, Column2 - 1); 490 } 491 492 // Returns the qualified name of `d` without considering template parameters. 493 std::string getQualifiedName(const NamedDecl *D) { 494 const DeclContext *Ctx = D->getDeclContext(); 495 if (Ctx->isFunctionOrMethod()) { 496 return D->getQualifiedNameAsString(); 497 } 498 499 std::vector<const DeclContext *> Contexts; 500 501 // Collect contexts. 502 while (Ctx && isa<NamedDecl>(Ctx)) { 503 Contexts.push_back(Ctx); 504 Ctx = Ctx->getParent(); 505 } 506 507 std::string Result; 508 509 std::reverse(Contexts.begin(), Contexts.end()); 510 511 for (const DeclContext *DC : Contexts) { 512 if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) { 513 Result += Spec->getNameAsString(); 514 515 if (Spec->getSpecializationKind() == TSK_ExplicitSpecialization) { 516 std::string Backing; 517 llvm::raw_string_ostream Stream(Backing); 518 const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); 519 printTemplateArgumentList(Stream, TemplateArgs.asArray(), 520 PrintingPolicy(CI.getLangOpts())); 521 Result += Stream.str(); 522 } 523 } else if (const auto *Nd = dyn_cast<NamespaceDecl>(DC)) { 524 if (Nd->isAnonymousNamespace() || Nd->isInline()) { 525 continue; 526 } 527 Result += Nd->getNameAsString(); 528 } else if (const auto *Rd = dyn_cast<RecordDecl>(DC)) { 529 if (!Rd->getIdentifier()) { 530 Result += "(anonymous)"; 531 } else { 532 Result += Rd->getNameAsString(); 533 } 534 } else if (const auto *Fd = dyn_cast<FunctionDecl>(DC)) { 535 Result += Fd->getNameAsString(); 536 } else if (const auto *Ed = dyn_cast<EnumDecl>(DC)) { 537 // C++ [dcl.enum]p10: Each enum-name and each unscoped 538 // enumerator is declared in the scope that immediately contains 539 // the enum-specifier. Each scoped enumerator is declared in the 540 // scope of the enumeration. 541 if (Ed->isScoped() || Ed->getIdentifier()) 542 Result += Ed->getNameAsString(); 543 else 544 continue; 545 } else { 546 Result += cast<NamedDecl>(DC)->getNameAsString(); 547 } 548 Result += "::"; 549 } 550 551 if (D->getDeclName()) 552 Result += D->getNameAsString(); 553 else 554 Result += "(anonymous)"; 555 556 return Result; 557 } 558 559 std::string mangleLocation(SourceLocation Loc, 560 std::string Backup = std::string()) { 561 FileInfo *F = getFileInfo(Loc); 562 std::string Filename = F->Realname; 563 if (Filename.length() == 0 && Backup.length() != 0) { 564 return Backup; 565 } 566 if (F->Generated) { 567 // Since generated files may be different on different platforms, 568 // we need to include a platform-specific thing in the hash. Otherwise 569 // we can end up with hash collisions where different symbols from 570 // different platforms map to the same thing. 571 char *Platform = getenv("MOZSEARCH_PLATFORM"); 572 Filename = 573 std::string(Platform ? Platform : "") + std::string("@") + Filename; 574 } 575 return hash(Filename + std::string("@") + locationToString(Loc)); 576 } 577 578 bool isAcceptableSymbolChar(char c) { 579 return isalpha(c) || isdigit(c) || c == '_' || c == '/'; 580 } 581 582 std::string mangleFile(std::string Filename, FileType Type) { 583 // "Mangle" the file path, such that: 584 // 1. The majority of paths will still be mostly human-readable. 585 // 2. The sanitization algorithm doesn't produce collisions where two 586 // different unsanitized paths can result in the same sanitized paths. 587 // 3. The produced symbol doesn't cause problems with downstream consumers. 588 // In order to accomplish this, we keep alphanumeric chars, underscores, 589 // and slashes, and replace everything else with an "@xx" hex encoding. 590 // The majority of path characters are letters and slashes which don't get 591 // encoded, so that satisfies (1). Since "@" characters in the unsanitized 592 // path get encoded, there should be no "@" characters in the sanitized path 593 // that got preserved from the unsanitized input, so that should satisfy 594 // (2). And (3) was done by trial-and-error. Note in particular the dot (.) 595 // character needs to be encoded, or the symbol-search feature of mozsearch 596 // doesn't work correctly, as all dot characters in the symbol query get 597 // replaced by #. 598 for (size_t i = 0; i < Filename.length(); i++) { 599 char c = Filename[i]; 600 if (isAcceptableSymbolChar(c)) { 601 continue; 602 } 603 char hex[4]; 604 sprintf(hex, "@%02X", ((int)c) & 0xFF); 605 Filename.replace(i, 1, hex); 606 i += 2; 607 } 608 609 if (Type == FileType::Generated) { 610 // Since generated files may be different on different platforms, 611 // we need to include a platform-specific thing in the hash. Otherwise 612 // we can end up with hash collisions where different symbols from 613 // different platforms map to the same thing. 614 char *Platform = getenv("MOZSEARCH_PLATFORM"); 615 Filename = 616 std::string(Platform ? Platform : "") + std::string("@") + Filename; 617 } 618 return Filename; 619 } 620 621 std::string mangleURL(std::string Url) { 622 return mangleFile(Url, FileType::Source); 623 } 624 625 std::string mangleQualifiedName(std::string Name) { 626 std::replace(Name.begin(), Name.end(), ' ', '_'); 627 return Name; 628 } 629 630 std::string getMangledName(clang::MangleContext *Ctx, 631 const clang::NamedDecl *Decl) { 632 // Main functions will tend to collide because they inherently have similar 633 // signatures, so let's provide a custom location-based signature. 634 if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isMain()) { 635 return std::string("MF_") + mangleLocation(Decl->getLocation()); 636 } 637 638 if (isa<FunctionDecl>(Decl) && cast<FunctionDecl>(Decl)->isExternC()) { 639 return cast<FunctionDecl>(Decl)->getNameAsString(); 640 } 641 642 if (isa<FunctionDecl>(Decl) || isa<VarDecl>(Decl)) { 643 const DeclContext *DC = Decl->getDeclContext(); 644 if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) || 645 isa<LinkageSpecDecl>(DC) || 646 // isa<ExternCContextDecl>(DC) || 647 isa<TagDecl>(DC)) { 648 llvm::SmallVector<char, 512> Output; 649 llvm::raw_svector_ostream Out(Output); 650 #if CLANG_VERSION_MAJOR >= 11 651 // This code changed upstream in version 11: 652 // https://github.com/llvm/llvm-project/commit/29e1a16be8216066d1ed733a763a749aed13ff47 653 GlobalDecl GD; 654 if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { 655 GD = GlobalDecl(D, Ctor_Complete); 656 } else if (const CXXDestructorDecl *D = 657 dyn_cast<CXXDestructorDecl>(Decl)) { 658 GD = GlobalDecl(D, Dtor_Complete); 659 } else { 660 GD = GlobalDecl(Decl); 661 } 662 Ctx->mangleName(GD, Out); 663 #else 664 if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(Decl)) { 665 Ctx->mangleCXXCtor(D, CXXCtorType::Ctor_Complete, Out); 666 } else if (const CXXDestructorDecl *D = 667 dyn_cast<CXXDestructorDecl>(Decl)) { 668 Ctx->mangleCXXDtor(D, CXXDtorType::Dtor_Complete, Out); 669 } else { 670 Ctx->mangleName(Decl, Out); 671 } 672 #endif 673 return Out.str().str(); 674 } else { 675 return std::string("V_") + mangleLocation(Decl->getLocation()) + 676 std::string("_") + hash(std::string(Decl->getName())); 677 } 678 } else if (isa<TagDecl>(Decl) || isa<ObjCInterfaceDecl>(Decl)) { 679 if (!Decl->getIdentifier()) { 680 // Anonymous. 681 return std::string("T_") + mangleLocation(Decl->getLocation()); 682 } 683 684 return std::string("T_") + mangleQualifiedName(getQualifiedName(Decl)); 685 } else if (isa<TypedefNameDecl>(Decl)) { 686 if (!Decl->getIdentifier()) { 687 // Anonymous. 688 return std::string("TA_") + mangleLocation(Decl->getLocation()); 689 } 690 691 return std::string("TA_") + mangleQualifiedName(getQualifiedName(Decl)); 692 } else if (isa<NamespaceDecl>(Decl) || isa<NamespaceAliasDecl>(Decl)) { 693 if (!Decl->getIdentifier()) { 694 // Anonymous. 695 return std::string("NS_") + mangleLocation(Decl->getLocation()); 696 } 697 698 return std::string("NS_") + mangleQualifiedName(getQualifiedName(Decl)); 699 } else if (const ObjCIvarDecl *D2 = dyn_cast<ObjCIvarDecl>(Decl)) { 700 const ObjCInterfaceDecl *Iface = D2->getContainingInterface(); 701 return std::string("F_<") + getMangledName(Ctx, Iface) + ">_" + 702 D2->getNameAsString(); 703 } else if (const FieldDecl *D2 = dyn_cast<FieldDecl>(Decl)) { 704 const RecordDecl *Record = D2->getParent(); 705 return std::string("F_<") + getMangledName(Ctx, Record) + ">_" + 706 D2->getNameAsString(); 707 } else if (const EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(Decl)) { 708 const DeclContext *DC = Decl->getDeclContext(); 709 if (const NamedDecl *Named = dyn_cast<NamedDecl>(DC)) { 710 return std::string("E_<") + getMangledName(Ctx, Named) + ">_" + 711 D2->getNameAsString(); 712 } 713 } 714 715 assert(false); 716 return std::string(""); 717 } 718 719 void debugLocation(SourceLocation Loc) { 720 std::string S = locationToString(Loc); 721 StringRef Filename = SM.getFilename(Loc); 722 printf("--> %s %s\n", std::string(Filename).c_str(), S.c_str()); 723 } 724 725 void debugRange(SourceRange Range) { 726 printf("Range\n"); 727 debugLocation(Range.getBegin()); 728 debugLocation(Range.getEnd()); 729 } 730 731 public: 732 IndexConsumer(CompilerInstance &CI) 733 : CI(CI), SM(CI.getSourceManager()), LO(CI.getLangOpts()), 734 CurMangleContext(nullptr), AstContext(nullptr), 735 ConcatInfo(CI.getPreprocessor()), CurDeclContext(nullptr), 736 TemplateStack(nullptr) { 737 CI.getPreprocessor().addPPCallbacks(make_unique<PreprocessorHook>(this)); 738 CI.getPreprocessor().setTokenWatcher( 739 [this](const auto &token) { onTokenLexed(token); }); 740 } 741 742 virtual DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const { 743 return new IndexConsumer(CI); 744 } 745 746 #if !defined(_WIN32) && !defined(_WIN64) 747 struct AutoTime { 748 AutoTime(double *Counter) : Counter(Counter), Start(time()) {} 749 ~AutoTime() { 750 if (Start) { 751 *Counter += time() - Start; 752 } 753 } 754 void stop() { 755 *Counter += time() - Start; 756 Start = 0; 757 } 758 double *Counter; 759 double Start; 760 }; 761 #endif 762 763 // All we need is to follow the final declaration. 764 virtual void HandleTranslationUnit(ASTContext &Ctx) { 765 CurMangleContext = 766 clang::ItaniumMangleContext::create(Ctx, CI.getDiagnostics()); 767 768 AstContext = &Ctx; 769 Resolver = std::make_unique<clangd::HeuristicResolver>(Ctx); 770 TraverseDecl(Ctx.getTranslationUnitDecl()); 771 772 // Emit the JSON data for all files now. 773 std::map<FileID, std::unique_ptr<FileInfo>>::iterator It; 774 for (It = FileMap.begin(); It != FileMap.end(); It++) { 775 if (!It->second->Interesting) { 776 continue; 777 } 778 779 FileInfo &Info = *It->second; 780 781 std::string Filename = Outdir + Info.Realname; 782 std::string SrcFilename = 783 Info.Generated ? Objdir + Info.Realname.substr(GENERATED.length()) 784 : Srcdir + PATHSEP_STRING + Info.Realname; 785 786 ensurePath(Filename); 787 788 // We lock the output file in case some other clang process is trying to 789 // write to it at the same time. 790 AutoLockFile Lock(SrcFilename, Filename); 791 792 if (!Lock.success()) { 793 fprintf(stderr, "Unable to lock file %s\n", Filename.c_str()); 794 exit(1); 795 } 796 797 // Merge our results with the existing lines from the output file. 798 // This ensures that header files that are included multiple times 799 // in different ways are analyzed completely. 800 std::ifstream Fin(Filename.c_str(), std::ios::in | std::ios::binary); 801 FILE *OutFp = Lock.openTmp(); 802 if (!OutFp) { 803 fprintf(stderr, "Unable to open tmp out file for %s\n", 804 Filename.c_str()); 805 exit(1); 806 } 807 808 // Sort our new results and get an iterator to them 809 std::sort(Info.Output.begin(), Info.Output.end()); 810 std::vector<std::string>::const_iterator NewLinesIter = 811 Info.Output.begin(); 812 std::string LastNewWritten; 813 814 // Loop over the existing (sorted) lines in the analysis output file. 815 // (The good() check also handles the case where Fin did not exist when we 816 // went to open it.) 817 while (Fin.good()) { 818 std::string OldLine; 819 std::getline(Fin, OldLine); 820 // Skip blank lines. 821 if (OldLine.length() == 0) { 822 continue; 823 } 824 // We need to put the newlines back that getline() eats. 825 OldLine.push_back('\n'); 826 827 // Write any results from Info.Output that are lexicographically 828 // smaller than OldLine (read from the existing file), but make sure 829 // to skip duplicates. Keep advancing NewLinesIter until we reach an 830 // entry that is lexicographically greater than OldLine. 831 for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { 832 if (*NewLinesIter > OldLine) { 833 break; 834 } 835 if (*NewLinesIter == OldLine) { 836 continue; 837 } 838 if (*NewLinesIter == LastNewWritten) { 839 // dedupe the new entries being written 840 continue; 841 } 842 if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 843 1) { 844 fprintf(stderr, 845 "Unable to write %zu bytes[1] to tmp output file for %s\n", 846 NewLinesIter->length(), Filename.c_str()); 847 exit(1); 848 } 849 LastNewWritten = *NewLinesIter; 850 } 851 852 // Write the entry read from the existing file. 853 if (fwrite(OldLine.c_str(), OldLine.length(), 1, OutFp) != 1) { 854 fprintf(stderr, 855 "Unable to write %zu bytes[2] to tmp output file for %s\n", 856 OldLine.length(), Filename.c_str()); 857 exit(1); 858 } 859 } 860 861 // We finished reading from Fin 862 Fin.close(); 863 864 // Finish iterating our new results, discarding duplicates 865 for (; NewLinesIter != Info.Output.end(); NewLinesIter++) { 866 if (*NewLinesIter == LastNewWritten) { 867 continue; 868 } 869 if (fwrite(NewLinesIter->c_str(), NewLinesIter->length(), 1, OutFp) != 870 1) { 871 fprintf(stderr, 872 "Unable to write %zu bytes[3] to tmp output file for %s\n", 873 NewLinesIter->length(), Filename.c_str()); 874 exit(1); 875 } 876 LastNewWritten = *NewLinesIter; 877 } 878 879 // Done writing all the things, close it and replace the old output file 880 // with the new one. 881 fclose(OutFp); 882 if (!Lock.moveTmp()) { 883 fprintf(stderr, 884 "Unable to move tmp output file into place for %s (err %d)\n", 885 Filename.c_str(), errno); 886 exit(1); 887 } 888 } 889 } 890 891 // Unfortunately, we have to override all these methods in order to track the 892 // context we're inside. 893 894 bool TraverseEnumDecl(EnumDecl *D) { 895 AutoSetContext Asc(this, D); 896 return Super::TraverseEnumDecl(D); 897 } 898 bool TraverseRecordDecl(RecordDecl *D) { 899 AutoSetContext Asc(this, D); 900 return Super::TraverseRecordDecl(D); 901 } 902 bool TraverseCXXRecordDecl(CXXRecordDecl *D) { 903 AutoSetContext Asc(this, D); 904 return Super::TraverseCXXRecordDecl(D); 905 } 906 bool TraverseFunctionDecl(FunctionDecl *D) { 907 AutoSetContext Asc(this, D); 908 const FunctionDecl *Def; 909 // (See the larger AutoTemplateContext comment for more information.) If a 910 // method on a templated class is declared out-of-line, we need to analyze 911 // the definition inside the scope of the template or else we won't properly 912 // handle member access on the templated type. 913 if (TemplateStack && D->isDefined(Def) && Def && D != Def) { 914 const auto _ = ValueRollback(CurDeclContext, nullptr); 915 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); 916 } 917 return Super::TraverseFunctionDecl(D); 918 } 919 bool TraverseCXXMethodDecl(CXXMethodDecl *D) { 920 AutoSetContext Asc(this, D); 921 const FunctionDecl *Def; 922 // See TraverseFunctionDecl. 923 if (TemplateStack && D->isDefined(Def) && Def && D != Def) { 924 const auto _ = ValueRollback(CurDeclContext, nullptr); 925 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); 926 } 927 return Super::TraverseCXXMethodDecl(D); 928 } 929 bool TraverseCXXConstructorDecl(CXXConstructorDecl *D) { 930 AutoSetContext Asc(this, D, /*VisitImplicit=*/true); 931 const FunctionDecl *Def; 932 // See TraverseFunctionDecl. 933 if (TemplateStack && D->isDefined(Def) && Def && D != Def) { 934 const auto _ = ValueRollback(CurDeclContext, nullptr); 935 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); 936 } 937 return Super::TraverseCXXConstructorDecl(D); 938 } 939 bool TraverseCXXConversionDecl(CXXConversionDecl *D) { 940 AutoSetContext Asc(this, D); 941 const FunctionDecl *Def; 942 // See TraverseFunctionDecl. 943 if (TemplateStack && D->isDefined(Def) && Def && D != Def) { 944 const auto _ = ValueRollback(CurDeclContext, nullptr); 945 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); 946 } 947 return Super::TraverseCXXConversionDecl(D); 948 } 949 bool TraverseCXXDestructorDecl(CXXDestructorDecl *D) { 950 AutoSetContext Asc(this, D); 951 const FunctionDecl *Def; 952 // See TraverseFunctionDecl. 953 if (TemplateStack && D->isDefined(Def) && Def && D != Def) { 954 const auto _ = ValueRollback(CurDeclContext, nullptr); 955 TraverseFunctionDecl(const_cast<FunctionDecl *>(Def)); 956 } 957 return Super::TraverseCXXDestructorDecl(D); 958 } 959 960 bool TraverseLambdaExpr(LambdaExpr *E) { 961 AutoSetContext Asc(this, nullptr, true); 962 return Super::TraverseLambdaExpr(E); 963 } 964 965 // Used to keep track of the context in which a token appears. 966 struct Context { 967 // Ultimately this becomes the "context" JSON property. 968 std::string Name; 969 970 // Ultimately this becomes the "contextsym" JSON property. 971 std::string Symbol; 972 973 Context() {} 974 Context(std::string Name, std::string Symbol) 975 : Name(Name), Symbol(Symbol) {} 976 }; 977 978 Context translateContext(NamedDecl *D) { 979 const FunctionDecl *F = dyn_cast<FunctionDecl>(D); 980 if (F && F->isTemplateInstantiation()) { 981 D = F->getTemplateInstantiationPattern(); 982 } 983 984 return Context(D->getQualifiedNameAsString(), 985 getMangledName(CurMangleContext, D)); 986 } 987 988 Context getContext(SourceLocation Loc) { 989 if (SM.isMacroBodyExpansion(Loc)) { 990 // If we're inside a macro definition, we don't return any context. It 991 // will probably not be what the user expects if we do. 992 return Context(); 993 } 994 995 AutoSetContext *Ctxt = CurDeclContext; 996 while (Ctxt) { 997 if (Ctxt->Decl) { 998 return translateContext(Ctxt->Decl); 999 } 1000 Ctxt = Ctxt->Prev; 1001 } 1002 return Context(); 1003 } 1004 1005 // Similar to GetContext(SourceLocation), but it skips the declaration passed 1006 // in. This is useful if we want the context of a declaration that's already 1007 // on the stack. 1008 Context getContext(Decl *D) { 1009 if (SM.isMacroBodyExpansion(D->getLocation())) { 1010 // If we're inside a macro definition, we don't return any context. It 1011 // will probably not be what the user expects if we do. 1012 return Context(); 1013 } 1014 1015 AutoSetContext *Ctxt = CurDeclContext; 1016 while (Ctxt) { 1017 if (Ctxt->Decl && Ctxt->Decl != D) { 1018 return translateContext(Ctxt->Decl); 1019 } 1020 Ctxt = Ctxt->Prev; 1021 } 1022 return Context(); 1023 } 1024 1025 // Searches for the closest CurDeclContext parent that is a function template 1026 // instantiation 1027 const FunctionDecl *getCurrentFunctionTemplateInstantiation() { 1028 const auto *Ctxt = CurDeclContext; 1029 while (Ctxt) { 1030 if (Ctxt->Decl && isa<FunctionDecl>(Ctxt->Decl)) { 1031 const auto *F = Ctxt->Decl->getAsFunction(); 1032 if (F->isTemplateInstantiation()) 1033 return F; 1034 } 1035 Ctxt = Ctxt->Prev; 1036 } 1037 return nullptr; 1038 } 1039 1040 // Analyzing template code is tricky. Suppose we have this code: 1041 // 1042 // template<class T> 1043 // bool Foo(T* ptr) { return T::StaticMethod(ptr); } 1044 // 1045 // If we analyze the body of Foo without knowing the type T, then we will not 1046 // be able to generate any information for StaticMethod. However, analyzing 1047 // Foo for every possible instantiation is inefficient and it also generates 1048 // too much data in some cases. For example, the following code would generate 1049 // one definition of Baz for every instantiation, which is undesirable: 1050 // 1051 // template<class T> 1052 // class Bar { struct Baz { ... }; }; 1053 // 1054 // To solve this problem, we analyze templates only once. We do so in a 1055 // GatherDependent mode where we look for "dependent scoped member 1056 // expressions" (i.e., things like StaticMethod). We keep track of the 1057 // locations of these expressions. If we find one or more of them, we analyze 1058 // the template for each instantiation, in an AnalyzeDependent mode. This mode 1059 // ignores all source locations except for the ones where we found dependent 1060 // scoped member expressions before. For these locations, we generate a 1061 // separate JSON result for each instantiation. 1062 // 1063 // We inherit our parent's mode if it is exists. This is because if our 1064 // parent is in analyze mode, it means we've already lived a full life in 1065 // gather mode and we must not restart in gather mode or we'll cause the 1066 // indexer to visit EVERY identifier, which is way too much data. 1067 struct AutoTemplateContext { 1068 AutoTemplateContext(IndexConsumer *Self) 1069 : Self(Self), CurMode(Self->TemplateStack ? Self->TemplateStack->CurMode 1070 : Mode::GatherDependent), 1071 Parent(Self->TemplateStack) { 1072 Self->TemplateStack = this; 1073 } 1074 1075 ~AutoTemplateContext() { Self->TemplateStack = Parent; } 1076 1077 // We traverse templates in two modes: 1078 enum class Mode { 1079 // Gather mode does not traverse into specializations. It looks for 1080 // locations where it would help to have more info from template 1081 // specializations. 1082 GatherDependent, 1083 1084 // Analyze mode traverses into template specializations and records 1085 // information about token locations saved in gather mode. 1086 AnalyzeDependent, 1087 }; 1088 1089 // We found a dependent scoped member expression! Keep track of it for 1090 // later. 1091 void visitDependent(SourceLocation Loc) { 1092 if (CurMode == Mode::AnalyzeDependent) { 1093 return; 1094 } 1095 1096 DependentLocations.insert(Loc.getRawEncoding()); 1097 if (Parent) { 1098 Parent->visitDependent(Loc); 1099 } 1100 } 1101 1102 bool inGatherMode() { return CurMode == Mode::GatherDependent; } 1103 1104 // Do we need to perform the extra AnalyzeDependent passes (one per 1105 // instantiation)? 1106 bool needsAnalysis() const { 1107 if (!DependentLocations.empty()) { 1108 return true; 1109 } 1110 if (Parent) { 1111 return Parent->needsAnalysis(); 1112 } 1113 return false; 1114 } 1115 1116 void switchMode() { CurMode = Mode::AnalyzeDependent; } 1117 1118 // Do we want to analyze each template instantiation separately? 1119 bool shouldVisitTemplateInstantiations() const { 1120 if (CurMode == Mode::AnalyzeDependent) { 1121 return true; 1122 } 1123 if (Parent) { 1124 return Parent->shouldVisitTemplateInstantiations(); 1125 } 1126 return false; 1127 } 1128 1129 // For a given expression/statement, should we emit JSON data for it? 1130 bool shouldVisit(SourceLocation Loc) { 1131 if (CurMode == Mode::GatherDependent) { 1132 return true; 1133 } 1134 if (DependentLocations.find(Loc.getRawEncoding()) != 1135 DependentLocations.end()) { 1136 return true; 1137 } 1138 if (Parent) { 1139 return Parent->shouldVisit(Loc); 1140 } 1141 return false; 1142 } 1143 1144 private: 1145 IndexConsumer *Self; 1146 Mode CurMode; 1147 std::unordered_set<unsigned> DependentLocations; 1148 AutoTemplateContext *Parent; 1149 }; 1150 1151 AutoTemplateContext *TemplateStack; 1152 1153 std::unordered_multimap<const FunctionDecl *, const Stmt *> 1154 ForwardingTemplates; 1155 std::unordered_set<unsigned> ForwardedTemplateLocations; 1156 1157 bool shouldVisitTemplateInstantiations() const { 1158 if (TemplateStack) { 1159 return TemplateStack->shouldVisitTemplateInstantiations(); 1160 } 1161 return false; 1162 } 1163 1164 bool shouldVisitImplicitCode() const { 1165 return CurDeclContext && CurDeclContext->VisitImplicit; 1166 } 1167 1168 // We don't want to traverse all specializations everytime we find a forward 1169 // declaration, so only traverse specializations related to an actual 1170 // definition. 1171 // 1172 // ``` 1173 // // This is the canonical declaration for Maybe but isn't really useful. 1174 // template <typename T> 1175 // struct Maybe; 1176 // 1177 // // This is another ClassTemplateDecl, but not the canonical one, where we 1178 // // actually have the definition. This is the one we want to traverse. 1179 // template <typename T> 1180 // struct Maybe { 1181 // // This is both the canonical declaration and the definition for 1182 // // inline_method and we want to traverse it. 1183 // template <typename... Args> 1184 // T *inline_method(Args&&... args) { 1185 // // definition 1186 // } 1187 // 1188 // // This is the canonical declaration, TraverseFunctionTemplateDecl 1189 // // traverses its out of line definition too. 1190 // template <typename... Args> 1191 // T *out_of_line_method(Args&&... args); 1192 // } 1193 // 1194 // // This is the definition for Maybe<T>::out_of_line_method<Args...> 1195 // // It is traversed when calling TraverseFunctionTemplateDecl on the 1196 // // canonical declaration. 1197 // template <typename T> 1198 // template <typename... Args> 1199 // T *maybe(Args&&... args) { 1200 // // definition 1201 // } 1202 // ``` 1203 // 1204 // So: 1205 // - for class templates we check isThisDeclarationADefinition 1206 // - for function templates we check isCanonicalDecl 1207 bool TraverseClassTemplateDecl(ClassTemplateDecl *D) { 1208 AutoTemplateContext Atc(this); 1209 Super::TraverseClassTemplateDecl(D); 1210 1211 // Gather dependent locations from partial specializations too 1212 SmallVector<ClassTemplatePartialSpecializationDecl *> PS; 1213 D->getPartialSpecializations(PS); 1214 for (auto *Spec : PS) { 1215 for (auto *Rd : Spec->redecls()) { 1216 TraverseDecl(Rd); 1217 } 1218 } 1219 1220 if (!Atc.needsAnalysis()) { 1221 return true; 1222 } 1223 1224 Atc.switchMode(); 1225 1226 if (!D->isThisDeclarationADefinition()) 1227 return true; 1228 1229 for (auto *Spec : D->specializations()) { 1230 for (auto *Rd : Spec->redecls()) { 1231 // We don't want to visit injected-class-names in this traversal. 1232 if (cast<CXXRecordDecl>(Rd)->isInjectedClassName()) 1233 continue; 1234 1235 TraverseDecl(Rd); 1236 } 1237 } 1238 1239 return true; 1240 } 1241 1242 // See also comment above TraverseClassTemplateDecl 1243 bool TraverseFunctionTemplateDecl(FunctionTemplateDecl *D) { 1244 AutoTemplateContext Atc(this); 1245 if (Atc.inGatherMode()) { 1246 Super::TraverseFunctionTemplateDecl(D); 1247 } 1248 1249 if (!Atc.needsAnalysis()) { 1250 return true; 1251 } 1252 1253 Atc.switchMode(); 1254 1255 if (!D->isCanonicalDecl()) 1256 return true; 1257 1258 for (auto *Spec : D->specializations()) { 1259 for (auto *Rd : Spec->redecls()) { 1260 TraverseDecl(Rd); 1261 } 1262 } 1263 1264 return true; 1265 } 1266 1267 bool shouldVisit(SourceLocation Loc) { 1268 if (TemplateStack) { 1269 return TemplateStack->shouldVisit(Loc); 1270 } 1271 return true; 1272 } 1273 1274 // Returns true if the class has template in its entire class hierarchy. 1275 bool hasTemplateInHierarchy(const CXXRecordDecl* cxxDecl) { 1276 if (cxxDecl->isDependentType()) { 1277 // This class is templatized. 1278 return true; 1279 } 1280 1281 1282 if (dyn_cast<const ClassTemplateSpecializationDecl>(cxxDecl)) { 1283 // This class is template specialization. 1284 return true; 1285 } 1286 1287 for (const CXXBaseSpecifier &Base : cxxDecl->bases()) { 1288 const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); 1289 if (!BaseDecl) { 1290 // The base class is not-yet-substituted. 1291 return true; 1292 } 1293 1294 const Type* ty = Base.getType().getTypePtr(); 1295 if (dyn_cast<const SubstTemplateTypeParmType>(ty)) { 1296 // The base class is a substituted template parameter. 1297 return true; 1298 } 1299 1300 if (hasTemplateInHierarchy(BaseDecl)) { 1301 return true; 1302 } 1303 } 1304 1305 return false; 1306 } 1307 1308 enum { 1309 // Flag to omit the identifier from being cross-referenced across files. 1310 // This is usually desired for local variables. 1311 NoCrossref = 1 << 0, 1312 // Flag to indicate the token with analysis data is not an identifier. 1313 // Indicates 1314 // we want to skip the check that tries to ensure a sane identifier token. 1315 NotIdentifierToken = 1 << 1, 1316 // This indicates that the end of the provided SourceRange is valid and 1317 // should be respected. If this flag is not set, the visitIdentifier 1318 // function should use only the start of the SourceRange and auto-detect 1319 // the end based on whatever token is found at the start. 1320 LocRangeEndValid = 1 << 2, 1321 // Indicates this record was generated through heuristic template 1322 // resolution. 1323 Heuristic = 1 << 3, 1324 }; 1325 1326 enum class LayoutHandling { 1327 // Emit the layout information (size, offset, etc) and the other fields. 1328 // This should be used when the struct is not templatized. 1329 UseLayout, 1330 1331 // Only emit the layout information. 1332 // This should be used for emitting the data for base classes. 1333 LayoutOnly, 1334 }; 1335 1336 void emitStructuredRecordInfo(llvm::json::OStream &J, SourceLocation Loc, 1337 const RecordDecl *decl, 1338 LayoutHandling layoutHandling = LayoutHandling::UseLayout) { 1339 if (layoutHandling != LayoutHandling::LayoutOnly) { 1340 J.attribute("kind", 1341 TypeWithKeyword::getTagTypeKindName(decl->getTagKind())); 1342 } 1343 1344 const ASTContext &C = *AstContext; 1345 const ASTRecordLayout &Layout = C.getASTRecordLayout(decl); 1346 1347 J.attribute("sizeBytes", Layout.getSize().getQuantity()); 1348 J.attribute("alignmentBytes", Layout.getAlignment().getQuantity()); 1349 1350 emitBindingAttributes(J, *decl); 1351 1352 auto cxxDecl = dyn_cast<CXXRecordDecl>(decl); 1353 1354 if (cxxDecl) { 1355 if (Layout.hasOwnVFPtr()) { 1356 // Encode the size of virtual function table pointer 1357 // instead of just true/false, for 2 reasons: 1358 // * having the size here is easier for the consumer 1359 // * the size string 4/8 is shorter than true/false in the analysis 1360 // file 1361 const QualType ptrType = C.getUIntPtrType(); 1362 J.attribute("ownVFPtrBytes", 1363 C.getTypeSizeInChars(ptrType).getQuantity()); 1364 } 1365 1366 bool emitLayout = false; 1367 if (layoutHandling == LayoutHandling::LayoutOnly) { 1368 emitLayout = true; 1369 } else { 1370 emitLayout = hasTemplateInHierarchy(cxxDecl); 1371 } 1372 1373 J.attributeBegin("supers"); 1374 J.arrayBegin(); 1375 for (const CXXBaseSpecifier &Base : cxxDecl->bases()) { 1376 const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); 1377 1378 if (!BaseDecl) { 1379 // If the base class is dependent of template parameters and 1380 // not yet fixed, skip it. 1381 // Those information will be emitted in the subclass that has 1382 // fixed template parameters. 1383 continue; 1384 } 1385 1386 J.objectBegin(); 1387 1388 J.attribute("sym", getMangledName(CurMangleContext, BaseDecl)); 1389 1390 if (Base.isVirtual()) { 1391 CharUnits superOffsetBytes = Layout.getVBaseClassOffset(BaseDecl); 1392 J.attribute("offsetBytes", superOffsetBytes.getQuantity()); 1393 } else { 1394 CharUnits superOffsetBytes = Layout.getBaseClassOffset(BaseDecl); 1395 J.attribute("offsetBytes", superOffsetBytes.getQuantity()); 1396 } 1397 1398 J.attributeBegin("props"); 1399 J.arrayBegin(); 1400 if (Base.isVirtual()) { 1401 J.value("virtual"); 1402 } 1403 J.arrayEnd(); 1404 J.attributeEnd(); 1405 1406 if (emitLayout) { 1407 // In order to reduce the file size, emit the entire super class 1408 // layout only if there's any template class in the hierarchy 1409 // Otherwise the field layout can be constructed with each 1410 // superclass's data. 1411 1412 J.attributeBegin("layout"); 1413 J.objectBegin(); 1414 1415 // The structured info for template leaf classes is not emitted, 1416 // which means we don't have "pretty" format of the class. 1417 // Thus we emit it here. 1418 // 1419 // Once that part is solved, the pretty field here can be removed. 1420 // 1421 // See the emitStructuredInfo callsite in VisitNamedDecl. 1422 J.attribute("pretty", getQualifiedName(BaseDecl)); 1423 1424 emitStructuredRecordInfo(J, Loc, BaseDecl, 1425 LayoutHandling::LayoutOnly); 1426 J.objectEnd(); 1427 J.attributeEnd(); 1428 } 1429 1430 J.objectEnd(); 1431 } 1432 J.arrayEnd(); 1433 J.attributeEnd(); 1434 1435 if (layoutHandling != LayoutHandling::LayoutOnly) { 1436 J.attributeBegin("methods"); 1437 J.arrayBegin(); 1438 for (const CXXMethodDecl *MethodDecl : cxxDecl->methods()) { 1439 J.objectBegin(); 1440 1441 J.attribute("pretty", getQualifiedName(MethodDecl)); 1442 J.attribute("sym", getMangledName(CurMangleContext, MethodDecl)); 1443 1444 // TODO: Better figure out what to do for non-isUserProvided methods 1445 // which means there's potentially semantic data that doesn't correspond 1446 // to a source location in the source. Should we be emitting 1447 // structured info for those when we're processing the class here? 1448 1449 J.attributeBegin("props"); 1450 J.arrayBegin(); 1451 if (MethodDecl->isStatic()) { 1452 J.value("static"); 1453 } 1454 if (MethodDecl->isInstance()) { 1455 J.value("instance"); 1456 } 1457 if (MethodDecl->isVirtual()) { 1458 J.value("virtual"); 1459 } 1460 if (MethodDecl->isUserProvided()) { 1461 J.value("user"); 1462 } 1463 if (MethodDecl->isDefaulted()) { 1464 J.value("defaulted"); 1465 } 1466 if (MethodDecl->isDeleted()) { 1467 J.value("deleted"); 1468 } 1469 if (MethodDecl->isConstexpr()) { 1470 J.value("constexpr"); 1471 } 1472 J.arrayEnd(); 1473 J.attributeEnd(); 1474 1475 J.objectEnd(); 1476 } 1477 J.arrayEnd(); 1478 J.attributeEnd(); 1479 } 1480 } 1481 1482 FileID structFileID = SM.getFileID(Loc); 1483 1484 J.attributeBegin("fields"); 1485 J.arrayBegin(); 1486 uint64_t iField = 0; 1487 for (RecordDecl::field_iterator It = decl->field_begin(), 1488 End = decl->field_end(); 1489 It != End; ++It, ++iField) { 1490 const FieldDecl &Field = **It; 1491 auto sourceRange = 1492 SM.getExpansionRange(Field.getSourceRange()).getAsRange(); 1493 uint64_t localOffsetBits = Layout.getFieldOffset(iField); 1494 CharUnits localOffsetBytes = C.toCharUnitsFromBits(localOffsetBits); 1495 1496 J.objectBegin(); 1497 J.attribute("lineRange", 1498 pathAndLineRangeToString(structFileID, sourceRange)); 1499 J.attribute("pretty", getQualifiedName(&Field)); 1500 J.attribute("sym", getMangledName(CurMangleContext, &Field)); 1501 1502 QualType FieldType = Field.getType(); 1503 QualType CanonicalFieldType = FieldType.getCanonicalType(); 1504 LangOptions langOptions; 1505 PrintingPolicy Policy(langOptions); 1506 Policy.PrintCanonicalTypes = true; 1507 J.attribute("type", CanonicalFieldType.getAsString(Policy)); 1508 1509 const TagDecl *tagDecl = CanonicalFieldType->getAsTagDecl(); 1510 if (!tagDecl) { 1511 // Try again piercing any pointers/references involved. Note that our 1512 // typesym semantics are dubious-ish and right now crossref just does 1513 // some parsing of "type" itself until we improve this rep. 1514 CanonicalFieldType = CanonicalFieldType->getPointeeType(); 1515 if (!CanonicalFieldType.isNull()) { 1516 tagDecl = CanonicalFieldType->getAsTagDecl(); 1517 } 1518 } 1519 if (tagDecl) { 1520 J.attribute("typesym", getMangledName(CurMangleContext, tagDecl)); 1521 } 1522 1523 J.attribute("offsetBytes", localOffsetBytes.getQuantity()); 1524 if (Field.isBitField()) { 1525 J.attributeBegin("bitPositions"); 1526 J.objectBegin(); 1527 1528 J.attribute("begin", 1529 unsigned(localOffsetBits - C.toBits(localOffsetBytes))); 1530 #if CLANG_VERSION_MAJOR < 20 1531 J.attribute("width", Field.getBitWidthValue(C)); 1532 #else 1533 J.attribute("width", Field.getBitWidthValue()); 1534 #endif 1535 1536 J.objectEnd(); 1537 J.attributeEnd(); 1538 } else { 1539 // Try and get the field as a record itself so we can know its size, but 1540 // we don't actually want to recurse into it. 1541 if (auto FieldRec = Field.getType()->getAs<RecordType>()) { 1542 auto const &FieldLayout = C.getASTRecordLayout(FieldRec->getDecl()); 1543 J.attribute("sizeBytes", FieldLayout.getSize().getQuantity()); 1544 } else { 1545 // We were unable to get it as a record, which suggests it's a normal 1546 // type, in which case let's just ask for the type size. (Maybe this 1547 // would also work for the above case too?) 1548 uint64_t typeSizeBits = C.getTypeSize(Field.getType()); 1549 CharUnits typeSizeBytes = C.toCharUnitsFromBits(typeSizeBits); 1550 J.attribute("sizeBytes", typeSizeBytes.getQuantity()); 1551 } 1552 } 1553 J.objectEnd(); 1554 } 1555 J.arrayEnd(); 1556 J.attributeEnd(); 1557 } 1558 1559 void emitStructuredEnumInfo(llvm::json::OStream &J, const EnumDecl *ED) { 1560 J.attribute("kind", "enum"); 1561 } 1562 1563 void emitStructuredEnumConstantInfo(llvm::json::OStream &J, 1564 const EnumConstantDecl *ECD) { 1565 J.attribute("kind", "enumConstant"); 1566 } 1567 1568 void emitStructuredFunctionInfo(llvm::json::OStream &J, 1569 const FunctionDecl *decl) { 1570 emitBindingAttributes(J, *decl); 1571 1572 J.attributeBegin("args"); 1573 J.arrayBegin(); 1574 1575 for (auto param : decl->parameters()) { 1576 J.objectBegin(); 1577 1578 J.attribute("name", param->getName()); 1579 QualType ArgType = param->getOriginalType(); 1580 J.attribute("type", ArgType.getAsString()); 1581 1582 QualType CanonicalArgType = ArgType.getCanonicalType(); 1583 const TagDecl *canonDecl = CanonicalArgType->getAsTagDecl(); 1584 if (!canonDecl) { 1585 // Try again piercing any pointers/references involved. Note that our 1586 // typesym semantics are dubious-ish and right now crossref just does 1587 // some parsing of "type" itself until we improve this rep. 1588 CanonicalArgType = CanonicalArgType->getPointeeType(); 1589 if (!CanonicalArgType.isNull()) { 1590 canonDecl = CanonicalArgType->getAsTagDecl(); 1591 } 1592 } 1593 if (canonDecl) { 1594 J.attribute("typesym", getMangledName(CurMangleContext, canonDecl)); 1595 } 1596 1597 J.objectEnd(); 1598 } 1599 1600 J.arrayEnd(); 1601 J.attributeEnd(); 1602 1603 auto cxxDecl = dyn_cast<CXXMethodDecl>(decl); 1604 1605 if (cxxDecl) { 1606 J.attribute("kind", "method"); 1607 if (auto parentDecl = cxxDecl->getParent()) { 1608 J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); 1609 } 1610 1611 J.attributeBegin("overrides"); 1612 J.arrayBegin(); 1613 for (const CXXMethodDecl *MethodDecl : cxxDecl->overridden_methods()) { 1614 J.objectBegin(); 1615 1616 // TODO: Make sure we're doing template traversals appropriately... 1617 // findOverriddenMethods (now removed) liked to do: 1618 // if (Decl->isTemplateInstantiation()) { 1619 // Decl = 1620 // dyn_cast<CXXMethodDecl>(Decl->getTemplateInstantiationPattern()); 1621 // } 1622 // I think our pre-emptive dereferencing/avoidance of templates may 1623 // protect us from this, but it needs more investigation. 1624 1625 J.attribute("sym", getMangledName(CurMangleContext, MethodDecl)); 1626 1627 J.objectEnd(); 1628 } 1629 J.arrayEnd(); 1630 J.attributeEnd(); 1631 1632 } else { 1633 J.attribute("kind", "function"); 1634 } 1635 1636 // ## Props 1637 J.attributeBegin("props"); 1638 J.arrayBegin(); 1639 // some of these are only possible on a CXXMethodDecl, but we want them all 1640 // in the same array, so condition these first ones. 1641 if (cxxDecl) { 1642 if (cxxDecl->isStatic()) { 1643 J.value("static"); 1644 } 1645 if (cxxDecl->isInstance()) { 1646 J.value("instance"); 1647 } 1648 if (cxxDecl->isVirtual()) { 1649 J.value("virtual"); 1650 } 1651 if (cxxDecl->isUserProvided()) { 1652 J.value("user"); 1653 } 1654 } 1655 if (decl->isDefaulted()) { 1656 J.value("defaulted"); 1657 } 1658 if (decl->isDeleted()) { 1659 J.value("deleted"); 1660 } 1661 if (decl->isConstexpr()) { 1662 J.value("constexpr"); 1663 } 1664 J.arrayEnd(); 1665 J.attributeEnd(); 1666 } 1667 1668 /** 1669 * Emit structured info for a field. Right now the intent is for this to just 1670 * be a pointer to its parent's structured info with this method entirely 1671 * avoiding getting the ASTRecordLayout. 1672 * 1673 * TODO: Give more thought on where to locate the canonical info on fields and 1674 * how to normalize their exposure over the web. We could relink the info 1675 * both at cross-reference time and web-server lookup time. This is also 1676 * called out in `analysis.md`. 1677 */ 1678 void emitStructuredFieldInfo(llvm::json::OStream &J, const FieldDecl *decl) { 1679 J.attribute("kind", "field"); 1680 1681 // XXX the call to decl::getParent will assert below for ObjCIvarDecl 1682 // instances because their DecContext is not a RecordDecl. So just bail 1683 // for now. 1684 // TODO: better support ObjC. 1685 if (!dyn_cast<ObjCIvarDecl>(decl)) { 1686 if (auto parentDecl = decl->getParent()) { 1687 J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); 1688 } 1689 } 1690 } 1691 1692 /** 1693 * Emit structured info for a variable if it is a static class member. 1694 */ 1695 void emitStructuredVarInfo(llvm::json::OStream &J, const VarDecl *decl) { 1696 const auto *parentDecl = 1697 dyn_cast_or_null<RecordDecl>(decl->getDeclContext()); 1698 1699 if (parentDecl) { 1700 J.attribute("kind", "field"); 1701 } else if (llvm::isa<ParmVarDecl>(decl)) { 1702 J.attribute("kind", "parameter"); 1703 } else if (decl->isLocalVarDecl()) { 1704 J.attribute("kind", "localVar"); 1705 } else { 1706 // namespace scope variable 1707 J.attribute("kind", "variable"); 1708 } 1709 1710 if (parentDecl) { 1711 J.attribute("parentsym", getMangledName(CurMangleContext, parentDecl)); 1712 } 1713 1714 emitBindingAttributes(J, *decl); 1715 } 1716 1717 void emitStructuredInfo(SourceLocation Loc, const NamedDecl *decl, 1718 LayoutHandling layoutHandling = LayoutHandling::UseLayout) { 1719 std::string json_str; 1720 llvm::raw_string_ostream ros(json_str); 1721 llvm::json::OStream J(ros); 1722 // Start the top-level object. 1723 J.objectBegin(); 1724 1725 unsigned StartOffset = SM.getFileOffset(Loc); 1726 unsigned EndOffset = 1727 StartOffset + Lexer::MeasureTokenLength(Loc, SM, CI.getLangOpts()); 1728 J.attribute("loc", locationToString(Loc, EndOffset - StartOffset)); 1729 J.attribute("structured", 1); 1730 J.attribute("pretty", getQualifiedName(decl)); 1731 J.attribute("sym", getMangledName(CurMangleContext, decl)); 1732 1733 if (const RecordDecl *RD = dyn_cast<RecordDecl>(decl)) { 1734 emitStructuredRecordInfo(J, Loc, RD, layoutHandling); 1735 } else if (const EnumDecl *ED = dyn_cast<EnumDecl>(decl)) { 1736 emitStructuredEnumInfo(J, ED); 1737 } else if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(decl)) { 1738 emitStructuredEnumConstantInfo(J, ECD); 1739 } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(decl)) { 1740 emitStructuredFunctionInfo(J, FD); 1741 } else if (const FieldDecl *FD = dyn_cast<FieldDecl>(decl)) { 1742 emitStructuredFieldInfo(J, FD); 1743 } else if (const VarDecl *VD = dyn_cast<VarDecl>(decl)) { 1744 emitStructuredVarInfo(J, VD); 1745 } 1746 1747 // End the top-level object. 1748 J.objectEnd(); 1749 1750 FileInfo *F = getFileInfo(Loc); 1751 // we want a newline. 1752 ros << '\n'; 1753 F->Output.push_back(std::move(ros.str())); 1754 } 1755 1756 // XXX Type annotating. 1757 // QualType is the type class. It has helpers like TagDecl via getAsTagDecl. 1758 // ValueDecl exposes a getType() method. 1759 // 1760 // Arguably it makes sense to only expose types that Searchfox has definitions 1761 // for as first-class. Probably the way to go is like context/contextsym. 1762 // We expose a "type" which is just a human-readable string which has no 1763 // semantic purposes and is just a display string, plus then a "typesym" which 1764 // we expose if we were able to map the type. 1765 // 1766 // Other meta-info: field offsets. Ancestor types. 1767 1768 // This is the only function that emits analysis JSON data. It should be 1769 // called for each identifier that corresponds to a symbol. 1770 void visitIdentifier(const char *Kind, const char *SyntaxKind, 1771 llvm::StringRef QualName, SourceRange LocRange, 1772 std::string Symbol, QualType MaybeType = QualType(), 1773 Context TokenContext = Context(), int Flags = 0, 1774 SourceRange PeekRange = SourceRange(), 1775 SourceRange NestingRange = SourceRange(), 1776 std::vector<SourceRange> *ArgRanges = nullptr) { 1777 SourceLocation Loc = LocRange.getBegin(); 1778 1779 // Also visit the spelling site. 1780 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 1781 if (SpellingLoc != Loc) { 1782 visitIdentifier(Kind, SyntaxKind, QualName, SpellingLoc, Symbol, 1783 MaybeType, TokenContext, Flags, PeekRange, NestingRange, 1784 ArgRanges); 1785 } 1786 1787 SourceLocation ExpansionLoc = SM.getExpansionLoc(Loc); 1788 normalizeLocation(&ExpansionLoc); 1789 1790 if (!shouldVisit(ExpansionLoc)) { 1791 return; 1792 } 1793 1794 if (ExpansionLoc != Loc) 1795 Flags = Flags & ~LocRangeEndValid; 1796 1797 // Find the file positions corresponding to the token. 1798 unsigned StartOffset = SM.getFileOffset(ExpansionLoc); 1799 unsigned EndOffset = 1800 (Flags & LocRangeEndValid) 1801 ? SM.getFileOffset(LocRange.getEnd()) 1802 : StartOffset + 1803 Lexer::MeasureTokenLength(ExpansionLoc, SM, CI.getLangOpts()); 1804 1805 std::string LocStr = 1806 locationToString(ExpansionLoc, EndOffset - StartOffset); 1807 std::string RangeStr = 1808 locationToString(ExpansionLoc, EndOffset - StartOffset); 1809 std::string PeekRangeStr; 1810 1811 if (!(Flags & NotIdentifierToken)) { 1812 // Get the token's characters so we can make sure it's a valid token. 1813 const char *StartChars = SM.getCharacterData(ExpansionLoc); 1814 std::string Text(StartChars, EndOffset - StartOffset); 1815 if (!isValidIdentifier(Text)) { 1816 return; 1817 } 1818 } 1819 1820 FileInfo *F = getFileInfo(ExpansionLoc); 1821 1822 if (!(Flags & NoCrossref)) { 1823 std::string json_str; 1824 llvm::raw_string_ostream ros(json_str); 1825 llvm::json::OStream J(ros); 1826 // Start the top-level object. 1827 J.objectBegin(); 1828 1829 J.attribute("loc", LocStr); 1830 J.attribute("target", 1); 1831 J.attribute("kind", Kind); 1832 J.attribute("pretty", QualName.data()); 1833 J.attribute("sym", Symbol); 1834 if (!TokenContext.Name.empty()) { 1835 J.attribute("context", TokenContext.Name); 1836 } 1837 if (!TokenContext.Symbol.empty()) { 1838 J.attribute("contextsym", TokenContext.Symbol); 1839 } 1840 if (PeekRange.isValid()) { 1841 PeekRangeStr = lineRangeToString(PeekRange); 1842 if (!PeekRangeStr.empty()) { 1843 J.attribute("peekRange", PeekRangeStr); 1844 } 1845 } 1846 1847 if (ArgRanges) { 1848 J.attributeBegin("argRanges"); 1849 J.arrayBegin(); 1850 1851 for (auto range : *ArgRanges) { 1852 std::string ArgRangeStr = fullRangeToString(range); 1853 if (!ArgRangeStr.empty()) { 1854 J.value(ArgRangeStr); 1855 } 1856 } 1857 1858 J.arrayEnd(); 1859 J.attributeEnd(); 1860 } 1861 1862 // End the top-level object. 1863 J.objectEnd(); 1864 // we want a newline. 1865 ros << '\n'; 1866 F->Output.push_back(std::move(ros.str())); 1867 } 1868 1869 // Generate a single "source":1 for all the symbols. If we search from here, 1870 // we want to union the results for every symbol in `symbols`. 1871 std::string json_str; 1872 llvm::raw_string_ostream ros(json_str); 1873 llvm::json::OStream J(ros); 1874 // Start the top-level object. 1875 J.objectBegin(); 1876 1877 J.attribute("loc", RangeStr); 1878 J.attribute("source", 1); 1879 1880 if (NestingRange.isValid()) { 1881 std::string NestingRangeStr = fullRangeToString(NestingRange); 1882 if (!NestingRangeStr.empty()) { 1883 J.attribute("nestingRange", NestingRangeStr); 1884 } 1885 } 1886 1887 std::string Syntax; 1888 if (Flags & NoCrossref) { 1889 J.attribute("syntax", ""); 1890 } else { 1891 Syntax = Kind; 1892 Syntax.push_back(','); 1893 Syntax.append(SyntaxKind); 1894 J.attribute("syntax", Syntax); 1895 } 1896 1897 if (!MaybeType.isNull()) { 1898 J.attribute("type", MaybeType.getAsString()); 1899 QualType canonical = MaybeType.getCanonicalType(); 1900 const TagDecl *decl = canonical->getAsTagDecl(); 1901 if (!decl) { 1902 // Try again piercing any pointers/references involved. Note that our 1903 // typesym semantics are dubious-ish and right now crossref just does 1904 // some parsing of "type" itself until we improve this rep. 1905 canonical = canonical->getPointeeType(); 1906 if (!canonical.isNull()) { 1907 decl = canonical->getAsTagDecl(); 1908 } 1909 } 1910 if (decl) { 1911 std::string Mangled = getMangledName(CurMangleContext, decl); 1912 J.attribute("typesym", Mangled); 1913 } 1914 } 1915 1916 std::string Pretty(SyntaxKind); 1917 Pretty.push_back(' '); 1918 Pretty.append(QualName.data()); 1919 J.attribute("pretty", Pretty); 1920 1921 J.attribute("sym", Symbol); 1922 1923 if (Flags & NoCrossref) { 1924 J.attribute("no_crossref", 1); 1925 } 1926 1927 if (Flags & Heuristic) { 1928 J.attributeBegin("confidence"); 1929 J.arrayBegin(); 1930 J.value("cppTemplateHeuristic"); 1931 J.arrayEnd(); 1932 J.attributeEnd(); 1933 } 1934 1935 if (ArgRanges) { 1936 J.attributeBegin("argRanges"); 1937 J.arrayBegin(); 1938 1939 for (auto range : *ArgRanges) { 1940 std::string ArgRangeStr = fullRangeToString(range); 1941 if (!ArgRangeStr.empty()) { 1942 J.value(ArgRangeStr); 1943 } 1944 } 1945 1946 J.arrayEnd(); 1947 J.attributeEnd(); 1948 } 1949 1950 const auto macro = MacroMaps.find(ExpansionLoc); 1951 if (macro != MacroMaps.end()) { 1952 const auto ¯oInfo = macro->second; 1953 if (macroInfo.Symbol == Symbol) { 1954 J.attributeBegin("expandsTo"); 1955 J.objectBegin(); 1956 J.attributeBegin(macroInfo.Key); 1957 J.objectBegin(); 1958 J.attribute("", macroInfo.Expansion); // "" is the platform key, 1959 // populated by the merge step 1960 J.objectEnd(); 1961 J.attributeEnd(); 1962 J.objectEnd(); 1963 J.attributeEnd(); 1964 } else { 1965 const auto it = macroInfo.TokenLocations.find(Loc); 1966 if (it != macroInfo.TokenLocations.end()) { 1967 J.attributeBegin("inExpansionAt"); 1968 J.objectBegin(); 1969 J.attributeBegin(macroInfo.Key); 1970 J.objectBegin(); 1971 J.attributeBegin( 1972 ""); // "" is the platform key, populated by the merge step 1973 J.arrayBegin(); 1974 J.value(it->second); 1975 J.arrayEnd(); 1976 J.attributeEnd(); 1977 J.objectEnd(); 1978 J.attributeEnd(); 1979 J.objectEnd(); 1980 J.attributeEnd(); 1981 } 1982 } 1983 } 1984 1985 // End the top-level object. 1986 J.objectEnd(); 1987 1988 // we want a newline. 1989 ros << '\n'; 1990 F->Output.push_back(std::move(ros.str())); 1991 } 1992 1993 void normalizeLocation(SourceLocation *Loc) { 1994 *Loc = SM.getSpellingLoc(*Loc); 1995 } 1996 1997 // For cases where the left-brace is not directly accessible from the AST, 1998 // helper to use the lexer to find the brace. Make sure you're picking the 1999 // start location appropriately! 2000 SourceLocation findLeftBraceFromLoc(SourceLocation Loc) { 2001 return Lexer::findLocationAfterToken(Loc, tok::l_brace, SM, LO, false); 2002 } 2003 2004 // If the provided statement is compound, return its range. 2005 SourceRange getCompoundStmtRange(Stmt *D) { 2006 if (!D) { 2007 return SourceRange(); 2008 } 2009 2010 CompoundStmt *D2 = dyn_cast<CompoundStmt>(D); 2011 if (D2) { 2012 return D2->getSourceRange(); 2013 } 2014 2015 return SourceRange(); 2016 } 2017 2018 SourceRange getFunctionPeekRange(FunctionDecl *D) { 2019 // We always start at the start of the function decl, which may include the 2020 // return type on a separate line. 2021 SourceLocation Start = D->getBeginLoc(); 2022 2023 // By default, we end at the line containing the function's name. 2024 SourceLocation End = D->getLocation(); 2025 2026 std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedExpansionLoc(End); 2027 2028 // But if there are parameters, we want to include those as well. 2029 for (ParmVarDecl *Param : D->parameters()) { 2030 std::pair<FileID, unsigned> ParamLoc = 2031 SM.getDecomposedExpansionLoc(Param->getLocation()); 2032 2033 // It's possible there are macros involved or something. We don't include 2034 // the parameters in that case. 2035 if (ParamLoc.first == FuncLoc.first) { 2036 // Assume parameters are in order, so we always take the last one. 2037 End = Param->getEndLoc(); 2038 } 2039 } 2040 2041 return SourceRange(Start, End); 2042 } 2043 2044 SourceRange getTagPeekRange(TagDecl *D) { 2045 SourceLocation Start = D->getBeginLoc(); 2046 2047 // By default, we end at the line containing the name. 2048 SourceLocation End = D->getLocation(); 2049 2050 std::pair<FileID, unsigned> FuncLoc = SM.getDecomposedExpansionLoc(End); 2051 2052 if (CXXRecordDecl *D2 = dyn_cast<CXXRecordDecl>(D)) { 2053 // But if there are parameters, we want to include those as well. 2054 for (CXXBaseSpecifier &Base : D2->bases()) { 2055 std::pair<FileID, unsigned> Loc = SM.getDecomposedExpansionLoc(Base.getEndLoc()); 2056 2057 // It's possible there are macros involved or something. We don't 2058 // include the parameters in that case. 2059 if (Loc.first == FuncLoc.first) { 2060 // Assume parameters are in order, so we always take the last one. 2061 End = Base.getEndLoc(); 2062 } 2063 } 2064 } 2065 2066 return SourceRange(Start, End); 2067 } 2068 2069 SourceRange getCommentRange(NamedDecl *D) { 2070 const RawComment *RC = AstContext->getRawCommentForDeclNoCache(D); 2071 if (!RC) { 2072 return SourceRange(); 2073 } 2074 2075 return RC->getSourceRange(); 2076 } 2077 2078 // Sanity checks that all ranges are in the same file, returning the first if 2079 // they're in different files. Unions the ranges based on which is first. 2080 SourceRange combineRanges(SourceRange Range1, SourceRange Range2) { 2081 if (Range1.isInvalid()) { 2082 return Range2; 2083 } 2084 if (Range2.isInvalid()) { 2085 return Range1; 2086 } 2087 2088 std::pair<FileID, unsigned> Begin1 = SM.getDecomposedExpansionLoc(Range1.getBegin()); 2089 std::pair<FileID, unsigned> End1 = SM.getDecomposedExpansionLoc(Range1.getEnd()); 2090 std::pair<FileID, unsigned> Begin2 = SM.getDecomposedExpansionLoc(Range2.getBegin()); 2091 std::pair<FileID, unsigned> End2 = SM.getDecomposedExpansionLoc(Range2.getEnd()); 2092 2093 if (End1.first != Begin2.first) { 2094 // Something weird is probably happening with the preprocessor. Just 2095 // return the first range. 2096 return Range1; 2097 } 2098 2099 // See which range comes first. 2100 if (Begin1.second <= End2.second) { 2101 return SourceRange(Range1.getBegin(), Range2.getEnd()); 2102 } else { 2103 return SourceRange(Range2.getBegin(), Range1.getEnd()); 2104 } 2105 } 2106 2107 // Given a location and a range, returns the range if: 2108 // - The location and the range live in the same file. 2109 // - The range is well ordered (end is not before begin). 2110 // Returns an empty range otherwise. 2111 SourceRange validateRange(SourceLocation Loc, SourceRange Range) { 2112 std::pair<FileID, unsigned> Decomposed = SM.getDecomposedExpansionLoc(Loc); 2113 std::pair<FileID, unsigned> Begin = SM.getDecomposedExpansionLoc(Range.getBegin()); 2114 std::pair<FileID, unsigned> End = SM.getDecomposedExpansionLoc(Range.getEnd()); 2115 2116 if (Begin.first != Decomposed.first || End.first != Decomposed.first) { 2117 return SourceRange(); 2118 } 2119 2120 if (Begin.second >= End.second) { 2121 return SourceRange(); 2122 } 2123 2124 return Range; 2125 } 2126 2127 bool VisitNamedDecl(NamedDecl *D) { 2128 SourceLocation Loc = D->getLocation(); 2129 if (!isInterestingLocation(Loc)) { 2130 return true; 2131 } 2132 2133 SourceLocation ExpansionLoc = Loc; 2134 if (SM.isMacroBodyExpansion(Loc)) { 2135 ExpansionLoc = SM.getFileLoc(Loc); 2136 } 2137 normalizeLocation(&ExpansionLoc); 2138 2139 if (isa<ParmVarDecl>(D) && !D->getDeclName().getAsIdentifierInfo()) { 2140 // Unnamed parameter in function proto. 2141 return true; 2142 } 2143 2144 int Flags = 0; 2145 const char *Kind = "def"; 2146 const char *PrettyKind = "?"; 2147 bool wasTemplate = false; 2148 SourceRange PeekRange(D->getBeginLoc(), D->getEndLoc()); 2149 // The nesting range identifies the left brace and right brace, which 2150 // heavily depends on the AST node type. 2151 SourceRange NestingRange; 2152 QualType qtype = QualType(); 2153 if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) { 2154 if (D2->isTemplateInstantiation()) { 2155 wasTemplate = true; 2156 D = D2->getTemplateInstantiationPattern(); 2157 } 2158 // We treat pure virtual declarations as definitions. 2159 Kind = 2160 (D2->isThisDeclarationADefinition() || isPure(D2)) ? "def" : "decl"; 2161 PrettyKind = "function"; 2162 PeekRange = getFunctionPeekRange(D2); 2163 2164 // Only emit the nesting range if: 2165 // - This is a definition AND 2166 // - This isn't a template instantiation. Function templates' 2167 // instantiations can end up as a definition with a Loc at their point 2168 // of declaration but with the CompoundStmt of the template's 2169 // point of definition. This really messes up the nesting range logic. 2170 // At the time of writing this, the test repo's `big_header.h`'s 2171 // `WhatsYourVector_impl::forwardDeclaredTemplateThingInlinedBelow` as 2172 // instantiated by `big_cpp.cpp` triggers this phenomenon. 2173 // 2174 // Note: As covered elsewhere, template processing is tricky and it's 2175 // conceivable that we may change traversal patterns in the future, 2176 // mooting this guard. 2177 if (D2->isThisDeclarationADefinition() && 2178 !D2->isTemplateInstantiation()) { 2179 // The CompoundStmt range is the brace range. 2180 NestingRange = getCompoundStmtRange(D2->getBody()); 2181 } 2182 } else if (TagDecl *D2 = dyn_cast<TagDecl>(D)) { 2183 Kind = D2->isThisDeclarationADefinition() ? "def" : "forward"; 2184 PrettyKind = "type"; 2185 2186 if (D2->isThisDeclarationADefinition() && D2->getDefinition() == D2) { 2187 PeekRange = getTagPeekRange(D2); 2188 NestingRange = D2->getBraceRange(); 2189 } else { 2190 PeekRange = SourceRange(); 2191 } 2192 } else if (TypedefNameDecl *D2 = dyn_cast<TypedefNameDecl>(D)) { 2193 Kind = "alias"; 2194 PrettyKind = "type"; 2195 PeekRange = SourceRange(ExpansionLoc, ExpansionLoc); 2196 qtype = D2->getUnderlyingType(); 2197 } else if (VarDecl *D2 = dyn_cast<VarDecl>(D)) { 2198 if (D2->isLocalVarDeclOrParm()) { 2199 Flags = NoCrossref; 2200 } 2201 2202 Kind = D2->isThisDeclarationADefinition() == VarDecl::DeclarationOnly 2203 ? "decl" 2204 : "def"; 2205 PrettyKind = "variable"; 2206 2207 if (needsNestingRangeForVarDecl(PeekRange)) { 2208 NestingRange = PeekRange; 2209 } 2210 } else if (isa<NamespaceDecl>(D) || isa<NamespaceAliasDecl>(D)) { 2211 Kind = "def"; 2212 PrettyKind = "namespace"; 2213 PeekRange = SourceRange(ExpansionLoc, ExpansionLoc); 2214 NamespaceDecl *D2 = dyn_cast<NamespaceDecl>(D); 2215 if (D2) { 2216 // There's no exposure of the left brace so we have to find it. 2217 NestingRange = SourceRange( 2218 findLeftBraceFromLoc(D2->isAnonymousNamespace() ? D2->getBeginLoc() 2219 : ExpansionLoc), 2220 D2->getRBraceLoc()); 2221 } 2222 } else if (isa<FieldDecl>(D)) { 2223 Kind = "def"; 2224 PrettyKind = "field"; 2225 } else if (isa<EnumConstantDecl>(D)) { 2226 Kind = "def"; 2227 PrettyKind = "enum constant"; 2228 } else { 2229 return true; 2230 } 2231 2232 if (ValueDecl *D2 = dyn_cast<ValueDecl>(D)) { 2233 qtype = D2->getType(); 2234 } 2235 2236 SourceRange CommentRange = getCommentRange(D); 2237 PeekRange = combineRanges(PeekRange, CommentRange); 2238 PeekRange = validateRange(Loc, PeekRange); 2239 NestingRange = validateRange(Loc, NestingRange); 2240 2241 std::string Symbol = getMangledName(CurMangleContext, D); 2242 2243 // In the case of destructors, Loc might point to the ~ character. In that 2244 // case we want to skip to the name of the class. However, Loc might also 2245 // point to other places that generate destructors, such as a lambda 2246 // (apparently clang 8 creates a destructor declaration for at least some 2247 // lambdas). In that case we'll just drop the declaration. 2248 if (isa<CXXDestructorDecl>(D)) { 2249 PrettyKind = "destructor"; 2250 const char *P = SM.getCharacterData(Loc); 2251 if (*P == '~') { 2252 // Advance Loc to the class name 2253 P++; 2254 2255 unsigned Skipped = 1; 2256 while (*P == ' ' || *P == '\t' || *P == '\r' || *P == '\n') { 2257 P++; 2258 Skipped++; 2259 } 2260 2261 Loc = Loc.getLocWithOffset(Skipped); 2262 } else { 2263 return true; 2264 } 2265 } 2266 2267 visitIdentifier(Kind, PrettyKind, getQualifiedName(D), SourceRange(Loc), 2268 Symbol, qtype, getContext(D), Flags, PeekRange, 2269 NestingRange); 2270 2271 // In-progress structured info emission. 2272 if (RecordDecl *D2 = dyn_cast<RecordDecl>(D)) { 2273 if (D2->isThisDeclarationADefinition() && 2274 // We don't emit structured info for template leaf classes 2275 // in order to reduce the memory consumption comes from 2276 // too many instantiation gathered to container classes in 2277 // crossref-extra and jumpref-extra. 2278 // 2279 // Once that part is solved, those template leaf classes 2280 // can be emitted by skipping getASTRecordLayout call and 2281 // the Layout handling in emitStructuredRecordInfo. 2282 // 2283 // See https://github.com/mozsearch/mozsearch/pull/906 2284 !D2->isDependentType() && !TemplateStack) { 2285 if (auto *D3 = dyn_cast<CXXRecordDecl>(D2)) { 2286 findBindingToJavaClass(*AstContext, *D3); 2287 findBoundAsJavaClasses(*AstContext, *D3); 2288 } 2289 emitStructuredInfo(ExpansionLoc, D2, LayoutHandling::UseLayout); 2290 } 2291 } 2292 if (EnumDecl *D2 = dyn_cast<EnumDecl>(D)) { 2293 if (D2->isThisDeclarationADefinition() && !D2->isDependentType() && 2294 !TemplateStack) { 2295 emitStructuredInfo(ExpansionLoc, D2); 2296 } 2297 } 2298 if (EnumConstantDecl *D2 = dyn_cast<EnumConstantDecl>(D)) { 2299 if (!D2->isTemplated() && !TemplateStack) { 2300 emitStructuredInfo(ExpansionLoc, D2); 2301 } 2302 } 2303 if (FunctionDecl *D2 = dyn_cast<FunctionDecl>(D)) { 2304 if ((D2->isThisDeclarationADefinition() || isPure(D2)) && 2305 // a clause at the top should have generalized and set wasTemplate so 2306 // it shouldn't be the case that isTemplateInstantiation() is true. 2307 !D2->isTemplateInstantiation() && !wasTemplate && 2308 !D2->isFunctionTemplateSpecialization() && !TemplateStack) { 2309 if (auto *D3 = dyn_cast<CXXMethodDecl>(D2)) { 2310 findBindingToJavaMember(*AstContext, *D3); 2311 } else { 2312 findBindingToJavaFunction(*AstContext, *D2); 2313 } 2314 emitStructuredInfo(ExpansionLoc, D2); 2315 } 2316 } 2317 if (FieldDecl *D2 = dyn_cast<FieldDecl>(D)) { 2318 if (!D2->isTemplated() && !TemplateStack) { 2319 emitStructuredInfo(ExpansionLoc, D2); 2320 } 2321 } 2322 if (VarDecl *D2 = dyn_cast<VarDecl>(D)) { 2323 if (!D2->isTemplated() && !TemplateStack) { 2324 findBindingToJavaConstant(*AstContext, *D2); 2325 emitStructuredInfo(ExpansionLoc, D2); 2326 } 2327 } 2328 2329 return true; 2330 } 2331 2332 bool VisitCXXConstructExpr(const CXXConstructExpr *E) { 2333 // If we are in a template and find a Stmt that was registed in 2334 // ForwardedTemplateLocations, convert the location to an actual Stmt* in 2335 // ForwardingTemplates 2336 if (TemplateStack && !TemplateStack->inGatherMode()) { 2337 if (ForwardedTemplateLocations.find(E->getBeginLoc().getRawEncoding()) != 2338 ForwardedTemplateLocations.end()) { 2339 if (const auto *currentTemplate = 2340 getCurrentFunctionTemplateInstantiation()) { 2341 ForwardingTemplates.insert({currentTemplate, E}); 2342 } 2343 return true; 2344 } 2345 } 2346 2347 SourceLocation Loc = E->getBeginLoc(); 2348 if (!isInterestingLocation(Loc)) { 2349 return true; 2350 } 2351 2352 return VisitCXXConstructExpr(E, Loc); 2353 } 2354 2355 bool VisitCXXConstructExpr(const CXXConstructExpr *E, SourceLocation Loc) { 2356 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2357 2358 FunctionDecl *Ctor = E->getConstructor(); 2359 if (Ctor->isTemplateInstantiation()) { 2360 Ctor = Ctor->getTemplateInstantiationPattern(); 2361 } 2362 std::string Mangled = getMangledName(CurMangleContext, Ctor); 2363 2364 // FIXME: Need to do something different for list initialization. 2365 2366 visitIdentifier("use", "constructor", getQualifiedName(Ctor), Loc, Mangled, 2367 QualType(), getContext(SpellingLoc)); 2368 2369 return true; 2370 } 2371 2372 CallExpr *CurrentCall = nullptr; 2373 bool TraverseCallExpr(CallExpr *E) { 2374 const auto _ = ValueRollback(CurrentCall, E); 2375 return Super::TraverseCallExpr(E); 2376 } 2377 2378 bool VisitCallExpr(CallExpr *E) { 2379 Expr *CalleeExpr = E->getCallee()->IgnoreParenImpCasts(); 2380 2381 if (TemplateStack) { 2382 const auto CalleeLocation = [&] { 2383 if (const auto *Member = 2384 dyn_cast<CXXDependentScopeMemberExpr>(CalleeExpr)) { 2385 return Member->getMemberLoc(); 2386 } 2387 if (const auto *DeclRef = 2388 dyn_cast<DependentScopeDeclRefExpr>(CalleeExpr)) { 2389 return DeclRef->getLocation(); 2390 } 2391 if (const auto *DeclRef = dyn_cast<DeclRefExpr>(CalleeExpr)) { 2392 return DeclRef->getLocation(); 2393 } 2394 2395 // Does the right thing for MemberExpr and UnresolvedMemberExpr at 2396 // least. 2397 return CalleeExpr->getExprLoc(); 2398 }(); 2399 2400 // If we are in a template: 2401 // - when in GatherDependent mode and the callee is type-dependent, 2402 // register it in ForwardedTemplateLocations 2403 // - when in AnalyseDependent mode and the callee is in 2404 // ForwardedTemplateLocations, convert the location to an actual Stmt* 2405 // in ForwardingTemplates 2406 if (TemplateStack->inGatherMode()) { 2407 if (CalleeExpr->isTypeDependent()) { 2408 TemplateStack->visitDependent(CalleeLocation); 2409 ForwardedTemplateLocations.insert(CalleeLocation.getRawEncoding()); 2410 } 2411 } else { 2412 if (ForwardedTemplateLocations.find(CalleeLocation.getRawEncoding()) != 2413 ForwardedTemplateLocations.end()) { 2414 if (const auto *currentTemplate = 2415 getCurrentFunctionTemplateInstantiation()) { 2416 ForwardingTemplates.insert({currentTemplate, E}); 2417 } 2418 } 2419 } 2420 } 2421 2422 Decl *Callee = E->getCalleeDecl(); 2423 if (!Callee || !FunctionDecl::classof(Callee)) { 2424 return true; 2425 } 2426 2427 const NamedDecl *NamedCallee = dyn_cast<NamedDecl>(Callee); 2428 2429 SourceLocation Loc; 2430 2431 const FunctionDecl *F = dyn_cast<FunctionDecl>(NamedCallee); 2432 if (F->isTemplateInstantiation()) { 2433 NamedCallee = F->getTemplateInstantiationPattern(); 2434 } 2435 2436 std::string Mangled = getMangledName(CurMangleContext, NamedCallee); 2437 int Flags = 0; 2438 2439 if (CXXOperatorCallExpr::classof(E)) { 2440 // Just take the first token. 2441 CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E); 2442 Loc = Op->getOperatorLoc(); 2443 Flags |= NotIdentifierToken; 2444 } else if (MemberExpr::classof(CalleeExpr)) { 2445 MemberExpr *Member = dyn_cast<MemberExpr>(CalleeExpr); 2446 Loc = Member->getMemberLoc(); 2447 } else if (DeclRefExpr::classof(CalleeExpr)) { 2448 // We handle this in VisitDeclRefExpr. 2449 return true; 2450 } else { 2451 return true; 2452 } 2453 2454 if (!isInterestingLocation(Loc)) { 2455 return true; 2456 } 2457 2458 if (F->isTemplateInstantiation()) { 2459 VisitForwardedStatements(E, Loc); 2460 } 2461 2462 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2463 2464 std::vector<SourceRange> argRanges; 2465 for (auto argExpr : E->arguments()) { 2466 argRanges.push_back(argExpr->getSourceRange()); 2467 } 2468 2469 visitIdentifier("use", "function", getQualifiedName(NamedCallee), Loc, 2470 Mangled, E->getCallReturnType(*AstContext), 2471 getContext(SpellingLoc), Flags, SourceRange(), 2472 SourceRange(), &argRanges); 2473 2474 return true; 2475 } 2476 2477 bool VisitTagTypeLoc(TagTypeLoc L) { 2478 SourceLocation Loc = L.getBeginLoc(); 2479 if (!isInterestingLocation(Loc)) { 2480 return true; 2481 } 2482 2483 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2484 2485 TagDecl *Decl = L.getDecl(); 2486 std::string Mangled = getMangledName(CurMangleContext, Decl); 2487 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, 2488 L.getType(), getContext(SpellingLoc)); 2489 return true; 2490 } 2491 2492 bool VisitTypedefTypeLoc(TypedefTypeLoc L) { 2493 SourceLocation Loc = L.getBeginLoc(); 2494 if (!isInterestingLocation(Loc)) { 2495 return true; 2496 } 2497 2498 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2499 2500 NamedDecl *Decl = L.getTypedefNameDecl(); 2501 std::string Mangled = getMangledName(CurMangleContext, Decl); 2502 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, 2503 L.getType(), getContext(SpellingLoc)); 2504 return true; 2505 } 2506 2507 bool VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc L) { 2508 SourceLocation Loc = L.getBeginLoc(); 2509 if (!isInterestingLocation(Loc)) { 2510 return true; 2511 } 2512 2513 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2514 2515 NamedDecl *Decl = L.getDecl(); 2516 std::string Mangled = getMangledName(CurMangleContext, Decl); 2517 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, 2518 L.getType(), getContext(SpellingLoc)); 2519 return true; 2520 } 2521 2522 bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc L) { 2523 SourceLocation Loc = L.getBeginLoc(); 2524 if (!isInterestingLocation(Loc)) { 2525 return true; 2526 } 2527 2528 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2529 2530 TemplateDecl *Td = L.getTypePtr()->getTemplateName().getAsTemplateDecl(); 2531 if (ClassTemplateDecl *D = dyn_cast<ClassTemplateDecl>(Td)) { 2532 NamedDecl *Decl = D->getTemplatedDecl(); 2533 std::string Mangled = getMangledName(CurMangleContext, Decl); 2534 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, 2535 QualType(), getContext(SpellingLoc)); 2536 } else if (TypeAliasTemplateDecl *D = dyn_cast<TypeAliasTemplateDecl>(Td)) { 2537 NamedDecl *Decl = D->getTemplatedDecl(); 2538 std::string Mangled = getMangledName(CurMangleContext, Decl); 2539 visitIdentifier("use", "type", getQualifiedName(Decl), Loc, Mangled, 2540 QualType(), getContext(SpellingLoc)); 2541 } 2542 2543 return true; 2544 } 2545 2546 bool VisitDependentNameTypeLoc(DependentNameTypeLoc L) { 2547 SourceLocation Loc = L.getNameLoc(); 2548 if (!isInterestingLocation(Loc)) { 2549 return true; 2550 } 2551 2552 for (const NamedDecl *D : 2553 Resolver->resolveDependentNameType(L.getTypePtr())) { 2554 visitHeuristicResult(Loc, D); 2555 } 2556 return true; 2557 } 2558 2559 void VisitForwardedStatements(const Expr *E, SourceLocation Loc) { 2560 // If Loc itself is forwarded to its callers, do nothing 2561 if (ForwardedTemplateLocations.find(Loc.getRawEncoding()) != 2562 ForwardedTemplateLocations.cend()) 2563 return; 2564 2565 // If this is a forwarding template (eg MakeUnique), visit the forwarded 2566 // statements 2567 auto todo = std::stack{std::vector<const Stmt *>{E}}; 2568 auto seen = std::unordered_set<const Stmt *>{}; 2569 while (!todo.empty()) { 2570 const auto forwarded = std::move(todo.top()); 2571 todo.pop(); 2572 if (seen.find(forwarded) != seen.end()) 2573 continue; 2574 seen.insert(forwarded); 2575 2576 if (const auto *C = dyn_cast<CXXConstructExpr>(forwarded)) 2577 VisitCXXConstructExpr(C, Loc); 2578 2579 const Decl *Decl = nullptr; 2580 if (const auto *D = dyn_cast<CallExpr>(forwarded)) 2581 Decl = D->getCalleeDecl(); 2582 if (const auto *D = dyn_cast<DeclRefExpr>(forwarded)) 2583 Decl = D->getDecl(); 2584 2585 if (!Decl) 2586 continue; 2587 const auto *F = Decl->getAsFunction(); 2588 if (!F) 2589 continue; 2590 if (!F->isTemplateInstantiation()) 2591 continue; 2592 const auto [ForwardedBegin, ForwardedEnd] = 2593 ForwardingTemplates.equal_range(F); 2594 for (auto ForwardedIt = ForwardedBegin; ForwardedIt != ForwardedEnd; 2595 ++ForwardedIt) 2596 if (seen.find(ForwardedIt->second) == seen.end()) 2597 todo.push(ForwardedIt->second); 2598 } 2599 } 2600 2601 bool VisitDeclRefExpr(const DeclRefExpr *E) { 2602 SourceLocation Loc = E->getExprLoc(); 2603 if (!isInterestingLocation(Loc)) { 2604 return true; 2605 } 2606 2607 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2608 2609 if (E->hasQualifier()) { 2610 Loc = E->getNameInfo().getLoc(); 2611 SpellingLoc = SM.getSpellingLoc(Loc); 2612 } 2613 2614 const NamedDecl *Decl = E->getDecl(); 2615 if (const VarDecl *D2 = dyn_cast<VarDecl>(Decl)) { 2616 int Flags = 0; 2617 if (D2->isLocalVarDeclOrParm()) { 2618 Flags = NoCrossref; 2619 } 2620 std::string Mangled = getMangledName(CurMangleContext, Decl); 2621 visitIdentifier("use", "variable", getQualifiedName(Decl), Loc, Mangled, 2622 D2->getType(), getContext(SpellingLoc), Flags); 2623 } else if (isa<FunctionDecl>(Decl)) { 2624 const FunctionDecl *F = dyn_cast<FunctionDecl>(Decl); 2625 if (F->isTemplateInstantiation()) { 2626 Decl = F->getTemplateInstantiationPattern(); 2627 VisitForwardedStatements(E, Loc); 2628 } 2629 2630 std::string Mangled = getMangledName(CurMangleContext, Decl); 2631 visitIdentifier("use", "function", getQualifiedName(Decl), Loc, Mangled, 2632 E->getType(), getContext(SpellingLoc)); 2633 } else if (isa<EnumConstantDecl>(Decl)) { 2634 std::string Mangled = getMangledName(CurMangleContext, Decl); 2635 visitIdentifier("use", "enum", getQualifiedName(Decl), Loc, Mangled, 2636 E->getType(), getContext(SpellingLoc)); 2637 } 2638 2639 return true; 2640 } 2641 2642 bool VisitCXXConstructorDecl(CXXConstructorDecl *D) { 2643 if (!isInterestingLocation(D->getLocation())) { 2644 return true; 2645 } 2646 2647 for (CXXConstructorDecl::init_const_iterator It = D->init_begin(); 2648 It != D->init_end(); ++It) { 2649 const CXXCtorInitializer *Ci = *It; 2650 if (!Ci->getMember() || !Ci->isWritten()) { 2651 continue; 2652 } 2653 2654 SourceLocation Loc = Ci->getMemberLocation(); 2655 if (!isInterestingLocation(Loc)) { 2656 continue; 2657 } 2658 2659 FieldDecl *Member = Ci->getMember(); 2660 std::string Mangled = getMangledName(CurMangleContext, Member); 2661 // We want the constructor to be the context of the field use and 2662 // `getContext(D)` would skip the current context. An alternate approach 2663 // would be `getContext(Loc)` but the heuristic to omit a context if we're 2664 // in a macro body expansion seems incorrect for field initializations; if 2665 // code is using macros to initialize the fields, we still care. 2666 visitIdentifier("use", "field", getQualifiedName(Member), Loc, Mangled, 2667 Member->getType(), translateContext(D)); 2668 } 2669 2670 return true; 2671 } 2672 2673 bool VisitMemberExpr(MemberExpr *E) { 2674 SourceLocation Loc = E->getExprLoc(); 2675 if (!isInterestingLocation(Loc)) { 2676 return true; 2677 } 2678 2679 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2680 2681 ValueDecl *Decl = E->getMemberDecl(); 2682 if (FieldDecl *Field = dyn_cast<FieldDecl>(Decl)) { 2683 std::string Mangled = getMangledName(CurMangleContext, Field); 2684 visitIdentifier("use", "field", getQualifiedName(Field), Loc, Mangled, 2685 Field->getType(), getContext(SpellingLoc)); 2686 } 2687 return true; 2688 } 2689 2690 // Helper function for producing heuristic results for usages in dependent 2691 // code. These are distinguished from concrete results (obtained for dependent 2692 // code using the AutoTemplateContext machinery) by setting the “confidence” 2693 // property to “cppTemplateHeuristic”. We don't expect this method to be 2694 // intentionally called multiple times for a given (Loc, NamedDecl) pair 2695 // because our callers should be mutually exclusive AST node types. However, 2696 // it's fine if this method is called multiple time for a given pair because 2697 // we explicitly de-duplicate records with an identical string representation 2698 // (which is a good reason to have this helper, as it ensures identical 2699 // representations). 2700 void visitHeuristicResult(SourceLocation Loc, const NamedDecl *ND) { 2701 SourceLocation SpellingLoc = SM.getSpellingLoc(Loc); 2702 2703 if (const UsingShadowDecl *USD = dyn_cast<UsingShadowDecl>(ND)) { 2704 ND = USD->getTargetDecl(); 2705 } 2706 if (const TemplateDecl *TD = dyn_cast<TemplateDecl>(ND)) { 2707 ND = TD->getTemplatedDecl(); 2708 } 2709 QualType MaybeType; 2710 const char *SyntaxKind = nullptr; 2711 if (const FunctionDecl *F = dyn_cast<FunctionDecl>(ND)) { 2712 MaybeType = F->getType(); 2713 SyntaxKind = "function"; 2714 } else if (const FieldDecl *F = dyn_cast<FieldDecl>(ND)) { 2715 MaybeType = F->getType(); 2716 SyntaxKind = "field"; 2717 } else if (const EnumConstantDecl *E = dyn_cast<EnumConstantDecl>(ND)) { 2718 MaybeType = E->getType(); 2719 SyntaxKind = "enum"; 2720 } else if (const TypedefNameDecl *T = dyn_cast<TypedefNameDecl>(ND)) { 2721 MaybeType = T->getUnderlyingType(); 2722 SyntaxKind = "type"; 2723 } 2724 if (SyntaxKind) { 2725 std::string Mangled = getMangledName(CurMangleContext, ND); 2726 visitIdentifier("use", SyntaxKind, getQualifiedName(ND), Loc, Mangled, 2727 MaybeType, getContext(SpellingLoc), Heuristic); 2728 } 2729 } 2730 2731 bool arityMatchesCurrentCallExpr(const Expr *E, const NamedDecl *Candidate) { 2732 const auto IsCurrentCallee = CurrentCall && E == CurrentCall->getCallee(); 2733 const auto CallNumArgs = 2734 IsCurrentCallee ? CurrentCall->getNumArgs() : std::optional<uint>{}; 2735 2736 const FunctionDecl *CandidateFunc; 2737 if (const auto *UsingDecl = dyn_cast<UsingShadowDecl>(Candidate)) { 2738 CandidateFunc = UsingDecl->getTargetDecl()->getAsFunction(); 2739 } else { 2740 CandidateFunc = Candidate->getAsFunction(); 2741 } 2742 2743 // We try and filter candidates by arity, but be conservative and accept 2744 // them when we don't know better 2745 if (!CandidateFunc || !CallNumArgs) { 2746 return true; 2747 } 2748 2749 const auto MinNumArgs = CandidateFunc->getMinRequiredExplicitArguments(); 2750 const auto MaxNumArgs = [&]() -> std::optional<uint> { 2751 const auto IsVariadic = 2752 CandidateFunc->isVariadic() || 2753 std::any_of(CandidateFunc->param_begin(), CandidateFunc->param_end(), 2754 [](const ParmVarDecl *param) { 2755 return param->isParameterPack(); 2756 }); 2757 2758 if (IsVariadic) 2759 return {}; 2760 2761 return CandidateFunc->getNumNonObjectParams(); 2762 }(); 2763 2764 if (CallNumArgs < MinNumArgs || (MaxNumArgs && CallNumArgs > *MaxNumArgs)) { 2765 return false; 2766 } 2767 2768 return true; 2769 } 2770 2771 bool VisitOverloadExpr(OverloadExpr *E) { 2772 SourceLocation Loc = E->getExprLoc(); 2773 normalizeLocation(&Loc); 2774 if (!isInterestingLocation(Loc)) { 2775 return true; 2776 } 2777 2778 for (auto *Candidate : E->decls()) { 2779 if (arityMatchesCurrentCallExpr(E, Candidate)) 2780 visitHeuristicResult(Loc, Candidate); 2781 } 2782 2783 // Also record this location so that if we have instantiations, we can 2784 // gather more accurate results from them. 2785 if (TemplateStack) { 2786 TemplateStack->visitDependent(Loc); 2787 } 2788 return true; 2789 } 2790 2791 bool VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E) { 2792 SourceLocation Loc = E->getMemberLoc(); 2793 normalizeLocation(&Loc); 2794 if (!isInterestingLocation(Loc)) { 2795 return true; 2796 } 2797 2798 for (const NamedDecl *Candidate : Resolver->resolveMemberExpr(E)) { 2799 if (arityMatchesCurrentCallExpr(E, Candidate)) 2800 visitHeuristicResult(Loc, Candidate); 2801 } 2802 2803 // Also record this location so that if we have instantiations, we can 2804 // gather more accurate results from them. 2805 if (TemplateStack) { 2806 TemplateStack->visitDependent(Loc); 2807 } 2808 return true; 2809 } 2810 2811 bool VisitCXXNewExpr(CXXNewExpr *N) { 2812 // If we are in a template and the new is type-dependent, register it in 2813 // ForwardedTemplateLocations to forward its uses to the surrounding 2814 // template call site 2815 if (TemplateStack && TemplateStack->inGatherMode()) { 2816 const auto *TypeInfo = N->getAllocatedTypeSourceInfo(); 2817 const auto ConstructExprLoc = TypeInfo->getTypeLoc().getBeginLoc(); 2818 if (N->isTypeDependent()) { 2819 TemplateStack->visitDependent(ConstructExprLoc); 2820 ForwardedTemplateLocations.insert(ConstructExprLoc.getRawEncoding()); 2821 } 2822 } 2823 return true; 2824 } 2825 2826 bool VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) { 2827 SourceLocation Loc = E->getLocation(); 2828 normalizeLocation(&Loc); 2829 if (!isInterestingLocation(Loc)) { 2830 return true; 2831 } 2832 2833 for (const NamedDecl *Candidate : Resolver->resolveDeclRefExpr(E)) { 2834 if (arityMatchesCurrentCallExpr(E, Candidate)) 2835 visitHeuristicResult(Loc, Candidate); 2836 } 2837 2838 // Also record this location so that if we have instantiations, we can 2839 // gather more accurate results from them. 2840 if (TemplateStack) { 2841 TemplateStack->visitDependent(Loc); 2842 2843 // Also record the dependent NestedNameSpecifier locations 2844 for (auto NestedNameLoc = E->getQualifierLoc(); 2845 NestedNameLoc && 2846 NestedNameLoc.getNestedNameSpecifier()->isDependent(); 2847 NestedNameLoc = NestedNameLoc.getPrefix()) { 2848 TemplateStack->visitDependent(NestedNameLoc.getLocalBeginLoc()); 2849 } 2850 } 2851 2852 return true; 2853 } 2854 2855 bool VisitStringLiteral(StringLiteral *E) { 2856 if (E->getCharByteWidth() != 1) { 2857 return true; 2858 } 2859 2860 StringRef sref = E->getString(); 2861 std::string s = sref.str(); 2862 2863 bool isMozSrc = stringStartsWith(s, "moz-src:///"); 2864 2865 if (!stringStartsWith(s, "chrome://") && 2866 !stringStartsWith(s, "resource://") && 2867 !isMozSrc) { 2868 return true; 2869 } 2870 2871 if (!isASCII(s)) { 2872 return true; 2873 } 2874 2875 SourceLocation Loc = E->getStrTokenLoc(0); 2876 normalizeLocation(&Loc); 2877 2878 std::string symbol; 2879 2880 if (isMozSrc) { 2881 symbol = std::string("FILE_") + mangleFile(s.substr(11), FileType::Source); 2882 } else { 2883 symbol = std::string("URL_") + mangleURL(s); 2884 } 2885 2886 visitIdentifier("use", "file", StringRef(s), Loc, symbol, QualType(), 2887 Context(), NotIdentifierToken | LocRangeEndValid); 2888 2889 return true; 2890 } 2891 2892 void enterSourceFile(SourceLocation Loc) { 2893 normalizeLocation(&Loc); 2894 FileInfo *newFile = getFileInfo(Loc); 2895 if (!newFile->Interesting) { 2896 return; 2897 } 2898 FileType type = newFile->Generated ? FileType::Generated : FileType::Source; 2899 std::string symbol = 2900 std::string("FILE_") + mangleFile(newFile->Realname, type); 2901 2902 // We use an explicit zero-length source range at the start of the file. If 2903 // we don't set the LocRangeEndValid flag, the visitIdentifier code will use 2904 // the entire first token, which could be e.g. a long multiline-comment. 2905 visitIdentifier("def", "file", newFile->Realname, SourceRange(Loc), symbol, 2906 QualType(), Context(), 2907 NotIdentifierToken | LocRangeEndValid); 2908 } 2909 2910 void inclusionDirective(SourceLocation HashLoc, SourceRange FileNameRange, const FileEntry *File) { 2911 std::string includedFile(File->tryGetRealPathName()); 2912 FileType type = relativizePath(includedFile, CI.getHeaderSearchOpts()); 2913 if (type == FileType::Unknown) { 2914 return; 2915 } 2916 std::string symbol = std::string("FILE_") + mangleFile(includedFile, type); 2917 2918 // Support the #include MACRO use-case 2919 // When parsing #include MACRO: 2920 // - the filename is never passed to onTokenLexed 2921 // - inclusionDirective is called before endMacroExpansion (which is only 2922 // called when the following token is parsed) So add the filename here and 2923 // call endMacroExpansion immediately. This ensures the macro has a correct 2924 // expansion and it has been added to MacroMaps so the referenced filename 2925 // knows to populate inExpansionAt. 2926 if (MacroExpansionState) { 2927 MacroExpansionState->TokenLocations[FileNameRange.getBegin()] = 2928 MacroExpansionState->Expansion.length(); 2929 MacroExpansionState->Expansion += '"'; 2930 MacroExpansionState->Expansion += includedFile; 2931 MacroExpansionState->Expansion += '"'; 2932 endMacroExpansion(); 2933 } 2934 2935 normalizeLocation(&HashLoc); 2936 FileInfo *thisFile = getFileInfo(HashLoc); 2937 FileType thisType = thisFile->Generated ? FileType::Generated : FileType::Source; 2938 std::string thisFilePretty = thisFile->Realname; 2939 std::string thisFileSym = 2940 std::string("FILE_") + mangleFile(thisFile->Realname, thisType); 2941 2942 visitIdentifier("use", "file", includedFile, FileNameRange, symbol, 2943 QualType(), Context(thisFilePretty, thisFileSym), 2944 NotIdentifierToken | LocRangeEndValid); 2945 } 2946 2947 void macroDefined(const Token &Tok, const MacroDirective *Macro) { 2948 if (Macro->getMacroInfo()->isBuiltinMacro()) { 2949 return; 2950 } 2951 SourceLocation Loc = Tok.getLocation(); 2952 normalizeLocation(&Loc); 2953 if (!isInterestingLocation(Loc)) { 2954 return; 2955 } 2956 2957 IdentifierInfo *Ident = Tok.getIdentifierInfo(); 2958 if (Ident) { 2959 std::string Mangled = std::string("M_") + 2960 mangleLocation(Loc, std::string(Ident->getName())); 2961 visitIdentifier("def", "macro", Ident->getName(), Loc, Mangled); 2962 } 2963 } 2964 2965 void macroUsed(const Token &Tok, const MacroInfo *Macro) { 2966 if (!Macro) { 2967 return; 2968 } 2969 if (Macro->isBuiltinMacro()) { 2970 return; 2971 } 2972 SourceLocation Loc = Tok.getLocation(); 2973 if (!isInterestingLocation(Loc)) { 2974 return; 2975 } 2976 2977 IdentifierInfo *Ident = Tok.getIdentifierInfo(); 2978 if (Ident) { 2979 std::string Mangled = 2980 std::string("M_") + mangleLocation(Macro->getDefinitionLoc(), 2981 std::string(Ident->getName())); 2982 visitIdentifier("use", "macro", Ident->getName(), Loc, Mangled); 2983 } 2984 } 2985 2986 void beginMacroExpansion(const Token &Tok, const MacroInfo *Macro, 2987 SourceRange Range) { 2988 if (!Macro) 2989 return; 2990 2991 if (Macro->isBuiltinMacro()) 2992 return; 2993 2994 if (!Tok.getIdentifierInfo()) 2995 return; 2996 2997 auto location = Tok.getLocation(); 2998 normalizeLocation(&location); 2999 if (!isInterestingLocation(location)) 3000 return; 3001 3002 if (MacroExpansionState) { 3003 const auto InMacroArgs = MacroExpansionState->Range.fullyContains( 3004 SM.getExpansionRange(Range).getAsRange()); 3005 const auto InMacroBody = 3006 SM.getExpansionLoc(Tok.getLocation()) == 3007 SM.getExpansionLoc(MacroExpansionState->MacroNameToken.getLocation()); 3008 if (InMacroArgs || InMacroBody) { 3009 if (MacroExpansionState->MacroInfo->getDefinitionLoc() != 3010 Macro->getDefinitionLoc()) { 3011 IdentifierInfo *DependencyIdent = Tok.getIdentifierInfo(); 3012 std::string DependencySymbol = 3013 std::string("M_") + 3014 mangleLocation(Macro->getDefinitionLoc(), 3015 std::string(DependencyIdent->getName())); 3016 3017 MacroExpansionState->Dependencies.push_back(DependencySymbol); 3018 } 3019 3020 macroUsed(Tok, Macro); 3021 return; 3022 } 3023 3024 endMacroExpansion(); 3025 } 3026 3027 MacroExpansionState = ::MacroExpansionState{ 3028 .MacroNameToken = Tok, 3029 .MacroInfo = Macro, 3030 .Expansion = {}, 3031 .TokenLocations = {}, 3032 .Range = Range, 3033 .PrevPrevTok = {}, 3034 .PrevTok = {}, 3035 }; 3036 } 3037 3038 void endMacroExpansion() { 3039 // large macros are too slow to reformat, don't reformat macros larger than 3040 // those arbitrary thresholds 3041 static constexpr auto includedFileExpansionReformatThreshold = 20'000; 3042 static constexpr auto mainFileExpansionReformatThreshold = 200'000; 3043 3044 const auto expansionLocation = 3045 SM.getExpansionLoc(MacroExpansionState->MacroNameToken.getLocation()); 3046 const auto expansionFilename = SM.getFilename(expansionLocation); 3047 const auto includedExtensions = 3048 std::array{".h", ".hpp", ".hxx", ".inc", ".def"}; 3049 const auto isIncludedFile = 3050 std::any_of(includedExtensions.begin(), includedExtensions.end(), 3051 [&](const auto *extension) { 3052 return expansionFilename.ends_with_insensitive(extension); 3053 }); 3054 const auto expansionReformatThreshold = 3055 isIncludedFile ? includedFileExpansionReformatThreshold 3056 : mainFileExpansionReformatThreshold; 3057 3058 if (MacroExpansionState->Expansion.length() < expansionReformatThreshold) { 3059 // large macros are too memory-hungry to reformat with ColumnLimit != 0 3060 // see https://github.com/llvm/llvm-project/issues/107434 3061 auto style = clang::format::getMozillaStyle(); 3062 if (MacroExpansionState->Expansion.length() > 3063 includedFileExpansionReformatThreshold) 3064 style.ColumnLimit = 0; 3065 3066 const auto replacements = clang::format::reformat( 3067 style, MacroExpansionState->Expansion, 3068 {tooling::Range(0, MacroExpansionState->Expansion.length())}); 3069 auto formatted = clang::tooling::applyAllReplacements( 3070 MacroExpansionState->Expansion, replacements); 3071 if (formatted) { 3072 for (auto &[k, v] : MacroExpansionState->TokenLocations) { 3073 v = replacements.getShiftedCodePosition(v); 3074 } 3075 MacroExpansionState->Expansion = std::move(formatted.get()); 3076 } 3077 } 3078 3079 IdentifierInfo *Ident = 3080 MacroExpansionState->MacroNameToken.getIdentifierInfo(); 3081 std::string Symbol = 3082 std::string("M_") + 3083 mangleLocation(MacroExpansionState->MacroInfo->getDefinitionLoc(), 3084 std::string(Ident->getName())); 3085 3086 const auto dependenciesBegin = MacroExpansionState->Dependencies.begin(); 3087 const auto dependenciesEnd = MacroExpansionState->Dependencies.end(); 3088 std::sort(dependenciesBegin, dependenciesEnd); 3089 MacroExpansionState->Dependencies.erase( 3090 std::unique(dependenciesBegin, dependenciesEnd), dependenciesEnd); 3091 3092 auto Key = Symbol; 3093 for (const auto &Dependency : MacroExpansionState->Dependencies) { 3094 Key.push_back(','); 3095 Key += Dependency; 3096 } 3097 3098 MacroMaps.emplace(std::pair{ 3099 MacroExpansionState->MacroNameToken.getLocation(), 3100 ExpandedMacro{ 3101 std::move(Symbol), 3102 std::move(Key), 3103 std::move(MacroExpansionState->Expansion), 3104 std::move(MacroExpansionState->TokenLocations), 3105 }, 3106 }); 3107 3108 MacroExpansionState.reset(); 3109 3110 macroUsed(MacroExpansionState->MacroNameToken, 3111 MacroExpansionState->MacroInfo); 3112 } 3113 3114 void onTokenLexed(const Token &Tok) { 3115 if (!MacroExpansionState) 3116 return; 3117 3118 // check if we exited the macro expansion 3119 SourceLocation SLoc = Tok.getLocation(); 3120 if (!SLoc.isMacroID()) { 3121 endMacroExpansion(); 3122 return; 3123 } 3124 3125 if (ConcatInfo.AvoidConcat(MacroExpansionState->PrevPrevTok, 3126 MacroExpansionState->PrevTok, Tok)) { 3127 MacroExpansionState->Expansion += ' '; 3128 } 3129 3130 if (Tok.isAnnotation()) { 3131 const auto Range = SM.getImmediateExpansionRange(Tok.getLocation()); 3132 const char *Start = SM.getCharacterData(Range.getBegin()); 3133 const char *End = SM.getCharacterData(Range.getEnd()) + 1; 3134 MacroExpansionState->Expansion += StringRef(Start, End - Start); 3135 } else { 3136 const auto spelling = CI.getPreprocessor().getSpelling(Tok); 3137 if (Tok.isAnyIdentifier()) { 3138 MacroExpansionState->TokenLocations[SLoc] = 3139 MacroExpansionState->Expansion.length(); 3140 } 3141 MacroExpansionState->Expansion += spelling; 3142 } 3143 3144 MacroExpansionState->PrevPrevTok = MacroExpansionState->PrevTok; 3145 MacroExpansionState->PrevTok = Tok; 3146 } 3147 }; 3148 3149 void PreprocessorHook::FileChanged(SourceLocation Loc, FileChangeReason Reason, 3150 SrcMgr::CharacteristicKind FileType, 3151 FileID PrevFID = FileID()) { 3152 switch (Reason) { 3153 case PPCallbacks::RenameFile: 3154 case PPCallbacks::SystemHeaderPragma: 3155 // Don't care about these, since we want the actual on-disk filenames 3156 break; 3157 case PPCallbacks::EnterFile: 3158 Indexer->enterSourceFile(Loc); 3159 break; 3160 case PPCallbacks::ExitFile: 3161 // Don't care about exiting files 3162 break; 3163 } 3164 } 3165 3166 void PreprocessorHook::InclusionDirective( 3167 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, 3168 bool IsAngled, CharSourceRange FileNameRange, 3169 #if CLANG_VERSION_MAJOR >= 16 3170 OptionalFileEntryRef File, 3171 #elif CLANG_VERSION_MAJOR >= 15 3172 Optional<FileEntryRef> File, 3173 #else 3174 const FileEntry *File, 3175 #endif 3176 StringRef SearchPath, StringRef RelativePath, 3177 #if CLANG_VERSION_MAJOR >= 19 3178 const Module *SuggestedModule, bool ModuleImported, 3179 #else 3180 const Module *Imported, 3181 #endif 3182 SrcMgr::CharacteristicKind FileType) { 3183 #if CLANG_VERSION_MAJOR >= 15 3184 if (!File) { 3185 return; 3186 } 3187 Indexer->inclusionDirective(HashLoc, FileNameRange.getAsRange(), 3188 &File->getFileEntry()); 3189 #else 3190 Indexer->inclusionDirective(HashLoc, FileNameRange.getAsRange(), File); 3191 #endif 3192 } 3193 3194 void PreprocessorHook::MacroDefined(const Token &Tok, 3195 const MacroDirective *Md) { 3196 Indexer->macroDefined(Tok, Md); 3197 } 3198 3199 void PreprocessorHook::MacroExpands(const Token &Tok, const MacroDefinition &Md, 3200 SourceRange Range, const MacroArgs *Ma) { 3201 Indexer->beginMacroExpansion(Tok, Md.getMacroInfo(), Range); 3202 } 3203 3204 void PreprocessorHook::MacroUndefined(const Token &Tok, 3205 const MacroDefinition &Md, 3206 const MacroDirective *Undef) { 3207 Indexer->macroUsed(Tok, Md.getMacroInfo()); 3208 } 3209 3210 void PreprocessorHook::Defined(const Token &Tok, const MacroDefinition &Md, 3211 SourceRange Range) { 3212 Indexer->macroUsed(Tok, Md.getMacroInfo()); 3213 } 3214 3215 void PreprocessorHook::Ifdef(SourceLocation Loc, const Token &Tok, 3216 const MacroDefinition &Md) { 3217 Indexer->macroUsed(Tok, Md.getMacroInfo()); 3218 } 3219 3220 void PreprocessorHook::Ifndef(SourceLocation Loc, const Token &Tok, 3221 const MacroDefinition &Md) { 3222 Indexer->macroUsed(Tok, Md.getMacroInfo()); 3223 } 3224 3225 class IndexAction : public PluginASTAction { 3226 protected: 3227 std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, 3228 llvm::StringRef F) { 3229 return make_unique<IndexConsumer>(CI); 3230 } 3231 3232 bool ParseArgs(const CompilerInstance &CI, 3233 const std::vector<std::string> &Args) { 3234 if (Args.size() != 3) { 3235 DiagnosticsEngine &D = CI.getDiagnostics(); 3236 unsigned DiagID = D.getCustomDiagID( 3237 DiagnosticsEngine::Error, 3238 "Need arguments for the source, output, and object directories"); 3239 D.Report(DiagID); 3240 return false; 3241 } 3242 3243 // Load our directories 3244 Srcdir = getAbsolutePath(Args[0]); 3245 if (Srcdir.empty()) { 3246 DiagnosticsEngine &D = CI.getDiagnostics(); 3247 unsigned DiagID = D.getCustomDiagID( 3248 DiagnosticsEngine::Error, "Source directory '%0' does not exist"); 3249 D.Report(DiagID) << Args[0]; 3250 return false; 3251 } 3252 3253 ensurePath(Args[1] + PATHSEP_STRING); 3254 Outdir = getAbsolutePath(Args[1]); 3255 Outdir += PATHSEP_STRING; 3256 3257 Objdir = getAbsolutePath(Args[2]); 3258 if (Objdir.empty()) { 3259 DiagnosticsEngine &D = CI.getDiagnostics(); 3260 unsigned DiagID = D.getCustomDiagID(DiagnosticsEngine::Error, 3261 "Objdir '%0' does not exist"); 3262 D.Report(DiagID) << Args[2]; 3263 return false; 3264 } 3265 Objdir += PATHSEP_STRING; 3266 3267 printf("MOZSEARCH: %s %s %s\n", Srcdir.c_str(), Outdir.c_str(), 3268 Objdir.c_str()); 3269 3270 return true; 3271 } 3272 3273 void printHelp(llvm::raw_ostream &Ros) { 3274 Ros << "Help for mozsearch plugin goes here\n"; 3275 } 3276 }; 3277 3278 static FrontendPluginRegistry::Add<IndexAction> 3279 Y("mozsearch-index", "create the mozsearch index database");