diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 9c37cfe7b700..7cfbd6f95750 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -119,6 +119,7 @@ add_clang_library(clangDaemon index/Ref.cpp index/Relation.cpp index/Serialization.cpp + index/StdLib.cpp index/Symbol.cpp index/SymbolCollector.cpp index/SymbolID.cpp diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 80d7d5c5ece1..69a0f63972aa 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -26,6 +26,7 @@ #include "index/CanonicalIncludes.h" #include "index/FileIndex.h" #include "index/Merge.h" +#include "index/StdLib.h" #include "refactor/Rename.h" #include "refactor/Tweak.h" #include "support/Cancellation.h" @@ -59,16 +60,39 @@ namespace { // Update the FileIndex with new ASTs and plumb the diagnostics responses. struct UpdateIndexCallbacks : public ParsingCallbacks { UpdateIndexCallbacks(FileIndex *FIndex, - ClangdServer::Callbacks *ServerCallbacks) - : FIndex(FIndex), ServerCallbacks(ServerCallbacks) {} + ClangdServer::Callbacks *ServerCallbacks, + const ThreadsafeFS &TFS, AsyncTaskRunner *Tasks) + : FIndex(FIndex), ServerCallbacks(ServerCallbacks), TFS(TFS), + Tasks(Tasks) {} - void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx, + void onPreambleAST(PathRef Path, llvm::StringRef Version, + const CompilerInvocation &CI, ASTContext &Ctx, Preprocessor &PP, const CanonicalIncludes &CanonIncludes) override { + // If this preamble uses a standard library we haven't seen yet, index it. + if (FIndex) + if (auto Loc = Stdlib.add(*CI.getLangOpts(), PP.getHeaderSearchInfo())) + indexStdlib(CI, std::move(*Loc)); + if (FIndex) FIndex->updatePreamble(Path, Version, Ctx, PP, CanonIncludes); } + void indexStdlib(const CompilerInvocation &CI, StdLibLocation Loc) { + auto Task = [this, LO(*CI.getLangOpts()), Loc(std::move(Loc)), + CI(std::make_unique(CI))]() mutable { + IndexFileIn IF; + IF.Symbols = indexStandardLibrary(std::move(CI), Loc, TFS); + if (Stdlib.isBest(LO)) + FIndex->updatePreamble(std::move(IF)); + }; + if (Tasks) + // This doesn't have a semaphore to enforce -j, but it's rare. + Tasks->runAsync("IndexStdlib", std::move(Task)); + else + Task(); + } + void onMainAST(PathRef Path, ParsedAST &AST, PublishFn Publish) override { if (FIndex) FIndex->updateMain(Path, AST); @@ -103,6 +127,9 @@ struct UpdateIndexCallbacks : public ParsingCallbacks { private: FileIndex *FIndex; ClangdServer::Callbacks *ServerCallbacks; + const ThreadsafeFS &TFS; + StdLibSet Stdlib; + AsyncTaskRunner *Tasks; }; class DraftStoreFS : public ThreadsafeFS { @@ -154,12 +181,15 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, Transient(Opts.ImplicitCancellation ? TUScheduler::InvalidateOnUpdate : TUScheduler::NoInvalidation), DirtyFS(std::make_unique(TFS, DraftMgr)) { + if (Opts.AsyncThreadsCount != 0) + IndexTasks.emplace(); // Pass a callback into `WorkScheduler` to extract symbols from a newly // parsed file and rebuild the file index synchronously each time an AST // is parsed. - WorkScheduler.emplace( - CDB, TUScheduler::Options(Opts), - std::make_unique(DynamicIdx.get(), Callbacks)); + WorkScheduler.emplace(CDB, TUScheduler::Options(Opts), + std::make_unique( + DynamicIdx.get(), Callbacks, TFS, + IndexTasks ? IndexTasks.getPointer() : nullptr)); // Adds an index to the stack, at higher priority than existing indexes. auto AddIndex = [&](SymbolIndex *Idx) { if (this->Index != nullptr) { @@ -975,6 +1005,9 @@ ClangdServer::blockUntilIdleForTest(llvm::Optional TimeoutSeconds) { // and we're blocking the main thread. if (!WorkScheduler->blockUntilIdle(timeoutSeconds(TimeoutSeconds))) return false; + // TUScheduler is the only thing that starts background indexing work. + if (IndexTasks && !IndexTasks->wait(timeoutSeconds(TimeoutSeconds))) + return false; // Unfortunately we don't have strict topological order between the rest of // the components. E.g. CDB broadcast triggers backrgound indexing. diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index 6d999722805e..e73454901cff 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -428,6 +428,7 @@ private: mutable std::mutex CachedCompletionFuzzyFindRequestMutex; llvm::Optional WorkspaceRoot; + llvm::Optional IndexTasks; // for stdlib indexing. llvm::Optional WorkScheduler; // Invalidation policy used for actions that we assume are "transient". TUScheduler::ASTActionInvalidation Transient; diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h index 734dce43c587..ec7247121d5c 100644 --- a/clang-tools-extra/clangd/Config.h +++ b/clang-tools-extra/clangd/Config.h @@ -81,11 +81,12 @@ struct Config { /// forward-slashes. std::string MountPoint; }; - /// Controls background-index behavior. + /// Controls index behavior. struct { - /// Whether this TU should be indexed. + /// Whether this TU should be background-indexed. BackgroundPolicy Background = BackgroundPolicy::Build; ExternalIndexSpec External; + bool StandardLibrary = false; } Index; enum UnusedIncludesPolicy { Strict, None }; diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index a4d7904781e4..0cdbc5526e6e 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -332,6 +332,11 @@ struct FragmentCompiler { } if (F.External) compile(std::move(**F.External), F.External->Range); + if (F.StandardLibrary) + Out.Apply.push_back( + [Val(**F.StandardLibrary)](const Params &, Config &C) { + C.Index.StandardLibrary = Val; + }); } void compile(Fragment::IndexBlock::ExternalBlock &&External, diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index 34bff844cb26..5950f8ff655c 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -199,6 +199,9 @@ struct Fragment { llvm::Optional> MountPoint; }; llvm::Optional> External; + // Whether the standard library visible from this file should be indexed. + // This makes all standard library symbols available, included or not. + llvm::Optional> StandardLibrary; }; IndexBlock Index; diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp index ec39bb968664..cec60756a343 100644 --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -184,6 +184,10 @@ private: F.External.emplace(std::move(External)); F.External->Range = N.getSourceRange(); }); + Dict.handle("StandardLibrary", [&](Node &N) { + if (auto StandardLibrary = boolValue(N, "StandardLibrary")) + F.StandardLibrary = *StandardLibrary; + }); Dict.parse(N); } diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 19d4ca5a48a5..f60fbfaa479f 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -1013,9 +1013,10 @@ void PreambleThread::build(Request Req) { bool IsFirstPreamble = !LatestBuild; LatestBuild = clang::clangd::buildPreamble( FileName, *Req.CI, Inputs, StoreInMemory, - [this, Version(Inputs.Version)](ASTContext &Ctx, Preprocessor &PP, - const CanonicalIncludes &CanonIncludes) { - Callbacks.onPreambleAST(FileName, Version, Ctx, PP, CanonIncludes); + [&](ASTContext &Ctx, Preprocessor &PP, + const CanonicalIncludes &CanonIncludes) { + Callbacks.onPreambleAST(FileName, Inputs.Version, *Req.CI, Ctx, PP, + CanonIncludes); }, &Stats); if (!LatestBuild) diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h index ceb7ea0f0239..a852199ba7cb 100644 --- a/clang-tools-extra/clangd/TUScheduler.h +++ b/clang-tools-extra/clangd/TUScheduler.h @@ -133,8 +133,8 @@ public: /// contains only AST nodes from the #include directives at the start of the /// file. AST node in the current file should be observed on onMainAST call. virtual void onPreambleAST(PathRef Path, llvm::StringRef Version, - ASTContext &Ctx, Preprocessor &PP, - const CanonicalIncludes &) {} + const CompilerInvocation &CI, ASTContext &Ctx, + Preprocessor &PP, const CanonicalIncludes &) {} /// The argument function is run under the critical section guarding against /// races when closing the files. diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp index 72f7c0801250..dcfc4b5981fa 100644 --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -425,12 +425,7 @@ FileIndex::FileIndex() MainFileSymbols(IndexContents::All), MainFileIndex(std::make_unique()) {} -void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, - ASTContext &AST, Preprocessor &PP, - const CanonicalIncludes &Includes) { - IndexFileIn IF; - std::tie(IF.Symbols, std::ignore, IF.Relations) = - indexHeaderSymbols(Version, AST, PP, Includes); +void FileIndex::updatePreamble(IndexFileIn IF) { FileShardedIndex ShardedIndex(std::move(IF)); for (auto Uri : ShardedIndex.getAllSources()) { auto IF = ShardedIndex.getShard(Uri); @@ -461,6 +456,15 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, } } +void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, + ASTContext &AST, Preprocessor &PP, + const CanonicalIncludes &Includes) { + IndexFileIn IF; + std::tie(IF.Symbols, std::ignore, IF.Relations) = + indexHeaderSymbols(Version, AST, PP, Includes); + updatePreamble(std::move(IF)); +} + void FileIndex::updateMain(PathRef Path, ParsedAST &AST) { auto Contents = indexMainDecls(AST); MainFileSymbols.update( diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h index 4c6f965e7801..24ffbc9c7fb5 100644 --- a/clang-tools-extra/clangd/index/FileIndex.h +++ b/clang-tools-extra/clangd/index/FileIndex.h @@ -114,6 +114,7 @@ public: /// and macros in \p PP. void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST, Preprocessor &PP, const CanonicalIncludes &Includes); + void updatePreamble(IndexFileIn); /// Update symbols and references from main file \p Path with /// `indexMainDecls`. diff --git a/clang-tools-extra/clangd/index/StdLib.cpp b/clang-tools-extra/clangd/index/StdLib.cpp new file mode 100644 index 000000000000..378efedd0f2a --- /dev/null +++ b/clang-tools-extra/clangd/index/StdLib.cpp @@ -0,0 +1,362 @@ +//===-- StdLib.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "StdLib.h" +#include +#include +#include +#include + +#include "Compiler.h" +#include "Config.h" +#include "SymbolCollector.h" +#include "index/IndexAction.h" +#include "support/Logger.h" +#include "support/ThreadsafeFS.h" +#include "support/Trace.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +namespace clang { +namespace clangd { +namespace { + +enum Lang { C, CXX }; + +Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; } +llvm::StringLiteral mandatoryHeader(Lang L) { + switch (L) { + case C: + return "stdio.h"; + case CXX: + return "vector"; + } + llvm_unreachable("unhandled Lang"); +} + +LangStandard::Kind standardFromOpts(const LangOptions &LO) { + if (LO.CPlusPlus) { + if (LO.CPlusPlus2b) + return LangStandard::lang_cxx2b; + if (LO.CPlusPlus20) + return LangStandard::lang_cxx20; + if (LO.CPlusPlus17) + return LangStandard::lang_cxx17; + if (LO.CPlusPlus14) + return LangStandard::lang_cxx14; + if (LO.CPlusPlus11) + return LangStandard::lang_cxx11; + return LangStandard::lang_cxx98; + } + if (LO.C2x) + return LangStandard::lang_c2x; + // C17 has no new features, so treat {C11,C17} as C17. + if (LO.C11) + return LangStandard::lang_c17; + return LangStandard::lang_c99; +} + +std::string buildUmbrella(llvm::StringLiteral Mandatory, + std::vector Headers) { + std::string Result; + llvm::raw_string_ostream OS(Result); + + // We __has_include guard all our #includes to avoid errors when using older + // stdlib version that don't have headers for the newest language standards. + // But make sure we get *some* error if things are totally broken. + OS << llvm::formatv( + "#if !__has_include(<{0}>)\n" + "#error Mandatory header <{0}> not found in standard library!\n" + "#endif\n", + Mandatory); + + llvm::sort(Headers.begin(), Headers.end()); + auto Last = std::unique(Headers.begin(), Headers.end()); + for (auto Header = Headers.begin(); Header != Last; ++Header) { + OS << llvm::formatv("#if __has_include({0})\n" + "#include {0}\n" + "#endif\n", + *Header); + } + OS.flush(); + return Result; +} + +} // namespace + +llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) { + // The umbrella header is the same for all versions of each language. + // Headers that are unsupported in old lang versions are usually guarded by + // #if. Some headers may be not present in old stdlib versions, the umbrella + // header guards with __has_include for this purpose. + Lang L = langFromOpts(LO); + switch (L) { + case CXX: + static std::string *UmbrellaCXX = + new std::string(buildUmbrella(mandatoryHeader(L), { +#define SYMBOL(Name, NameSpace, Header) #Header, +#include "clang/Tooling/Inclusions/StdSymbolMap.inc" +#undef SYMBOL + })); + return *UmbrellaCXX; + case C: + static std::string *UmbrellaC = + new std::string(buildUmbrella(mandatoryHeader(L), { +#define SYMBOL(Name, NameSpace, Header) #Header, +#include "clang/Tooling/Inclusions/CSymbolMap.inc" +#undef SYMBOL + })); + return *UmbrellaC; + } +} + +namespace { + +// Including the standard library leaks unwanted transitively included symbols. +// +// We want to drop these, they're a bit tricky to identify: +// - we don't want to limit to symbols on our list, as our list has only +// top-level symbols (and there may be legitimate stdlib extensions). +// - we can't limit to only symbols defined in known stdlib headers, as stdlib +// internal structure is murky +// - we can't strictly require symbols to come from a particular path, e.g. +// libstdc++ is mostly under /usr/include/c++/10/... +// but std::ctype_base is under /usr/include//c++/10/... +// We require the symbol to come from a header that is *either* from +// the standard library path (as identified by the location of ), or +// another header that defines a symbol from our stdlib list. +SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) { + SymbolSlab::Builder Result; + + static auto &StandardHeaders = *[] { + auto *Set = new llvm::DenseSet(); + for (llvm::StringRef Header : { +#define SYMBOL(Name, NameSpace, Header) #Header, +#include "clang/Tooling/Inclusions/CSymbolMap.inc" +#include "clang/Tooling/Inclusions/StdSymbolMap.inc" +#undef SYMBOL + }) + Set->insert(Header); + return Set; + }(); + + // Form prefixes like file:///usr/include/c++/10/ + // These can be trivially prefix-compared with URIs in the indexed symbols. + llvm::SmallVector StdLibURIPrefixes; + for (const auto &Path : Loc.Paths) { + StdLibURIPrefixes.push_back(URI::create(Path).toString()); + if (StdLibURIPrefixes.back().back() != '/') + StdLibURIPrefixes.back().push_back('/'); + } + // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or* + // owner of a symbol whose insertable header is in StandardHeaders? + // Pointer key because strings in a SymbolSlab are interned. + llvm::DenseMap GoodHeader; + for (const Symbol &S : Slab) { + if (!S.IncludeHeaders.empty() && + StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) { + GoodHeader[S.CanonicalDeclaration.FileURI] = true; + GoodHeader[S.Definition.FileURI] = true; + continue; + } + for (const char *URI : + {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) { + auto R = GoodHeader.try_emplace(URI, false); + if (R.second) { + R.first->second = llvm::any_of( + StdLibURIPrefixes, + [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) { + return URIStr.startswith(Prefix); + }); + } + } + } +#ifndef NDEBUG + for (const auto &Good : GoodHeader) + if (Good.second && *Good.first) + dlog("Stdlib header: {0}", Good.first); +#endif + // Empty URIs aren't considered good. (Definition can be blank). + auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); }; + + for (const Symbol &S : Slab) { + if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) || + IsGoodHeader(S.Definition.FileURI))) { + dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name, + S.CanonicalDeclaration.FileURI); + continue; + } + Result.insert(S); + } + + return std::move(Result).build(); +} + +} // namespace + +SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources, + std::unique_ptr CI, + const StdLibLocation &Loc, + const ThreadsafeFS &TFS) { + if (CI->getFrontendOpts().Inputs.size() != 1 || + !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) { + elog("Indexing standard library failed: bad CompilerInvocation"); + assert(false && "indexing stdlib with a dubious CompilerInvocation!"); + return SymbolSlab(); + } + const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front(); + trace::Span Tracer("StandardLibraryIndex"); + LangStandard::Kind LangStd = standardFromOpts(*CI->getLangOpts()); + log("Indexing {0} standard library in the context of {1}", + LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile()); + + SymbolSlab Symbols; + IgnoreDiagnostics IgnoreDiags; + // CompilerInvocation is taken from elsewhere, and may map a dirty buffer. + CI->getPreprocessorOpts().clearRemappedFiles(); + auto Clang = prepareCompilerInstance( + std::move(CI), /*Preamble=*/nullptr, + llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()), + TFS.view(/*CWD=*/llvm::None), IgnoreDiags); + if (!Clang) { + elog("Standard Library Index: Couldn't build compiler instance"); + return Symbols; + } + + SymbolCollector::Options IndexOpts; + IndexOpts.Origin = SymbolOrigin::StdLib; + IndexOpts.CollectMainFileSymbols = false; + IndexOpts.CollectMainFileRefs = false; + IndexOpts.CollectMacro = true; + IndexOpts.StoreAllDocumentation = true; + // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope. + // Files from outside the StdLibLocation may define true std symbols anyway. + // We end up "blessing" such headers, and can only do that by indexing + // everything first. + + // Refs, relations, include graph in the stdlib mostly aren't useful. + auto Action = createStaticIndexingAction( + IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr, + nullptr, nullptr); + + if (!Action->BeginSourceFile(*Clang, Input)) { + elog("Standard Library Index: BeginSourceFile() failed"); + return Symbols; + } + + if (llvm::Error Err = Action->Execute()) { + elog("Standard Library Index: Execute failed: {0}", std::move(Err)); + return Symbols; + } + + Action->EndSourceFile(); + + unsigned SymbolsBeforeFilter = Symbols.size(); + Symbols = filter(std::move(Symbols), Loc); + bool Errors = Clang->hasDiagnostics() && + Clang->getDiagnostics().hasUncompilableErrorOccurred(); + log("Indexed {0} standard library{3}: {1} symbols, {2} filtered", + LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(), + SymbolsBeforeFilter - Symbols.size(), + Errors ? " (incomplete due to errors)" : ""); + SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); + return Symbols; +} + +SymbolSlab indexStandardLibrary(std::unique_ptr Invocation, + const StdLibLocation &Loc, + const ThreadsafeFS &TFS) { + llvm::StringRef Header = getStdlibUmbrellaHeader(*Invocation->getLangOpts()); + return indexStandardLibrary(Header, std::move(Invocation), Loc, TFS); +} + +bool StdLibSet::isBest(const LangOptions &LO) const { + return standardFromOpts(LO) >= + Best[langFromOpts(LO)].load(std::memory_order_acquire); +} + +llvm::Optional StdLibSet::add(const LangOptions &LO, + const HeaderSearch &HS) { + Lang L = langFromOpts(LO); + int OldVersion = Best[L].load(std::memory_order_acquire); + int NewVersion = standardFromOpts(LO); + dlog("Index stdlib? {0}", + LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName()); + + if (!Config::current().Index.StandardLibrary) { + dlog("No: disabled in config"); + return llvm::None; + } + + if (NewVersion <= OldVersion) { + dlog("No: have {0}, {1}>={2}", + LangStandard::getLangStandardForKind( + static_cast(NewVersion)) + .getName(), + OldVersion, NewVersion); + return llvm::None; + } + + // We'd like to index a standard library here if there is one. + // Check for the existence of on the search path. + // We could cache this, but we only get here repeatedly when there's no + // stdlib, and even then only once per preamble build. + llvm::StringLiteral ProbeHeader = mandatoryHeader(L); + llvm::SmallString<256> Path; // Scratch space. + llvm::SmallVector SearchPaths; + auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) { + llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath); + if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path)) + SearchPaths.emplace_back(Path); + }; + for (const auto &DL : + llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) { + switch (DL.getLookupType()) { + case DirectoryLookup::LT_NormalDir: { + Path = DL.getDir()->getName(); + llvm::sys::path::append(Path, ProbeHeader); + llvm::vfs::Status Stat; + if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) && + Stat.isRegularFile()) + RecordHeaderPath(Path); + break; + } + case DirectoryLookup::LT_Framework: + // stdlib can't be a framework (framework includes must have a slash) + continue; + case DirectoryLookup::LT_HeaderMap: + llvm::StringRef Target = + DL.getHeaderMap()->lookupFilename(ProbeHeader, Path); + if (!Target.empty()) + RecordHeaderPath(Target); + break; + } + } + if (SearchPaths.empty()) + return llvm::None; + + dlog("Found standard library in {0}", llvm::join(SearchPaths, ", ")); + + while (!Best[L].compare_exchange_weak(OldVersion, NewVersion, + std::memory_order_acq_rel)) + if (OldVersion >= NewVersion) { + dlog("No: lost the race"); + return llvm::None; // Another thread won the race while we were checking. + } + + dlog("Yes, index stdlib!"); + return StdLibLocation{std::move(SearchPaths)}; +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/index/StdLib.h b/clang-tools-extra/clangd/index/StdLib.h new file mode 100644 index 000000000000..6df30ace669c --- /dev/null +++ b/clang-tools-extra/clangd/index/StdLib.h @@ -0,0 +1,110 @@ +//===--- StdLib.h - Index the C and C++ standard library ---------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Eagerly indexing the standard library gives a much friendlier "warm start" +// with working code completion in a standalone file or small project. +// +// We act as if we saw a file which included the whole standard library: +// #include +// #include +// #include +// ... +// We index this TU and feed the result into the dynamic index. +// +// This happens within the context of some particular open file, and we reuse +// its CompilerInvocation. Matching its include path, LangOpts etc ensures that +// we see the standard library and configuration that matches the project. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H + +#include "index/Symbol.h" +#include "support/ThreadsafeFS.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +class CompilerInvocation; +class LangOptions; +class HeaderSearch; +namespace clangd { + +// The filesystem location where a standard library was found. +// +// This is the directory containing or . +// It's used to ensure we only index files that are in the standard library. +// +// The paths are canonicalized (FS "real path" with symlinks resolved). +// This allows them to be easily compared against paths the indexer returns. +struct StdLibLocation { + llvm::SmallVector Paths; +}; + +// Tracks the state of standard library indexing within a particular index. +// +// In general, we don't want to index the standard library multiple times. +// In most cases, this class just acts as a flag to ensure we only do it once. +// +// However, if we first open a C++11 file, and then a C++20 file, we *do* +// want the index to be upgraded to include the extra symbols. +// Similarly, the C and C++ standard library can coexist. +class StdLibSet { + std::atomic Best[2] = {{-1}, {-1}}; + +public: + // Determines if we should index the standard library in a configuration. + // + // This is true if: + // - standard library indexing is enabled for the file + // - the language version is higher than any previous add() for the language + // - the standard library headers exist on the search path + // Returns the location where the standard library was found. + // + // This function is threadsafe. + llvm::Optional add(const LangOptions &, const HeaderSearch &); + + // Indicates whether a built index should be used. + // It should not be used if a newer version has subsequently been added. + // + // Intended pattern is: + // if (add()) { + // symbols = indexStandardLibrary(); + // if (isBest()) + // index.update(symbols); + // } + // + // This is still technically racy: we could return true here, then another + // thread could add->index->update a better library before we can update. + // We'd then overwrite it with the older version. + // However, it's very unlikely: indexing takes a long time. + bool isBest(const LangOptions &) const; +}; + +// Index a standard library and return the discovered symbols. +// +// The compiler invocation should describe the file whose config we're reusing. +// We overwrite its virtual buffer with a lot of #include statements. +SymbolSlab indexStandardLibrary(std::unique_ptr Invocation, + const StdLibLocation &Loc, + const ThreadsafeFS &TFS); + +// Variant that allows the umbrella header source to be specified. +// Exposed for testing. +SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources, + std::unique_ptr CI, + const StdLibLocation &Loc, + const ThreadsafeFS &TFS); + +// Generate header containing #includes for all standard library headers. +// Exposed for testing. +llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &); + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H diff --git a/clang-tools-extra/clangd/index/SymbolOrigin.cpp b/clang-tools-extra/clangd/index/SymbolOrigin.cpp index 46a84f2ca984..e893ff78b8ae 100644 --- a/clang-tools-extra/clangd/index/SymbolOrigin.cpp +++ b/clang-tools-extra/clangd/index/SymbolOrigin.cpp @@ -14,7 +14,7 @@ namespace clangd { llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, SymbolOrigin O) { if (O == SymbolOrigin::Unknown) return OS << "unknown"; - constexpr static char Sigils[] = "AOSMIRP7B9012345"; + constexpr static char Sigils[] = "AOSMIRP7BL012345"; for (unsigned I = 0; I < sizeof(Sigils); ++I) if (static_cast(O) & 1u << I) OS << Sigils[I]; diff --git a/clang-tools-extra/clangd/index/SymbolOrigin.h b/clang-tools-extra/clangd/index/SymbolOrigin.h index 18e3616d5a9c..2e7a3fa745e2 100644 --- a/clang-tools-extra/clangd/index/SymbolOrigin.h +++ b/clang-tools-extra/clangd/index/SymbolOrigin.h @@ -29,6 +29,7 @@ enum class SymbolOrigin : uint16_t { Preamble = 1 << 6, // From the dynamic index of preambles. // 7 reserved Background = 1 << 8, // From the automatic project index. + StdLib = 1 << 9, // Standard library index. }; inline SymbolOrigin operator|(SymbolOrigin A, SymbolOrigin B) { diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index 8309be64ef23..692d7f8038d9 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -81,6 +81,7 @@ add_unittest(ClangdUnitTests ClangdTests SemanticSelectionTests.cpp SerializationTests.cpp SourceCodeTests.cpp + StdLibTests.cpp SymbolCollectorTests.cpp SymbolInfoTests.cpp SyncAPI.cpp diff --git a/clang-tools-extra/clangd/unittests/StdLibTests.cpp b/clang-tools-extra/clangd/unittests/StdLibTests.cpp new file mode 100644 index 000000000000..0fadc872305c --- /dev/null +++ b/clang-tools-extra/clangd/unittests/StdLibTests.cpp @@ -0,0 +1,162 @@ +//===-- StdLibTests.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Annotations.h" +#include "ClangdServer.h" +#include "CodeComplete.h" +#include "Compiler.h" +#include "Config.h" +#include "SyncAPI.h" +#include "TestFS.h" +#include "index/StdLib.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include + +using namespace testing; + +namespace clang { +namespace clangd { +namespace { + +// Check the generated header sources contains usual standard library headers. +TEST(StdLibTests, getStdlibUmbrellaHeader) { + LangOptions LO; + LO.CPlusPlus = true; + + auto CXX = getStdlibUmbrellaHeader(LO).str(); + EXPECT_THAT(CXX, HasSubstr("#include ")); + EXPECT_THAT(CXX, HasSubstr("#include ")); + EXPECT_THAT(CXX, Not(HasSubstr("#include "))); + + LO.CPlusPlus = false; + auto C = getStdlibUmbrellaHeader(LO).str(); + EXPECT_THAT(C, Not(HasSubstr("#include "))); + EXPECT_THAT(C, Not(HasSubstr("#include "))); + EXPECT_THAT(C, HasSubstr("#include ")); +} + +MATCHER_P(Named, Name, "") { return arg.Name == Name; } + +// Build an index, and check if it contains the right symbols. +TEST(StdLibTests, indexStandardLibrary) { + MockFS FS; + FS.Files["std/foo.h"] = R"cpp( + #include + #if __cplusplus >= 201703L + int foo17(); + #elif __cplusplus >= 201402L + int foo14(); + #else + bool foo98(); + #endif + )cpp"; + FS.Files["nonstd/platform_stuff.h"] = "int magic = 42;"; + + ParseInputs OriginalInputs; + OriginalInputs.TFS = &FS; + OriginalInputs.CompileCommand.Filename = testPath("main.cc"); + OriginalInputs.CompileCommand.CommandLine = {"clang++", testPath("main.cc"), + "-isystemstd/", + "-isystemnonstd/", "-std=c++14"}; + OriginalInputs.CompileCommand.Directory = testRoot(); + IgnoreDiagnostics Diags; + auto CI = buildCompilerInvocation(OriginalInputs, Diags); + ASSERT_TRUE(CI); + + StdLibLocation Loc; + Loc.Paths.push_back(testPath("std/")); + + auto Symbols = + indexStandardLibrary("#include ", std::move(CI), Loc, FS); + EXPECT_THAT(Symbols, ElementsAre(Named("foo14"))); +} + +TEST(StdLibTests, StdLibSet) { + StdLibSet Set; + MockFS FS; + FS.Files["std/_"] = ""; + FS.Files["libc/_"] = ""; + + auto Add = [&](const LangOptions &LO, + std::vector SearchPath) { + SourceManagerForFile SM("scratch", ""); + SM.get().getFileManager().setVirtualFileSystem(FS.view(llvm::None)); + HeaderSearch HS(/*HSOpts=*/nullptr, SM.get(), SM.get().getDiagnostics(), LO, + /*Target=*/nullptr); + for (auto P : SearchPath) + HS.AddSearchPath( + DirectoryLookup( + cantFail(SM.get().getFileManager().getDirectoryRef(testPath(P))), + SrcMgr::C_System, /*isFramework=*/false), + true); + return Set.add(LO, HS); + }; + + Config Cfg; + Cfg.Index.StandardLibrary = false; + WithContextValue Disabled(Config::Key, std::move(Cfg)); + + LangOptions LO; + LO.CPlusPlus = true; + EXPECT_FALSE(Add(LO, {"std"})) << "Disabled in config"; + + Cfg = Config(); + Cfg.Index.StandardLibrary = true; + WithContextValue Enabled(Config::Key, std::move(Cfg)); + + EXPECT_FALSE(Add(LO, {"std"})) << "No found"; + FS.Files["std/vector"] = "class vector;"; + EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++98"; + EXPECT_FALSE(Add(LO, {"std"})) << "Don't reindex"; + LO.CPlusPlus11 = true; + EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++11"; + LO.CPlusPlus = false; + EXPECT_FALSE(Add(LO, {"libc"})) << "No "; + FS.Files["libc/stdio.h"] = true; + EXPECT_TRUE(Add(LO, {"libc"})) << "Indexing as C"; +} + +MATCHER_P(StdlibSymbol, Name, "") { + return arg.Name == Name && arg.Includes.size() == 1 && + llvm::StringRef(arg.Includes.front().Header).startswith("<"); +} + +TEST(StdLibTests, EndToEnd) { + Config Cfg; + Cfg.Index.StandardLibrary = true; + WithContextValue Enabled(Config::Key, std::move(Cfg)); + + MockFS FS; + FS.Files["stdlib/vector"] = + "namespace std { template class vector; }"; + FS.Files["stdlib/list"] = + " namespace std { template class list; }"; + MockCompilationDatabase CDB; + CDB.ExtraClangFlags.push_back("-isystem" + testPath("stdlib")); + ClangdServer::Options Opts = ClangdServer::optsForTest(); + Opts.BuildDynamicSymbolIndex = true; // also used for stdlib index + ClangdServer Server(CDB, FS, Opts); + + Annotations A("std::^"); + + Server.addDocument(testPath("foo.cc"), A.code()); + ASSERT_TRUE(Server.blockUntilIdleForTest()); + clangd::CodeCompleteOptions CCOpts; + auto Completions = + cantFail(runCodeComplete(Server, testPath("foo.cc"), A.point(), CCOpts)); + EXPECT_THAT( + Completions.Completions, + UnorderedElementsAre(StdlibSymbol("list"), StdlibSymbol("vector"))); +} + +} // namespace +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp index 76f4cbafc830..cf30acb0d669 100644 --- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp +++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp @@ -1123,7 +1123,8 @@ TEST_F(TUSchedulerTests, AsyncPreambleThread) { public: BlockPreambleThread(llvm::StringRef BlockVersion, Notification &N) : BlockVersion(BlockVersion), N(N) {} - void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx, + void onPreambleAST(PathRef Path, llvm::StringRef Version, + const CompilerInvocation &, ASTContext &Ctx, Preprocessor &, const CanonicalIncludes &) override { if (Version == BlockVersion) N.wait();