Reland(3) "[clangd] Indexing of standard library"

Tracked down the crash, which was argument-evaluation-order UB
in the wrapping indexStandardLibrary().
Sorry for the churn!

This reverts commit 77533ea443.
This commit is contained in:
Sam McCall 2022-05-18 19:27:43 +02:00
parent 8b7e85f4f8
commit 03ea140b3a
18 changed files with 712 additions and 21 deletions

View File

@ -119,6 +119,7 @@ add_clang_library(clangDaemon
index/Ref.cpp
index/Relation.cpp
index/Serialization.cpp
index/StdLib.cpp
index/Symbol.cpp
index/SymbolCollector.cpp
index/SymbolID.cpp

View File

@ -26,6 +26,7 @@
#include "index/CanonicalIncludes.h"
#include "index/FileIndex.h"
#include "index/Merge.h"
#include "index/StdLib.h"
#include "refactor/Rename.h"
#include "refactor/Tweak.h"
#include "support/Cancellation.h"
@ -59,16 +60,39 @@ namespace {
// Update the FileIndex with new ASTs and plumb the diagnostics responses.
struct UpdateIndexCallbacks : public ParsingCallbacks {
UpdateIndexCallbacks(FileIndex *FIndex,
ClangdServer::Callbacks *ServerCallbacks)
: FIndex(FIndex), ServerCallbacks(ServerCallbacks) {}
ClangdServer::Callbacks *ServerCallbacks,
const ThreadsafeFS &TFS, AsyncTaskRunner *Tasks)
: FIndex(FIndex), ServerCallbacks(ServerCallbacks), TFS(TFS),
Tasks(Tasks) {}
void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx,
void onPreambleAST(PathRef Path, llvm::StringRef Version,
const CompilerInvocation &CI, ASTContext &Ctx,
Preprocessor &PP,
const CanonicalIncludes &CanonIncludes) override {
// If this preamble uses a standard library we haven't seen yet, index it.
if (FIndex)
if (auto Loc = Stdlib.add(*CI.getLangOpts(), PP.getHeaderSearchInfo()))
indexStdlib(CI, std::move(*Loc));
if (FIndex)
FIndex->updatePreamble(Path, Version, Ctx, PP, CanonIncludes);
}
void indexStdlib(const CompilerInvocation &CI, StdLibLocation Loc) {
auto Task = [this, LO(*CI.getLangOpts()), Loc(std::move(Loc)),
CI(std::make_unique<CompilerInvocation>(CI))]() mutable {
IndexFileIn IF;
IF.Symbols = indexStandardLibrary(std::move(CI), Loc, TFS);
if (Stdlib.isBest(LO))
FIndex->updatePreamble(std::move(IF));
};
if (Tasks)
// This doesn't have a semaphore to enforce -j, but it's rare.
Tasks->runAsync("IndexStdlib", std::move(Task));
else
Task();
}
void onMainAST(PathRef Path, ParsedAST &AST, PublishFn Publish) override {
if (FIndex)
FIndex->updateMain(Path, AST);
@ -103,6 +127,9 @@ struct UpdateIndexCallbacks : public ParsingCallbacks {
private:
FileIndex *FIndex;
ClangdServer::Callbacks *ServerCallbacks;
const ThreadsafeFS &TFS;
StdLibSet Stdlib;
AsyncTaskRunner *Tasks;
};
class DraftStoreFS : public ThreadsafeFS {
@ -154,12 +181,15 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB,
Transient(Opts.ImplicitCancellation ? TUScheduler::InvalidateOnUpdate
: TUScheduler::NoInvalidation),
DirtyFS(std::make_unique<DraftStoreFS>(TFS, DraftMgr)) {
if (Opts.AsyncThreadsCount != 0)
IndexTasks.emplace();
// Pass a callback into `WorkScheduler` to extract symbols from a newly
// parsed file and rebuild the file index synchronously each time an AST
// is parsed.
WorkScheduler.emplace(
CDB, TUScheduler::Options(Opts),
std::make_unique<UpdateIndexCallbacks>(DynamicIdx.get(), Callbacks));
WorkScheduler.emplace(CDB, TUScheduler::Options(Opts),
std::make_unique<UpdateIndexCallbacks>(
DynamicIdx.get(), Callbacks, TFS,
IndexTasks ? IndexTasks.getPointer() : nullptr));
// Adds an index to the stack, at higher priority than existing indexes.
auto AddIndex = [&](SymbolIndex *Idx) {
if (this->Index != nullptr) {
@ -975,6 +1005,9 @@ ClangdServer::blockUntilIdleForTest(llvm::Optional<double> TimeoutSeconds) {
// and we're blocking the main thread.
if (!WorkScheduler->blockUntilIdle(timeoutSeconds(TimeoutSeconds)))
return false;
// TUScheduler is the only thing that starts background indexing work.
if (IndexTasks && !IndexTasks->wait(timeoutSeconds(TimeoutSeconds)))
return false;
// Unfortunately we don't have strict topological order between the rest of
// the components. E.g. CDB broadcast triggers backrgound indexing.

View File

@ -428,6 +428,7 @@ private:
mutable std::mutex CachedCompletionFuzzyFindRequestMutex;
llvm::Optional<std::string> WorkspaceRoot;
llvm::Optional<AsyncTaskRunner> IndexTasks; // for stdlib indexing.
llvm::Optional<TUScheduler> WorkScheduler;
// Invalidation policy used for actions that we assume are "transient".
TUScheduler::ASTActionInvalidation Transient;

View File

@ -81,11 +81,12 @@ struct Config {
/// forward-slashes.
std::string MountPoint;
};
/// Controls background-index behavior.
/// Controls index behavior.
struct {
/// Whether this TU should be indexed.
/// Whether this TU should be background-indexed.
BackgroundPolicy Background = BackgroundPolicy::Build;
ExternalIndexSpec External;
bool StandardLibrary = false;
} Index;
enum UnusedIncludesPolicy { Strict, None };

View File

@ -332,6 +332,11 @@ struct FragmentCompiler {
}
if (F.External)
compile(std::move(**F.External), F.External->Range);
if (F.StandardLibrary)
Out.Apply.push_back(
[Val(**F.StandardLibrary)](const Params &, Config &C) {
C.Index.StandardLibrary = Val;
});
}
void compile(Fragment::IndexBlock::ExternalBlock &&External,

View File

@ -199,6 +199,9 @@ struct Fragment {
llvm::Optional<Located<std::string>> MountPoint;
};
llvm::Optional<Located<ExternalBlock>> External;
// Whether the standard library visible from this file should be indexed.
// This makes all standard library symbols available, included or not.
llvm::Optional<Located<bool>> StandardLibrary;
};
IndexBlock Index;

View File

@ -184,6 +184,10 @@ private:
F.External.emplace(std::move(External));
F.External->Range = N.getSourceRange();
});
Dict.handle("StandardLibrary", [&](Node &N) {
if (auto StandardLibrary = boolValue(N, "StandardLibrary"))
F.StandardLibrary = *StandardLibrary;
});
Dict.parse(N);
}

View File

@ -1013,9 +1013,10 @@ void PreambleThread::build(Request Req) {
bool IsFirstPreamble = !LatestBuild;
LatestBuild = clang::clangd::buildPreamble(
FileName, *Req.CI, Inputs, StoreInMemory,
[this, Version(Inputs.Version)](ASTContext &Ctx, Preprocessor &PP,
[&](ASTContext &Ctx, Preprocessor &PP,
const CanonicalIncludes &CanonIncludes) {
Callbacks.onPreambleAST(FileName, Version, Ctx, PP, CanonIncludes);
Callbacks.onPreambleAST(FileName, Inputs.Version, *Req.CI, Ctx, PP,
CanonIncludes);
},
&Stats);
if (!LatestBuild)

View File

@ -133,8 +133,8 @@ public:
/// contains only AST nodes from the #include directives at the start of the
/// file. AST node in the current file should be observed on onMainAST call.
virtual void onPreambleAST(PathRef Path, llvm::StringRef Version,
ASTContext &Ctx, Preprocessor &PP,
const CanonicalIncludes &) {}
const CompilerInvocation &CI, ASTContext &Ctx,
Preprocessor &PP, const CanonicalIncludes &) {}
/// The argument function is run under the critical section guarding against
/// races when closing the files.

View File

@ -425,12 +425,7 @@ FileIndex::FileIndex()
MainFileSymbols(IndexContents::All),
MainFileIndex(std::make_unique<MemIndex>()) {}
void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
ASTContext &AST, Preprocessor &PP,
const CanonicalIncludes &Includes) {
IndexFileIn IF;
std::tie(IF.Symbols, std::ignore, IF.Relations) =
indexHeaderSymbols(Version, AST, PP, Includes);
void FileIndex::updatePreamble(IndexFileIn IF) {
FileShardedIndex ShardedIndex(std::move(IF));
for (auto Uri : ShardedIndex.getAllSources()) {
auto IF = ShardedIndex.getShard(Uri);
@ -461,6 +456,15 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
}
}
void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
ASTContext &AST, Preprocessor &PP,
const CanonicalIncludes &Includes) {
IndexFileIn IF;
std::tie(IF.Symbols, std::ignore, IF.Relations) =
indexHeaderSymbols(Version, AST, PP, Includes);
updatePreamble(std::move(IF));
}
void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
auto Contents = indexMainDecls(AST);
MainFileSymbols.update(

View File

@ -114,6 +114,7 @@ public:
/// and macros in \p PP.
void updatePreamble(PathRef Path, llvm::StringRef Version, ASTContext &AST,
Preprocessor &PP, const CanonicalIncludes &Includes);
void updatePreamble(IndexFileIn);
/// Update symbols and references from main file \p Path with
/// `indexMainDecls`.

View File

@ -0,0 +1,362 @@
//===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "StdLib.h"
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "Compiler.h"
#include "Config.h"
#include "SymbolCollector.h"
#include "index/IndexAction.h"
#include "support/Logger.h"
#include "support/ThreadsafeFS.h"
#include "support/Trace.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
namespace clang {
namespace clangd {
namespace {
enum Lang { C, CXX };
Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; }
llvm::StringLiteral mandatoryHeader(Lang L) {
switch (L) {
case C:
return "stdio.h";
case CXX:
return "vector";
}
llvm_unreachable("unhandled Lang");
}
LangStandard::Kind standardFromOpts(const LangOptions &LO) {
if (LO.CPlusPlus) {
if (LO.CPlusPlus2b)
return LangStandard::lang_cxx2b;
if (LO.CPlusPlus20)
return LangStandard::lang_cxx20;
if (LO.CPlusPlus17)
return LangStandard::lang_cxx17;
if (LO.CPlusPlus14)
return LangStandard::lang_cxx14;
if (LO.CPlusPlus11)
return LangStandard::lang_cxx11;
return LangStandard::lang_cxx98;
}
if (LO.C2x)
return LangStandard::lang_c2x;
// C17 has no new features, so treat {C11,C17} as C17.
if (LO.C11)
return LangStandard::lang_c17;
return LangStandard::lang_c99;
}
std::string buildUmbrella(llvm::StringLiteral Mandatory,
std::vector<llvm::StringLiteral> Headers) {
std::string Result;
llvm::raw_string_ostream OS(Result);
// We __has_include guard all our #includes to avoid errors when using older
// stdlib version that don't have headers for the newest language standards.
// But make sure we get *some* error if things are totally broken.
OS << llvm::formatv(
"#if !__has_include(<{0}>)\n"
"#error Mandatory header <{0}> not found in standard library!\n"
"#endif\n",
Mandatory);
llvm::sort(Headers.begin(), Headers.end());
auto Last = std::unique(Headers.begin(), Headers.end());
for (auto Header = Headers.begin(); Header != Last; ++Header) {
OS << llvm::formatv("#if __has_include({0})\n"
"#include {0}\n"
"#endif\n",
*Header);
}
OS.flush();
return Result;
}
} // namespace
llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) {
// The umbrella header is the same for all versions of each language.
// Headers that are unsupported in old lang versions are usually guarded by
// #if. Some headers may be not present in old stdlib versions, the umbrella
// header guards with __has_include for this purpose.
Lang L = langFromOpts(LO);
switch (L) {
case CXX:
static std::string *UmbrellaCXX =
new std::string(buildUmbrella(mandatoryHeader(L), {
#define SYMBOL(Name, NameSpace, Header) #Header,
#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
#undef SYMBOL
}));
return *UmbrellaCXX;
case C:
static std::string *UmbrellaC =
new std::string(buildUmbrella(mandatoryHeader(L), {
#define SYMBOL(Name, NameSpace, Header) #Header,
#include "clang/Tooling/Inclusions/CSymbolMap.inc"
#undef SYMBOL
}));
return *UmbrellaC;
}
}
namespace {
// Including the standard library leaks unwanted transitively included symbols.
//
// We want to drop these, they're a bit tricky to identify:
// - we don't want to limit to symbols on our list, as our list has only
// top-level symbols (and there may be legitimate stdlib extensions).
// - we can't limit to only symbols defined in known stdlib headers, as stdlib
// internal structure is murky
// - we can't strictly require symbols to come from a particular path, e.g.
// libstdc++ is mostly under /usr/include/c++/10/...
// but std::ctype_base is under /usr/include/<platform>/c++/10/...
// We require the symbol to come from a header that is *either* from
// the standard library path (as identified by the location of <vector>), or
// another header that defines a symbol from our stdlib list.
SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) {
SymbolSlab::Builder Result;
static auto &StandardHeaders = *[] {
auto *Set = new llvm::DenseSet<llvm::StringRef>();
for (llvm::StringRef Header : {
#define SYMBOL(Name, NameSpace, Header) #Header,
#include "clang/Tooling/Inclusions/CSymbolMap.inc"
#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
#undef SYMBOL
})
Set->insert(Header);
return Set;
}();
// Form prefixes like file:///usr/include/c++/10/
// These can be trivially prefix-compared with URIs in the indexed symbols.
llvm::SmallVector<std::string> StdLibURIPrefixes;
for (const auto &Path : Loc.Paths) {
StdLibURIPrefixes.push_back(URI::create(Path).toString());
if (StdLibURIPrefixes.back().back() != '/')
StdLibURIPrefixes.back().push_back('/');
}
// For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
// owner of a symbol whose insertable header is in StandardHeaders?
// Pointer key because strings in a SymbolSlab are interned.
llvm::DenseMap<const char *, bool> GoodHeader;
for (const Symbol &S : Slab) {
if (!S.IncludeHeaders.empty() &&
StandardHeaders.contains(S.IncludeHeaders.front().IncludeHeader)) {
GoodHeader[S.CanonicalDeclaration.FileURI] = true;
GoodHeader[S.Definition.FileURI] = true;
continue;
}
for (const char *URI :
{S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) {
auto R = GoodHeader.try_emplace(URI, false);
if (R.second) {
R.first->second = llvm::any_of(
StdLibURIPrefixes,
[&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) {
return URIStr.startswith(Prefix);
});
}
}
}
#ifndef NDEBUG
for (const auto &Good : GoodHeader)
if (Good.second && *Good.first)
dlog("Stdlib header: {0}", Good.first);
#endif
// Empty URIs aren't considered good. (Definition can be blank).
auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(C); };
for (const Symbol &S : Slab) {
if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) ||
IsGoodHeader(S.Definition.FileURI))) {
dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name,
S.CanonicalDeclaration.FileURI);
continue;
}
Result.insert(S);
}
return std::move(Result).build();
}
} // namespace
SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
std::unique_ptr<CompilerInvocation> CI,
const StdLibLocation &Loc,
const ThreadsafeFS &TFS) {
if (CI->getFrontendOpts().Inputs.size() != 1 ||
!CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) {
elog("Indexing standard library failed: bad CompilerInvocation");
assert(false && "indexing stdlib with a dubious CompilerInvocation!");
return SymbolSlab();
}
const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front();
trace::Span Tracer("StandardLibraryIndex");
LangStandard::Kind LangStd = standardFromOpts(*CI->getLangOpts());
log("Indexing {0} standard library in the context of {1}",
LangStandard::getLangStandardForKind(LangStd).getName(), Input.getFile());
SymbolSlab Symbols;
IgnoreDiagnostics IgnoreDiags;
// CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
CI->getPreprocessorOpts().clearRemappedFiles();
auto Clang = prepareCompilerInstance(
std::move(CI), /*Preamble=*/nullptr,
llvm::MemoryBuffer::getMemBuffer(HeaderSources, Input.getFile()),
TFS.view(/*CWD=*/llvm::None), IgnoreDiags);
if (!Clang) {
elog("Standard Library Index: Couldn't build compiler instance");
return Symbols;
}
SymbolCollector::Options IndexOpts;
IndexOpts.Origin = SymbolOrigin::StdLib;
IndexOpts.CollectMainFileSymbols = false;
IndexOpts.CollectMainFileRefs = false;
IndexOpts.CollectMacro = true;
IndexOpts.StoreAllDocumentation = true;
// Sadly we can't use IndexOpts.FileFilter to restrict indexing scope.
// Files from outside the StdLibLocation may define true std symbols anyway.
// We end up "blessing" such headers, and can only do that by indexing
// everything first.
// Refs, relations, include graph in the stdlib mostly aren't useful.
auto Action = createStaticIndexingAction(
IndexOpts, [&](SymbolSlab S) { Symbols = std::move(S); }, nullptr,
nullptr, nullptr);
if (!Action->BeginSourceFile(*Clang, Input)) {
elog("Standard Library Index: BeginSourceFile() failed");
return Symbols;
}
if (llvm::Error Err = Action->Execute()) {
elog("Standard Library Index: Execute failed: {0}", std::move(Err));
return Symbols;
}
Action->EndSourceFile();
unsigned SymbolsBeforeFilter = Symbols.size();
Symbols = filter(std::move(Symbols), Loc);
bool Errors = Clang->hasDiagnostics() &&
Clang->getDiagnostics().hasUncompilableErrorOccurred();
log("Indexed {0} standard library{3}: {1} symbols, {2} filtered",
LangStandard::getLangStandardForKind(LangStd).getName(), Symbols.size(),
SymbolsBeforeFilter - Symbols.size(),
Errors ? " (incomplete due to errors)" : "");
SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
return Symbols;
}
SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
const StdLibLocation &Loc,
const ThreadsafeFS &TFS) {
llvm::StringRef Header = getStdlibUmbrellaHeader(*Invocation->getLangOpts());
return indexStandardLibrary(Header, std::move(Invocation), Loc, TFS);
}
bool StdLibSet::isBest(const LangOptions &LO) const {
return standardFromOpts(LO) >=
Best[langFromOpts(LO)].load(std::memory_order_acquire);
}
llvm::Optional<StdLibLocation> StdLibSet::add(const LangOptions &LO,
const HeaderSearch &HS) {
Lang L = langFromOpts(LO);
int OldVersion = Best[L].load(std::memory_order_acquire);
int NewVersion = standardFromOpts(LO);
dlog("Index stdlib? {0}",
LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName());
if (!Config::current().Index.StandardLibrary) {
dlog("No: disabled in config");
return llvm::None;
}
if (NewVersion <= OldVersion) {
dlog("No: have {0}, {1}>={2}",
LangStandard::getLangStandardForKind(
static_cast<LangStandard::Kind>(NewVersion))
.getName(),
OldVersion, NewVersion);
return llvm::None;
}
// We'd like to index a standard library here if there is one.
// Check for the existence of <vector> on the search path.
// We could cache this, but we only get here repeatedly when there's no
// stdlib, and even then only once per preamble build.
llvm::StringLiteral ProbeHeader = mandatoryHeader(L);
llvm::SmallString<256> Path; // Scratch space.
llvm::SmallVector<std::string> SearchPaths;
auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) {
llvm::StringRef DirPath = llvm::sys::path::parent_path(HeaderPath);
if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(DirPath, Path))
SearchPaths.emplace_back(Path);
};
for (const auto &DL :
llvm::make_range(HS.search_dir_begin(), HS.search_dir_end())) {
switch (DL.getLookupType()) {
case DirectoryLookup::LT_NormalDir: {
Path = DL.getDir()->getName();
llvm::sys::path::append(Path, ProbeHeader);
llvm::vfs::Status Stat;
if (!HS.getFileMgr().getNoncachedStatValue(Path, Stat) &&
Stat.isRegularFile())
RecordHeaderPath(Path);
break;
}
case DirectoryLookup::LT_Framework:
// stdlib can't be a framework (framework includes must have a slash)
continue;
case DirectoryLookup::LT_HeaderMap:
llvm::StringRef Target =
DL.getHeaderMap()->lookupFilename(ProbeHeader, Path);
if (!Target.empty())
RecordHeaderPath(Target);
break;
}
}
if (SearchPaths.empty())
return llvm::None;
dlog("Found standard library in {0}", llvm::join(SearchPaths, ", "));
while (!Best[L].compare_exchange_weak(OldVersion, NewVersion,
std::memory_order_acq_rel))
if (OldVersion >= NewVersion) {
dlog("No: lost the race");
return llvm::None; // Another thread won the race while we were checking.
}
dlog("Yes, index stdlib!");
return StdLibLocation{std::move(SearchPaths)};
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,110 @@
//===--- StdLib.h - Index the C and C++ standard library ---------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Eagerly indexing the standard library gives a much friendlier "warm start"
// with working code completion in a standalone file or small project.
//
// We act as if we saw a file which included the whole standard library:
// #include <array>
// #include <bitset>
// #include <chrono>
// ...
// We index this TU and feed the result into the dynamic index.
//
// This happens within the context of some particular open file, and we reuse
// its CompilerInvocation. Matching its include path, LangOpts etc ensures that
// we see the standard library and configuration that matches the project.
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H
#include "index/Symbol.h"
#include "support/ThreadsafeFS.h"
#include "llvm/ADT/StringRef.h"
#include <string>
namespace clang {
class CompilerInvocation;
class LangOptions;
class HeaderSearch;
namespace clangd {
// The filesystem location where a standard library was found.
//
// This is the directory containing <vector> or <stdio.h>.
// It's used to ensure we only index files that are in the standard library.
//
// The paths are canonicalized (FS "real path" with symlinks resolved).
// This allows them to be easily compared against paths the indexer returns.
struct StdLibLocation {
llvm::SmallVector<std::string> Paths;
};
// Tracks the state of standard library indexing within a particular index.
//
// In general, we don't want to index the standard library multiple times.
// In most cases, this class just acts as a flag to ensure we only do it once.
//
// However, if we first open a C++11 file, and then a C++20 file, we *do*
// want the index to be upgraded to include the extra symbols.
// Similarly, the C and C++ standard library can coexist.
class StdLibSet {
std::atomic<int> Best[2] = {{-1}, {-1}};
public:
// Determines if we should index the standard library in a configuration.
//
// This is true if:
// - standard library indexing is enabled for the file
// - the language version is higher than any previous add() for the language
// - the standard library headers exist on the search path
// Returns the location where the standard library was found.
//
// This function is threadsafe.
llvm::Optional<StdLibLocation> add(const LangOptions &, const HeaderSearch &);
// Indicates whether a built index should be used.
// It should not be used if a newer version has subsequently been added.
//
// Intended pattern is:
// if (add()) {
// symbols = indexStandardLibrary();
// if (isBest())
// index.update(symbols);
// }
//
// This is still technically racy: we could return true here, then another
// thread could add->index->update a better library before we can update.
// We'd then overwrite it with the older version.
// However, it's very unlikely: indexing takes a long time.
bool isBest(const LangOptions &) const;
};
// Index a standard library and return the discovered symbols.
//
// The compiler invocation should describe the file whose config we're reusing.
// We overwrite its virtual buffer with a lot of #include statements.
SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
const StdLibLocation &Loc,
const ThreadsafeFS &TFS);
// Variant that allows the umbrella header source to be specified.
// Exposed for testing.
SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
std::unique_ptr<CompilerInvocation> CI,
const StdLibLocation &Loc,
const ThreadsafeFS &TFS);
// Generate header containing #includes for all standard library headers.
// Exposed for testing.
llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &);
} // namespace clangd
} // namespace clang
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_STDLIB_H

View File

@ -14,7 +14,7 @@ namespace clangd {
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, SymbolOrigin O) {
if (O == SymbolOrigin::Unknown)
return OS << "unknown";
constexpr static char Sigils[] = "AOSMIRP7B9012345";
constexpr static char Sigils[] = "AOSMIRP7BL012345";
for (unsigned I = 0; I < sizeof(Sigils); ++I)
if (static_cast<uint16_t>(O) & 1u << I)
OS << Sigils[I];

View File

@ -29,6 +29,7 @@ enum class SymbolOrigin : uint16_t {
Preamble = 1 << 6, // From the dynamic index of preambles.
// 7 reserved
Background = 1 << 8, // From the automatic project index.
StdLib = 1 << 9, // Standard library index.
};
inline SymbolOrigin operator|(SymbolOrigin A, SymbolOrigin B) {

View File

@ -81,6 +81,7 @@ add_unittest(ClangdUnitTests ClangdTests
SemanticSelectionTests.cpp
SerializationTests.cpp
SourceCodeTests.cpp
StdLibTests.cpp
SymbolCollectorTests.cpp
SymbolInfoTests.cpp
SyncAPI.cpp

View File

@ -0,0 +1,162 @@
//===-- StdLibTests.cpp -----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Annotations.h"
#include "ClangdServer.h"
#include "CodeComplete.h"
#include "Compiler.h"
#include "Config.h"
#include "SyncAPI.h"
#include "TestFS.h"
#include "index/StdLib.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceManager.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <memory>
using namespace testing;
namespace clang {
namespace clangd {
namespace {
// Check the generated header sources contains usual standard library headers.
TEST(StdLibTests, getStdlibUmbrellaHeader) {
LangOptions LO;
LO.CPlusPlus = true;
auto CXX = getStdlibUmbrellaHeader(LO).str();
EXPECT_THAT(CXX, HasSubstr("#include <string>"));
EXPECT_THAT(CXX, HasSubstr("#include <cstdio>"));
EXPECT_THAT(CXX, Not(HasSubstr("#include <stdio.h>")));
LO.CPlusPlus = false;
auto C = getStdlibUmbrellaHeader(LO).str();
EXPECT_THAT(C, Not(HasSubstr("#include <string>")));
EXPECT_THAT(C, Not(HasSubstr("#include <cstdio>")));
EXPECT_THAT(C, HasSubstr("#include <stdio.h>"));
}
MATCHER_P(Named, Name, "") { return arg.Name == Name; }
// Build an index, and check if it contains the right symbols.
TEST(StdLibTests, indexStandardLibrary) {
MockFS FS;
FS.Files["std/foo.h"] = R"cpp(
#include <platform_stuff.h>
#if __cplusplus >= 201703L
int foo17();
#elif __cplusplus >= 201402L
int foo14();
#else
bool foo98();
#endif
)cpp";
FS.Files["nonstd/platform_stuff.h"] = "int magic = 42;";
ParseInputs OriginalInputs;
OriginalInputs.TFS = &FS;
OriginalInputs.CompileCommand.Filename = testPath("main.cc");
OriginalInputs.CompileCommand.CommandLine = {"clang++", testPath("main.cc"),
"-isystemstd/",
"-isystemnonstd/", "-std=c++14"};
OriginalInputs.CompileCommand.Directory = testRoot();
IgnoreDiagnostics Diags;
auto CI = buildCompilerInvocation(OriginalInputs, Diags);
ASSERT_TRUE(CI);
StdLibLocation Loc;
Loc.Paths.push_back(testPath("std/"));
auto Symbols =
indexStandardLibrary("#include <foo.h>", std::move(CI), Loc, FS);
EXPECT_THAT(Symbols, ElementsAre(Named("foo14")));
}
TEST(StdLibTests, StdLibSet) {
StdLibSet Set;
MockFS FS;
FS.Files["std/_"] = "";
FS.Files["libc/_"] = "";
auto Add = [&](const LangOptions &LO,
std::vector<llvm::StringRef> SearchPath) {
SourceManagerForFile SM("scratch", "");
SM.get().getFileManager().setVirtualFileSystem(FS.view(llvm::None));
HeaderSearch HS(/*HSOpts=*/nullptr, SM.get(), SM.get().getDiagnostics(), LO,
/*Target=*/nullptr);
for (auto P : SearchPath)
HS.AddSearchPath(
DirectoryLookup(
cantFail(SM.get().getFileManager().getDirectoryRef(testPath(P))),
SrcMgr::C_System, /*isFramework=*/false),
true);
return Set.add(LO, HS);
};
Config Cfg;
Cfg.Index.StandardLibrary = false;
WithContextValue Disabled(Config::Key, std::move(Cfg));
LangOptions LO;
LO.CPlusPlus = true;
EXPECT_FALSE(Add(LO, {"std"})) << "Disabled in config";
Cfg = Config();
Cfg.Index.StandardLibrary = true;
WithContextValue Enabled(Config::Key, std::move(Cfg));
EXPECT_FALSE(Add(LO, {"std"})) << "No <vector> found";
FS.Files["std/vector"] = "class vector;";
EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++98";
EXPECT_FALSE(Add(LO, {"std"})) << "Don't reindex";
LO.CPlusPlus11 = true;
EXPECT_TRUE(Add(LO, {"std"})) << "Indexing as C++11";
LO.CPlusPlus = false;
EXPECT_FALSE(Add(LO, {"libc"})) << "No <stdio.h>";
FS.Files["libc/stdio.h"] = true;
EXPECT_TRUE(Add(LO, {"libc"})) << "Indexing as C";
}
MATCHER_P(StdlibSymbol, Name, "") {
return arg.Name == Name && arg.Includes.size() == 1 &&
llvm::StringRef(arg.Includes.front().Header).startswith("<");
}
TEST(StdLibTests, EndToEnd) {
Config Cfg;
Cfg.Index.StandardLibrary = true;
WithContextValue Enabled(Config::Key, std::move(Cfg));
MockFS FS;
FS.Files["stdlib/vector"] =
"namespace std { template <class> class vector; }";
FS.Files["stdlib/list"] =
" namespace std { template <typename T> class list; }";
MockCompilationDatabase CDB;
CDB.ExtraClangFlags.push_back("-isystem" + testPath("stdlib"));
ClangdServer::Options Opts = ClangdServer::optsForTest();
Opts.BuildDynamicSymbolIndex = true; // also used for stdlib index
ClangdServer Server(CDB, FS, Opts);
Annotations A("std::^");
Server.addDocument(testPath("foo.cc"), A.code());
ASSERT_TRUE(Server.blockUntilIdleForTest());
clangd::CodeCompleteOptions CCOpts;
auto Completions =
cantFail(runCodeComplete(Server, testPath("foo.cc"), A.point(), CCOpts));
EXPECT_THAT(
Completions.Completions,
UnorderedElementsAre(StdlibSymbol("list"), StdlibSymbol("vector")));
}
} // namespace
} // namespace clangd
} // namespace clang

View File

@ -1123,7 +1123,8 @@ TEST_F(TUSchedulerTests, AsyncPreambleThread) {
public:
BlockPreambleThread(llvm::StringRef BlockVersion, Notification &N)
: BlockVersion(BlockVersion), N(N) {}
void onPreambleAST(PathRef Path, llvm::StringRef Version, ASTContext &Ctx,
void onPreambleAST(PathRef Path, llvm::StringRef Version,
const CompilerInvocation &, ASTContext &Ctx,
Preprocessor &, const CanonicalIncludes &) override {
if (Version == BlockVersion)
N.wait();