mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-14 11:39:35 +00:00
[clangd] Incorporate transitive #includes into code complete proximity scoring.
Summary: We now compute a distance from the main file to the symbol header, which is a weighted count of: - some number of #include traversals from source file --> included file - some number of FS traversals from file --> parent directory - some number of FS traversals from parent directory --> child file/dir This calculation is performed in the appropriate URI scheme. This means we'll get some proximity boost from header files in main-file contexts, even when these are in different directory trees. This extended file proximity model is not yet incorporated in the index interface/implementation. Reviewers: ioeric Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, cfe-commits Differential Revision: https://reviews.llvm.org/D48441 llvm-svn: 336177
This commit is contained in:
parent
a0a52bf195
commit
3f0243fdaf
@ -19,6 +19,7 @@ add_clang_library(clangDaemon
|
||||
Diagnostics.cpp
|
||||
DraftStore.cpp
|
||||
FindSymbols.cpp
|
||||
FileDistance.cpp
|
||||
FuzzyMatch.cpp
|
||||
GlobalCompilationDatabase.cpp
|
||||
Headers.cpp
|
||||
|
@ -167,7 +167,7 @@ void ClangdServer::codeComplete(PathRef File, Position Pos,
|
||||
// both the old and the new version in case only one of them matches.
|
||||
CodeCompleteResult Result = clangd::codeComplete(
|
||||
File, IP->Command, PreambleData ? &PreambleData->Preamble : nullptr,
|
||||
PreambleData ? PreambleData->Inclusions : std::vector<Inclusion>(),
|
||||
PreambleData ? PreambleData->Includes : IncludeStructure(),
|
||||
IP->Contents, Pos, FS, PCHs, CodeCompleteOpts);
|
||||
CB(std::move(Result));
|
||||
};
|
||||
|
@ -88,7 +88,7 @@ public:
|
||||
CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback)
|
||||
: File(File), ParsedCallback(ParsedCallback) {}
|
||||
|
||||
std::vector<Inclusion> takeInclusions() { return std::move(Inclusions); }
|
||||
IncludeStructure takeIncludes() { return std::move(Includes); }
|
||||
|
||||
void AfterExecute(CompilerInstance &CI) override {
|
||||
if (!ParsedCallback)
|
||||
@ -103,15 +103,13 @@ public:
|
||||
|
||||
std::unique_ptr<PPCallbacks> createPPCallbacks() override {
|
||||
assert(SourceMgr && "SourceMgr must be set at this point");
|
||||
return collectInclusionsInMainFileCallback(
|
||||
*SourceMgr,
|
||||
[this](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); });
|
||||
return collectIncludeStructureCallback(*SourceMgr, &Includes);
|
||||
}
|
||||
|
||||
private:
|
||||
PathRef File;
|
||||
PreambleParsedCallback ParsedCallback;
|
||||
std::vector<Inclusion> Inclusions;
|
||||
IncludeStructure Includes;
|
||||
SourceManager *SourceMgr = nullptr;
|
||||
};
|
||||
|
||||
@ -153,15 +151,11 @@ ParsedAST::Build(std::unique_ptr<clang::CompilerInvocation> CI,
|
||||
return llvm::None;
|
||||
}
|
||||
|
||||
std::vector<Inclusion> Inclusions;
|
||||
// Copy over the includes from the preamble, then combine with the
|
||||
// non-preamble includes below.
|
||||
if (Preamble)
|
||||
Inclusions = Preamble->Inclusions;
|
||||
|
||||
Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
|
||||
Clang->getSourceManager(),
|
||||
[&Inclusions](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }));
|
||||
auto Includes = Preamble ? Preamble->Includes : IncludeStructure{};
|
||||
Clang->getPreprocessor().addPPCallbacks(
|
||||
collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
|
||||
|
||||
if (!Action->Execute())
|
||||
log("Execute() failed when building AST for " + MainInput.getFile());
|
||||
@ -179,7 +173,7 @@ ParsedAST::Build(std::unique_ptr<clang::CompilerInvocation> CI,
|
||||
Diags.insert(Diags.begin(), Preamble->Diags.begin(), Preamble->Diags.end());
|
||||
return ParsedAST(std::move(Preamble), std::move(Clang), std::move(Action),
|
||||
std::move(ParsedDecls), std::move(Diags),
|
||||
std::move(Inclusions));
|
||||
std::move(Includes));
|
||||
}
|
||||
|
||||
ParsedAST::ParsedAST(ParsedAST &&Other) = default;
|
||||
@ -246,25 +240,24 @@ std::size_t ParsedAST::getUsedBytes() const {
|
||||
return Total;
|
||||
}
|
||||
|
||||
const std::vector<Inclusion> &ParsedAST::getInclusions() const {
|
||||
return Inclusions;
|
||||
const IncludeStructure &ParsedAST::getIncludeStructure() const {
|
||||
return Includes;
|
||||
}
|
||||
|
||||
PreambleData::PreambleData(PrecompiledPreamble Preamble,
|
||||
std::vector<Diag> Diags,
|
||||
std::vector<Inclusion> Inclusions)
|
||||
std::vector<Diag> Diags, IncludeStructure Includes)
|
||||
: Preamble(std::move(Preamble)), Diags(std::move(Diags)),
|
||||
Inclusions(std::move(Inclusions)) {}
|
||||
Includes(std::move(Includes)) {}
|
||||
|
||||
ParsedAST::ParsedAST(std::shared_ptr<const PreambleData> Preamble,
|
||||
std::unique_ptr<CompilerInstance> Clang,
|
||||
std::unique_ptr<FrontendAction> Action,
|
||||
std::vector<Decl *> LocalTopLevelDecls,
|
||||
std::vector<Diag> Diags, std::vector<Inclusion> Inclusions)
|
||||
std::vector<Diag> Diags, IncludeStructure Includes)
|
||||
: Preamble(std::move(Preamble)), Clang(std::move(Clang)),
|
||||
Action(std::move(Action)), Diags(std::move(Diags)),
|
||||
LocalTopLevelDecls(std::move(LocalTopLevelDecls)),
|
||||
Inclusions(std::move(Inclusions)) {
|
||||
Includes(std::move(Includes)) {
|
||||
assert(this->Clang);
|
||||
assert(this->Action);
|
||||
}
|
||||
@ -350,7 +343,7 @@ std::shared_ptr<const PreambleData> clangd::buildPreamble(
|
||||
" for file " + Twine(FileName));
|
||||
return std::make_shared<PreambleData>(
|
||||
std::move(*BuiltPreamble), PreambleDiagnostics.take(),
|
||||
SerializedDeclsCollector.takeInclusions());
|
||||
SerializedDeclsCollector.takeIncludes());
|
||||
} else {
|
||||
log("Could not build a preamble for file " + Twine(FileName));
|
||||
return nullptr;
|
||||
|
@ -45,14 +45,14 @@ namespace clangd {
|
||||
// Stores Preamble and associated data.
|
||||
struct PreambleData {
|
||||
PreambleData(PrecompiledPreamble Preamble, std::vector<Diag> Diags,
|
||||
std::vector<Inclusion> Inclusions);
|
||||
IncludeStructure Includes);
|
||||
|
||||
tooling::CompileCommand CompileCommand;
|
||||
PrecompiledPreamble Preamble;
|
||||
std::vector<Diag> Diags;
|
||||
// Processes like code completions and go-to-definitions will need #include
|
||||
// information, and their compile action skips preamble range.
|
||||
std::vector<Inclusion> Inclusions;
|
||||
IncludeStructure Includes;
|
||||
};
|
||||
|
||||
/// Information required to run clang, e.g. to parse AST or do code completion.
|
||||
@ -99,14 +99,14 @@ public:
|
||||
/// Returns the esitmated size of the AST and the accessory structures, in
|
||||
/// bytes. Does not include the size of the preamble.
|
||||
std::size_t getUsedBytes() const;
|
||||
const std::vector<Inclusion> &getInclusions() const;
|
||||
const IncludeStructure &getIncludeStructure() const;
|
||||
|
||||
private:
|
||||
ParsedAST(std::shared_ptr<const PreambleData> Preamble,
|
||||
std::unique_ptr<CompilerInstance> Clang,
|
||||
std::unique_ptr<FrontendAction> Action,
|
||||
std::vector<Decl *> LocalTopLevelDecls, std::vector<Diag> Diags,
|
||||
std::vector<Inclusion> Inclusions);
|
||||
IncludeStructure Includes);
|
||||
|
||||
// In-memory preambles must outlive the AST, it is important that this member
|
||||
// goes before Clang and Action.
|
||||
@ -124,7 +124,7 @@ private:
|
||||
// Top-level decls inside the current file. Not that this does not include
|
||||
// top-level decls from the preamble.
|
||||
std::vector<Decl *> LocalTopLevelDecls;
|
||||
std::vector<Inclusion> Inclusions;
|
||||
IncludeStructure Includes;
|
||||
};
|
||||
|
||||
using PreambleParsedCallback = std::function<void(
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "AST.h"
|
||||
#include "CodeCompletionStrings.h"
|
||||
#include "Compiler.h"
|
||||
#include "FileDistance.h"
|
||||
#include "FuzzyMatch.h"
|
||||
#include "Headers.h"
|
||||
#include "Logger.h"
|
||||
@ -763,7 +764,6 @@ struct SemaCompleteInput {
|
||||
PathRef FileName;
|
||||
const tooling::CompileCommand &Command;
|
||||
PrecompiledPreamble const *Preamble;
|
||||
const std::vector<Inclusion> &PreambleInclusions;
|
||||
StringRef Contents;
|
||||
Position Pos;
|
||||
IntrusiveRefCntPtr<vfs::FileSystem> VFS;
|
||||
@ -771,12 +771,11 @@ struct SemaCompleteInput {
|
||||
};
|
||||
|
||||
// Invokes Sema code completion on a file.
|
||||
// If \p Includes is set, it will be initialized after a compiler instance has
|
||||
// been set up.
|
||||
// If \p Includes is set, it will be updated based on the compiler invocation.
|
||||
bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
|
||||
const clang::CodeCompleteOptions &Options,
|
||||
const SemaCompleteInput &Input,
|
||||
std::unique_ptr<IncludeInserter> *Includes = nullptr) {
|
||||
IncludeStructure *Includes = nullptr) {
|
||||
trace::Span Tracer("Sema completion");
|
||||
std::vector<const char *> ArgStrs;
|
||||
for (const auto &S : Input.Command.CommandLine)
|
||||
@ -837,29 +836,9 @@ bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
|
||||
Input.FileName);
|
||||
return false;
|
||||
}
|
||||
if (Includes) {
|
||||
// Initialize Includes if provided.
|
||||
|
||||
// FIXME(ioeric): needs more consistent style support in clangd server.
|
||||
auto Style = format::getStyle(format::DefaultFormatStyle, Input.FileName,
|
||||
format::DefaultFallbackStyle, Input.Contents,
|
||||
Input.VFS.get());
|
||||
if (!Style) {
|
||||
log("ERROR: failed to get FormatStyle for file " + Input.FileName +
|
||||
". Fall back to use LLVM style. Error: " +
|
||||
llvm::toString(Style.takeError()));
|
||||
Style = format::getLLVMStyle();
|
||||
}
|
||||
*Includes = llvm::make_unique<IncludeInserter>(
|
||||
Input.FileName, Input.Contents, *Style, Input.Command.Directory,
|
||||
Clang->getPreprocessor().getHeaderSearchInfo());
|
||||
for (const auto &Inc : Input.PreambleInclusions)
|
||||
Includes->get()->addExisting(Inc);
|
||||
Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
|
||||
Clang->getSourceManager(), [Includes](Inclusion Inc) {
|
||||
Includes->get()->addExisting(std::move(Inc));
|
||||
}));
|
||||
}
|
||||
if (Includes)
|
||||
Clang->getPreprocessor().addPPCallbacks(
|
||||
collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
|
||||
if (!Action.Execute()) {
|
||||
log("Execute() failed when running codeComplete for " + Input.FileName);
|
||||
return false;
|
||||
@ -949,24 +928,23 @@ clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
|
||||
// - TopN determines the results with the best score.
|
||||
class CodeCompleteFlow {
|
||||
PathRef FileName;
|
||||
IncludeStructure Includes; // Complete once the compiler runs.
|
||||
const CodeCompleteOptions &Opts;
|
||||
// Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
|
||||
CompletionRecorder *Recorder = nullptr;
|
||||
int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
|
||||
bool Incomplete = false; // Would more be available with a higher limit?
|
||||
llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
|
||||
std::unique_ptr<IncludeInserter> Includes; // Initialized once compiler runs.
|
||||
FileProximityMatcher FileProximityMatch;
|
||||
// Include-insertion and proximity scoring rely on the include structure.
|
||||
// This is available after Sema has run.
|
||||
llvm::Optional<IncludeInserter> Inserter; // Available during runWithSema.
|
||||
llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
|
||||
|
||||
public:
|
||||
// A CodeCompleteFlow object is only useful for calling run() exactly once.
|
||||
CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts)
|
||||
: FileName(FileName), Opts(Opts),
|
||||
// FIXME: also use path of the main header corresponding to FileName to
|
||||
// calculate the file proximity, which would capture include/ and src/
|
||||
// project setup where headers and implementations are not in the same
|
||||
// directory.
|
||||
FileProximityMatch(ArrayRef<StringRef>({FileName})) {}
|
||||
CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
|
||||
const CodeCompleteOptions &Opts)
|
||||
: FileName(FileName), Includes(Includes), Opts(Opts) {}
|
||||
|
||||
CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
|
||||
trace::Span Tracer("CodeCompleteFlow");
|
||||
@ -977,11 +955,45 @@ public:
|
||||
CodeCompleteResult Output;
|
||||
auto RecorderOwner = llvm::make_unique<CompletionRecorder>(Opts, [&]() {
|
||||
assert(Recorder && "Recorder is not set");
|
||||
assert(Includes && "Includes is not set");
|
||||
// FIXME(ioeric): needs more consistent style support in clangd server.
|
||||
auto Style =
|
||||
format::getStyle("file", SemaCCInput.FileName, "LLVM",
|
||||
SemaCCInput.Contents, SemaCCInput.VFS.get());
|
||||
if (!Style) {
|
||||
log("Failed to get FormatStyle for file" + SemaCCInput.FileName + ": " +
|
||||
llvm::toString(Style.takeError()) + ". Fallback is LLVM style.");
|
||||
Style = format::getLLVMStyle();
|
||||
}
|
||||
// If preprocessor was run, inclusions from preprocessor callback should
|
||||
// already be added to Inclusions.
|
||||
// already be added to Includes.
|
||||
Inserter.emplace(
|
||||
SemaCCInput.FileName, SemaCCInput.Contents, *Style,
|
||||
SemaCCInput.Command.Directory,
|
||||
Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
|
||||
for (const auto &Inc : Includes.MainFileIncludes)
|
||||
Inserter->addExisting(Inc);
|
||||
|
||||
// Most of the cost of file proximity is in initializing the FileDistance
|
||||
// structures based on the observed includes, once per query. Conceptually
|
||||
// that happens here (though the per-URI-scheme initialization is lazy).
|
||||
// The per-result proximity scoring is (amortized) very cheap.
|
||||
FileDistanceOptions ProxOpts{}; // Use defaults.
|
||||
const auto &SM = Recorder->CCSema->getSourceManager();
|
||||
llvm::StringMap<SourceParams> ProxSources;
|
||||
for (auto &Entry : Includes.includeDepth(
|
||||
SM.getFileEntryForID(SM.getMainFileID())->getName())) {
|
||||
auto &Source = ProxSources[Entry.getKey()];
|
||||
Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
|
||||
// Symbols near our transitive includes are good, but only consider
|
||||
// things in the same directory or below it. Otherwise there can be
|
||||
// many false positives.
|
||||
if (Entry.getValue() > 0)
|
||||
Source.MaxUpTraversals = 1;
|
||||
}
|
||||
FileProximity.emplace(ProxSources, ProxOpts);
|
||||
|
||||
Output = runWithSema();
|
||||
Includes.reset(); // Make sure this doesn't out-live Clang.
|
||||
Inserter.reset(); // Make sure this doesn't out-live Clang.
|
||||
SPAN_ATTACH(Tracer, "sema_completion_kind",
|
||||
getCompletionKindString(Recorder->CCContext.getKind()));
|
||||
});
|
||||
@ -1044,6 +1056,7 @@ private:
|
||||
Req.RestrictForCodeCompletion = true;
|
||||
Req.Scopes = getQueryScopes(Recorder->CCContext,
|
||||
Recorder->CCSema->getSourceManager());
|
||||
// FIXME: we should send multiple weighted paths here.
|
||||
Req.ProximityPaths.push_back(FileName);
|
||||
log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])",
|
||||
Req.Query,
|
||||
@ -1124,7 +1137,7 @@ private:
|
||||
SymbolQualitySignals Quality;
|
||||
SymbolRelevanceSignals Relevance;
|
||||
Relevance.Query = SymbolRelevanceSignals::CodeComplete;
|
||||
Relevance.FileProximityMatch = &FileProximityMatch;
|
||||
Relevance.FileProximityMatch = FileProximity.getPointer();
|
||||
auto &First = Bundle.front();
|
||||
if (auto FuzzyScore = fuzzyScore(First))
|
||||
Relevance.NameMatch = *FuzzyScore;
|
||||
@ -1174,7 +1187,7 @@ private:
|
||||
: nullptr;
|
||||
if (!Builder)
|
||||
Builder.emplace(Recorder->CCSema->getASTContext(), Item, SemaCCS,
|
||||
*Includes, FileName, Opts);
|
||||
*Inserter, FileName, Opts);
|
||||
else
|
||||
Builder->add(Item, SemaCCS);
|
||||
}
|
||||
@ -1182,15 +1195,16 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
CodeCompleteResult codeComplete(
|
||||
PathRef FileName, const tooling::CompileCommand &Command,
|
||||
PrecompiledPreamble const *Preamble,
|
||||
const std::vector<Inclusion> &PreambleInclusions, StringRef Contents,
|
||||
Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
|
||||
std::shared_ptr<PCHContainerOperations> PCHs, CodeCompleteOptions Opts) {
|
||||
return CodeCompleteFlow(FileName, Opts)
|
||||
.run({FileName, Command, Preamble, PreambleInclusions, Contents, Pos, VFS,
|
||||
PCHs});
|
||||
CodeCompleteResult codeComplete(PathRef FileName,
|
||||
const tooling::CompileCommand &Command,
|
||||
PrecompiledPreamble const *Preamble,
|
||||
const IncludeStructure &PreambleInclusions,
|
||||
StringRef Contents, Position Pos,
|
||||
IntrusiveRefCntPtr<vfs::FileSystem> VFS,
|
||||
std::shared_ptr<PCHContainerOperations> PCHs,
|
||||
CodeCompleteOptions Opts) {
|
||||
return CodeCompleteFlow(FileName, PreambleInclusions, Opts)
|
||||
.run({FileName, Command, Preamble, Contents, Pos, VFS, PCHs});
|
||||
}
|
||||
|
||||
SignatureHelp signatureHelp(PathRef FileName,
|
||||
@ -1205,11 +1219,11 @@ SignatureHelp signatureHelp(PathRef FileName,
|
||||
Options.IncludeMacros = false;
|
||||
Options.IncludeCodePatterns = false;
|
||||
Options.IncludeBriefComments = false;
|
||||
std::vector<Inclusion> PreambleInclusions = {}; // Unused for signatureHelp
|
||||
IncludeStructure PreambleInclusions; // Unused for signatureHelp
|
||||
semaCodeComplete(llvm::make_unique<SignatureHelpCollector>(Options, Result),
|
||||
Options,
|
||||
{FileName, Command, Preamble, PreambleInclusions, Contents,
|
||||
Pos, std::move(VFS), std::move(PCHs)});
|
||||
{FileName, Command, Preamble, Contents, Pos, std::move(VFS),
|
||||
std::move(PCHs)});
|
||||
return Result;
|
||||
}
|
||||
|
||||
|
@ -144,12 +144,14 @@ struct CodeCompleteResult {
|
||||
raw_ostream &operator<<(raw_ostream &, const CodeCompleteResult &);
|
||||
|
||||
/// Get code completions at a specified \p Pos in \p FileName.
|
||||
CodeCompleteResult codeComplete(
|
||||
PathRef FileName, const tooling::CompileCommand &Command,
|
||||
PrecompiledPreamble const *Preamble,
|
||||
const std::vector<Inclusion> &PreambleInclusions, StringRef Contents,
|
||||
Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
|
||||
std::shared_ptr<PCHContainerOperations> PCHs, CodeCompleteOptions Opts);
|
||||
CodeCompleteResult codeComplete(PathRef FileName,
|
||||
const tooling::CompileCommand &Command,
|
||||
PrecompiledPreamble const *Preamble,
|
||||
const IncludeStructure &PreambleInclusions,
|
||||
StringRef Contents, Position Pos,
|
||||
IntrusiveRefCntPtr<vfs::FileSystem> VFS,
|
||||
std::shared_ptr<PCHContainerOperations> PCHs,
|
||||
CodeCompleteOptions Opts);
|
||||
|
||||
/// Get signature help at a specified \p Pos in \p FileName.
|
||||
SignatureHelp signatureHelp(PathRef FileName,
|
||||
|
173
clang-tools-extra/clangd/FileDistance.cpp
Normal file
173
clang-tools-extra/clangd/FileDistance.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The FileDistance structure allows calculating the minimum distance to paths
|
||||
// in a single tree.
|
||||
// We simply walk up the path's ancestors until we find a node whose cost is
|
||||
// known, and add the cost of walking back down. Initialization ensures this
|
||||
// gives the correct path to the roots.
|
||||
// We cache the results, so that the runtime is O(|A|), where A is the set of
|
||||
// all distinct ancestors of visited paths.
|
||||
//
|
||||
// Example after initialization with /=2, /bar=0, DownCost = 1:
|
||||
// / = 2
|
||||
// /bar = 0
|
||||
//
|
||||
// After querying /foo/bar and /bar/foo:
|
||||
// / = 2
|
||||
// /bar = 0
|
||||
// /bar/foo = 1
|
||||
// /foo = 3
|
||||
// /foo/bar = 4
|
||||
//
|
||||
// URIDistance creates FileDistance lazily for each URI scheme encountered. In
|
||||
// practice this is a small constant factor.
|
||||
//
|
||||
//===-------------------------------------------------------------------------//
|
||||
|
||||
#include "FileDistance.h"
|
||||
#include "Logger.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include <queue>
|
||||
#define DEBUG_TYPE "FileDistance"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
using namespace llvm;
|
||||
|
||||
// Convert a path into the canonical form.
|
||||
// Canonical form is either "/", or "/segment" * N:
|
||||
// C:\foo\bar --> /c:/foo/bar
|
||||
// /foo/ --> /foo
|
||||
// a/b/c --> /a/b/c
|
||||
static SmallString<128> canonicalize(StringRef Path) {
|
||||
SmallString<128> Result = Path.rtrim('/');
|
||||
native(Result, sys::path::Style::posix);
|
||||
if (Result.empty() || Result.front() != '/')
|
||||
Result.insert(Result.begin(), '/');
|
||||
return Result;
|
||||
}
|
||||
|
||||
const unsigned FileDistance::kUnreachable;
|
||||
|
||||
FileDistance::FileDistance(StringMap<SourceParams> Sources,
|
||||
const FileDistanceOptions &Opts)
|
||||
: Opts(Opts) {
|
||||
llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
|
||||
// Compute the best distance following only up edges.
|
||||
// Keep track of down edges, in case we can use them to improve on this.
|
||||
for (const auto &S : Sources) {
|
||||
auto Canonical = canonicalize(S.getKey());
|
||||
LLVM_DEBUG(dbgs() << "Source " << Canonical << " = " << S.second.Cost
|
||||
<< ", MaxUp=" << S.second.MaxUpTraversals << "\n");
|
||||
// Walk up to ancestors of this source, assigning cost.
|
||||
StringRef Rest = Canonical;
|
||||
llvm::hash_code Hash = hash_value(Rest);
|
||||
for (unsigned I = 0; !Rest.empty(); ++I) {
|
||||
Rest = parent_path(Rest, sys::path::Style::posix);
|
||||
auto NextHash = hash_value(Rest);
|
||||
DownEdges[NextHash].push_back(Hash);
|
||||
// We can't just break after MaxUpTraversals, must still set DownEdges.
|
||||
if (I > S.getValue().MaxUpTraversals) {
|
||||
if (Cache.find(Hash) != Cache.end())
|
||||
break;
|
||||
} else {
|
||||
unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
|
||||
auto R = Cache.try_emplace(Hash, Cost);
|
||||
if (!R.second) {
|
||||
if (Cost < R.first->second) {
|
||||
R.first->second = Cost;
|
||||
} else {
|
||||
// If we're not the best way to get to this path, stop assigning.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Hash = NextHash;
|
||||
}
|
||||
}
|
||||
// Now propagate scores parent -> child if that's an improvement.
|
||||
// BFS ensures we propagate down chains (must visit parents before children).
|
||||
std::queue<hash_code> Next;
|
||||
for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef(""))))
|
||||
Next.push(Child);
|
||||
while (!Next.empty()) {
|
||||
auto ParentCost = Cache.lookup(Next.front());
|
||||
for (auto Child : DownEdges.lookup(Next.front())) {
|
||||
auto &ChildCost =
|
||||
Cache.try_emplace(Child, kUnreachable).first->getSecond();
|
||||
if (ParentCost + Opts.DownCost < ChildCost)
|
||||
ChildCost = ParentCost + Opts.DownCost;
|
||||
Next.push(Child);
|
||||
}
|
||||
Next.pop();
|
||||
}
|
||||
}
|
||||
|
||||
unsigned FileDistance::distance(StringRef Path) {
|
||||
auto Canonical = canonicalize(Path);
|
||||
unsigned Cost = kUnreachable;
|
||||
SmallVector<hash_code, 16> Ancestors;
|
||||
// Walk up ancestors until we find a path we know the distance for.
|
||||
for (StringRef Rest = Canonical; !Rest.empty();
|
||||
Rest = parent_path(Rest, sys::path::Style::posix)) {
|
||||
auto Hash = hash_value(Rest);
|
||||
auto It = Cache.find(Hash);
|
||||
if (It != Cache.end()) {
|
||||
Cost = It->second;
|
||||
break;
|
||||
}
|
||||
Ancestors.push_back(Hash);
|
||||
}
|
||||
// Now we know the costs for (known node, queried node].
|
||||
// Fill these in, walking down the directory tree.
|
||||
for (hash_code Hash : reverse(Ancestors)) {
|
||||
if (Cost != kUnreachable)
|
||||
Cost += Opts.DownCost;
|
||||
Cache.try_emplace(Hash, Cost);
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n");
|
||||
return Cost;
|
||||
}
|
||||
|
||||
unsigned URIDistance::distance(llvm::StringRef URI) {
|
||||
auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable);
|
||||
if (!R.second)
|
||||
return R.first->getSecond();
|
||||
if (auto U = clangd::URI::parse(URI)) {
|
||||
LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body()
|
||||
<< ")\n");
|
||||
R.first->second = forScheme(U->scheme()).distance(U->body());
|
||||
} else {
|
||||
log("URIDistance::distance() of unparseable " + URI + ": " +
|
||||
llvm::toString(U.takeError()));
|
||||
}
|
||||
return R.first->second;
|
||||
}
|
||||
|
||||
FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
|
||||
auto &Delegate = ByScheme[Scheme];
|
||||
if (!Delegate) {
|
||||
llvm::StringMap<SourceParams> SchemeSources;
|
||||
for (const auto &Source : Sources) {
|
||||
if (auto U = clangd::URI::create(Source.getKey(), Scheme))
|
||||
SchemeSources.try_emplace(U->body(), Source.getValue());
|
||||
else
|
||||
consumeError(U.takeError());
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": "
|
||||
<< SchemeSources.size() << "/" << Sources.size()
|
||||
<< " sources\n");
|
||||
Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
|
||||
}
|
||||
return *Delegate;
|
||||
}
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
109
clang-tools-extra/clangd/FileDistance.h
Normal file
109
clang-tools-extra/clangd/FileDistance.h
Normal file
@ -0,0 +1,109 @@
|
||||
//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This library measures the distance between file paths.
|
||||
// It's used for ranking symbols, e.g. in code completion.
|
||||
// |foo/bar.h -> foo/bar.h| = 0.
|
||||
// |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
|
||||
// This is an edit-distance, where edits go up or down the directory tree.
|
||||
// It's not symmetrical, the costs of going up and down may not match.
|
||||
//
|
||||
// Dealing with multiple sources:
|
||||
// In practice we care about the distance from a source file, but files near
|
||||
// its main-header and #included files are considered "close".
|
||||
// So we start with a set of (anchor, cost) pairs, and call the distance to a
|
||||
// path the minimum of `cost + |source -> path|`.
|
||||
//
|
||||
// We allow each source to limit the number of up-traversals paths may start
|
||||
// with. Up-traversals may reach things that are not "semantically near".
|
||||
//
|
||||
// Symbol URI schemes:
|
||||
// Symbol locations may be represented by URIs rather than file paths directly.
|
||||
// In this case we want to perform distance computations in URI space rather
|
||||
// than in file-space, without performing redundant conversions.
|
||||
// Therefore we have a lookup structure that accepts URIs, so that intermediate
|
||||
// calculations for the same scheme can be reused.
|
||||
//
|
||||
// Caveats:
|
||||
// Assuming up and down traversals each have uniform costs is simplistic.
|
||||
// Often there are "semantic roots" whose children are almost unrelated.
|
||||
// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "URI.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseMapInfo.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Allocator.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Support/StringSaver.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
|
||||
struct FileDistanceOptions {
|
||||
unsigned UpCost = 2; // |foo/bar.h -> foo|
|
||||
unsigned DownCost = 1; // |foo -> foo/bar.h|
|
||||
unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
|
||||
};
|
||||
|
||||
struct SourceParams {
|
||||
// Base cost for paths starting at this source.
|
||||
unsigned Cost = 0;
|
||||
// Limits the number of upwards traversals allowed from this source.
|
||||
unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
|
||||
};
|
||||
|
||||
// Supports lookups to find the minimum distance to a file from any source.
|
||||
// This object should be reused, it memoizes intermediate computations.
|
||||
class FileDistance {
|
||||
public:
|
||||
static constexpr unsigned kUnreachable = std::numeric_limits<unsigned>::max();
|
||||
|
||||
FileDistance(llvm::StringMap<SourceParams> Sources,
|
||||
const FileDistanceOptions &Opts = {});
|
||||
|
||||
// Computes the minimum distance from any source to the file path.
|
||||
unsigned distance(llvm::StringRef Path);
|
||||
|
||||
private:
|
||||
// Costs computed so far. Always contains sources and their ancestors.
|
||||
// We store hash codes only. Collisions are rare and consequences aren't dire.
|
||||
llvm::DenseMap<llvm::hash_code, unsigned> Cache;
|
||||
FileDistanceOptions Opts;
|
||||
};
|
||||
|
||||
// Supports lookups like FileDistance, but the lookup keys are URIs.
|
||||
// We convert each of the sources to the scheme of the URI and do a FileDistance
|
||||
// comparison on the bodies.
|
||||
class URIDistance {
|
||||
public:
|
||||
URIDistance(llvm::StringMap<SourceParams> Sources,
|
||||
const FileDistanceOptions &Opts = {})
|
||||
: Sources(Sources), Opts(Opts) {}
|
||||
|
||||
// Computes the minimum distance from any source to the URI.
|
||||
// Only sources that can be mapped into the URI's scheme are considered.
|
||||
unsigned distance(llvm::StringRef URI);
|
||||
|
||||
private:
|
||||
// Returns the FileDistance for a URI scheme, creating it if needed.
|
||||
FileDistance &forScheme(llvm::StringRef Scheme);
|
||||
|
||||
// We cache the results using the original strings so we can skip URI parsing.
|
||||
llvm::DenseMap<llvm::hash_code, unsigned> Cache;
|
||||
llvm::StringMap<SourceParams> Sources;
|
||||
llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
|
||||
FileDistanceOptions Opts;
|
||||
};
|
||||
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
@ -23,9 +23,8 @@ namespace {
|
||||
|
||||
class RecordHeaders : public PPCallbacks {
|
||||
public:
|
||||
RecordHeaders(const SourceManager &SM,
|
||||
std::function<void(Inclusion)> Callback)
|
||||
: SM(SM), Callback(std::move(Callback)) {}
|
||||
RecordHeaders(const SourceManager &SM, IncludeStructure *Out)
|
||||
: SM(SM), Out(Out) {}
|
||||
|
||||
// Record existing #includes - both written and resolved paths. Only #includes
|
||||
// in the main file are collected.
|
||||
@ -36,21 +35,28 @@ public:
|
||||
llvm::StringRef /*RelativePath*/,
|
||||
const Module * /*Imported*/,
|
||||
SrcMgr::CharacteristicKind /*FileType*/) override {
|
||||
// Only inclusion directives in the main file make sense. The user cannot
|
||||
// select directives not in the main file.
|
||||
if (HashLoc.isInvalid() || !SM.isInMainFile(HashLoc))
|
||||
return;
|
||||
std::string Written =
|
||||
(IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str();
|
||||
std::string Resolved = (!File || File->tryGetRealPathName().empty())
|
||||
? ""
|
||||
: File->tryGetRealPathName();
|
||||
Callback({halfOpenToRange(SM, FilenameRange), Written, Resolved});
|
||||
if (SM.isInMainFile(HashLoc))
|
||||
Out->MainFileIncludes.push_back({
|
||||
halfOpenToRange(SM, FilenameRange),
|
||||
(IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(),
|
||||
File ? File->tryGetRealPathName() : "",
|
||||
});
|
||||
if (File) {
|
||||
auto *IncludingFileEntry = SM.getFileEntryForID(SM.getFileID(HashLoc));
|
||||
if (!IncludingFileEntry) {
|
||||
assert(SM.getBufferName(HashLoc).startswith("<") &&
|
||||
"Expected #include location to be a file or <built-in>");
|
||||
// Treat as if included from the main file.
|
||||
IncludingFileEntry = SM.getFileEntryForID(SM.getMainFileID());
|
||||
}
|
||||
Out->recordInclude(IncludingFileEntry->getName(), File->getName(),
|
||||
File->tryGetRealPathName());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const SourceManager &SM;
|
||||
std::function<void(Inclusion)> Callback;
|
||||
IncludeStructure *Out;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -65,9 +71,59 @@ bool HeaderFile::valid() const {
|
||||
}
|
||||
|
||||
std::unique_ptr<PPCallbacks>
|
||||
collectInclusionsInMainFileCallback(const SourceManager &SM,
|
||||
std::function<void(Inclusion)> Callback) {
|
||||
return llvm::make_unique<RecordHeaders>(SM, std::move(Callback));
|
||||
collectIncludeStructureCallback(const SourceManager &SM,
|
||||
IncludeStructure *Out) {
|
||||
return llvm::make_unique<RecordHeaders>(SM, Out);
|
||||
}
|
||||
|
||||
void IncludeStructure::recordInclude(llvm::StringRef IncludingName,
|
||||
llvm::StringRef IncludedName,
|
||||
llvm::StringRef IncludedRealName) {
|
||||
auto Child = fileIndex(IncludedName);
|
||||
if (!IncludedRealName.empty() && RealPathNames[Child].empty())
|
||||
RealPathNames[Child] = IncludedRealName;
|
||||
auto Parent = fileIndex(IncludingName);
|
||||
IncludeChildren[Parent].push_back(Child);
|
||||
}
|
||||
|
||||
unsigned IncludeStructure::fileIndex(llvm::StringRef Name) {
|
||||
auto R = NameToIndex.try_emplace(Name, RealPathNames.size());
|
||||
if (R.second)
|
||||
RealPathNames.emplace_back();
|
||||
return R.first->getValue();
|
||||
}
|
||||
|
||||
llvm::StringMap<unsigned>
|
||||
IncludeStructure::includeDepth(llvm::StringRef Root) const {
|
||||
// Include depth 0 is the main file only.
|
||||
llvm::StringMap<unsigned> Result;
|
||||
Result[Root] = 0;
|
||||
std::vector<unsigned> CurrentLevel;
|
||||
llvm::DenseSet<unsigned> Seen;
|
||||
auto It = NameToIndex.find(Root);
|
||||
if (It != NameToIndex.end()) {
|
||||
CurrentLevel.push_back(It->second);
|
||||
Seen.insert(It->second);
|
||||
}
|
||||
|
||||
// Each round of BFS traversal finds the next depth level.
|
||||
std::vector<unsigned> PreviousLevel;
|
||||
for (unsigned Level = 1; !CurrentLevel.empty(); ++Level) {
|
||||
PreviousLevel.clear();
|
||||
PreviousLevel.swap(CurrentLevel);
|
||||
for (const auto &Parent : PreviousLevel) {
|
||||
for (const auto &Child : IncludeChildren.lookup(Parent)) {
|
||||
if (Seen.insert(Child).second) {
|
||||
CurrentLevel.push_back(Child);
|
||||
const auto &Name = RealPathNames[Child];
|
||||
// Can't include files if we don't have their real path.
|
||||
if (!Name.empty())
|
||||
Result[Name] = Level;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// FIXME(ioeric): we might not want to insert an absolute include path if the
|
||||
|
@ -45,10 +45,47 @@ struct Inclusion {
|
||||
Path Resolved; // Resolved path of included file. Empty if not resolved.
|
||||
};
|
||||
|
||||
// Information captured about the inclusion graph in a translation unit.
|
||||
// This includes detailed information about the direct #includes, and summary
|
||||
// information about all transitive includes.
|
||||
//
|
||||
// It should be built incrementally with collectIncludeStructureCallback().
|
||||
// When we build the preamble, we capture and store its include structure along
|
||||
// with the preamble data. When we use the preamble, we can copy its
|
||||
// IncludeStructure and use another collectIncludeStructureCallback() to fill
|
||||
// in any non-preamble inclusions.
|
||||
class IncludeStructure {
|
||||
public:
|
||||
std::vector<Inclusion> MainFileIncludes;
|
||||
|
||||
// Return all transitively reachable files, and their minimum include depth.
|
||||
// All transitive includes (absolute paths), with their minimum include depth.
|
||||
// Root --> 0, #included file --> 1, etc.
|
||||
// Root is clang's name for a file, which may not be absolute.
|
||||
// Usually it should be SM.getFileEntryForID(SM.getMainFileID())->getName().
|
||||
llvm::StringMap<unsigned> includeDepth(llvm::StringRef Root) const;
|
||||
|
||||
// This updates IncludeDepth(), but not MainFileIncludes.
|
||||
void recordInclude(llvm::StringRef IncludingName,
|
||||
llvm::StringRef IncludedName,
|
||||
llvm::StringRef IncludedRealName);
|
||||
|
||||
private:
|
||||
// Identifying files in a way that persists from preamble build to subsequent
|
||||
// builds is surprisingly hard. FileID is unavailable in InclusionDirective(),
|
||||
// and RealPathName and UniqueID are not preseved in the preamble.
|
||||
// We use the FileEntry::Name, which is stable, interned into a "file index".
|
||||
// The paths we want to expose are the RealPathName, so store those too.
|
||||
std::vector<std::string> RealPathNames; // In file index order.
|
||||
unsigned fileIndex(llvm::StringRef Name);
|
||||
llvm::StringMap<unsigned> NameToIndex; // Values are file indexes.
|
||||
// Maps a file's index to that of the files it includes.
|
||||
llvm::DenseMap<unsigned, SmallVector<unsigned, 8>> IncludeChildren;
|
||||
};
|
||||
|
||||
/// Returns a PPCallback that visits all inclusions in the main file.
|
||||
std::unique_ptr<PPCallbacks>
|
||||
collectInclusionsInMainFileCallback(const SourceManager &SM,
|
||||
std::function<void(Inclusion)> Callback);
|
||||
collectIncludeStructureCallback(const SourceManager &SM, IncludeStructure *Out);
|
||||
|
||||
// Calculates insertion edit for including a new header in a file.
|
||||
class IncludeInserter {
|
||||
|
@ -7,17 +7,18 @@
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
#include "Quality.h"
|
||||
#include <cmath>
|
||||
#include "FileDistance.h"
|
||||
#include "URI.h"
|
||||
#include "index/Index.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/Basic/CharInfo.h"
|
||||
#include "clang/AST/DeclVisitor.h"
|
||||
#include "clang/Basic/CharInfo.h"
|
||||
#include "clang/Basic/SourceManager.h"
|
||||
#include "clang/Sema/CodeCompleteConsumer.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cmath>
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
@ -187,60 +188,6 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
|
||||
return OS;
|
||||
}
|
||||
|
||||
/// Calculates a proximity score from \p From and \p To, which are URI strings
|
||||
/// that have the same scheme. This does not parse URI. A URI (sans "<scheme>:")
|
||||
/// is split into chunks by '/' and each chunk is considered a file/directory.
|
||||
/// For example, "uri:///a/b/c" will be treated as /a/b/c
|
||||
static float uriProximity(StringRef From, StringRef To) {
|
||||
auto SchemeSplitFrom = From.split(':');
|
||||
auto SchemeSplitTo = To.split(':');
|
||||
assert((SchemeSplitFrom.first == SchemeSplitTo.first) &&
|
||||
"URIs must have the same scheme in order to compute proximity.");
|
||||
auto Split = [](StringRef URIWithoutScheme) {
|
||||
SmallVector<StringRef, 8> Split;
|
||||
URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
|
||||
return Split;
|
||||
};
|
||||
SmallVector<StringRef, 8> Fs = Split(SchemeSplitFrom.second);
|
||||
SmallVector<StringRef, 8> Ts = Split(SchemeSplitTo.second);
|
||||
auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end();
|
||||
for (; F != FE && T != TE && *F == *T; ++F, ++T) {
|
||||
}
|
||||
// We penalize for traversing up and down from \p From to \p To but penalize
|
||||
// less for traversing down because subprojects are more closely related than
|
||||
// superprojects.
|
||||
int UpDist = FE - F;
|
||||
int DownDist = TE - T;
|
||||
return std::pow(0.7, UpDist + DownDist/2);
|
||||
}
|
||||
|
||||
FileProximityMatcher::FileProximityMatcher(ArrayRef<StringRef> ProximityPaths)
|
||||
: ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {}
|
||||
|
||||
float FileProximityMatcher::uriProximity(StringRef SymbolURI) const {
|
||||
float Score = 0;
|
||||
if (!ProximityPaths.empty() && !SymbolURI.empty()) {
|
||||
for (const auto &Path : ProximityPaths)
|
||||
// Only calculate proximity score for two URIs with the same scheme so
|
||||
// that the computation can be purely text-based and thus avoid expensive
|
||||
// URI encoding/decoding.
|
||||
if (auto U = URI::create(Path, SymbolURI.split(':').first)) {
|
||||
Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI));
|
||||
} else {
|
||||
llvm::consumeError(U.takeError());
|
||||
}
|
||||
}
|
||||
return Score;
|
||||
}
|
||||
|
||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
|
||||
const FileProximityMatcher &M) {
|
||||
OS << formatv("File proximity matcher: ");
|
||||
OS << formatv("ProximityPaths[{0}]", llvm::join(M.ProximityPaths.begin(),
|
||||
M.ProximityPaths.end(), ","));
|
||||
return OS;
|
||||
}
|
||||
|
||||
static SymbolRelevanceSignals::AccessibleScope
|
||||
ComputeScope(const NamedDecl *D) {
|
||||
// Injected "Foo" within the class "Foo" has file scope, not class scope.
|
||||
@ -288,6 +235,15 @@ void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
|
||||
Scope = std::min(Scope, ComputeScope(SemaCCResult.Declaration));
|
||||
}
|
||||
|
||||
static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
|
||||
URIDistance *D) {
|
||||
if (!D || SymbolURI.empty())
|
||||
return {0.f, 0u};
|
||||
unsigned Distance = D->distance(SymbolURI);
|
||||
// Assume approximately default options are used for sensible scoring.
|
||||
return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
|
||||
}
|
||||
|
||||
float SymbolRelevanceSignals::evaluate() const {
|
||||
float Score = 1;
|
||||
|
||||
@ -296,11 +252,10 @@ float SymbolRelevanceSignals::evaluate() const {
|
||||
|
||||
Score *= NameMatch;
|
||||
|
||||
float IndexProximityScore =
|
||||
FileProximityMatch ? FileProximityMatch->uriProximity(SymbolURI) : 0;
|
||||
// Proximity scores are [0,1] and we translate them into a multiplier in the
|
||||
// range from 1 to 2.
|
||||
Score *= 1 + std::max(IndexProximityScore, SemaProximityScore);
|
||||
// range from 1 to 3.
|
||||
Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
|
||||
SemaProximityScore);
|
||||
|
||||
// Symbols like local variables may only be referenced within their scope.
|
||||
// Conversely if we're in that scope, it's likely we'll reference them.
|
||||
@ -331,9 +286,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
|
||||
OS << formatv("\tForbidden: {0}\n", S.Forbidden);
|
||||
OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
|
||||
if (S.FileProximityMatch) {
|
||||
OS << "\tIndex proximity: "
|
||||
<< S.FileProximityMatch->uriProximity(S.SymbolURI) << " ("
|
||||
<< *S.FileProximityMatch << ")\n";
|
||||
auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
|
||||
OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
|
||||
Score.second);
|
||||
}
|
||||
OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
|
||||
OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
|
||||
|
@ -38,6 +38,7 @@ namespace clang {
|
||||
class CodeCompletionResult;
|
||||
namespace clangd {
|
||||
struct Symbol;
|
||||
class URIDistance;
|
||||
|
||||
// Signals structs are designed to be aggregated from 0 or more sources.
|
||||
// A default instance has neutral signals, and sources are merged into it.
|
||||
@ -69,15 +70,13 @@ struct SymbolQualitySignals {
|
||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
|
||||
const SymbolQualitySignals &);
|
||||
|
||||
class FileProximityMatcher;
|
||||
|
||||
/// Attributes of a symbol-query pair that affect how much we like it.
|
||||
struct SymbolRelevanceSignals {
|
||||
/// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
|
||||
float NameMatch = 1;
|
||||
bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
|
||||
|
||||
const FileProximityMatcher *FileProximityMatch = nullptr;
|
||||
URIDistance *FileProximityMatch = nullptr;
|
||||
/// This is used to calculate proximity between the index symbol and the
|
||||
/// query.
|
||||
llvm::StringRef SymbolURI;
|
||||
@ -111,25 +110,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &,
|
||||
/// Combine symbol quality and relevance into a single score.
|
||||
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
|
||||
|
||||
class FileProximityMatcher {
|
||||
public:
|
||||
/// \p ProximityPaths are used to compute proximity scores from symbol's
|
||||
/// declaring file. The best score will be used.
|
||||
explicit FileProximityMatcher(
|
||||
llvm::ArrayRef<llvm::StringRef> ProximityPaths);
|
||||
|
||||
/// Calculates the best proximity score from proximity paths to the symbol's
|
||||
/// URI. Score is [0-1], 1 means \p SymbolURI exactly matches a proximity
|
||||
/// path. When a path cannot be encoded into the same scheme as \p
|
||||
/// SymbolURI, the proximity will be 0.
|
||||
float uriProximity(llvm::StringRef SymbolURI) const;
|
||||
|
||||
private:
|
||||
llvm::SmallVector<std::string, 2> ProximityPaths;
|
||||
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
|
||||
const FileProximityMatcher &);
|
||||
};
|
||||
|
||||
/// TopN<T> is a lossy container that preserves only the "best" N elements.
|
||||
template <typename T, typename Compare = std::greater<T>> class TopN {
|
||||
public:
|
||||
|
@ -230,13 +230,10 @@ llvm::Optional<SymbolID> getSymbolID(const Decl *D) {
|
||||
std::vector<Location> findDefinitions(ParsedAST &AST, Position Pos,
|
||||
const SymbolIndex *Index) {
|
||||
const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
|
||||
SourceLocation SourceLocationBeg =
|
||||
getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
|
||||
|
||||
std::vector<Location> Result;
|
||||
// Handle goto definition for #include.
|
||||
for (auto &Inc : AST.getInclusions()) {
|
||||
Position Pos = sourceLocToPosition(SourceMgr, SourceLocationBeg);
|
||||
for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) {
|
||||
if (!Inc.Resolved.empty() && Inc.R.contains(Pos))
|
||||
Result.push_back(Location{URIForFile{Inc.Resolved}, {}});
|
||||
}
|
||||
@ -244,6 +241,8 @@ std::vector<Location> findDefinitions(ParsedAST &AST, Position Pos,
|
||||
return Result;
|
||||
|
||||
// Identified symbols at a specific position.
|
||||
SourceLocation SourceLocationBeg =
|
||||
getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
|
||||
auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg);
|
||||
|
||||
for (auto Item : Symbols.Macros) {
|
||||
|
@ -17,6 +17,7 @@ add_extra_unittest(ClangdTests
|
||||
ContextTests.cpp
|
||||
DraftStoreTests.cpp
|
||||
FileIndexTests.cpp
|
||||
FileDistanceTests.cpp
|
||||
FindSymbolsTests.cpp
|
||||
FuzzyMatchTests.cpp
|
||||
GlobalCompilationDatabaseTests.cpp
|
||||
|
93
clang-tools-extra/unittests/clangd/FileDistanceTests.cpp
Normal file
93
clang-tools-extra/unittests/clangd/FileDistanceTests.cpp
Normal file
@ -0,0 +1,93 @@
|
||||
//===-- FileDistanceTests.cpp ------------------------*- C++ -*-----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "FileDistance.h"
|
||||
#include "TestFS.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace clang {
|
||||
namespace clangd {
|
||||
namespace {
|
||||
|
||||
TEST(FileDistanceTests, Distance) {
|
||||
FileDistanceOptions Opts;
|
||||
Opts.UpCost = 5;
|
||||
Opts.DownCost = 3;
|
||||
SourceParams CostTwo;
|
||||
CostTwo.Cost = 2;
|
||||
FileDistance D(
|
||||
{{"tools/clang/lib/Format/FormatToken.cpp", SourceParams()},
|
||||
{"tools/clang/include/clang/Format/FormatToken.h", SourceParams()},
|
||||
{"include/llvm/ADT/StringRef.h", CostTwo}},
|
||||
Opts);
|
||||
|
||||
// Source
|
||||
EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp"), 0u);
|
||||
EXPECT_EQ(D.distance("include/llvm/ADT/StringRef.h"), 2u);
|
||||
// Parent
|
||||
EXPECT_EQ(D.distance("tools/clang/lib/Format/"), 5u);
|
||||
// Child
|
||||
EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp/Oops"), 3u);
|
||||
// Ancestor (up+up+up+up)
|
||||
EXPECT_EQ(D.distance("/"), 22u);
|
||||
// Sibling (up+down)
|
||||
EXPECT_EQ(D.distance("tools/clang/lib/Format/AnotherFile.cpp"), 8u);
|
||||
// Cousin (up+up+down+down)
|
||||
EXPECT_EQ(D.distance("include/llvm/Support/Allocator.h"), 18u);
|
||||
// First cousin, once removed (up+up+up+down+down)
|
||||
EXPECT_EQ(D.distance("include/llvm-c/Core.h"), 23u);
|
||||
}
|
||||
|
||||
TEST(FileDistanceTests, BadSource) {
|
||||
// We mustn't assume that paths above sources are best reached via them.
|
||||
FileDistanceOptions Opts;
|
||||
Opts.UpCost = 5;
|
||||
Opts.DownCost = 3;
|
||||
SourceParams CostLots;
|
||||
CostLots.Cost = 100;
|
||||
FileDistance D({{"a", SourceParams()}, {"b/b/b", CostLots}}, Opts);
|
||||
EXPECT_EQ(D.distance("b"), 8u); // a+up+down, not b+up+up
|
||||
EXPECT_EQ(D.distance("b/b/b"), 14u); // a+up+down+down+down, not b
|
||||
EXPECT_EQ(D.distance("b/b/b/c"), 17u); // a+up+down+down+down+down, not b+down
|
||||
}
|
||||
|
||||
auto UseUnittestScheme = UnittestSchemeAnchorSource;
|
||||
|
||||
TEST(FileDistanceTests, URI) {
|
||||
FileDistanceOptions Opts;
|
||||
Opts.UpCost = 5;
|
||||
Opts.DownCost = 3;
|
||||
SourceParams CostLots;
|
||||
CostLots.Cost = 1000;
|
||||
|
||||
URIDistance D(
|
||||
{{testPath("foo"), CostLots}, {"/not/a/testpath", SourceParams()}}, Opts);
|
||||
EXPECT_EQ(D.distance("file:///not/a/testpath/either"), 3u);
|
||||
EXPECT_EQ(D.distance("unittest:foo"), 1000u);
|
||||
EXPECT_EQ(D.distance("unittest:bar"), 1008u);
|
||||
}
|
||||
|
||||
TEST(FileDistance, LimitUpTraversals) {
|
||||
FileDistanceOptions Opts;
|
||||
Opts.UpCost = Opts.DownCost = 1;
|
||||
SourceParams CheapButLimited, CostLots;
|
||||
CheapButLimited.MaxUpTraversals = 1;
|
||||
CostLots.Cost = 100;
|
||||
|
||||
FileDistance D({{"/", CostLots}, {"/a/b/c", CheapButLimited}}, Opts);
|
||||
EXPECT_EQ(D.distance("/a"), 101u);
|
||||
EXPECT_EQ(D.distance("/a/z"), 102u);
|
||||
EXPECT_EQ(D.distance("/a/b"), 1u);
|
||||
EXPECT_EQ(D.distance("/a/b/z"), 2u);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
@ -64,18 +64,17 @@ private:
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<Inclusion> collectIncludes() {
|
||||
IncludeStructure collectIncludes() {
|
||||
auto Clang = setupClang();
|
||||
PreprocessOnlyAction Action;
|
||||
EXPECT_TRUE(
|
||||
Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]));
|
||||
std::vector<Inclusion> Inclusions;
|
||||
Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
|
||||
Clang->getSourceManager(),
|
||||
[&](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }));
|
||||
IncludeStructure Includes;
|
||||
Clang->getPreprocessor().addPPCallbacks(
|
||||
collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
|
||||
EXPECT_TRUE(Action.Execute());
|
||||
Action.EndSourceFile();
|
||||
return Inclusions;
|
||||
return Includes;
|
||||
}
|
||||
|
||||
// Calculates the include path, or returns "" on error or header should not be
|
||||
@ -133,6 +132,14 @@ protected:
|
||||
MATCHER_P(Written, Name, "") { return arg.Written == Name; }
|
||||
MATCHER_P(Resolved, Name, "") { return arg.Resolved == Name; }
|
||||
|
||||
MATCHER_P2(Distance, File, D, "") {
|
||||
if (arg.getKey() != File)
|
||||
*result_listener << "file =" << arg.getKey().str();
|
||||
if (arg.getValue() != D)
|
||||
*result_listener << "distance =" << arg.getValue();
|
||||
return arg.getKey() == File && arg.getValue() == D;
|
||||
}
|
||||
|
||||
TEST_F(HeadersTest, CollectRewrittenAndResolved) {
|
||||
FS.Files[MainFile] = R"cpp(
|
||||
#include "sub/bar.h" // not shortest
|
||||
@ -140,9 +147,12 @@ TEST_F(HeadersTest, CollectRewrittenAndResolved) {
|
||||
std::string BarHeader = testPath("sub/bar.h");
|
||||
FS.Files[BarHeader] = "";
|
||||
|
||||
EXPECT_THAT(collectIncludes(),
|
||||
EXPECT_THAT(collectIncludes().MainFileIncludes,
|
||||
UnorderedElementsAre(
|
||||
AllOf(Written("\"sub/bar.h\""), Resolved(BarHeader))));
|
||||
EXPECT_THAT(collectIncludes().includeDepth(MainFile),
|
||||
UnorderedElementsAre(Distance(MainFile, 0u),
|
||||
Distance(testPath("sub/bar.h"), 1u)));
|
||||
}
|
||||
|
||||
TEST_F(HeadersTest, OnlyCollectInclusionsInMain) {
|
||||
@ -156,8 +166,16 @@ TEST_F(HeadersTest, OnlyCollectInclusionsInMain) {
|
||||
#include "bar.h"
|
||||
)cpp";
|
||||
EXPECT_THAT(
|
||||
collectIncludes(),
|
||||
collectIncludes().MainFileIncludes,
|
||||
UnorderedElementsAre(AllOf(Written("\"bar.h\""), Resolved(BarHeader))));
|
||||
EXPECT_THAT(collectIncludes().includeDepth(MainFile),
|
||||
UnorderedElementsAre(Distance(MainFile, 0u),
|
||||
Distance(testPath("sub/bar.h"), 1u),
|
||||
Distance(testPath("sub/baz.h"), 2u)));
|
||||
// includeDepth() also works for non-main files.
|
||||
EXPECT_THAT(collectIncludes().includeDepth(testPath("sub/bar.h")),
|
||||
UnorderedElementsAre(Distance(testPath("sub/bar.h"), 0u),
|
||||
Distance(testPath("sub/baz.h"), 1u)));
|
||||
}
|
||||
|
||||
TEST_F(HeadersTest, UnResolvedInclusion) {
|
||||
@ -165,8 +183,10 @@ TEST_F(HeadersTest, UnResolvedInclusion) {
|
||||
#include "foo.h"
|
||||
)cpp";
|
||||
|
||||
EXPECT_THAT(collectIncludes(),
|
||||
EXPECT_THAT(collectIncludes().MainFileIncludes,
|
||||
UnorderedElementsAre(AllOf(Written("\"foo.h\""), Resolved(""))));
|
||||
EXPECT_THAT(collectIncludes().includeDepth(MainFile),
|
||||
UnorderedElementsAre(Distance(MainFile, 0u)));
|
||||
}
|
||||
|
||||
TEST_F(HeadersTest, InsertInclude) {
|
||||
|
@ -17,6 +17,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "FileDistance.h"
|
||||
#include "Quality.h"
|
||||
#include "TestFS.h"
|
||||
#include "TestTU.h"
|
||||
@ -162,9 +163,22 @@ TEST(QualityTests, SymbolRelevanceSignalsSanity) {
|
||||
PoorNameMatch.NameMatch = 0.2f;
|
||||
EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate());
|
||||
|
||||
SymbolRelevanceSignals WithProximity;
|
||||
WithProximity.SemaProximityScore = 0.2f;
|
||||
EXPECT_GT(WithProximity.evaluate(), Default.evaluate());
|
||||
SymbolRelevanceSignals WithSemaProximity;
|
||||
WithSemaProximity.SemaProximityScore = 0.2f;
|
||||
EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate());
|
||||
|
||||
SymbolRelevanceSignals IndexProximate;
|
||||
IndexProximate.SymbolURI = "unittest:/foo/bar.h";
|
||||
llvm::StringMap<SourceParams> ProxSources;
|
||||
ProxSources.try_emplace(testPath("foo/baz.h"));
|
||||
URIDistance Distance(ProxSources);
|
||||
IndexProximate.FileProximityMatch = &Distance;
|
||||
EXPECT_GT(IndexProximate.evaluate(), Default.evaluate());
|
||||
SymbolRelevanceSignals IndexDistant = IndexProximate;
|
||||
IndexDistant.SymbolURI = "unittest:/elsewhere/path.h";
|
||||
EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate())
|
||||
<< IndexProximate << IndexDistant;
|
||||
EXPECT_GT(IndexDistant.evaluate(), Default.evaluate());
|
||||
|
||||
SymbolRelevanceSignals Scoped;
|
||||
Scoped.Scope = SymbolRelevanceSignals::FileScope;
|
||||
@ -185,59 +199,6 @@ TEST(QualityTests, SortText) {
|
||||
EXPECT_LT(sortText(0, "a"), sortText(0, "z"));
|
||||
}
|
||||
|
||||
// {a,b,c} becomes /clangd-test/a/b/c
|
||||
std::string joinPaths(llvm::ArrayRef<StringRef> Parts) {
|
||||
return testPath(
|
||||
llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator()));
|
||||
}
|
||||
|
||||
static constexpr float ProximityBase = 0.7f;
|
||||
|
||||
// Calculates a proximity score for an index symbol with declaration file
|
||||
// SymPath with the given URI scheme.
|
||||
float URIProximity(const FileProximityMatcher &Matcher, StringRef SymPath,
|
||||
StringRef Scheme = "file") {
|
||||
auto U = URI::create(SymPath, Scheme);
|
||||
EXPECT_TRUE(static_cast<bool>(U)) << llvm::toString(U.takeError());
|
||||
return Matcher.uriProximity(U->toString());
|
||||
}
|
||||
|
||||
TEST(QualityTests, URIProximityScores) {
|
||||
FileProximityMatcher Matcher(
|
||||
/*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})});
|
||||
|
||||
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})),
|
||||
1);
|
||||
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})),
|
||||
ProximityBase);
|
||||
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})),
|
||||
std::pow(ProximityBase, 5));
|
||||
EXPECT_FLOAT_EQ(
|
||||
URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})),
|
||||
std::pow(ProximityBase, 2));
|
||||
EXPECT_FLOAT_EQ(
|
||||
URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})),
|
||||
std::pow(ProximityBase, 5));
|
||||
EXPECT_FLOAT_EQ(
|
||||
URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})),
|
||||
std::pow(ProximityBase, 6));
|
||||
// Note the common directory is /clang-test/
|
||||
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})),
|
||||
std::pow(ProximityBase, 7));
|
||||
}
|
||||
|
||||
TEST(QualityTests, URIProximityScoresWithTestURI) {
|
||||
FileProximityMatcher Matcher(
|
||||
/*ProximityPaths=*/{joinPaths({"b", "c", "x"})});
|
||||
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"),
|
||||
1);
|
||||
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"),
|
||||
std::pow(ProximityBase, 2));
|
||||
// unittest:///b/c/x vs unittest:///m/n/y. No common directory.
|
||||
EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"),
|
||||
std::pow(ProximityBase, 4));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace clangd
|
||||
} // namespace clang
|
||||
|
Loading…
Reference in New Issue
Block a user