[clangd] Extract scoring/ranking logic, and shave yaks.

Summary:
Code completion scoring was embedded in CodeComplete.cpp, which is bad:
 - awkward to test. The mechanisms (extracting info from index/sema) can be
   unit-tested well, the policy (scoring) should be quantitatively measured.
   Neither was easily possible, and debugging was hard.
   The intermediate signal struct makes this easier.
 - hard to reuse. This is a bug in workspaceSymbols: it just presents the
   results in the index order, which is not sorted in practice, it needs to rank
   them!
   Also, index implementations care about scoring (both query-dependent and
   independent) in order to truncate result lists appropriately.

The main yak shaved here is the build() function that had 3 variants across
unit tests is unified in TestTU.h (rather than adding a 4th variant).

Reviewers: ilya-biryukov

Subscribers: klimek, mgorny, ioeric, MaskRay, jkorous, mgrang, cfe-commits

Differential Revision: https://reviews.llvm.org/D46524

llvm-svn: 332378
This commit is contained in:
Sam McCall 2018-05-15 17:43:27 +00:00
parent 603a43fcd5
commit c5707b6c36
12 changed files with 589 additions and 224 deletions

View File

@ -28,6 +28,7 @@ add_clang_library(clangDaemon
Logger.cpp
Protocol.cpp
ProtocolHandlers.cpp
Quality.cpp
SourceCode.cpp
Threading.cpp
Trace.cpp

View File

@ -20,6 +20,7 @@
#include "FuzzyMatch.h"
#include "Headers.h"
#include "Logger.h"
#include "Quality.h"
#include "SourceCode.h"
#include "Trace.h"
#include "URI.h"
@ -34,6 +35,9 @@
#include "llvm/Support/Format.h"
#include <queue>
// We log detailed candidate here if you run with -debug-only=codecomplete.
#define DEBUG_TYPE "codecomplete"
namespace clang {
namespace clangd {
namespace {
@ -192,35 +196,6 @@ getOptionalParameters(const CodeCompletionString &CCS,
return Result;
}
// Produces an integer that sorts in the same order as F.
// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
uint32_t encodeFloat(float F) {
static_assert(std::numeric_limits<float>::is_iec559, "");
static_assert(sizeof(float) == sizeof(uint32_t), "");
constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
// Get the bits of the float. Endianness is the same as for integers.
uint32_t U;
memcpy(&U, &F, sizeof(float));
// IEEE 754 floats compare like sign-magnitude integers.
if (U & TopBit) // Negative float.
return 0 - U; // Map onto the low half of integers, order reversed.
return U + TopBit; // Positive floats map onto the high half of integers.
}
// Returns a string that sorts in the same order as (-Score, Name), for LSP.
std::string sortText(float Score, llvm::StringRef Name) {
// We convert -Score to an integer, and hex-encode for readability.
// Example: [0.5, "foo"] -> "41000000foo"
std::string S;
llvm::raw_string_ostream OS(S);
write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
/*Width=*/2 * sizeof(Score));
OS << Name;
OS.flush();
return S;
}
/// Creates a `HeaderFile` from \p Header which can be either a URI or a literal
/// include.
static llvm::Expected<HeaderFile> toHeaderFile(StringRef Header,
@ -251,33 +226,6 @@ struct CompletionCandidate {
const CodeCompletionResult *SemaResult = nullptr;
const Symbol *IndexResult = nullptr;
// Computes the "symbol quality" score for this completion. Higher is better.
float score() const {
float Score = 1;
if (IndexResult)
Score *= quality(*IndexResult);
if (SemaResult) {
// For now we just use the Sema priority, mapping it onto a 0-2 interval.
// That makes 1 neutral-ish, so we don't reward/penalize non-Sema results.
// Priority 80 is a really bad score.
Score *= 2 - std::min<float>(80, SemaResult->Priority) / 40;
switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
case CXAvailability_Available:
// No penalty.
break;
case CXAvailability_Deprecated:
Score *= 0.1f;
break;
case CXAvailability_NotAccessible:
case CXAvailability_NotAvailable:
Score = 0;
break;
}
}
return Score;
}
// Builds an LSP completion item.
CompletionItem build(StringRef FileName, const CompletionItemScores &Scores,
const CodeCompleteOptions &Opts,
@ -346,6 +294,7 @@ struct CompletionCandidate {
return I;
}
};
using ScoredCandidate = std::pair<CompletionCandidate, CompletionItemScores>;
// Determine the symbol ID for a Sema code completion result, if possible.
llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R) {
@ -552,50 +501,12 @@ private:
UniqueFunction<void()> ResultsCallback;
};
// Tracks a bounded number of candidates with the best scores.
class TopN {
public:
using value_type = std::pair<CompletionCandidate, CompletionItemScores>;
static constexpr size_t Unbounded = std::numeric_limits<size_t>::max();
TopN(size_t N) : N(N) {}
// Adds a candidate to the set.
// Returns true if a candidate was dropped to get back under N.
bool push(value_type &&V) {
bool Dropped = false;
if (Heap.size() >= N) {
Dropped = true;
if (N > 0 && greater(V, Heap.front())) {
std::pop_heap(Heap.begin(), Heap.end(), greater);
Heap.back() = std::move(V);
std::push_heap(Heap.begin(), Heap.end(), greater);
}
} else {
Heap.push_back(std::move(V));
std::push_heap(Heap.begin(), Heap.end(), greater);
}
assert(Heap.size() <= N);
assert(std::is_heap(Heap.begin(), Heap.end(), greater));
return Dropped;
}
// Returns candidates from best to worst.
std::vector<value_type> items() && {
std::sort_heap(Heap.begin(), Heap.end(), greater);
assert(Heap.size() <= N);
return std::move(Heap);
}
private:
static bool greater(const value_type &L, const value_type &R) {
struct ScoredCandidateGreater {
bool operator()(const ScoredCandidate &L, const ScoredCandidate &R) {
if (L.second.finalScore != R.second.finalScore)
return L.second.finalScore > R.second.finalScore;
return L.first.Name < R.first.Name; // Earlier name is better.
}
const size_t N;
std::vector<value_type> Heap; // Min-heap, comparator is greater().
};
class SignatureHelpCollector final : public CodeCompleteConsumer {
@ -1035,7 +946,8 @@ private:
const SymbolSlab &IndexResults) {
trace::Span Tracer("Merge and score results");
// We only keep the best N results at any time, in "native" format.
TopN Top(Opts.Limit == 0 ? TopN::Unbounded : Opts.Limit);
TopN<ScoredCandidate, ScoredCandidateGreater> Top(
Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
llvm::DenseSet<const Symbol *> UsedIndexResults;
auto CorrespondingIndexResult =
[&](const CodeCompletionResult &SemaResult) -> const Symbol * {
@ -1061,23 +973,42 @@ private:
}
// Scores a candidate and adds it to the TopN structure.
void addCandidate(TopN &Candidates, const CodeCompletionResult *SemaResult,
void addCandidate(TopN<ScoredCandidate, ScoredCandidateGreater> &Candidates,
const CodeCompletionResult *SemaResult,
const Symbol *IndexResult) {
CompletionCandidate C;
C.SemaResult = SemaResult;
C.IndexResult = IndexResult;
C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult);
CompletionItemScores Scores;
SymbolQualitySignals Quality;
SymbolRelevanceSignals Relevance;
if (auto FuzzyScore = Filter->match(C.Name))
Scores.filterScore = *FuzzyScore;
Relevance.NameMatch = *FuzzyScore;
else
return;
Scores.symbolScore = C.score();
// We score candidates by multiplying symbolScore ("quality" of the result)
// with filterScore (how well it matched the query).
// This is sensitive to the distribution of both component scores!
Scores.finalScore = Scores.filterScore * Scores.symbolScore;
if (IndexResult)
Quality.merge(*IndexResult);
if (SemaResult) {
Quality.merge(*SemaResult);
Relevance.merge(*SemaResult);
}
float QualScore = Quality.evaluate(), RelScore = Relevance.evaluate();
CompletionItemScores Scores;
Scores.finalScore = evaluateSymbolAndRelevance(QualScore, RelScore);
// The purpose of exporting component scores is to allow NameMatch to be
// replaced on the client-side. So we export (NameMatch, final/NameMatch)
// rather than (RelScore, QualScore).
Scores.filterScore = Relevance.NameMatch;
Scores.symbolScore =
Scores.filterScore ? Scores.finalScore / Scores.filterScore : QualScore;
DEBUG(llvm::dbgs() << "CodeComplete: " << C.Name
<< (IndexResult ? " (index)" : "")
<< (SemaResult ? " (sema)" : "") << " = "
<< Scores.finalScore << "\n"
<< Quality << Relevance << "\n");
NSema += bool(SemaResult);
NIndex += bool(IndexResult);

View File

@ -0,0 +1,108 @@
//===--- Quality.cpp --------------------------------------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
#include "Quality.h"
#include "index/Index.h"
#include "clang/Sema/CodeCompleteConsumer.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace clangd {
using namespace llvm;
void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
SemaCCPriority = SemaCCResult.Priority;
if (SemaCCResult.Availability == CXAvailability_Deprecated)
Deprecated = true;
}
void SymbolQualitySignals::merge(const Symbol &IndexResult) {
References = std::max(IndexResult.References, References);
}
float SymbolQualitySignals::evaluate() const {
float Score = 1;
// This avoids a sharp gradient for tail symbols, and also neatly avoids the
// question of whether 0 references means a bad symbol or missing data.
if (References >= 3)
Score *= std::log(References);
if (SemaCCPriority)
// Map onto a 0-2 interval, so we don't reward/penalize non-Sema results.
// Priority 80 is a really bad score.
Score *= 2 - std::min<float>(80, SemaCCPriority) / 40;
if (Deprecated)
Score *= 0.1;
return Score;
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
OS << formatv("=== Symbol quality: {0}\n", S.evaluate());
if (S.SemaCCPriority)
OS << formatv("\tSemaCCPriority: {0}\n", S.SemaCCPriority);
OS << formatv("\tReferences: {0}\n", S.References);
OS << formatv("\tDeprecated: {0}\n", S.Deprecated);
return OS;
}
void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
SemaCCResult.Availability == CXAvailability_NotAccessible)
Forbidden = true;
}
float SymbolRelevanceSignals::evaluate() const {
if (Forbidden)
return 0;
return NameMatch;
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
OS << formatv("\tName match: {0}\n", S.NameMatch);
OS << formatv("\tForbidden: {0}\n", S.Forbidden);
return OS;
}
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
return SymbolQuality * SymbolRelevance;
}
// Produces an integer that sorts in the same order as F.
// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
static uint32_t encodeFloat(float F) {
static_assert(std::numeric_limits<float>::is_iec559, "");
constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
// Get the bits of the float. Endianness is the same as for integers.
uint32_t U = FloatToBits(F);
// IEEE 754 floats compare like sign-magnitude integers.
if (U & TopBit) // Negative float.
return 0 - U; // Map onto the low half of integers, order reversed.
return U + TopBit; // Positive floats map onto the high half of integers.
}
std::string sortText(float Score, llvm::StringRef Name) {
// We convert -Score to an integer, and hex-encode for readability.
// Example: [0.5, "foo"] -> "41000000foo"
std::string S;
llvm::raw_string_ostream OS(S);
write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
/*Width=*/2 * sizeof(Score));
OS << Name;
OS.flush();
return S;
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,126 @@
//===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
///
/// Some operations such as code completion produce a set of candidates.
/// Usually the user can choose between them, but we should put the best options
/// at the top (they're easier to select, and more likely to be seen).
///
/// This file defines building blocks for ranking candidates.
/// It's used by the features directly and also in the implementation of
/// indexes, as indexes also need to heuristically limit their results.
///
/// The facilities here are:
/// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
/// These are structured in a way that they can be debugged, and are fairly
/// consistent regardless of the source.
/// - compute scores from scoring signals. These are suitable for sorting.
/// - sorting utilities like the TopN container.
/// These could be split up further to isolate dependencies if we care.
///
//===---------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
#include "llvm/ADT/StringRef.h"
#include <algorithm>
#include <functional>
#include <vector>
namespace llvm {
class raw_ostream;
}
namespace clang {
class CodeCompletionResult;
namespace clangd {
struct Symbol;
// Signals structs are designed to be aggregated from 0 or more sources.
// A default instance has neutral signals, and sources are merged into it.
// They can be dumped for debugging, and evaluate()d into a score.
/// Attributes of a symbol that affect how much we like it.
struct SymbolQualitySignals {
unsigned SemaCCPriority = 0; // 1-80, 1 is best. 0 means absent.
// FIXME: this is actually a mix of symbol
// quality and relevance. Untangle this.
bool Deprecated = false;
unsigned References = 0;
void merge(const CodeCompletionResult &SemaCCResult);
void merge(const Symbol &IndexResult);
// Condense these signals down to a single number, higher is better.
float evaluate() const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const SymbolQualitySignals &);
/// Attributes of a symbol-query pair that affect how much we like it.
struct SymbolRelevanceSignals {
// 0-1 fuzzy-match score for unqualified name. Must be explicitly assigned.
float NameMatch = 1;
bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
void merge(const CodeCompletionResult &SemaResult);
// Condense these signals down to a single number, higher is better.
float evaluate() const;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &,
const SymbolRelevanceSignals &);
/// Combine symbol quality and relevance into a single score.
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
/// TopN<T> is a lossy container that preserves only the "best" N elements.
template <typename T, typename Compare = std::greater<T>> class TopN {
public:
using value_type = T;
TopN(size_t N, Compare Greater = Compare())
: N(N), Greater(std::move(Greater)) {}
// Adds a candidate to the set.
// Returns true if a candidate was dropped to get back under N.
bool push(value_type &&V) {
bool Dropped = false;
if (Heap.size() >= N) {
Dropped = true;
if (N > 0 && Greater(V, Heap.front())) {
std::pop_heap(Heap.begin(), Heap.end(), Greater);
Heap.back() = std::move(V);
std::push_heap(Heap.begin(), Heap.end(), Greater);
}
} else {
Heap.push_back(std::move(V));
std::push_heap(Heap.begin(), Heap.end(), Greater);
}
assert(Heap.size() <= N);
assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
return Dropped;
}
// Returns candidates from best to worst.
std::vector<value_type> items() && {
std::sort_heap(Heap.begin(), Heap.end(), Greater);
assert(Heap.size() <= N);
return std::move(Heap);
}
private:
const size_t N;
std::vector<value_type> Heap; // Min-heap, comparator is Greater.
Compare Greater;
};
/// Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP.
/// (The highest score compares smallest so it sorts at the top).
std::string sortText(float Score, llvm::StringRef Tiebreak = "");
} // namespace clangd
} // namespace clang
#endif

View File

@ -23,10 +23,12 @@ add_extra_unittest(ClangdTests
HeadersTests.cpp
IndexTests.cpp
JSONExprTests.cpp
QualityTests.cpp
SourceCodeTests.cpp
SymbolCollectorTests.cpp
SyncAPI.cpp
TestFS.cpp
TestTU.cpp
ThreadingTests.cpp
TraceTests.cpp
TUSchedulerTests.cpp

View File

@ -10,10 +10,7 @@
#include "Annotations.h"
#include "ClangdUnit.h"
#include "SourceCode.h"
#include "TestFS.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/PCHContainerOperations.h"
#include "clang/Frontend/Utils.h"
#include "TestTU.h"
#include "llvm/Support/ScopedPrinter.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@ -36,24 +33,6 @@ testing::Matcher<const Diag &> WithNote(testing::Matcher<Note> NoteMatcher) {
return Field(&Diag::Notes, ElementsAre(NoteMatcher));
}
// FIXME: this is duplicated with FileIndexTests. Share it.
ParsedAST build(StringRef Code, std::vector<const char *> Flags = {}) {
std::vector<const char *> Cmd = {"clang", "main.cpp"};
Cmd.insert(Cmd.begin() + 1, Flags.begin(), Flags.end());
auto CI = createInvocationFromCommandLine(Cmd);
auto Buf = MemoryBuffer::getMemBuffer(Code);
auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf),
std::make_shared<PCHContainerOperations>(),
vfs::getRealFileSystem());
assert(AST.hasValue());
return std::move(*AST);
}
std::vector<Diag> buildDiags(llvm::StringRef Code,
std::vector<const char *> Flags = {}) {
return build(Code, std::move(Flags)).getDiagnostics();
}
MATCHER_P2(Diag, Range, Message,
"Diag at " + llvm::to_string(Range) + " = [" + Message + "]") {
return arg.Range == Range && arg.Message == Message;
@ -105,7 +84,7 @@ o]]();
}
)cpp");
EXPECT_THAT(
buildDiags(Test.code()),
TestTU::withCode(Test.code()).build().getDiagnostics(),
ElementsAre(
// This range spans lines.
AllOf(Diag(Test.range("typo"),
@ -123,13 +102,15 @@ o]]();
TEST(DiagnosticsTest, FlagsMatter) {
Annotations Test("[[void]] main() {}");
EXPECT_THAT(buildDiags(Test.code()),
auto TU = TestTU::withCode(Test.code());
EXPECT_THAT(TU.build().getDiagnostics(),
ElementsAre(AllOf(Diag(Test.range(), "'main' must return 'int'"),
WithFix(Fix(Test.range(), "int",
"change 'void' to 'int'")))));
// Same code built as C gets different diagnostics.
TU.Filename = "Plain.c";
EXPECT_THAT(
buildDiags(Test.code(), {"-x", "c"}),
TU.build().getDiagnostics(),
ElementsAre(AllOf(
Diag(Test.range(), "return type of 'main' is not 'int'"),
WithFix(Fix(Test.range(), "int", "change return type to 'int'")))));
@ -150,7 +131,7 @@ TEST(DiagnosticsTest, Preprocessor) {
#endif
)cpp");
EXPECT_THAT(
buildDiags(Test.code()),
TestTU::withCode(Test.code()).build().getDiagnostics(),
ElementsAre(Diag(Test.range(), "use of undeclared identifier 'b'")));
}
@ -229,7 +210,7 @@ TEST(ClangdUnitTest, GetBeginningOfIdentifier) {
"int ^λλ^λ();", // UTF-8 handled properly when backing up
}) {
Annotations TestCase(Text);
auto AST = build(TestCase.code());
auto AST = TestTU::withCode(TestCase.code()).build();
const auto &SourceMgr = AST.getASTContext().getSourceManager();
SourceLocation Actual = getBeginningOfIdentifier(
AST, TestCase.points().back(), SourceMgr.getMainFileID());

View File

@ -7,11 +7,8 @@
//
//===----------------------------------------------------------------------===//
#include "TestFS.h"
#include "TestTU.h"
#include "index/FileIndex.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/PCHContainerOperations.h"
#include "clang/Frontend/Utils.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@ -83,38 +80,19 @@ std::vector<std::string> match(const SymbolIndex &I,
return Matches;
}
/// Create an ParsedAST for \p Code. Returns None if \p Code is empty.
/// \p Code is put into <Path>.h which is included by \p <BasePath>.cpp.
llvm::Optional<ParsedAST> build(llvm::StringRef BasePath,
llvm::StringRef Code) {
if (Code.empty())
return llvm::None;
assert(llvm::sys::path::extension(BasePath).empty() &&
"BasePath must be a base file path without extension.");
llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS(
new vfs::InMemoryFileSystem);
std::string Path = testPath((BasePath + ".cpp").str());
std::string Header = testPath((BasePath + ".h").str());
VFS->addFile(Path, 0, llvm::MemoryBuffer::getMemBuffer(""));
VFS->addFile(Header, 0, llvm::MemoryBuffer::getMemBuffer(Code));
const char *Args[] = {"clang", "-xc++", "-include", Header.c_str(),
Path.c_str()};
auto CI = createInvocationFromCommandLine(Args);
auto Buf = llvm::MemoryBuffer::getMemBuffer(Code);
auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf),
std::make_shared<PCHContainerOperations>(), VFS);
assert(AST.hasValue());
return std::move(*AST);
// Adds Basename.cpp, which includes Basename.h, which contains Code.
void update(FileIndex &M, llvm::StringRef Basename, llvm::StringRef Code) {
TestTU File;
File.Filename = (Basename + ".cpp").str();
File.HeaderFilename = (Basename + ".h").str();
File.HeaderCode = Code;
auto AST = File.build();
M.update(File.Filename, &AST);
}
TEST(FileIndexTest, IndexAST) {
FileIndex M;
M.update(
"f1",
build("f1", "namespace ns { void f() {} class X {}; }").getPointer());
update(M, "f1", "namespace ns { void f() {} class X {}; }");
FuzzyFindRequest Req;
Req.Query = "";
@ -124,10 +102,7 @@ TEST(FileIndexTest, IndexAST) {
TEST(FileIndexTest, NoLocal) {
FileIndex M;
M.update(
"f1",
build("f1", "namespace ns { void f() { int local = 0; } class X {}; }")
.getPointer());
update(M, "f1", "namespace ns { void f() { int local = 0; } class X {}; }");
FuzzyFindRequest Req;
Req.Query = "";
@ -136,12 +111,8 @@ TEST(FileIndexTest, NoLocal) {
TEST(FileIndexTest, IndexMultiASTAndDeduplicate) {
FileIndex M;
M.update(
"f1",
build("f1", "namespace ns { void f() {} class X {}; }").getPointer());
M.update(
"f2",
build("f2", "namespace ns { void ff() {} class X {}; }").getPointer());
update(M, "f1", "namespace ns { void f() {} class X {}; }");
update(M, "f2", "namespace ns { void ff() {} class X {}; }");
FuzzyFindRequest Req;
Req.Query = "";
@ -151,30 +122,26 @@ TEST(FileIndexTest, IndexMultiASTAndDeduplicate) {
TEST(FileIndexTest, RemoveAST) {
FileIndex M;
M.update(
"f1",
build("f1", "namespace ns { void f() {} class X {}; }").getPointer());
update(M, "f1", "namespace ns { void f() {} class X {}; }");
FuzzyFindRequest Req;
Req.Query = "";
Req.Scopes = {"ns::"};
EXPECT_THAT(match(M, Req), UnorderedElementsAre("ns::f", "ns::X"));
M.update("f1", nullptr);
M.update("f1.cpp", nullptr);
EXPECT_THAT(match(M, Req), UnorderedElementsAre());
}
TEST(FileIndexTest, RemoveNonExisting) {
FileIndex M;
M.update("no", nullptr);
M.update("no.cpp", nullptr);
EXPECT_THAT(match(M, FuzzyFindRequest()), UnorderedElementsAre());
}
TEST(FileIndexTest, IgnoreClassMembers) {
FileIndex M;
M.update("f1",
build("f1", "class X { static int m1; int m2; static void f(); };")
.getPointer());
update(M, "f1", "class X { static int m1; int m2; static void f(); };");
FuzzyFindRequest Req;
Req.Query = "";
@ -183,7 +150,7 @@ TEST(FileIndexTest, IgnoreClassMembers) {
TEST(FileIndexTest, NoIncludeCollected) {
FileIndex M;
M.update("f", build("f", "class string {};").getPointer());
update(M, "f", "class string {};");
FuzzyFindRequest Req;
Req.Query = "";
@ -206,7 +173,7 @@ vector<Ty> make_vector(Arg A) {}
)cpp";
FileIndex M;
M.update("f", build("f", Source).getPointer());
update(M, "f", Source);
FuzzyFindRequest Req;
Req.Query = "";

View File

@ -0,0 +1,123 @@
//===-- SourceCodeTests.cpp ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Evaluating scoring functions isn't a great fit for assert-based tests.
// For interesting cases, both exact scores and "X beats Y" are too brittle to
// make good hard assertions.
//
// Here we test the signal extraction and sanity-check that signals point in
// the right direction. This should be supplemented by quality metrics which
// we can compute from a corpus of queries and preferred rankings.
//
//===----------------------------------------------------------------------===//
#include "Quality.h"
#include "TestTU.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang {
namespace clangd {
namespace {
TEST(QualityTests, SymbolQualitySignalExtraction) {
auto Header = TestTU::withHeaderCode(R"cpp(
int x;
[[deprecated]]
int f() { return x; }
)cpp");
auto Symbols = Header.headerSymbols();
auto AST = Header.build();
SymbolQualitySignals Quality;
Quality.merge(findSymbol(Symbols, "x"));
EXPECT_FALSE(Quality.Deprecated);
EXPECT_EQ(Quality.SemaCCPriority, SymbolQualitySignals().SemaCCPriority);
EXPECT_EQ(Quality.References, SymbolQualitySignals().References);
Symbol F = findSymbol(Symbols, "f");
F.References = 24; // TestTU doesn't count references, so fake it.
Quality = {};
Quality.merge(F);
EXPECT_FALSE(Quality.Deprecated); // FIXME: Include deprecated bit in index.
EXPECT_EQ(Quality.SemaCCPriority, SymbolQualitySignals().SemaCCPriority);
EXPECT_EQ(Quality.References, 24u);
Quality = {};
Quality.merge(CodeCompletionResult(&findDecl(AST, "f"), /*Priority=*/42));
EXPECT_TRUE(Quality.Deprecated);
EXPECT_EQ(Quality.SemaCCPriority, 42u);
EXPECT_EQ(Quality.References, SymbolQualitySignals().References);
}
TEST(QualityTests, SymbolRelevanceSignalExtraction) {
auto AST = TestTU::withHeaderCode(R"cpp(
[[deprecated]]
int f() { return 0; }
)cpp")
.build();
SymbolRelevanceSignals Relevance;
Relevance.merge(CodeCompletionResult(&findDecl(AST, "f"), /*Priority=*/42,
nullptr, false, /*Accessible=*/false));
EXPECT_EQ(Relevance.NameMatch, SymbolRelevanceSignals().NameMatch);
EXPECT_TRUE(Relevance.Forbidden);
}
// Do the signals move the scores in the direction we expect?
TEST(QualityTests, SymbolQualitySignalsSanity) {
SymbolQualitySignals Default;
EXPECT_EQ(Default.evaluate(), 1);
SymbolQualitySignals Deprecated;
Deprecated.Deprecated = true;
EXPECT_LT(Deprecated.evaluate(), Default.evaluate());
SymbolQualitySignals WithReferences, ManyReferences;
WithReferences.References = 10;
ManyReferences.References = 1000;
EXPECT_GT(WithReferences.evaluate(), Default.evaluate());
EXPECT_GT(ManyReferences.evaluate(), WithReferences.evaluate());
SymbolQualitySignals LowPriority, HighPriority;
LowPriority.SemaCCPriority = 60;
HighPriority.SemaCCPriority = 20;
EXPECT_GT(HighPriority.evaluate(), Default.evaluate());
EXPECT_LT(LowPriority.evaluate(), Default.evaluate());
}
TEST(QualityTests, SymbolRelevanceSignalsSanity) {
SymbolRelevanceSignals Default;
EXPECT_EQ(Default.evaluate(), 1);
SymbolRelevanceSignals Forbidden;
Forbidden.Forbidden = true;
EXPECT_LT(Forbidden.evaluate(), Default.evaluate());
SymbolRelevanceSignals PoorNameMatch;
PoorNameMatch.NameMatch = 0.2;
EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate());
}
TEST(QualityTests, SortText) {
EXPECT_LT(sortText(std::numeric_limits<float>::infinity()), sortText(1000.2));
EXPECT_LT(sortText(1000.2), sortText(1));
EXPECT_LT(sortText(1), sortText(0.3));
EXPECT_LT(sortText(0.3), sortText(0));
EXPECT_LT(sortText(0), sortText(-10));
EXPECT_LT(sortText(-10), sortText(-std::numeric_limits<float>::infinity()));
EXPECT_LT(sortText(1, "z"), sortText(0, "a"));
EXPECT_LT(sortText(0, "a"), sortText(0, "z"));
}
} // namespace
} // namespace clangd
} // namespace clang

View File

@ -20,8 +20,8 @@ buildTestFS(StringMap<std::string> const &Files) {
new vfs::InMemoryFileSystem);
for (auto &FileAndContents : Files) {
MemFS->addFile(FileAndContents.first(), time_t(),
MemoryBuffer::getMemBuffer(FileAndContents.second,
FileAndContents.first()));
MemoryBuffer::getMemBufferCopy(FileAndContents.second,
FileAndContents.first()));
}
return MemFS;
}

View File

@ -0,0 +1,95 @@
//===--- TestTU.cpp - Scratch source files for testing ------------*-
//C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
#include "TestTU.h"
#include "TestFS.h"
#include "index/FileIndex.h"
#include "index/MemIndex.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/PCHContainerOperations.h"
#include "clang/Frontend/Utils.h"
namespace clang {
namespace clangd {
using namespace llvm;
ParsedAST TestTU::build() const {
std::string FullFilename = testPath(Filename),
FullHeaderName = testPath(HeaderFilename);
std::vector<const char *> Cmd = {"clang", FullFilename.c_str()};
// FIXME: this shouldn't need to be conditional, but it breaks a
// GoToDefinition test for some reason (getMacroArgExpandedLocation fails).
if (!HeaderCode.empty()) {
Cmd.push_back("-include");
Cmd.push_back(FullHeaderName.c_str());
}
auto AST = ParsedAST::Build(
createInvocationFromCommandLine(Cmd), nullptr,
MemoryBuffer::getMemBufferCopy(Code),
std::make_shared<PCHContainerOperations>(),
buildTestFS({{FullFilename, Code}, {FullHeaderName, HeaderCode}}));
if (!AST.hasValue()) {
ADD_FAILURE() << "Failed to build code:\n" << Code;
llvm_unreachable("Failed to build TestTU!");
}
return std::move(*AST);
}
SymbolSlab TestTU::headerSymbols() const {
auto AST = build();
return indexAST(&AST);
}
std::unique_ptr<SymbolIndex> TestTU::index() const {
return MemIndex::build(headerSymbols());
}
// Look up a symbol by qualified name, which must be unique.
const Symbol &findSymbol(const SymbolSlab &Slab, llvm::StringRef QName) {
const Symbol *Result = nullptr;
for (const Symbol &S : Slab) {
if (QName != (S.Scope + S.Name).str())
continue;
if (Result) {
ADD_FAILURE() << "Multiple symbols named " << QName << ":\n"
<< *Result << "\n---\n"
<< S;
assert(false && "QName is not unique");
}
Result = &S;
}
if (!Result) {
ADD_FAILURE() << "No symbol named " << QName << " in "
<< ::testing::PrintToString(Slab);
assert(false && "No symbol with QName");
}
return *Result;
}
const NamedDecl &findDecl(ParsedAST &AST, llvm::StringRef QName) {
const NamedDecl *Result = nullptr;
for (const Decl *D : AST.getTopLevelDecls()) {
auto *ND = dyn_cast<NamedDecl>(D);
if (!ND || ND->getNameAsString() != QName)
continue;
if (Result) {
ADD_FAILURE() << "Multiple Decls named " << QName;
assert(false && "QName is not unique");
}
Result = ND;
}
if (!Result) {
ADD_FAILURE() << "No Decl named " << QName << " in AST";
assert(false && "No Decl with QName");
}
return *Result;
}
} // namespace clangd
} // namespace clang

View File

@ -0,0 +1,59 @@
//===--- TestTU.h - Scratch source files for testing ------------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===---------------------------------------------------------------------===//
//
// Many tests for indexing, code completion etc are most naturally expressed
// using code examples.
// TestTU lets test define these examples in a common way without dealing with
// the mechanics of VFS and compiler interactions, and then easily grab the
// AST, particular symbols, etc.
//
//===---------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H
#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H
#include "ClangdUnit.h"
#include "index/Index.h"
#include "gtest/gtest.h"
namespace clang {
namespace clangd {
struct TestTU {
static TestTU withCode(llvm::StringRef Code) {
TestTU TU;
TU.Code = Code;
return TU;
}
static TestTU withHeaderCode(llvm::StringRef HeaderCode) {
TestTU TU;
TU.HeaderCode = HeaderCode;
return TU;
}
// The code to be compiled.
std::string Code;
std::string Filename = "TestTU.cpp";
// Define contents of a header to be included by TestTU.cpp.
std::string HeaderCode;
std::string HeaderFilename = "TestTU.h";
ParsedAST build() const;
SymbolSlab headerSymbols() const;
std::unique_ptr<SymbolIndex> index() const;
};
// Look up an index symbol by qualified name, which must be unique.
const Symbol &findSymbol(const SymbolSlab &, llvm::StringRef QName);
// Look up an AST symbol by qualified name, which must be unique and top-level.
const NamedDecl &findDecl(ParsedAST &AST, llvm::StringRef QName);
} // namespace clangd
} // namespace clang
#endif

View File

@ -12,15 +12,13 @@
#include "Matchers.h"
#include "SyncAPI.h"
#include "TestFS.h"
#include "TestTU.h"
#include "XRefs.h"
#include "gmock/gmock.h"
#include "index/FileIndex.h"
#include "index/SymbolCollector.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/PCHContainerOperations.h"
#include "clang/Frontend/Utils.h"
#include "clang/Index/IndexingAction.h"
#include "llvm/Support/Path.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang {
@ -39,34 +37,6 @@ class IgnoreDiagnostics : public DiagnosticsConsumer {
std::vector<Diag> Diagnostics) override {}
};
// FIXME: this is duplicated with FileIndexTests. Share it.
ParsedAST build(StringRef MainCode, StringRef HeaderCode = "") {
auto HeaderPath = testPath("foo.h");
auto MainPath = testPath("foo.cpp");
llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS(
new vfs::InMemoryFileSystem());
VFS->addFile(MainPath, 0, llvm::MemoryBuffer::getMemBuffer(MainCode));
VFS->addFile(HeaderPath, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode));
std::vector<const char *> Cmd = {"clang", "-xc++", MainPath.c_str()};
if (!HeaderCode.empty()) {
std::vector<const char *> args = {"-include", HeaderPath.c_str()};
Cmd.insert(Cmd.begin() + 1, args.begin(), args.end());
}
auto CI = createInvocationFromCommandLine(Cmd);
auto Buf = MemoryBuffer::getMemBuffer(MainCode);
auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf),
std::make_shared<PCHContainerOperations>(), VFS);
assert(AST.hasValue());
return std::move(*AST);
}
std::unique_ptr<SymbolIndex> buildIndex(StringRef MainCode,
StringRef HeaderCode) {
auto AST = build(MainCode, HeaderCode);
return MemIndex::build(indexAST(&AST));
}
// Extracts ranges from an annotated example, and constructs a matcher for a
// highlight set. Ranges should be named $read/$write as appropriate.
Matcher<const std::vector<DocumentHighlight> &>
@ -117,7 +87,7 @@ TEST(HighlightsTest, All) {
};
for (const char *Test : Tests) {
Annotations T(Test);
auto AST = build(T.code());
auto AST = TestTU::withCode(T.code()).build();
EXPECT_THAT(findDocumentHighlights(AST, T.point()), HighlightsFrom(T))
<< Test;
}
@ -139,10 +109,12 @@ TEST(GoToDefinition, WithIndex) {
void $f1[[f1]]() {}
)cpp");
auto Index = buildIndex(SymbolCpp.code(), SymbolHeader.code());
TestTU TU;
TU.Code = SymbolCpp.code();
TU.HeaderCode = SymbolHeader.code();
auto Index = TU.index();
auto runFindDefinitionsWithIndex = [&Index](const Annotations &Main) {
auto AST = build(/*MainCode=*/Main.code(),
/*HeaderCode=*/"");
auto AST = TestTU::withCode(Main.code()).build();
return clangd::findDefinitions(AST, Main.point(), Index.get());
};
@ -329,7 +301,7 @@ TEST(GoToDefinition, All) {
};
for (const char *Test : Tests) {
Annotations T(Test);
auto AST = build(T.code());
auto AST = TestTU::withCode(T.code()).build();
std::vector<Matcher<Location>> ExpectedLocations;
for (const auto &R : T.ranges())
ExpectedLocations.push_back(RangeIs(R));
@ -661,7 +633,7 @@ TEST(Hover, All) {
for (const OneTest &Test : Tests) {
Annotations T(Test.Input);
auto AST = build(T.code());
auto AST = TestTU::withCode(T.code()).build();
Hover H = getHover(AST, T.point());
EXPECT_EQ(H.contents.value, Test.ExpectedHover) << Test.Input;