[clangd] Use tokenize instead of raw lexer in SourceCode/lex

Reviewers: hokein, sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D75249
This commit is contained in:
Kadir Cetinkaya 2020-02-27 15:10:54 +01:00
parent 7a9d00f261
commit 98bb094c1e
No known key found for this signature in database
GPG Key ID: E39E36B8D2057ED6

View File

@ -23,6 +23,7 @@
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
@ -612,31 +613,26 @@ cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
static void
lex(llvm::StringRef Code, const LangOptions &LangOpts,
llvm::function_ref<void(const clang::Token &, const SourceManager &SM)>
llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)>
Action) {
// FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
std::string NullTerminatedCode = Code.str();
SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
auto &SM = FileSM.get();
auto FID = SM.getMainFileID();
// Create a raw lexer (with no associated preprocessor object).
Lexer Lex(FID, SM.getBuffer(FID), SM, LangOpts);
Token Tok;
while (!Lex.LexFromRawLexer(Tok))
for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
Action(Tok, SM);
// LexFromRawLexer returns true after it lexes last token, so we still have
// one more token to report.
Action(Tok, SM);
}
llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
const format::FormatStyle &Style) {
llvm::StringMap<unsigned> Identifiers;
auto LangOpt = format::getFormattingLangOpts(Style);
lex(Content, LangOpt, [&](const clang::Token &Tok, const SourceManager &) {
if (Tok.getKind() == tok::raw_identifier)
++Identifiers[Tok.getRawIdentifier()];
lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) {
if (Tok.kind() == tok::identifier)
++Identifiers[Tok.text(SM)];
// FIXME: Should this function really return keywords too ?
else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind()))
++Identifiers[Keyword];
});
return Identifiers;
}
@ -645,16 +641,13 @@ std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
llvm::StringRef Content,
const LangOptions &LangOpts) {
std::vector<Range> Ranges;
lex(Content, LangOpts, [&](const clang::Token &Tok, const SourceManager &SM) {
if (Tok.getKind() != tok::raw_identifier)
return;
if (Tok.getRawIdentifier() != Identifier)
return;
auto Range = getTokenRange(SM, LangOpts, Tok.getLocation());
if (!Range)
return;
Ranges.push_back(*Range);
});
lex(Content, LangOpts,
[&](const syntax::Token &Tok, const SourceManager &SM) {
if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier)
return;
if (auto Range = getTokenRange(SM, LangOpts, Tok.location()))
Ranges.push_back(*Range);
});
return Ranges;
}
@ -691,97 +684,113 @@ void parseNamespaceEvents(llvm::StringRef Code,
NamespaceEvent Event;
lex(Code, format::getFormattingLangOpts(Style),
[&](const clang::Token &Tok,const SourceManager &SM) {
Event.Pos = sourceLocToPosition(SM, Tok.getLocation());
switch (Tok.getKind()) {
case tok::raw_identifier:
// In raw mode, this could be a keyword or a name.
switch (State) {
case UsingNamespace:
case UsingNamespaceName:
NSName.append(std::string(Tok.getRawIdentifier()));
State = UsingNamespaceName;
break;
case Namespace:
case NamespaceName:
NSName.append(std::string(Tok.getRawIdentifier()));
State = NamespaceName;
break;
case Using:
State =
(Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default;
break;
case Default:
NSName.clear();
if (Tok.getRawIdentifier() == "namespace")
State = Namespace;
else if (Tok.getRawIdentifier() == "using")
State = Using;
break;
}
break;
case tok::coloncolon:
// This can come at the beginning or in the middle of a namespace name.
switch (State) {
case UsingNamespace:
case UsingNamespaceName:
NSName.append("::");
State = UsingNamespaceName;
break;
case NamespaceName:
NSName.append("::");
State = NamespaceName;
break;
case Namespace: // Not legal here.
case Using:
case Default:
State = Default;
break;
}
break;
case tok::l_brace:
// Record which { started a namespace, so we know when } ends one.
if (State == NamespaceName) {
// Parsed: namespace <name> {
BraceStack.push_back(true);
Enclosing.push_back(NSName);
Event.Trigger = NamespaceEvent::BeginNamespace;
Event.Payload = llvm::join(Enclosing, "::");
Callback(Event);
} else {
// This case includes anonymous namespaces (State = Namespace).
// For our purposes, they're not namespaces and we ignore them.
BraceStack.push_back(false);
}
State = Default;
break;
case tok::r_brace:
// If braces are unmatched, we're going to be confused, but don't crash.
if (!BraceStack.empty()) {
if (BraceStack.back()) {
// Parsed: } // namespace
Enclosing.pop_back();
Event.Trigger = NamespaceEvent::EndNamespace;
Event.Payload = llvm::join(Enclosing, "::");
Callback(Event);
[&](const syntax::Token &Tok, const SourceManager &SM) {
Event.Pos = sourceLocToPosition(SM, Tok.location());
switch (Tok.kind()) {
case tok::kw_using:
State = State == Default ? Using : Default;
break;
case tok::kw_namespace:
switch (State) {
case Using:
State = UsingNamespace;
break;
case Default:
State = Namespace;
break;
default:
State = Default;
break;
}
break;
case tok::identifier:
switch (State) {
case UsingNamespace:
NSName.clear();
LLVM_FALLTHROUGH;
case UsingNamespaceName:
NSName.append(Tok.text(SM).str());
State = UsingNamespaceName;
break;
case Namespace:
NSName.clear();
LLVM_FALLTHROUGH;
case NamespaceName:
NSName.append(Tok.text(SM).str());
State = NamespaceName;
break;
case Using:
case Default:
State = Default;
break;
}
break;
case tok::coloncolon:
// This can come at the beginning or in the middle of a namespace
// name.
switch (State) {
case UsingNamespace:
NSName.clear();
LLVM_FALLTHROUGH;
case UsingNamespaceName:
NSName.append("::");
State = UsingNamespaceName;
break;
case NamespaceName:
NSName.append("::");
State = NamespaceName;
break;
case Namespace: // Not legal here.
case Using:
case Default:
State = Default;
break;
}
break;
case tok::l_brace:
// Record which { started a namespace, so we know when } ends one.
if (State == NamespaceName) {
// Parsed: namespace <name> {
BraceStack.push_back(true);
Enclosing.push_back(NSName);
Event.Trigger = NamespaceEvent::BeginNamespace;
Event.Payload = llvm::join(Enclosing, "::");
Callback(Event);
} else {
// This case includes anonymous namespaces (State = Namespace).
// For our purposes, they're not namespaces and we ignore them.
BraceStack.push_back(false);
}
State = Default;
break;
case tok::r_brace:
// If braces are unmatched, we're going to be confused, but don't
// crash.
if (!BraceStack.empty()) {
if (BraceStack.back()) {
// Parsed: } // namespace
Enclosing.pop_back();
Event.Trigger = NamespaceEvent::EndNamespace;
Event.Payload = llvm::join(Enclosing, "::");
Callback(Event);
}
BraceStack.pop_back();
}
break;
case tok::semi:
if (State == UsingNamespaceName) {
// Parsed: using namespace <name> ;
Event.Trigger = NamespaceEvent::UsingDirective;
Event.Payload = std::move(NSName);
Callback(Event);
}
State = Default;
break;
default:
State = Default;
break;
}
BraceStack.pop_back();
}
break;
case tok::semi:
if (State == UsingNamespaceName) {
// Parsed: using namespace <name> ;
Event.Trigger = NamespaceEvent::UsingDirective;
Event.Payload = std::move(NSName);
Callback(Event);
}
State = Default;
break;
default:
State = Default;
break;
}
});
});
}
// Returns the prefix namespaces of NS: {"" ... NS}.