[clangd] Use tokenize instead of raw lexer in SourceCode/lex

Reviewers: hokein, sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D75249
2024-12-23 07:52:06 +00:00 · 2020-02-27 15:10:54 +01:00 · 2020-02-27 15:10:54 +01:00 · 98bb094c1e
commit 98bb094c1e
parent 7a9d00f261
1 changed files with 122 additions and 113 deletions
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@ -23,6 +23,7 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/Token.h"
 #include "clang/Tooling/Core/Replacement.h"
+#include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
@ -612,31 +613,26 @@ cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,

 static void
 lex(llvm::StringRef Code, const LangOptions &LangOpts,
-    llvm::function_ref<void(const clang::Token &, const SourceManager &SM)>
+    llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)>
        Action) {
  // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
  std::string NullTerminatedCode = Code.str();
  SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
  auto &SM = FileSM.get();
-  auto FID = SM.getMainFileID();
-  // Create a raw lexer (with no associated preprocessor object).
-  Lexer Lex(FID, SM.getBuffer(FID), SM, LangOpts);
-  Token Tok;
-
-  while (!Lex.LexFromRawLexer(Tok))
+  for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
    Action(Tok, SM);
-  // LexFromRawLexer returns true after it lexes last token, so we still have
-  // one more token to report.
-  Action(Tok, SM);
 }

 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
                                             const format::FormatStyle &Style) {
  llvm::StringMap<unsigned> Identifiers;
  auto LangOpt = format::getFormattingLangOpts(Style);
-  lex(Content, LangOpt, [&](const clang::Token &Tok, const SourceManager &) {
-    if (Tok.getKind() == tok::raw_identifier)
-      ++Identifiers[Tok.getRawIdentifier()];
+  lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) {
+    if (Tok.kind() == tok::identifier)
+      ++Identifiers[Tok.text(SM)];
+    // FIXME: Should this function really return keywords too ?
+    else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind()))
+      ++Identifiers[Keyword];
  });
  return Identifiers;
 }
@ -645,16 +641,13 @@ std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
                                           llvm::StringRef Content,
                                           const LangOptions &LangOpts) {
  std::vector<Range> Ranges;
-  lex(Content, LangOpts, [&](const clang::Token &Tok, const SourceManager &SM) {
-    if (Tok.getKind() != tok::raw_identifier)
-      return;
-    if (Tok.getRawIdentifier() != Identifier)
-      return;
-    auto Range = getTokenRange(SM, LangOpts, Tok.getLocation());
-    if (!Range)
-      return;
-    Ranges.push_back(*Range);
-  });
+  lex(Content, LangOpts,
+      [&](const syntax::Token &Tok, const SourceManager &SM) {
+        if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier)
+          return;
+        if (auto Range = getTokenRange(SM, LangOpts, Tok.location()))
+          Ranges.push_back(*Range);
+      });
  return Ranges;
 }

@ -691,97 +684,113 @@ void parseNamespaceEvents(llvm::StringRef Code,

  NamespaceEvent Event;
  lex(Code, format::getFormattingLangOpts(Style),
-      [&](const clang::Token &Tok,const SourceManager &SM) {
-    Event.Pos = sourceLocToPosition(SM, Tok.getLocation());
-    switch (Tok.getKind()) {
-    case tok::raw_identifier:
-      // In raw mode, this could be a keyword or a name.
-      switch (State) {
-      case UsingNamespace:
-      case UsingNamespaceName:
-        NSName.append(std::string(Tok.getRawIdentifier()));
-        State = UsingNamespaceName;
-        break;
-      case Namespace:
-      case NamespaceName:
-        NSName.append(std::string(Tok.getRawIdentifier()));
-        State = NamespaceName;
-        break;
-      case Using:
-        State =
-            (Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default;
-        break;
-      case Default:
-        NSName.clear();
-        if (Tok.getRawIdentifier() == "namespace")
-          State = Namespace;
-        else if (Tok.getRawIdentifier() == "using")
-          State = Using;
-        break;
-      }
-      break;
-    case tok::coloncolon:
-      // This can come at the beginning or in the middle of a namespace name.
-      switch (State) {
-      case UsingNamespace:
-      case UsingNamespaceName:
-        NSName.append("::");
-        State = UsingNamespaceName;
-        break;
-      case NamespaceName:
-        NSName.append("::");
-        State = NamespaceName;
-        break;
-      case Namespace: // Not legal here.
-      case Using:
-      case Default:
-        State = Default;
-        break;
-      }
-      break;
-    case tok::l_brace:
-      // Record which { started a namespace, so we know when } ends one.
-      if (State == NamespaceName) {
-        // Parsed: namespace <name> {
-        BraceStack.push_back(true);
-        Enclosing.push_back(NSName);
-        Event.Trigger = NamespaceEvent::BeginNamespace;
-        Event.Payload = llvm::join(Enclosing, "::");
-        Callback(Event);
-      } else {
-        // This case includes anonymous namespaces (State = Namespace).
-        // For our purposes, they're not namespaces and we ignore them.
-        BraceStack.push_back(false);
-      }
-      State = Default;
-      break;
-    case tok::r_brace:
-      // If braces are unmatched, we're going to be confused, but don't crash.
-      if (!BraceStack.empty()) {
-        if (BraceStack.back()) {
-          // Parsed: } // namespace
-          Enclosing.pop_back();
-          Event.Trigger = NamespaceEvent::EndNamespace;
-          Event.Payload = llvm::join(Enclosing, "::");
-          Callback(Event);
+      [&](const syntax::Token &Tok, const SourceManager &SM) {
+        Event.Pos = sourceLocToPosition(SM, Tok.location());
+        switch (Tok.kind()) {
+        case tok::kw_using:
+          State = State == Default ? Using : Default;
+          break;
+        case tok::kw_namespace:
+          switch (State) {
+          case Using:
+            State = UsingNamespace;
+            break;
+          case Default:
+            State = Namespace;
+            break;
+          default:
+            State = Default;
+            break;
+          }
+          break;
+        case tok::identifier:
+          switch (State) {
+          case UsingNamespace:
+            NSName.clear();
+            LLVM_FALLTHROUGH;
+          case UsingNamespaceName:
+            NSName.append(Tok.text(SM).str());
+            State = UsingNamespaceName;
+            break;
+          case Namespace:
+            NSName.clear();
+            LLVM_FALLTHROUGH;
+          case NamespaceName:
+            NSName.append(Tok.text(SM).str());
+            State = NamespaceName;
+            break;
+          case Using:
+          case Default:
+            State = Default;
+            break;
+          }
+          break;
+        case tok::coloncolon:
+          // This can come at the beginning or in the middle of a namespace
+          // name.
+          switch (State) {
+          case UsingNamespace:
+            NSName.clear();
+            LLVM_FALLTHROUGH;
+          case UsingNamespaceName:
+            NSName.append("::");
+            State = UsingNamespaceName;
+            break;
+          case NamespaceName:
+            NSName.append("::");
+            State = NamespaceName;
+            break;
+          case Namespace: // Not legal here.
+          case Using:
+          case Default:
+            State = Default;
+            break;
+          }
+          break;
+        case tok::l_brace:
+          // Record which { started a namespace, so we know when } ends one.
+          if (State == NamespaceName) {
+            // Parsed: namespace <name> {
+            BraceStack.push_back(true);
+            Enclosing.push_back(NSName);
+            Event.Trigger = NamespaceEvent::BeginNamespace;
+            Event.Payload = llvm::join(Enclosing, "::");
+            Callback(Event);
+          } else {
+            // This case includes anonymous namespaces (State = Namespace).
+            // For our purposes, they're not namespaces and we ignore them.
+            BraceStack.push_back(false);
+          }
+          State = Default;
+          break;
+        case tok::r_brace:
+          // If braces are unmatched, we're going to be confused, but don't
+          // crash.
+          if (!BraceStack.empty()) {
+            if (BraceStack.back()) {
+              // Parsed: } // namespace
+              Enclosing.pop_back();
+              Event.Trigger = NamespaceEvent::EndNamespace;
+              Event.Payload = llvm::join(Enclosing, "::");
+              Callback(Event);
+            }
+            BraceStack.pop_back();
+          }
+          break;
+        case tok::semi:
+          if (State == UsingNamespaceName) {
+            // Parsed: using namespace <name> ;
+            Event.Trigger = NamespaceEvent::UsingDirective;
+            Event.Payload = std::move(NSName);
+            Callback(Event);
+          }
+          State = Default;
+          break;
+        default:
+          State = Default;
+          break;
        }
-        BraceStack.pop_back();
-      }
-      break;
-    case tok::semi:
-      if (State == UsingNamespaceName) {
-        // Parsed: using namespace <name> ;
-        Event.Trigger = NamespaceEvent::UsingDirective;
-        Event.Payload = std::move(NSName);
-        Callback(Event);
-      }
-      State = Default;
-      break;
-    default:
-      State = Default;
-      break;
-    }
-  });
+      });
 }

 // Returns the prefix namespaces of NS: {"" ... NS}.