From dd46a08008f76d6ac9fcc6a9e748b113bea3c758 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 7 Nov 2022 13:30:47 +0100 Subject: [PATCH] Move the isSelfContainedHeader function from clangd to libtooling. We plan to reuse it in the include-cleaner library, this patch moves this functionality from clangd to libtooling, so that this piece of code can be shared among all clang tools. Differential Revision: https://reviews.llvm.org/D137697 --- clang-tools-extra/clangd/Headers.cpp | 9 ++- clang-tools-extra/clangd/SourceCode.cpp | 53 --------------- clang-tools-extra/clangd/SourceCode.h | 5 -- .../clangd/index/SymbolCollector.cpp | 5 +- .../clang/Tooling/Inclusions/HeaderAnalysis.h | 33 +++++++++ clang/lib/Tooling/Inclusions/CMakeLists.txt | 1 + .../lib/Tooling/Inclusions/HeaderAnalysis.cpp | 67 +++++++++++++++++++ clang/unittests/Tooling/CMakeLists.txt | 1 + .../unittests/Tooling/HeaderAnalysisTest.cpp | 66 ++++++++++++++++++ 9 files changed, 175 insertions(+), 65 deletions(-) create mode 100644 clang/include/clang/Tooling/Inclusions/HeaderAnalysis.h create mode 100644 clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp create mode 100644 clang/unittests/Tooling/HeaderAnalysisTest.cpp diff --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp index cbfeb637e2fc..f276f5bd8c6c 100644 --- a/clang-tools-extra/clangd/Headers.cpp +++ b/clang-tools-extra/clangd/Headers.cpp @@ -15,6 +15,7 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Tooling/Inclusions/HeaderAnalysis.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Path.h" #include @@ -121,12 +122,10 @@ public: // isSelfContainedHeader only returns true once the full header-guard // structure has been seen, i.e. when exiting the *outer* copy of the // file. So last result wins. - if (isSelfContainedHeader(FE, PrevFID, SM, HeaderInfo)) - Out->NonSelfContained.erase( - *Out->getID(SM.getFileEntryForID(PrevFID))); + if (tooling::isSelfContainedHeader(FE, SM, HeaderInfo)) + Out->NonSelfContained.erase(*Out->getID(FE)); else - Out->NonSelfContained.insert( - *Out->getID(SM.getFileEntryForID(PrevFID))); + Out->NonSelfContained.insert(*Out->getID(FE)); } break; } diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 5928541635e6..5913db5405a3 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -1183,58 +1183,5 @@ bool isProtoFile(SourceLocation Loc, const SourceManager &SM) { return SM.getBufferData(FID).startswith(ProtoHeaderComment); } -namespace { - -// Is Line an #if or #ifdef directive? -// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non -// self-contained and is probably not what we want. -bool isIf(llvm::StringRef Line) { - Line = Line.ltrim(); - if (!Line.consume_front("#")) - return false; - Line = Line.ltrim(); - return Line.startswith("if"); -} - -// Is Line an #error directive mentioning includes? -bool isErrorAboutInclude(llvm::StringRef Line) { - Line = Line.ltrim(); - if (!Line.consume_front("#")) - return false; - Line = Line.ltrim(); - if (!Line.startswith("error")) - return false; - return Line.contains_insensitive( - "includ"); // Matches "include" or "including". -} - -// Heuristically headers that only want to be included via an umbrella. -bool isDontIncludeMeHeader(llvm::StringRef Content) { - llvm::StringRef Line; - // Only sniff up to 100 lines or 10KB. - Content = Content.take_front(100 * 100); - for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { - std::tie(Line, Content) = Content.split('\n'); - if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) - return true; - } - return false; -} - -} // namespace - -bool isSelfContainedHeader(const FileEntry *FE, FileID FID, - const SourceManager &SM, HeaderSearch &HeaderInfo) { - // FIXME: Should files that have been #import'd be considered - // self-contained? That's really a property of the includer, - // not of the file. - if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) && - !HeaderInfo.hasFileBeenImported(FE)) - return false; - // This pattern indicates that a header can't be used without - // particular preprocessor state, usually set up by another header. - return !isDontIncludeMeHeader(SM.getBufferData(FID)); -} - } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index faed27d7c8c4..70d1ebe9b099 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -325,11 +325,6 @@ bool isHeaderFile(llvm::StringRef FileName, /// Returns true if the given location is in a generated protobuf file. bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr); -/// This scans source code, and should not be called when using a preamble. -/// Prefer to access the cache in IncludeStructure::isSelfContained if you can. -bool isSelfContainedHeader(const FileEntry *FE, FileID ID, - const SourceManager &SM, HeaderSearch &HeaderInfo); - /// Returns true if Name is reserved, like _Foo or __Vector_base. inline bool isReservedName(llvm::StringRef Name) { // This doesn't catch all cases, but the most common. diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index ee948f80e7a6..a943746fab5d 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -28,6 +28,7 @@ #include "clang/Index/IndexSymbol.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" +#include "clang/Tooling/Inclusions/HeaderAnalysis.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FileSystem.h" @@ -419,8 +420,8 @@ private: getFrameworkHeaderIncludeSpelling(FE, HFI->Framework, HS)) return *Spelling; - if (!isSelfContainedHeader(FE, FID, PP->getSourceManager(), - PP->getHeaderSearchInfo())) { + if (!tooling::isSelfContainedHeader(FE, PP->getSourceManager(), + PP->getHeaderSearchInfo())) { // A .inc or .def file is often included into a real header to define // symbols (e.g. LLVM tablegen files). if (Filename.endswith(".inc") || Filename.endswith(".def")) diff --git a/clang/include/clang/Tooling/Inclusions/HeaderAnalysis.h b/clang/include/clang/Tooling/Inclusions/HeaderAnalysis.h new file mode 100644 index 000000000000..b0b4b1455c5d --- /dev/null +++ b/clang/include/clang/Tooling/Inclusions/HeaderAnalysis.h @@ -0,0 +1,33 @@ +//===--- HeaderAnalysis.h -----------------------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H +#define LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H + +namespace clang { +class FileEntry; +class SourceManager; +class HeaderSearch; + +namespace tooling { + +/// Returns true if the given physical file is a self-contained header. +/// +/// A header is considered self-contained if +// - it has a proper header guard or has been #imported +// - *and* it doesn't have a dont-include-me pattern. +/// +/// This function can be expensive as it may scan the source code to find out +/// dont-include-me pattern heuristically. +bool isSelfContainedHeader(const FileEntry *FE, const SourceManager &SM, + HeaderSearch &HeaderInfo); + +} // namespace tooling +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H diff --git a/clang/lib/Tooling/Inclusions/CMakeLists.txt b/clang/lib/Tooling/Inclusions/CMakeLists.txt index 1954d16a1b23..78b25f4a3486 100644 --- a/clang/lib/Tooling/Inclusions/CMakeLists.txt +++ b/clang/lib/Tooling/Inclusions/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangToolingInclusions + HeaderAnalysis.cpp HeaderIncludes.cpp IncludeStyle.cpp diff --git a/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp b/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp new file mode 100644 index 000000000000..78b866eae096 --- /dev/null +++ b/clang/lib/Tooling/Inclusions/HeaderAnalysis.cpp @@ -0,0 +1,67 @@ +//===--- HeaderAnalysis.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Inclusions/HeaderAnalysis.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/HeaderSearch.h" + +namespace clang::tooling { +namespace { + +// Is Line an #if or #ifdef directive? +// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non +// self-contained and is probably not what we want. +bool isIf(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + return Line.startswith("if"); +} + +// Is Line an #error directive mentioning includes? +bool isErrorAboutInclude(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + if (!Line.startswith("error")) + return false; + return Line.contains_insensitive( + "includ"); // Matches "include" or "including". +} + +// Heuristically headers that only want to be included via an umbrella. +bool isDontIncludeMeHeader(llvm::MemoryBufferRef Buffer) { + StringRef Content = Buffer.getBuffer(); + llvm::StringRef Line; + // Only sniff up to 100 lines or 10KB. + Content = Content.take_front(100 * 100); + for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { + std::tie(Line, Content) = Content.split('\n'); + if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) + return true; + } + return false; +} + +} // namespace + +bool isSelfContainedHeader(const FileEntry *FE, const SourceManager &SM, + HeaderSearch &HeaderInfo) { + assert(FE); + if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) && + !HeaderInfo.hasFileBeenImported(FE)) + return false; + // This pattern indicates that a header can't be used without + // particular preprocessor state, usually set up by another header. + return !isDontIncludeMeHeader( + const_cast(SM).getMemoryBufferForFileOrNone(FE).value_or( + llvm::MemoryBufferRef())); +} +} // namespace clang::tooling diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt index 424932e52951..ce9f556c857b 100644 --- a/clang/unittests/Tooling/CMakeLists.txt +++ b/clang/unittests/Tooling/CMakeLists.txt @@ -16,6 +16,7 @@ add_clang_unittest(ToolingTests DiagnosticsYamlTest.cpp ExecutionTest.cpp FixItTest.cpp + HeaderAnalysisTest.cpp HeaderIncludesTest.cpp StandardLibraryTest.cpp LexicallyOrderedRecursiveASTVisitorTest.cpp diff --git a/clang/unittests/Tooling/HeaderAnalysisTest.cpp b/clang/unittests/Tooling/HeaderAnalysisTest.cpp new file mode 100644 index 000000000000..1a121e7bf0a4 --- /dev/null +++ b/clang/unittests/Tooling/HeaderAnalysisTest.cpp @@ -0,0 +1,66 @@ +//===- unittest/Tooling/HeaderAnalysisTest.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Inclusions/HeaderAnalysis.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Testing/TestAST.h" +#include "gtest/gtest.h" + +namespace clang { +namespace tooling { +namespace { + +TEST(HeaderAnalysisTest, IsSelfContained) { + TestInputs Inputs; + Inputs.Code = R"cpp( + #include "headerguard.h" + #include "pragmaonce.h" + #import "imported.h" + + #include "bad.h" + #include "unguarded.h" + )cpp"; + + Inputs.ExtraFiles["headerguard.h"] = R"cpp( + #ifndef HEADER_H + #define HEADER_H + + #endif HEADER_H + )cpp"; + Inputs.ExtraFiles["pragmaonce.h"] = R"cpp( + #pragma once + )cpp"; + Inputs.ExtraFiles["imported.h"] = ""; + + Inputs.ExtraFiles["unguarded.h"] = ""; + Inputs.ExtraFiles["bad.h"] = R"cpp( + #pragma once + + #if defined(INSIDE_H) + #error "Only ... can be included directly" + #endif + )cpp"; + + TestAST AST(Inputs); + const auto &SM = AST.sourceManager(); + auto &FM = SM.getFileManager(); + auto &HI = AST.preprocessor().getHeaderSearchInfo(); + auto getFileID = [&](llvm::StringRef FileName) { + return SM.translateFile(FM.getFile(FileName).get()); + }; + EXPECT_TRUE(isSelfContainedHeader(getFileID("headerguard.h"), SM, HI)); + EXPECT_TRUE(isSelfContainedHeader(getFileID("pragmaonce.h"), SM, HI)); + EXPECT_TRUE(isSelfContainedHeader(getFileID("imported.h"), SM, HI)); + + EXPECT_FALSE(isSelfContainedHeader(getFileID("unguarded.h"), SM, HI)); + EXPECT_FALSE(isSelfContainedHeader(getFileID("bad.h"), SM, HI)); +} + +} // namespace +} // namespace tooling +} // namespace clang