Move the isSelfContainedHeader function from clangd to libtooling.

We plan to reuse it in the include-cleaner library, this patch moves
this functionality from clangd to libtooling, so that this piece of code can be
shared among all clang tools.

Differential Revision: https://reviews.llvm.org/D137697
This commit is contained in:
Haojian Wu 2022-11-07 13:30:47 +01:00
parent 285da1c8cd
commit dd46a08008
9 changed files with 175 additions and 65 deletions

View File

@ -15,6 +15,7 @@
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Path.h"
#include <cstring>
@ -121,12 +122,10 @@ public:
// isSelfContainedHeader only returns true once the full header-guard
// structure has been seen, i.e. when exiting the *outer* copy of the
// file. So last result wins.
if (isSelfContainedHeader(FE, PrevFID, SM, HeaderInfo))
Out->NonSelfContained.erase(
*Out->getID(SM.getFileEntryForID(PrevFID)));
if (tooling::isSelfContainedHeader(FE, SM, HeaderInfo))
Out->NonSelfContained.erase(*Out->getID(FE));
else
Out->NonSelfContained.insert(
*Out->getID(SM.getFileEntryForID(PrevFID)));
Out->NonSelfContained.insert(*Out->getID(FE));
}
break;
}

View File

@ -1183,58 +1183,5 @@ bool isProtoFile(SourceLocation Loc, const SourceManager &SM) {
return SM.getBufferData(FID).startswith(ProtoHeaderComment);
}
namespace {
// Is Line an #if or #ifdef directive?
// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non
// self-contained and is probably not what we want.
bool isIf(llvm::StringRef Line) {
Line = Line.ltrim();
if (!Line.consume_front("#"))
return false;
Line = Line.ltrim();
return Line.startswith("if");
}
// Is Line an #error directive mentioning includes?
bool isErrorAboutInclude(llvm::StringRef Line) {
Line = Line.ltrim();
if (!Line.consume_front("#"))
return false;
Line = Line.ltrim();
if (!Line.startswith("error"))
return false;
return Line.contains_insensitive(
"includ"); // Matches "include" or "including".
}
// Heuristically headers that only want to be included via an umbrella.
bool isDontIncludeMeHeader(llvm::StringRef Content) {
llvm::StringRef Line;
// Only sniff up to 100 lines or 10KB.
Content = Content.take_front(100 * 100);
for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
std::tie(Line, Content) = Content.split('\n');
if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
return true;
}
return false;
}
} // namespace
bool isSelfContainedHeader(const FileEntry *FE, FileID FID,
const SourceManager &SM, HeaderSearch &HeaderInfo) {
// FIXME: Should files that have been #import'd be considered
// self-contained? That's really a property of the includer,
// not of the file.
if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) &&
!HeaderInfo.hasFileBeenImported(FE))
return false;
// This pattern indicates that a header can't be used without
// particular preprocessor state, usually set up by another header.
return !isDontIncludeMeHeader(SM.getBufferData(FID));
}
} // namespace clangd
} // namespace clang

View File

@ -325,11 +325,6 @@ bool isHeaderFile(llvm::StringRef FileName,
/// Returns true if the given location is in a generated protobuf file.
bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr);
/// This scans source code, and should not be called when using a preamble.
/// Prefer to access the cache in IncludeStructure::isSelfContained if you can.
bool isSelfContainedHeader(const FileEntry *FE, FileID ID,
const SourceManager &SM, HeaderSearch &HeaderInfo);
/// Returns true if Name is reserved, like _Foo or __Vector_base.
inline bool isReservedName(llvm::StringRef Name) {
// This doesn't catch all cases, but the most common.

View File

@ -28,6 +28,7 @@
#include "clang/Index/IndexSymbol.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FileSystem.h"
@ -419,8 +420,8 @@ private:
getFrameworkHeaderIncludeSpelling(FE, HFI->Framework, HS))
return *Spelling;
if (!isSelfContainedHeader(FE, FID, PP->getSourceManager(),
PP->getHeaderSearchInfo())) {
if (!tooling::isSelfContainedHeader(FE, PP->getSourceManager(),
PP->getHeaderSearchInfo())) {
// A .inc or .def file is often included into a real header to define
// symbols (e.g. LLVM tablegen files).
if (Filename.endswith(".inc") || Filename.endswith(".def"))

View File

@ -0,0 +1,33 @@
//===--- HeaderAnalysis.h -----------------------------------------*-C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H
#define LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H
namespace clang {
class FileEntry;
class SourceManager;
class HeaderSearch;
namespace tooling {
/// Returns true if the given physical file is a self-contained header.
///
/// A header is considered self-contained if
// - it has a proper header guard or has been #imported
// - *and* it doesn't have a dont-include-me pattern.
///
/// This function can be expensive as it may scan the source code to find out
/// dont-include-me pattern heuristically.
bool isSelfContainedHeader(const FileEntry *FE, const SourceManager &SM,
HeaderSearch &HeaderInfo);
} // namespace tooling
} // namespace clang
#endif // LLVM_CLANG_TOOLING_INCLUSIONS_HEADER_ANALYSIS_H

View File

@ -1,6 +1,7 @@
set(LLVM_LINK_COMPONENTS support)
add_clang_library(clangToolingInclusions
HeaderAnalysis.cpp
HeaderIncludes.cpp
IncludeStyle.cpp

View File

@ -0,0 +1,67 @@
//===--- HeaderAnalysis.cpp -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/HeaderSearch.h"
namespace clang::tooling {
namespace {
// Is Line an #if or #ifdef directive?
// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non
// self-contained and is probably not what we want.
bool isIf(llvm::StringRef Line) {
Line = Line.ltrim();
if (!Line.consume_front("#"))
return false;
Line = Line.ltrim();
return Line.startswith("if");
}
// Is Line an #error directive mentioning includes?
bool isErrorAboutInclude(llvm::StringRef Line) {
Line = Line.ltrim();
if (!Line.consume_front("#"))
return false;
Line = Line.ltrim();
if (!Line.startswith("error"))
return false;
return Line.contains_insensitive(
"includ"); // Matches "include" or "including".
}
// Heuristically headers that only want to be included via an umbrella.
bool isDontIncludeMeHeader(llvm::MemoryBufferRef Buffer) {
StringRef Content = Buffer.getBuffer();
llvm::StringRef Line;
// Only sniff up to 100 lines or 10KB.
Content = Content.take_front(100 * 100);
for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
std::tie(Line, Content) = Content.split('\n');
if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
return true;
}
return false;
}
} // namespace
bool isSelfContainedHeader(const FileEntry *FE, const SourceManager &SM,
HeaderSearch &HeaderInfo) {
assert(FE);
if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) &&
!HeaderInfo.hasFileBeenImported(FE))
return false;
// This pattern indicates that a header can't be used without
// particular preprocessor state, usually set up by another header.
return !isDontIncludeMeHeader(
const_cast<SourceManager &>(SM).getMemoryBufferForFileOrNone(FE).value_or(
llvm::MemoryBufferRef()));
}
} // namespace clang::tooling

View File

@ -16,6 +16,7 @@ add_clang_unittest(ToolingTests
DiagnosticsYamlTest.cpp
ExecutionTest.cpp
FixItTest.cpp
HeaderAnalysisTest.cpp
HeaderIncludesTest.cpp
StandardLibraryTest.cpp
LexicallyOrderedRecursiveASTVisitorTest.cpp

View File

@ -0,0 +1,66 @@
//===- unittest/Tooling/HeaderAnalysisTest.cpp ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Testing/TestAST.h"
#include "gtest/gtest.h"
namespace clang {
namespace tooling {
namespace {
TEST(HeaderAnalysisTest, IsSelfContained) {
TestInputs Inputs;
Inputs.Code = R"cpp(
#include "headerguard.h"
#include "pragmaonce.h"
#import "imported.h"
#include "bad.h"
#include "unguarded.h"
)cpp";
Inputs.ExtraFiles["headerguard.h"] = R"cpp(
#ifndef HEADER_H
#define HEADER_H
#endif HEADER_H
)cpp";
Inputs.ExtraFiles["pragmaonce.h"] = R"cpp(
#pragma once
)cpp";
Inputs.ExtraFiles["imported.h"] = "";
Inputs.ExtraFiles["unguarded.h"] = "";
Inputs.ExtraFiles["bad.h"] = R"cpp(
#pragma once
#if defined(INSIDE_H)
#error "Only ... can be included directly"
#endif
)cpp";
TestAST AST(Inputs);
const auto &SM = AST.sourceManager();
auto &FM = SM.getFileManager();
auto &HI = AST.preprocessor().getHeaderSearchInfo();
auto getFileID = [&](llvm::StringRef FileName) {
return SM.translateFile(FM.getFile(FileName).get());
};
EXPECT_TRUE(isSelfContainedHeader(getFileID("headerguard.h"), SM, HI));
EXPECT_TRUE(isSelfContainedHeader(getFileID("pragmaonce.h"), SM, HI));
EXPECT_TRUE(isSelfContainedHeader(getFileID("imported.h"), SM, HI));
EXPECT_FALSE(isSelfContainedHeader(getFileID("unguarded.h"), SM, HI));
EXPECT_FALSE(isSelfContainedHeader(getFileID("bad.h"), SM, HI));
}
} // namespace
} // namespace tooling
} // namespace clang