diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h index 02269cea4bdf..cd3fd8600060 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h @@ -31,7 +31,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include +#include #include #include #include @@ -167,6 +169,20 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Include &); /// Supports efficiently hit-testing Headers against Includes. class Includes { public: + /// Registers a directory on the include path (-I etc) from HeaderSearch. + /// This allows reasoning about equivalence of e.g. "path/a/b.h" and "a/b.h". + /// This must be called before calling add() in order to take effect. + /// + /// The paths may be relative or absolute, but the paths passed to + /// addSearchDirectory() and add() (that is: Include.Resolved->getName()) + /// should be consistent, as they are compared lexically. + /// Generally, this is satisfied if you obtain paths through HeaderSearch + /// and FileEntries through PPCallbacks::IncludeDirective(). + void addSearchDirectory(llvm::StringRef); + + /// Registers an include directive seen in the main file. + /// + /// This should only be called after all search directories are added. void add(const Include &); /// All #includes seen, in the order they appear. @@ -183,9 +199,13 @@ public: const Include *atLine(unsigned OneBasedIndex) const; private: + llvm::StringSet<> SearchPath; + std::vector All; // Lookup structures for match(), values are index into All. llvm::StringMap> BySpelling; + // Heuristic spellings that likely resolve to the given file. + llvm::StringMap> BySpellingAlternate; llvm::DenseMap> ByFile; llvm::DenseMap ByLine; }; diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp index 50a15229cbe5..78f21bc2262a 100644 --- a/clang-tools-extra/include-cleaner/lib/Record.cpp +++ b/clang-tools-extra/include-cleaner/lib/Record.cpp @@ -14,13 +14,16 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Lex/DirectoryLookup.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/Inclusions/HeaderAnalysis.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" +#include "llvm/ADT/StringRef.h" #include #include +#include namespace clang::include_cleaner { namespace { @@ -28,7 +31,11 @@ namespace { class PPRecorder : public PPCallbacks { public: PPRecorder(RecordedPP &Recorded, const Preprocessor &PP) - : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {} + : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) { + for (const auto &Dir : PP.getHeaderSearchInfo().search_dir_range()) + if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir) + Recorded.Includes.addSearchDirectory(Dir.getDirRef()->getName()); + } void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp index bcd15920797b..2061fdc44388 100644 --- a/clang-tools-extra/include-cleaner/lib/Types.cpp +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -10,8 +10,15 @@ #include "TypesInternal.h" #include "clang/AST/Decl.h" #include "clang/Basic/FileEntry.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang::include_cleaner { @@ -94,16 +101,51 @@ std::string Include::quote() const { .str(); } +static llvm::SmallString<128> normalizePath(llvm::StringRef Path) { + namespace path = llvm::sys::path; + + llvm::SmallString<128> P = Path; + path::remove_dots(P, /*remove_dot_dot=*/true); + path::native(P, path::Style::posix); + while (!P.empty() && P.back() == '/') + P.pop_back(); + return P; +} + +void Includes::addSearchDirectory(llvm::StringRef Path) { + SearchPath.try_emplace(normalizePath(Path)); +} + void Includes::add(const Include &I) { + namespace path = llvm::sys::path; + unsigned Index = All.size(); All.push_back(I); auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first; All.back().Spelled = BySpellingIt->first(); // Now we own the backing string. BySpellingIt->second.push_back(Index); - if (I.Resolved) - ByFile[&I.Resolved->getFileEntry()].push_back(Index); ByLine[I.Line] = Index; + + if (!I.Resolved) + return; + ByFile[&I.Resolved->getFileEntry()].push_back(Index); + + // While verbatim headers ideally should match #include spelling exactly, + // we want to be tolerant of different spellings of the same file. + // + // If the search path includes "/a/b" and "/a/b/c/d", + // verbatim "e/f" should match (spelled=c/d/e/f, resolved=/a/b/c/d/e/f). + // We assume entry's (normalized) name will match the search dirs. + auto Path = normalizePath(I.Resolved->getName()); + for (llvm::StringRef Parent = path::parent_path(Path); !Parent.empty(); + Parent = path::parent_path(Parent)) { + if (!SearchPath.contains(Parent)) + continue; + llvm::StringRef Rel = + llvm::StringRef(Path).drop_front(Parent.size()).ltrim('/'); + BySpellingAlternate[Rel].push_back(Index); + } } const Include *Includes::atLine(unsigned OneBasedIndex) const { @@ -122,11 +164,16 @@ llvm::SmallVector Includes::match(Header H) const { for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>"))) Result.push_back(&All[I]); break; - case Header::Verbatim: - for (unsigned I : BySpelling.lookup(H.verbatim().trim("\"<>"))) + case Header::Verbatim: { + llvm::StringRef Spelling = H.verbatim().trim("\"<>"); + for (unsigned I : BySpelling.lookup(Spelling)) Result.push_back(&All[I]); + for (unsigned I : BySpellingAlternate.lookup(Spelling)) + if (!llvm::is_contained(Result, &All[I])) + Result.push_back(&All[I]); break; } + } return Result; } diff --git a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp index a2aec2eaf316..56b5fbe603ad 100644 --- a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp @@ -8,6 +8,7 @@ #include "clang-include-cleaner/Types.h" #include "clang/Basic/FileManager.h" +#include "clang/Basic/FileSystemOptions.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Support/VirtualFileSystem.h" @@ -17,6 +18,8 @@ namespace clang::include_cleaner { namespace { using testing::ElementsAre; +using testing::IsEmpty; +using testing::UnorderedElementsAre; // Matches an Include* on the specified line; MATCHER_P(line, N, "") { return arg->Line == (unsigned)N; } @@ -44,5 +47,57 @@ TEST(RecordedIncludesTest, Match) { ElementsAre(line(4), line(5))); } +TEST(RecordedIncludesTest, MatchVerbatim) { + auto FS = llvm::makeIntrusiveRefCnt(); + FileManager FM(FileSystemOptions{}); + Includes Inc; + + // By default, a verbatim header only matches includes with the same spelling. + auto Foo = + FM.getVirtualFileRef("repo/lib/include/rel/foo.h", /*Size=*/0, time_t{}); + Inc.add(Include{"lib/include/rel/foo.h", Foo, SourceLocation(), 1}); + Inc.add(Include{"rel/foo.h", Foo, SourceLocation(), 2}); + EXPECT_THAT(Inc.match(Header("")), ElementsAre(line(2))); + + // A verbatim header can match another spelling if the search path + // suggests it's equivalent. + auto Bar = + FM.getVirtualFileRef("repo/lib/include/rel/bar.h", /*Size=*/0, time_t{}); + Inc.addSearchDirectory("repo/"); + Inc.addSearchDirectory("repo/lib/include"); + Inc.add(Include{"lib/include/rel/bar.h", Bar, SourceLocation(), 3}); + Inc.add(Include{"rel/bar.h", Bar, SourceLocation(), 4}); + EXPECT_THAT(Inc.match(Header("")), + UnorderedElementsAre(line(3), line(4))); + + // We don't apply this logic to system headers, though. + auto Vector = + FM.getVirtualFileRef("repo/lib/include/vector", /*Size=*/0, time_t{}); + Inc.add(Include{"lib/include/vector", Vector, SourceLocation(), 5}); + EXPECT_THAT(Inc.match(Header(*tooling::stdlib::Header::named(""))), + IsEmpty()); +} + +TEST(RecordedIncludesTest, MatchVerbatimMixedAbsoluteRelative) { + auto FS = llvm::makeIntrusiveRefCnt(); + FS->setCurrentWorkingDirectory("/working"); + FileManager FM(FileSystemOptions{}); + Includes Inc; + + auto Foo = + FM.getVirtualFileRef("/working/rel1/rel2/foo.h", /*Size=*/0, time_t{}); + Inc.addSearchDirectory("rel1"); + Inc.addSearchDirectory("rel1/rel2"); + Inc.add(Include{"rel2/foo.h", Foo, SourceLocation(), 1}); + EXPECT_THAT(Inc.match(Header("")), IsEmpty()); + + Inc = Includes{}; + auto Bar = FM.getVirtualFileRef("rel1/rel2/bar.h", /*Size=*/0, time_t{}); + Inc.addSearchDirectory("/working/rel1"); + Inc.addSearchDirectory("/working/rel1/rel2"); + Inc.add(Include{"rel2/bar.h", Bar, SourceLocation(), 1}); + EXPECT_THAT(Inc.match(Header("")), IsEmpty()); +} + } // namespace } // namespace clang::include_cleaner