From 80787ca12d529b9f61ade17c680b2517c2d1ce33 Mon Sep 17 00:00:00 2001 From: Easwaran Raman <eraman@google.com> Date: Tue, 10 May 2016 22:03:23 +0000 Subject: [PATCH] Reapply r266477 and r266488 llvm-svn: 269131 --- include/llvm/ProfileData/ProfileCommon.h | 37 ++++++++++- lib/Analysis/InlineCost.cpp | 6 +- lib/Analysis/LLVMBuild.txt | 2 +- lib/ProfileData/ProfileSummary.cpp | 44 +++++++++++++ test/Transforms/Inline/inline-cold-callee.ll | 25 +++++--- test/Transforms/Inline/inline-hot-callee.ll | 25 +++++--- unittests/ProfileData/CMakeLists.txt | 1 + unittests/ProfileData/ProfileSummaryTest.cpp | 66 ++++++++++++++++++++ 8 files changed, 187 insertions(+), 19 deletions(-) create mode 100644 unittests/ProfileData/ProfileSummaryTest.cpp diff --git a/include/llvm/ProfileData/ProfileCommon.h b/include/llvm/ProfileData/ProfileCommon.h index ae196aa5d00..878c7c73449 100644 --- a/include/llvm/ProfileData/ProfileCommon.h +++ b/include/llvm/ProfileData/ProfileCommon.h @@ -21,6 +21,8 @@ #include <vector> #include "llvm/Support/Casting.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" namespace llvm { class Function; @@ -35,6 +37,7 @@ class LLVMContext; class Metadata; class MDTuple; class MDNode; +class Module; inline const char *getHotSectionPrefix() { return ".hot"; } inline const char *getUnlikelySectionPrefix() { return ".unlikely"; } @@ -67,6 +70,14 @@ private: // appears in the profile. The map is kept sorted in the descending order of // counts. std::map<uint64_t, uint32_t, std::greater<uint64_t>> CountFrequencies; + // Compute profile summary for a module. + static ProfileSummary *computeProfileSummary(Module *M); + // Cache of last seen module and its profile summary. + static ManagedStatic<std::pair<Module *, std::unique_ptr<ProfileSummary>>> + CachedSummary; + // Mutex to access summary cache + static ManagedStatic<sys::SmartMutex<true>> CacheMutex; + protected: SummaryEntryVector DetailedSummary; std::vector<uint32_t> DetailedSummaryCutoffs; @@ -85,14 +96,12 @@ protected: : PSK(K), DetailedSummary(DetailedSummary), TotalCount(TotalCount), MaxCount(MaxCount), MaxFunctionCount(MaxFunctionCount), NumCounts(NumCounts), NumFunctions(NumFunctions) {} - ~ProfileSummary() = default; inline void addCount(uint64_t Count); /// \brief Return metadata specific to the profile format. /// Derived classes implement this method to return a vector of Metadata. virtual std::vector<Metadata *> getFormatSpecificMD(LLVMContext &Context) = 0; /// \brief Return detailed summary as metadata. Metadata *getDetailedSummaryMD(LLVMContext &Context); - public: static const int Scale = 1000000; Kind getKind() const { return PSK; } @@ -111,6 +120,10 @@ public: static ProfileSummary *getFromMD(Metadata *MD); uint32_t getNumFunctions() { return NumFunctions; } uint64_t getMaxFunctionCount() { return MaxFunctionCount; } + /// \brief Get profile summary associated with module \p M + static inline ProfileSummary *getProfileSummary(Module *M); + virtual ~ProfileSummary() = default; + virtual bool operator==(ProfileSummary &Other); }; class InstrProfSummary final : public ProfileSummary { @@ -140,6 +153,7 @@ public: uint64_t getTotalCount() { return TotalCount; } uint64_t getMaxBlockCount() { return MaxCount; } uint64_t getMaxInternalBlockCount() { return MaxInternalBlockCount; } + bool operator==(ProfileSummary &Other) override; }; class SampleProfileSummary final : public ProfileSummary { @@ -180,5 +194,24 @@ SummaryEntryVector &ProfileSummary::getDetailedSummary() { return DetailedSummary; } +ProfileSummary *ProfileSummary::getProfileSummary(Module *M) { + if (!M) + return nullptr; + sys::SmartScopedLock<true> Lock(*CacheMutex); + // Computing profile summary for a module involves parsing a fairly large + // metadata and could be expensive. We use a simple cache of the last seen + // module and its profile summary. + if (CachedSummary->first != M) { + auto *Summary = computeProfileSummary(M); + // Do not cache if the summary is empty. This is because a later pass + // (sample profile loader, for example) could attach the summary metadata on + // the module. + if (!Summary) + return nullptr; + CachedSummary->first = M; + CachedSummary->second.reset(Summary); + } + return CachedSummary->second.get(); +} } // end namespace llvm #endif diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index d3b2930d7ea..87dac729367 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -630,10 +631,11 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // a well-tuned heuristic based on *callsite* hotness and not callee hotness. uint64_t FunctionCount = 0, MaxFunctionCount = 0; bool HasPGOCounts = false; - if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) { + ProfileSummary *PS = ProfileSummary::getProfileSummary(Callee.getParent()); + if (Callee.getEntryCount() && PS) { HasPGOCounts = true; FunctionCount = Callee.getEntryCount().getValue(); - MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); + MaxFunctionCount = PS->getMaxFunctionCount(); } // Listen to the inlinehint attribute or profile based hotness information diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt index bddf1a3ac20..4678cba28dc 100644 --- a/lib/Analysis/LLVMBuild.txt +++ b/lib/Analysis/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Analysis parent = Libraries -required_libraries = Core Support +required_libraries = Core ProfileData Support diff --git a/lib/ProfileData/ProfileSummary.cpp b/lib/ProfileData/ProfileSummary.cpp index dfe44e32bbe..4e94138f0bf 100644 --- a/lib/ProfileData/ProfileSummary.cpp +++ b/lib/ProfileData/ProfileSummary.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/ProfileCommon.h" @@ -32,6 +33,10 @@ const std::vector<uint32_t> ProfileSummary::DefaultCutoffs( 900000, 950000, 990000, 999000, 999900, 999990, 999999}); const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"}; +ManagedStatic<std::pair<Module *, std::unique_ptr<ProfileSummary>>> + ProfileSummary::CachedSummary; +ManagedStatic<sys::SmartMutex<true>> ProfileSummary::CacheMutex; + void InstrProfSummary::addRecord(const InstrProfRecord &R) { // The first counter is not necessarily an entry count for IR // instrumentation profiles. @@ -86,6 +91,39 @@ void ProfileSummary::computeDetailedSummary() { } } +bool ProfileSummary::operator==(ProfileSummary &Other) { + if (getKind() != Other.getKind()) + return false; + if (TotalCount != Other.TotalCount) + return false; + if (MaxCount != Other.MaxCount) + return false; + if (MaxFunctionCount != Other.MaxFunctionCount) + return false; + if (NumFunctions != Other.NumFunctions) + return false; + if (NumCounts != Other.NumCounts) + return false; + std::vector<ProfileSummaryEntry> DS1 = getDetailedSummary(); + std::vector<ProfileSummaryEntry> DS2 = Other.getDetailedSummary(); + auto CompareSummaryEntry = [](ProfileSummaryEntry &E1, + ProfileSummaryEntry &E2) { + return E1.Cutoff == E2.Cutoff && E1.MinCount == E2.MinCount && + E1.NumCounts == E2.NumCounts; + }; + if (!std::equal(DS1.begin(), DS1.end(), DS2.begin(), CompareSummaryEntry)) + return false; + return true; +} + +bool InstrProfSummary::operator==(ProfileSummary &Other) { + InstrProfSummary *OtherIPS = dyn_cast<InstrProfSummary>(&Other); + if (!OtherIPS) + return false; + return MaxInternalBlockCount == OtherIPS->MaxInternalBlockCount && + ProfileSummary::operator==(Other); +} + // Returns true if the function is a hot function. bool ProfileSummary::isFunctionHot(const Function *F) { // FIXME: update when summary data is stored in module's metadata. @@ -366,3 +404,9 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) { else return nullptr; } + +ProfileSummary *ProfileSummary::computeProfileSummary(Module *M) { + if (Metadata *MD = M->getProfileSummary()) + return getFromMD(MD); + return nullptr; +} diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll index 1fd9f105db5..0543a5a9cd6 100644 --- a/test/Transforms/Inline/inline-cold-callee.ll +++ b/test/Transforms/Inline/inline-cold-callee.ll @@ -5,7 +5,7 @@ ; A callee with identical body does gets inlined because cost fits within the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !21 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 { ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !22 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 { ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !22 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,19 @@ define i32 @caller2(i32 %y1) !prof !2 { ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 1000} -!1 = !{!"function_entry_count", i64 100} -!2 = !{!"function_entry_count", i64 1} +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 100} +!22 = !{!"function_entry_count", i64 1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxBlockCount", i64 1000} +!6 = !{!"MaxInternalBlockCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumBlocks", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12} +!12 = !{i32 10000, i64 0, i32 0} diff --git a/test/Transforms/Inline/inline-hot-callee.ll b/test/Transforms/Inline/inline-hot-callee.ll index 93ea9d43c78..905625e286d 100644 --- a/test/Transforms/Inline/inline-hot-callee.ll +++ b/test/Transforms/Inline/inline-hot-callee.ll @@ -5,7 +5,7 @@ ; A cold callee with identical body does not get inlined because cost exceeds the ; inline-threshold -define i32 @callee1(i32 %x) !prof !1 { +define i32 @callee1(i32 %x) !prof !20 { %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 %x3 = add i32 %x2, 1 @@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 { ret i32 %x3 } -define i32 @callee2(i32 %x) !prof !2 { +define i32 @callee2(i32 %x) !prof !21 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 %x2 = add i32 %x1, 1 @@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 { ret i32 %x3 } -define i32 @caller2(i32 %y1) !prof !2 { +define i32 @caller2(i32 %y1) !prof !21 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 @@ -32,8 +32,19 @@ define i32 @caller2(i32 %y1) !prof !2 { ret i32 %y3 } -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"MaxFunctionCount", i32 10} -!1 = !{!"function_entry_count", i64 10} -!2 = !{!"function_entry_count", i64 1} +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 10} +!21 = !{!"function_entry_count", i64 1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxBlockCount", i64 10} +!6 = !{!"MaxInternalBlockCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 10} +!8 = !{!"NumBlocks", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12} +!12 = !{i32 10000, i64 0, i32 0} diff --git a/unittests/ProfileData/CMakeLists.txt b/unittests/ProfileData/CMakeLists.txt index dd39ca7da3a..820c0185508 100644 --- a/unittests/ProfileData/CMakeLists.txt +++ b/unittests/ProfileData/CMakeLists.txt @@ -8,5 +8,6 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(ProfileDataTests CoverageMappingTest.cpp InstrProfTest.cpp + ProfileSummaryTest.cpp SampleProfTest.cpp ) diff --git a/unittests/ProfileData/ProfileSummaryTest.cpp b/unittests/ProfileData/ProfileSummaryTest.cpp new file mode 100644 index 00000000000..46d7162f743 --- /dev/null +++ b/unittests/ProfileData/ProfileSummaryTest.cpp @@ -0,0 +1,66 @@ +//===- unittest/ProfileData/ProfileSummaryTest.cpp --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Module.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/ProfileData/SampleProf.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace sampleprof; + +struct ProfileSummaryTest : ::testing::Test { + InstrProfSummary IPS; + SampleProfileSummary SPS; + + ProfileSummaryTest() + : IPS({100000, 900000, 999999}), SPS({100000, 900000, 999999}) {} + void SetUp() { + InstrProfRecord Record1("func1", 0x1234, {97531, 5, 99999}); + InstrProfRecord Record2("func2", 0x1234, {57341, 10000, 10, 1}); + IPS.addRecord(Record1); + IPS.addRecord(Record2); + + IPS.computeDetailedSummary(); + + FunctionSamples FooSamples; + FooSamples.addTotalSamples(7711); + FooSamples.addHeadSamples(610); + FooSamples.addBodySamples(1, 0, 610); + FooSamples.addBodySamples(2, 0, 600); + FooSamples.addBodySamples(4, 0, 60000); + FooSamples.addBodySamples(8, 0, 60351); + FooSamples.addBodySamples(10, 0, 605); + + FunctionSamples BarSamples; + BarSamples.addTotalSamples(20301); + BarSamples.addHeadSamples(1437); + BarSamples.addBodySamples(1, 0, 1437); + + SPS.addRecord(FooSamples); + SPS.addRecord(BarSamples); + + SPS.computeDetailedSummary(); + } + +}; + +TEST_F(ProfileSummaryTest, summary_from_module) { + LLVMContext Context; + Module M1("M1", Context); + EXPECT_FALSE(ProfileSummary::getProfileSummary(&M1)); + M1.setProfileSummary(IPS.getMD(Context)); + EXPECT_TRUE(IPS == *ProfileSummary::getProfileSummary(&M1)); + + Module M2("M2", Context); + EXPECT_FALSE(ProfileSummary::getProfileSummary(&M2)); + M2.setProfileSummary(SPS.getMD(Context)); + EXPECT_TRUE(SPS == *ProfileSummary::getProfileSummary(&M2)); +}