From 80787ca12d529b9f61ade17c680b2517c2d1ce33 Mon Sep 17 00:00:00 2001
From: Easwaran Raman <eraman@google.com>
Date: Tue, 10 May 2016 22:03:23 +0000
Subject: [PATCH] Reapply r266477 and r266488

llvm-svn: 269131
---
 include/llvm/ProfileData/ProfileCommon.h     | 37 ++++++++++-
 lib/Analysis/InlineCost.cpp                  |  6 +-
 lib/Analysis/LLVMBuild.txt                   |  2 +-
 lib/ProfileData/ProfileSummary.cpp           | 44 +++++++++++++
 test/Transforms/Inline/inline-cold-callee.ll | 25 +++++---
 test/Transforms/Inline/inline-hot-callee.ll  | 25 +++++---
 unittests/ProfileData/CMakeLists.txt         |  1 +
 unittests/ProfileData/ProfileSummaryTest.cpp | 66 ++++++++++++++++++++
 8 files changed, 187 insertions(+), 19 deletions(-)
 create mode 100644 unittests/ProfileData/ProfileSummaryTest.cpp

diff --git a/include/llvm/ProfileData/ProfileCommon.h b/include/llvm/ProfileData/ProfileCommon.h
index ae196aa5d00..878c7c73449 100644
--- a/include/llvm/ProfileData/ProfileCommon.h
+++ b/include/llvm/ProfileData/ProfileCommon.h
@@ -21,6 +21,8 @@
 #include <vector>
 
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
 
 namespace llvm {
 class Function;
@@ -35,6 +37,7 @@ class LLVMContext;
 class Metadata;
 class MDTuple;
 class MDNode;
+class Module;
 
 inline const char *getHotSectionPrefix() { return ".hot"; }
 inline const char *getUnlikelySectionPrefix() { return ".unlikely"; }
@@ -67,6 +70,14 @@ private:
   // appears in the profile. The map is kept sorted in the descending order of
   // counts.
   std::map<uint64_t, uint32_t, std::greater<uint64_t>> CountFrequencies;
+  // Compute profile summary for a module.
+  static ProfileSummary *computeProfileSummary(Module *M);
+  // Cache of last seen module and its profile summary.
+  static ManagedStatic<std::pair<Module *, std::unique_ptr<ProfileSummary>>>
+      CachedSummary;
+  // Mutex to access summary cache
+  static ManagedStatic<sys::SmartMutex<true>> CacheMutex;
+
 protected:
   SummaryEntryVector DetailedSummary;
   std::vector<uint32_t> DetailedSummaryCutoffs;
@@ -85,14 +96,12 @@ protected:
       : PSK(K), DetailedSummary(DetailedSummary), TotalCount(TotalCount),
         MaxCount(MaxCount), MaxFunctionCount(MaxFunctionCount),
         NumCounts(NumCounts), NumFunctions(NumFunctions) {}
-  ~ProfileSummary() = default;
   inline void addCount(uint64_t Count);
   /// \brief Return metadata specific to the profile format.
   /// Derived classes implement this method to return a vector of Metadata.
   virtual std::vector<Metadata *> getFormatSpecificMD(LLVMContext &Context) = 0;
   /// \brief Return detailed summary as metadata.
   Metadata *getDetailedSummaryMD(LLVMContext &Context);
-
 public:
   static const int Scale = 1000000;
   Kind getKind() const { return PSK; }
@@ -111,6 +120,10 @@ public:
   static ProfileSummary *getFromMD(Metadata *MD);
   uint32_t getNumFunctions() { return NumFunctions; }
   uint64_t getMaxFunctionCount() { return MaxFunctionCount; }
+  /// \brief Get profile summary associated with module \p M
+  static inline ProfileSummary *getProfileSummary(Module *M);
+  virtual ~ProfileSummary() = default;
+  virtual bool operator==(ProfileSummary &Other);
 };
 
 class InstrProfSummary final : public ProfileSummary {
@@ -140,6 +153,7 @@ public:
   uint64_t getTotalCount() { return TotalCount; }
   uint64_t getMaxBlockCount() { return MaxCount; }
   uint64_t getMaxInternalBlockCount() { return MaxInternalBlockCount; }
+  bool operator==(ProfileSummary &Other) override;
 };
 
 class SampleProfileSummary final : public ProfileSummary {
@@ -180,5 +194,24 @@ SummaryEntryVector &ProfileSummary::getDetailedSummary() {
   return DetailedSummary;
 }
 
+ProfileSummary *ProfileSummary::getProfileSummary(Module *M) {
+  if (!M)
+    return nullptr;
+  sys::SmartScopedLock<true> Lock(*CacheMutex);
+  // Computing profile summary for a module involves parsing a fairly large
+  // metadata and could be expensive. We use a simple cache of the last seen
+  // module and its profile summary.
+  if (CachedSummary->first != M) {
+    auto *Summary = computeProfileSummary(M);
+    // Do not cache if the summary is empty. This is because a later pass
+    // (sample profile loader, for example) could attach the summary metadata on
+    // the module.
+    if (!Summary)
+      return nullptr;
+    CachedSummary->first = M;
+    CachedSummary->second.reset(Summary);
+  }
+  return CachedSummary->second.get();
+}
 } // end namespace llvm
 #endif
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index d3b2930d7ea..87dac729367 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -630,10 +631,11 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
   uint64_t FunctionCount = 0, MaxFunctionCount = 0;
   bool HasPGOCounts = false;
-  if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
+  ProfileSummary *PS = ProfileSummary::getProfileSummary(Callee.getParent());
+  if (Callee.getEntryCount() && PS) {
     HasPGOCounts = true;
     FunctionCount = Callee.getEntryCount().getValue();
-    MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
+    MaxFunctionCount = PS->getMaxFunctionCount();
   }
 
   // Listen to the inlinehint attribute or profile based hotness information
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index bddf1a3ac20..4678cba28dc 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Analysis
 parent = Libraries
-required_libraries = Core Support
+required_libraries = Core ProfileData Support
diff --git a/lib/ProfileData/ProfileSummary.cpp b/lib/ProfileData/ProfileSummary.cpp
index dfe44e32bbe..4e94138f0bf 100644
--- a/lib/ProfileData/ProfileSummary.cpp
+++ b/lib/ProfileData/ProfileSummary.cpp
@@ -15,6 +15,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
@@ -32,6 +33,10 @@ const std::vector<uint32_t> ProfileSummary::DefaultCutoffs(
      900000, 950000, 990000, 999000, 999900, 999990, 999999});
 const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"};
 
+ManagedStatic<std::pair<Module *, std::unique_ptr<ProfileSummary>>>
+    ProfileSummary::CachedSummary;
+ManagedStatic<sys::SmartMutex<true>> ProfileSummary::CacheMutex;
+
 void InstrProfSummary::addRecord(const InstrProfRecord &R) {
   // The first counter is not necessarily an entry count for IR
   // instrumentation profiles.
@@ -86,6 +91,39 @@ void ProfileSummary::computeDetailedSummary() {
   }
 }
 
+bool ProfileSummary::operator==(ProfileSummary &Other) {
+  if (getKind() != Other.getKind())
+    return false;
+  if (TotalCount != Other.TotalCount)
+    return false;
+  if (MaxCount != Other.MaxCount)
+    return false;
+  if (MaxFunctionCount != Other.MaxFunctionCount)
+    return false;
+  if (NumFunctions != Other.NumFunctions)
+    return false;
+  if (NumCounts != Other.NumCounts)
+    return false;
+  std::vector<ProfileSummaryEntry> DS1 = getDetailedSummary();
+  std::vector<ProfileSummaryEntry> DS2 = Other.getDetailedSummary();
+  auto CompareSummaryEntry = [](ProfileSummaryEntry &E1,
+                                ProfileSummaryEntry &E2) {
+    return E1.Cutoff == E2.Cutoff && E1.MinCount == E2.MinCount &&
+           E1.NumCounts == E2.NumCounts;
+  };
+  if (!std::equal(DS1.begin(), DS1.end(), DS2.begin(), CompareSummaryEntry))
+    return false;
+  return true;
+}
+
+bool InstrProfSummary::operator==(ProfileSummary &Other) {
+  InstrProfSummary *OtherIPS = dyn_cast<InstrProfSummary>(&Other);
+  if (!OtherIPS)
+    return false;
+  return MaxInternalBlockCount == OtherIPS->MaxInternalBlockCount &&
+         ProfileSummary::operator==(Other);
+}
+
 // Returns true if the function is a hot function.
 bool ProfileSummary::isFunctionHot(const Function *F) {
   // FIXME: update when summary data is stored in module's metadata.
@@ -366,3 +404,9 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) {
   else
     return nullptr;
 }
+
+ProfileSummary *ProfileSummary::computeProfileSummary(Module *M) {
+  if (Metadata *MD = M->getProfileSummary())
+    return getFromMD(MD);
+  return nullptr;
+}
diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll
index 1fd9f105db5..0543a5a9cd6 100644
--- a/test/Transforms/Inline/inline-cold-callee.ll
+++ b/test/Transforms/Inline/inline-cold-callee.ll
@@ -5,7 +5,7 @@
 ; A callee with identical body does gets inlined because cost fits within the
 ; inline-threshold
 
-define i32 @callee1(i32 %x) !prof !1 {
+define i32 @callee1(i32 %x) !prof !21 {
   %x1 = add i32 %x, 1
   %x2 = add i32 %x1, 1
   %x3 = add i32 %x2, 1
@@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
   ret i32 %x3
 }
 
-define i32 @callee2(i32 %x) !prof !2 {
+define i32 @callee2(i32 %x) !prof !22 {
 ; CHECK-LABEL: @callee2(
   %x1 = add i32 %x, 1
   %x2 = add i32 %x1, 1
@@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
   ret i32 %x3
 }
 
-define i32 @caller2(i32 %y1) !prof !2 {
+define i32 @caller2(i32 %y1) !prof !22 {
 ; CHECK-LABEL: @caller2(
 ; CHECK: call i32 @callee2
 ; CHECK-NOT: call i32 @callee1
@@ -32,8 +32,19 @@ define i32 @caller2(i32 %y1) !prof !2 {
   ret i32 %y3
 }
 
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
-!1 = !{!"function_entry_count", i64 100}
-!2 = !{!"function_entry_count", i64 1}
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 100}
+!22 = !{!"function_entry_count", i64 1}
 
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxBlockCount", i64 1000}
+!6 = !{!"MaxInternalBlockCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumBlocks", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12}
+!12 = !{i32 10000, i64 0, i32 0}
diff --git a/test/Transforms/Inline/inline-hot-callee.ll b/test/Transforms/Inline/inline-hot-callee.ll
index 93ea9d43c78..905625e286d 100644
--- a/test/Transforms/Inline/inline-hot-callee.ll
+++ b/test/Transforms/Inline/inline-hot-callee.ll
@@ -5,7 +5,7 @@
 ; A cold callee with identical body does not get inlined because cost exceeds the
 ; inline-threshold
 
-define i32 @callee1(i32 %x) !prof !1 {
+define i32 @callee1(i32 %x) !prof !20 {
   %x1 = add i32 %x, 1
   %x2 = add i32 %x1, 1
   %x3 = add i32 %x2, 1
@@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
   ret i32 %x3
 }
 
-define i32 @callee2(i32 %x) !prof !2 {
+define i32 @callee2(i32 %x) !prof !21 {
 ; CHECK-LABEL: @callee2(
   %x1 = add i32 %x, 1
   %x2 = add i32 %x1, 1
@@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
   ret i32 %x3
 }
 
-define i32 @caller2(i32 %y1) !prof !2 {
+define i32 @caller2(i32 %y1) !prof !21 {
 ; CHECK-LABEL: @caller2(
 ; CHECK: call i32 @callee2
 ; CHECK-NOT: call i32 @callee1
@@ -32,8 +32,19 @@ define i32 @caller2(i32 %y1) !prof !2 {
   ret i32 %y3
 }
 
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"MaxFunctionCount", i32 10}
-!1 = !{!"function_entry_count", i64 10}
-!2 = !{!"function_entry_count", i64 1}
+!llvm.module.flags = !{!1}
+!20 = !{!"function_entry_count", i64 10}
+!21 = !{!"function_entry_count", i64 1}
 
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxBlockCount", i64 10}
+!6 = !{!"MaxInternalBlockCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 10}
+!8 = !{!"NumBlocks", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12}
+!12 = !{i32 10000, i64 0, i32 0}
diff --git a/unittests/ProfileData/CMakeLists.txt b/unittests/ProfileData/CMakeLists.txt
index dd39ca7da3a..820c0185508 100644
--- a/unittests/ProfileData/CMakeLists.txt
+++ b/unittests/ProfileData/CMakeLists.txt
@@ -8,5 +8,6 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(ProfileDataTests
   CoverageMappingTest.cpp
   InstrProfTest.cpp
+  ProfileSummaryTest.cpp
   SampleProfTest.cpp
   )
diff --git a/unittests/ProfileData/ProfileSummaryTest.cpp b/unittests/ProfileData/ProfileSummaryTest.cpp
new file mode 100644
index 00000000000..46d7162f743
--- /dev/null
+++ b/unittests/ProfileData/ProfileSummaryTest.cpp
@@ -0,0 +1,66 @@
+//===- unittest/ProfileData/ProfileSummaryTest.cpp --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Module.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace sampleprof;
+
+struct ProfileSummaryTest : ::testing::Test {
+  InstrProfSummary IPS;
+  SampleProfileSummary SPS;
+
+  ProfileSummaryTest()
+      : IPS({100000, 900000, 999999}), SPS({100000, 900000, 999999}) {}
+  void SetUp() {
+    InstrProfRecord Record1("func1", 0x1234, {97531, 5, 99999});
+    InstrProfRecord Record2("func2", 0x1234, {57341, 10000, 10, 1});
+    IPS.addRecord(Record1);
+    IPS.addRecord(Record2);
+
+    IPS.computeDetailedSummary();
+
+    FunctionSamples FooSamples;
+    FooSamples.addTotalSamples(7711);
+    FooSamples.addHeadSamples(610);
+    FooSamples.addBodySamples(1, 0, 610);
+    FooSamples.addBodySamples(2, 0, 600);
+    FooSamples.addBodySamples(4, 0, 60000);
+    FooSamples.addBodySamples(8, 0, 60351);
+    FooSamples.addBodySamples(10, 0, 605);
+
+    FunctionSamples BarSamples;
+    BarSamples.addTotalSamples(20301);
+    BarSamples.addHeadSamples(1437);
+    BarSamples.addBodySamples(1, 0, 1437);
+
+    SPS.addRecord(FooSamples);
+    SPS.addRecord(BarSamples);
+
+    SPS.computeDetailedSummary();
+  }
+
+};
+
+TEST_F(ProfileSummaryTest, summary_from_module) {
+  LLVMContext Context;
+  Module M1("M1", Context);
+  EXPECT_FALSE(ProfileSummary::getProfileSummary(&M1));
+  M1.setProfileSummary(IPS.getMD(Context));
+  EXPECT_TRUE(IPS == *ProfileSummary::getProfileSummary(&M1));
+
+  Module M2("M2", Context);
+  EXPECT_FALSE(ProfileSummary::getProfileSummary(&M2));
+  M2.setProfileSummary(SPS.getMD(Context));
+  EXPECT_TRUE(SPS == *ProfileSummary::getProfileSummary(&M2));
+}