mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-03 17:02:03 +00:00
[PGO] Improve the working set size heuristics under the partial sample PGO.
Summary: The working set size heuristics (ProfileSummaryInfo::hasHugeWorkingSetSize) under the partial sample PGO may not be accurate because the profile is partial and the number of hot profile counters in the ProfileSummary may not reflect the actual working set size of the program being compiled. To improve this, the (approximated) ratio of the the number of profile counters of the program being compiled to the number of profile counters in the partial sample profile is computed (which is called the partial profile ratio) and the working set size of the profile is scaled by this ratio to reflect the working set size of the program being compiled and used for the working set size heuristics. The partial profile ratio is approximated based on the number of the basic blocks in the program and the NumCounts field in the ProfileSummary and computed through the thin LTO indexing. This means that there is the limitation that the scaled working set size is available to the thin LTO post link passes only. Reviewers: davidxl Subscribers: mgorny, eraman, hiraditya, steven_wu, dexonsmith, arphaman, dang, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D79831
This commit is contained in:
parent
3fc69c930e
commit
c65f25e192
@ -46,6 +46,7 @@ class FunctionType;
|
||||
class GVMaterializer;
|
||||
class LLVMContext;
|
||||
class MemoryBuffer;
|
||||
class ModuleSummaryIndex;
|
||||
class Pass;
|
||||
class RandomNumberGenerator;
|
||||
template <class PtrType> class SmallPtrSetImpl;
|
||||
@ -882,6 +883,10 @@ public:
|
||||
|
||||
/// Take ownership of the given memory buffer.
|
||||
void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);
|
||||
|
||||
/// Set the partial sample profile ratio in the profile summary module flag,
|
||||
/// if applicable.
|
||||
void setPartialSampleProfileRatio(const ModuleSummaryIndex &Index);
|
||||
};
|
||||
|
||||
/// Given "llvm.used" or "llvm.compiler.used" as a global name, collect
|
||||
|
@ -59,7 +59,8 @@ private:
|
||||
bool Partial = false;
|
||||
/// This approximately represents the ratio of the number of profile counters
|
||||
/// of the program being built to the number of profile counters in the
|
||||
/// partial sample profile. When 'Partial' is false, it is undefined.
|
||||
/// partial sample profile. When 'Partial' is false, it is undefined. This is
|
||||
/// currently only available under thin LTO mode.
|
||||
double PartialProfileRatio = 0;
|
||||
/// Return detailed summary as metadata.
|
||||
Metadata *getDetailedSummaryMD(LLVMContext &Context);
|
||||
|
@ -70,6 +70,23 @@ static cl::opt<bool> PartialProfile(
|
||||
"partial-profile", cl::Hidden, cl::init(false),
|
||||
cl::desc("Specify the current profile is used as a partial profile."));
|
||||
|
||||
cl::opt<bool> ScalePartialSampleProfileWorkingSetSize(
|
||||
"scale-partial-sample-profile-working-set-size", cl::Hidden,
|
||||
cl::init(false),
|
||||
cl::desc(
|
||||
"If true, scale the working set size of the partial sample profile "
|
||||
"by the partial profile ratio to reflect the size of the program "
|
||||
"being compiled."));
|
||||
|
||||
static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor(
|
||||
"partial-sample-profile-working-set-size-scale-factor", cl::Hidden,
|
||||
cl::init(0.008),
|
||||
cl::desc("The scale factor used to scale the working set size of the "
|
||||
"partial sample profile along with the partial profile ratio. "
|
||||
"This includes the factor of the profile counter per block "
|
||||
"and the factor to scale the working set size to use the same "
|
||||
"shared thresholds as PGO."));
|
||||
|
||||
// Find the summary entry for a desired percentile of counts.
|
||||
static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
|
||||
uint64_t Percentile) {
|
||||
@ -280,10 +297,23 @@ void ProfileSummaryInfo::computeThresholds() {
|
||||
ColdCountThreshold = ProfileSummaryColdCount;
|
||||
assert(ColdCountThreshold <= HotCountThreshold &&
|
||||
"Cold count threshold cannot exceed hot count threshold!");
|
||||
HasHugeWorkingSetSize =
|
||||
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
|
||||
HasLargeWorkingSetSize =
|
||||
HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
|
||||
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
|
||||
HasHugeWorkingSetSize =
|
||||
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
|
||||
HasLargeWorkingSetSize =
|
||||
HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
|
||||
} else {
|
||||
// Scale the working set size of the partial sample profile to reflect the
|
||||
// size of the program being compiled.
|
||||
double PartialProfileRatio = Summary->getPartialProfileRatio();
|
||||
uint64_t ScaledHotEntryNumCounts =
|
||||
static_cast<uint64_t>(HotEntry.NumCounts * PartialProfileRatio *
|
||||
PartialSampleProfileWorkingSetSizeScaleFactor);
|
||||
HasHugeWorkingSetSize =
|
||||
ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
|
||||
HasLargeWorkingSetSize =
|
||||
ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
|
||||
}
|
||||
}
|
||||
|
||||
Optional<uint64_t>
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/ModuleSummaryIndex.h"
|
||||
#include "llvm/IR/SymbolTableListTraits.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/TypeFinder.h"
|
||||
@ -673,3 +674,23 @@ GlobalVariable *llvm::collectUsedGlobalVariables(
|
||||
}
|
||||
return GV;
|
||||
}
|
||||
|
||||
void Module::setPartialSampleProfileRatio(const ModuleSummaryIndex &Index) {
|
||||
if (auto *SummaryMD = getProfileSummary(/*IsCS*/ false)) {
|
||||
std::unique_ptr<ProfileSummary> ProfileSummary(
|
||||
ProfileSummary::getFromMD(SummaryMD));
|
||||
if (ProfileSummary) {
|
||||
if (ProfileSummary->getKind() != ProfileSummary::PSK_Sample ||
|
||||
!ProfileSummary->isPartialProfile())
|
||||
return;
|
||||
uint64_t BlockCount = Index.getBlockCount();
|
||||
uint32_t NumCounts = ProfileSummary->getNumCounts();
|
||||
if (!NumCounts)
|
||||
return;
|
||||
double Ratio = (double)BlockCount / NumCounts;
|
||||
ProfileSummary->setPartialProfileRatio(Ratio);
|
||||
setProfileSummary(ProfileSummary->getMD(getContext()),
|
||||
ProfileSummary::PSK_Sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -541,6 +541,10 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
|
||||
return DiagFileOrErr.takeError();
|
||||
auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
|
||||
|
||||
// Set the partial sample profile ratio in the profile summary module flag of
|
||||
// the module, if applicable.
|
||||
Mod.setPartialSampleProfileRatio(CombinedIndex);
|
||||
|
||||
if (Conf.CodeGenOnly) {
|
||||
codegen(Conf, TM.get(), AddStream, Task, Mod);
|
||||
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
|
||||
|
@ -1232,6 +1232,12 @@ Expected<bool> FunctionImporter::importFunctions(
|
||||
// have loaded all the required metadata!
|
||||
UpgradeDebugInfo(*SrcModule);
|
||||
|
||||
// Set the partial sample profile ratio in the profile summary module flag
|
||||
// of the imported source module, if applicable, so that the profile summary
|
||||
// module flag will match with that of the destination module when it's
|
||||
// imported.
|
||||
SrcModule->setPartialSampleProfileRatio(Index);
|
||||
|
||||
// Link in the specified functions.
|
||||
if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
|
||||
&GlobalsToImport))
|
||||
|
@ -23,6 +23,8 @@
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
extern llvm::cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
|
||||
|
||||
namespace llvm {
|
||||
namespace {
|
||||
|
||||
@ -42,7 +44,12 @@ protected:
|
||||
BPI.reset(new BranchProbabilityInfo(F, *LI));
|
||||
return BlockFrequencyInfo(F, *BPI, *LI);
|
||||
}
|
||||
std::unique_ptr<Module> makeLLVMModule(const char *ProfKind = nullptr) {
|
||||
std::unique_ptr<Module> makeLLVMModule(const char *ProfKind = nullptr,
|
||||
uint64_t NumCounts = 3,
|
||||
uint64_t IsPartialProfile = 0,
|
||||
double PartialProfileRatio = 0.0,
|
||||
uint64_t HotNumCounts = 3,
|
||||
uint64_t ColdNumCounts = 10) {
|
||||
const char *ModuleString =
|
||||
"define i32 @g(i32 %x) !prof !21 {{\n"
|
||||
" ret i32 0\n"
|
||||
@ -83,27 +90,32 @@ protected:
|
||||
"!22 = !{{!\"function_entry_count\", i64 100}\n"
|
||||
"!23 = !{{!\"branch_weights\", i32 64, i32 4}\n"
|
||||
"{0}";
|
||||
const char *SummaryString = "!llvm.module.flags = !{{!1}"
|
||||
"!1 = !{{i32 1, !\"ProfileSummary\", !2}"
|
||||
"!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10}"
|
||||
"!3 = !{{!\"ProfileFormat\", !\"{0}\"}"
|
||||
"!4 = !{{!\"TotalCount\", i64 10000}"
|
||||
"!5 = !{{!\"MaxCount\", i64 10}"
|
||||
"!6 = !{{!\"MaxInternalCount\", i64 1}"
|
||||
"!7 = !{{!\"MaxFunctionCount\", i64 1000}"
|
||||
"!8 = !{{!\"NumCounts\", i64 3}"
|
||||
"!9 = !{{!\"NumFunctions\", i64 3}"
|
||||
"!10 = !{{!\"DetailedSummary\", !11}"
|
||||
"!11 = !{{!12, !13, !14}"
|
||||
"!12 = !{{i32 10000, i64 1000, i32 1}"
|
||||
"!13 = !{{i32 999000, i64 300, i32 3}"
|
||||
"!14 = !{{i32 999999, i64 5, i32 10}";
|
||||
const char *SummaryString =
|
||||
"!llvm.module.flags = !{{!1}\n"
|
||||
"!1 = !{{i32 1, !\"ProfileSummary\", !2}\n"
|
||||
"!2 = !{{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}\n"
|
||||
"!3 = !{{!\"ProfileFormat\", !\"{0}\"}\n"
|
||||
"!4 = !{{!\"TotalCount\", i64 10000}\n"
|
||||
"!5 = !{{!\"MaxCount\", i64 10}\n"
|
||||
"!6 = !{{!\"MaxInternalCount\", i64 1}\n"
|
||||
"!7 = !{{!\"MaxFunctionCount\", i64 1000}\n"
|
||||
"!8 = !{{!\"NumCounts\", i64 {1}}\n"
|
||||
"!9 = !{{!\"NumFunctions\", i64 3}\n"
|
||||
"!10 = !{{!\"IsPartialProfile\", i64 {2}}\n"
|
||||
"!11 = !{{!\"PartialProfileRatio\", double {3}}\n"
|
||||
"!12 = !{{!\"DetailedSummary\", !13}\n"
|
||||
"!13 = !{{!14, !15, !16}\n"
|
||||
"!14 = !{{i32 10000, i64 1000, i32 1}\n"
|
||||
"!15 = !{{i32 990000, i64 300, i32 {4}}\n"
|
||||
"!16 = !{{i32 999999, i64 5, i32 {5}}\n";
|
||||
SMDiagnostic Err;
|
||||
if (ProfKind)
|
||||
return parseAssemblyString(
|
||||
formatv(ModuleString, formatv(SummaryString, ProfKind).str()).str(),
|
||||
Err, C);
|
||||
else
|
||||
if (ProfKind) {
|
||||
auto Summary =
|
||||
formatv(SummaryString, ProfKind, NumCounts, IsPartialProfile,
|
||||
PartialProfileRatio, HotNumCounts, ColdNumCounts)
|
||||
.str();
|
||||
return parseAssemblyString(formatv(ModuleString, Summary).str(), Err, C);
|
||||
} else
|
||||
return parseAssemblyString(formatv(ModuleString, "").str(), Err, C);
|
||||
}
|
||||
};
|
||||
@ -280,6 +292,7 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) {
|
||||
ProfileSummaryInfo PSI = buildPSI(M.get());
|
||||
EXPECT_TRUE(PSI.hasProfileSummary());
|
||||
EXPECT_TRUE(PSI.hasSampleProfile());
|
||||
EXPECT_FALSE(PSI.hasPartialSampleProfile());
|
||||
|
||||
BasicBlock &BB0 = F->getEntryBlock();
|
||||
BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0);
|
||||
@ -373,5 +386,47 @@ TEST_F(ProfileSummaryInfoTest, SampleProfNoFuncEntryCount) {
|
||||
EXPECT_FALSE(PSI.isFunctionColdInCallGraphNthPercentile(990000, F, BFI));
|
||||
}
|
||||
|
||||
TEST_F(ProfileSummaryInfoTest, PartialSampleProfWorkingSetSize) {
|
||||
ScalePartialSampleProfileWorkingSetSize.setValue(true);
|
||||
|
||||
// With PartialProfileRatio unset (zero.)
|
||||
auto M1 = makeLLVMModule("SampleProfile", /*NumCounts*/ 3,
|
||||
/*IsPartialProfile*/ 1,
|
||||
/*PartialProfileRatio*/ 0.0,
|
||||
/*HotNumCounts*/ 3, /*ColdNumCounts*/ 10);
|
||||
ProfileSummaryInfo PSI1 = buildPSI(M1.get());
|
||||
EXPECT_TRUE(PSI1.hasProfileSummary());
|
||||
EXPECT_TRUE(PSI1.hasSampleProfile());
|
||||
EXPECT_TRUE(PSI1.hasPartialSampleProfile());
|
||||
EXPECT_FALSE(PSI1.hasHugeWorkingSetSize());
|
||||
EXPECT_FALSE(PSI1.hasLargeWorkingSetSize());
|
||||
|
||||
// With PartialProfileRatio set (non-zero) and a small working set size.
|
||||
auto M2 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235,
|
||||
/*IsPartialProfile*/ 1,
|
||||
/*PartialProfileRatio*/ 0.00000012,
|
||||
/*HotNumCounts*/ 3102082,
|
||||
/*ColdNumCounts*/ 18306149);
|
||||
ProfileSummaryInfo PSI2 = buildPSI(M2.get());
|
||||
EXPECT_TRUE(PSI2.hasProfileSummary());
|
||||
EXPECT_TRUE(PSI2.hasSampleProfile());
|
||||
EXPECT_TRUE(PSI2.hasPartialSampleProfile());
|
||||
EXPECT_FALSE(PSI2.hasHugeWorkingSetSize());
|
||||
EXPECT_FALSE(PSI2.hasLargeWorkingSetSize());
|
||||
|
||||
// With PartialProfileRatio is set (non-zero) and a large working set size.
|
||||
auto M3 = makeLLVMModule("SampleProfile", /*NumCounts*/ 27493235,
|
||||
/*IsPartialProfile*/ 1,
|
||||
/*PartialProfileRatio*/ 0.9,
|
||||
/*HotNumCounts*/ 3102082,
|
||||
/*ColdNumCounts*/ 18306149);
|
||||
ProfileSummaryInfo PSI3 = buildPSI(M3.get());
|
||||
EXPECT_TRUE(PSI3.hasProfileSummary());
|
||||
EXPECT_TRUE(PSI3.hasSampleProfile());
|
||||
EXPECT_TRUE(PSI3.hasPartialSampleProfile());
|
||||
EXPECT_TRUE(PSI3.hasHugeWorkingSetSize());
|
||||
EXPECT_TRUE(PSI3.hasLargeWorkingSetSize());
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
} // end namespace llvm
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/AsmParser/Parser.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/ModuleSummaryIndex.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/RandomNumberGenerator.h"
|
||||
#include "gtest/gtest.h"
|
||||
@ -121,4 +122,40 @@ TEST(ModuleTest, setProfileSummary) {
|
||||
delete PS;
|
||||
}
|
||||
|
||||
TEST(ModuleTest, setPartialSampleProfileRatio) {
|
||||
const char *IRString = R"IR(
|
||||
!llvm.module.flags = !{!0}
|
||||
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11}
|
||||
!2 = !{!"ProfileFormat", !"SampleProfile"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 200}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"IsPartialProfile", i64 1}
|
||||
!10 = !{!"PartialProfileRatio", double 0.0}
|
||||
!11 = !{!"DetailedSummary", !12}
|
||||
!12 = !{!13, !14, !15}
|
||||
!13 = !{i32 10000, i64 1000, i32 1}
|
||||
!14 = !{i32 990000, i64 300, i32 10}
|
||||
!15 = !{i32 999999, i64 5, i32 100}
|
||||
)IR";
|
||||
|
||||
SMDiagnostic Err;
|
||||
LLVMContext Context;
|
||||
std::unique_ptr<Module> M = parseAssemblyString(IRString, Err, Context);
|
||||
ModuleSummaryIndex Index(/*HaveGVs*/ false);
|
||||
const unsigned BlockCount = 100;
|
||||
const unsigned NumCounts = 200;
|
||||
Index.setBlockCount(BlockCount);
|
||||
M->setPartialSampleProfileRatio(Index);
|
||||
double Ratio = (double)BlockCount / NumCounts;
|
||||
std::unique_ptr<ProfileSummary> ProfileSummary(
|
||||
ProfileSummary::getFromMD(M->getProfileSummary(/*IsCS*/ false)));
|
||||
EXPECT_EQ(Ratio, ProfileSummary->getPartialProfileRatio());
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
Loading…
Reference in New Issue
Block a user