BlockFrequency: Saturate at 1 instead of 0 when multiplying a frequency with a branch probability.

Zero is used by BlockFrequencyInfo as a special "don't know" value. It also
causes a sink for frequencies as you can't ever get off a zero frequency with
more multiplies.

This recovers a 10% regression on MultiSource/Benchmarks/7zip. A zero frequency
was propagated into an inner loop causing excessive spilling.

PR16402.

llvm-svn: 184584
This commit is contained in:
Benjamin Kramer 2013-06-21 19:30:05 +00:00
parent 8f3eacc90e
commit 3315e168ee
4 changed files with 95 additions and 3 deletions

View File

@ -30,12 +30,20 @@ class BlockFrequency {
public:
BlockFrequency(uint64_t Freq = 0) : Frequency(Freq) { }
/// \brief Returns the frequency of the entry block of the function.
static uint64_t getEntryFrequency() { return ENTRY_FREQ; }
/// \brief Returns the frequency as a fixpoint number scaled by the entry
/// frequency.
uint64_t getFrequency() const { return Frequency; }
/// \brief Multiplies with a branch probability. The computation will never
/// overflow. If the result is equal to zero but the input wasn't this method
/// will return a frequency of one.
BlockFrequency &operator*=(const BranchProbability &Prob);
const BlockFrequency operator*(const BranchProbability &Prob) const;
/// \brief Adds another block frequency using saturating arithmetic.
BlockFrequency &operator+=(const BlockFrequency &Freq);
const BlockFrequency operator+(const BlockFrequency &Freq) const;

View File

@ -65,6 +65,9 @@ uint64_t div96bit(uint64_t W[2], uint32_t D) {
BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
if (Frequency == 0)
return *this;
uint32_t n = Prob.getNumerator();
uint32_t d = Prob.getDenominator();
@ -84,10 +87,15 @@ BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
// 64-bit.
mult96bit(Frequency, n, W);
Frequency = div96bit(W, d);
return *this;
} else {
// Fast case.
Frequency = mulRes / d;
}
Frequency = mulRes / d;
// Limit the result to 1; 0 is a sentinel value. This keeps BlockFrequencyInfo
// from getting stuck at zero frequencies just because a value became too
// small to be represented as a BlockFrequency.
Frequency = (n == 0 || Frequency != 0) ? Frequency : 1;
return *this;
}

View File

@ -0,0 +1,65 @@
; RUN: opt < %s -analyze -block-freq | FileCheck %s
; PR16402
define void @test1(i32 %n) nounwind {
entry:
%call = tail call i32* @cond() nounwind
%tobool = icmp eq i32* %call, null
br i1 %tobool, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %entry
%call1 = tail call i32* @cond() nounwind
%tobool2 = icmp eq i32* %call1, null
br i1 %tobool2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %land.lhs.true
%call4 = tail call i32* @cond() nounwind
%tobool5 = icmp eq i32* %call4, null
br i1 %tobool5, label %land.lhs.true6, label %if.end
land.lhs.true6: ; preds = %land.lhs.true3
%call7 = tail call i32* @cond() nounwind
%tobool8 = icmp eq i32* %call7, null
br i1 %tobool8, label %land.lhs.true9, label %if.end
land.lhs.true9: ; preds = %land.lhs.true6
%call10 = tail call i32* @cond() nounwind
%tobool11 = icmp eq i32* %call10, null
br i1 %tobool11, label %land.lhs.true12, label %if.end
land.lhs.true12: ; preds = %land.lhs.true9
%call13 = tail call i32* @cond() nounwind
%tobool14 = icmp eq i32* %call13, null
br i1 %tobool14, label %land.lhs.true15, label %if.end
land.lhs.true15: ; preds = %land.lhs.true12
%call16 = tail call i32* @cond() nounwind
%tobool17 = icmp eq i32* %call16, null
br i1 %tobool17, label %for.cond.preheader, label %if.end
for.cond.preheader: ; preds = %land.lhs.true15
%cmp21 = icmp eq i32 %n, 0
br i1 %cmp21, label %for.end, label %for.body
for.body: ; preds = %for.cond.preheader, %for.body
%i.022 = phi i32 [ %inc, %for.body ], [ 0, %for.cond.preheader ]
%call18 = tail call i32 @call() nounwind
%inc = add nsw i32 %i.022, 1
%cmp = icmp eq i32 %inc, %n
br i1 %cmp, label %for.end, label %for.body
for.end: ; preds = %for.body, %for.cond.preheader
%call19 = tail call i32* @cond() nounwind
br label %if.end
if.end: ; preds = %land.lhs.true15, %land.lhs.true12, %land.lhs.true9, %land.lhs.true6, %land.lhs.true3, %land.lhs.true, %entry, %for.end
ret void
; CHECK: entry = 1024
; CHECK-NOT: for.body = 0
; CHECK-NOT: for.end = 0
}
declare i32* @cond() nounwind
declare i32 @call() nounwind

View File

@ -8,11 +8,22 @@ using namespace llvm;
namespace {
TEST(BlockFrequencyTest, ZeroToZero) {
BlockFrequency Freq(0);
BranchProbability Prob(UINT32_MAX - 1, UINT32_MAX);
Freq *= Prob;
EXPECT_EQ(Freq.getFrequency(), 0u);
Freq = 1;
Freq *= BranchProbability::getZero();
EXPECT_EQ(Freq.getFrequency(), 0u);
}
TEST(BlockFrequencyTest, OneToZero) {
BlockFrequency Freq(1);
BranchProbability Prob(UINT32_MAX - 1, UINT32_MAX);
Freq *= Prob;
EXPECT_EQ(Freq.getFrequency(), 0u);
EXPECT_EQ(Freq.getFrequency(), 1u);
}
TEST(BlockFrequencyTest, OneToOne) {