mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:40:38 +00:00
Verify profile data confirms large loop trip counts.
Summary: Loops with inequality comparers, such as: // unsigned bound for (unsigned i = 1; i < bound; ++i) {...} have getSmallConstantMaxTripCount report a large maximum static trip count - in this case, 0xffff fffe. However, profiling info may show that the trip count is much smaller, and thus counter-recommend vectorization. This change: - flips loop-vectorize-with-block-frequency on by default. - validates profiled loop frequency data supports vectorization, when static info appears to not counter-recommend it. Absence of profile data means we rely on static data, just as we've done so far. Reviewers: twoh, mkuper, davidxl, tejohnson, Ayal Reviewed By: davidxl Subscribers: bkramer, llvm-commits Differential Revision: https://reviews.llvm.org/D42946 llvm-svn: 324543
This commit is contained in:
parent
f39cd6d76b
commit
239ce88ecb
@ -208,7 +208,7 @@ static cl::opt<unsigned> SmallLoopCost(
|
||||
"The cost of a loop that is considered 'small' by the interleaver."));
|
||||
|
||||
static cl::opt<bool> LoopVectorizeWithBlockFrequency(
|
||||
"loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,
|
||||
"loop-vectorize-with-block-frequency", cl::init(true), cl::Hidden,
|
||||
cl::desc("Enable the use of the block frequency analysis to access PGO "
|
||||
"heuristics minimizing code growth in cold regions and being more "
|
||||
"aggressive in hot regions."));
|
||||
@ -8347,9 +8347,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||
|
||||
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
|
||||
// count by optimizing for size, to minimize overheads.
|
||||
unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);
|
||||
bool HasExpectedTC = (ExpectedTC > 0);
|
||||
|
||||
// Prefer constant trip counts over profile data, over upper bound estimate.
|
||||
unsigned ExpectedTC = 0;
|
||||
bool HasExpectedTC = false;
|
||||
if (const SCEVConstant *ConstExits =
|
||||
dyn_cast<SCEVConstant>(SE->getBackedgeTakenCount(L))) {
|
||||
const APInt &ExitsCount = ConstExits->getAPInt();
|
||||
// We are interested in small values for ExpectedTC. Skip over those that
|
||||
// can't fit an unsigned.
|
||||
if (ExitsCount.ult(std::numeric_limits<unsigned>::max())) {
|
||||
ExpectedTC = static_cast<unsigned>(ExitsCount.getZExtValue()) + 1;
|
||||
HasExpectedTC = true;
|
||||
}
|
||||
}
|
||||
// ExpectedTC may be large because it's bound by a variable. Check
|
||||
// profiling information to validate we should vectorize.
|
||||
if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {
|
||||
auto EstimatedTC = getLoopEstimatedTripCount(L);
|
||||
if (EstimatedTC) {
|
||||
@ -8357,6 +8369,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||
HasExpectedTC = true;
|
||||
}
|
||||
}
|
||||
if (!HasExpectedTC) {
|
||||
ExpectedTC = SE->getSmallConstantMaxTripCount(L);
|
||||
HasExpectedTC = (ExpectedTC > 0);
|
||||
}
|
||||
|
||||
if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
|
||||
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
|
||||
|
@ -84,6 +84,126 @@ for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
|
||||
; Simple loop with low tripcount and inequality test for exit.
|
||||
; Should not be vectorized.
|
||||
|
||||
; CHECK-LABEL: @foo_low_trip_count_icmp_sgt(
|
||||
; CHECK-NOT: <{{[0-9]+}} x i8>
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%cmp1 = icmp eq i8 %0, 0
|
||||
%. = select i1 %cmp1, i8 2, i8 1
|
||||
store i8 %., i8* %arrayidx, align 1
|
||||
%inc = add nsw i32 %i.08, 1
|
||||
%exitcond = icmp sgt i32 %i.08, %bound
|
||||
br i1 %exitcond, label %for.end, label %for.body, !prof !1
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @const_low_trip_count() {
|
||||
; Simple loop with constant, small trip count and no profiling info.
|
||||
|
||||
; CHECK-LABEL: @const_low_trip_count
|
||||
; CHECK-NOT: <{{[0-9]+}} x i8>
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%cmp1 = icmp eq i8 %0, 0
|
||||
%. = select i1 %cmp1, i8 2, i8 1
|
||||
store i8 %., i8* %arrayidx, align 1
|
||||
%inc = add nsw i32 %i.08, 1
|
||||
%exitcond = icmp slt i32 %i.08, 2
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @const_large_trip_count() {
|
||||
; Simple loop with constant large trip count and no profiling info.
|
||||
|
||||
; CHECK-LABEL: @const_large_trip_count
|
||||
; CHECK: <{{[0-9]+}} x i8>
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%cmp1 = icmp eq i8 %0, 0
|
||||
%. = select i1 %cmp1, i8 2, i8 1
|
||||
store i8 %., i8* %arrayidx, align 1
|
||||
%inc = add nsw i32 %i.08, 1
|
||||
%exitcond = icmp slt i32 %i.08, 1000
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @const_small_trip_count_step() {
|
||||
; Simple loop with static, small trip count and no profiling info.
|
||||
|
||||
; CHECK-LABEL: @const_small_trip_count_step
|
||||
; CHECK-NOT: <{{[0-9]+}} x i8>
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%cmp1 = icmp eq i8 %0, 0
|
||||
%. = select i1 %cmp1, i8 2, i8 1
|
||||
store i8 %., i8* %arrayidx, align 1
|
||||
%inc = add nsw i32 %i.08, 5
|
||||
%exitcond = icmp slt i32 %i.08, 10
|
||||
br i1 %exitcond, label %for.body, label %for.end
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @const_trip_over_profile() {
|
||||
; constant trip count takes precedence over profile data
|
||||
|
||||
; CHECK-LABEL: @const_trip_over_profile
|
||||
; CHECK: <{{[0-9]+}} x i8>
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%cmp1 = icmp eq i8 %0, 0
|
||||
%. = select i1 %cmp1, i8 2, i8 1
|
||||
store i8 %., i8* %arrayidx, align 1
|
||||
%inc = add nsw i32 %i.08, 1
|
||||
%exitcond = icmp slt i32 %i.08, 1000
|
||||
br i1 %exitcond, label %for.body, label %for.end, !prof !1
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
!0 = !{!"function_entry_count", i64 100}
|
||||
!1 = !{!"branch_weights", i32 100, i32 0}
|
||||
|
Loading…
Reference in New Issue
Block a user