Generalize LSR's OptimizeMax to handle the new kinds of max expressions

that indvars may use, now that indvars is recognizing le and ge loops.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102235 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2010-04-24 03:13:44 +00:00
parent 9f93d30a26
commit 1d367988e2
2 changed files with 74 additions and 9 deletions

View File

@ -1461,12 +1461,26 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Add one to the backedge-taken count to get the trip count.
const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
// Check for a max calculation that matches the pattern.
if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
// Check for a max calculation that matches the pattern. There's no check
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
const SCEVNAryExpr *Max = 0;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_SLT;
Max = S;
} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_ULT;
Max = U;
} else {
// No match; bail.
return Cond;
const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
if (Max != SE.getSCEV(Sel)) return Cond;
}
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
@ -1475,7 +1489,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
if (!MaxLHS || MaxLHS != One) return Cond;
// ScalarEvolution canonicalizes constants to the left. For < and >, look
// for a comparison with 1. For <= and >=, a comparison with zero.
if (!MaxLHS ||
(ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
@ -1491,16 +1511,29 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
if (isa<ConstantInt>(BO->getOperand(1)) &&
cast<ConstantInt>(BO->getOperand(1))->isOne() &&
SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
if (isa<ConstantInt>(BO->getOperand(1)) &&
cast<ConstantInt>(BO->getOperand(1))->isOne() &&
SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
else
llvm_unreachable("Max doesn't match expected pattern!");
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
CmpInst::Predicate Pred =
isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);

View File

@ -0,0 +1,32 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s
; LSR's OptimizeMax should eliminate the select (max).
; CHECK: foo:
; CHECK-NOT: cmov
; CHECK: jle
define void @foo(i64 %n, double* nocapture %p) nounwind {
entry:
%cmp6 = icmp slt i64 %n, 0 ; <i1> [#uses=1]
br i1 %cmp6, label %for.end, label %for.body.preheader
for.body.preheader: ; preds = %entry
%tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
%n.op = add i64 %n, 1 ; <i64> [#uses=1]
%tmp1 = select i1 %tmp, i64 %n.op, i64 1 ; <i64> [#uses=1]
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
%arrayidx = getelementptr double* %p, i64 %i ; <double*> [#uses=2]
%t4 = load double* %arrayidx ; <double> [#uses=1]
%mul = fmul double %t4, 2.200000e+00 ; <double> [#uses=1]
store double %mul, double* %arrayidx
%i.next = add nsw i64 %i, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %i.next, %tmp1 ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}