mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 21:50:40 +00:00
Enable exitValue rewrite only when the cost of expansion is low.
The patch evaluates the expansion cost of exitValue in indVarSimplify pass, and only does the rewriting when the expansion cost is low or loop can be deleted with the rewriting. It provides an option "-replexitval=" to control the default aggressiveness of the exitvalue rewriting. It also fixes some missing cases in SCEVExpander::isHighCostExpansionHelper to enhance the evaluation of SCEV expansion cost. Differential Revision: http://reviews.llvm.org/D9800 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238507 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9886da621d
commit
61897e8564
@ -1807,6 +1807,23 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
|
||||
|
||||
bool SCEVExpander::isHighCostExpansionHelper(
|
||||
const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
|
||||
|
||||
// Zero/One operand expressions
|
||||
switch (S->getSCEVType()) {
|
||||
case scUnknown:
|
||||
case scConstant:
|
||||
return false;
|
||||
case scTruncate:
|
||||
return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L,
|
||||
Processed);
|
||||
case scZeroExtend:
|
||||
return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
|
||||
L, Processed);
|
||||
case scSignExtend:
|
||||
return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
|
||||
L, Processed);
|
||||
}
|
||||
|
||||
if (!Processed.insert(S).second)
|
||||
return false;
|
||||
|
||||
@ -1849,23 +1866,22 @@ bool SCEVExpander::isHighCostExpansionHelper(
|
||||
}
|
||||
}
|
||||
|
||||
// Recurse past add expressions, which commonly occur in the
|
||||
// BackedgeTakenCount. They may already exist in program code, and if not,
|
||||
// they are not too expensive rematerialize.
|
||||
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
|
||||
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
|
||||
I != E; ++I) {
|
||||
if (isHighCostExpansionHelper(*I, L, Processed))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
|
||||
// the exit condition.
|
||||
if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
|
||||
return true;
|
||||
|
||||
// Recurse past nary expressions, which commonly occur in the
|
||||
// BackedgeTakenCount. They may already exist in program code, and if not,
|
||||
// they are not too expensive rematerialize.
|
||||
if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
|
||||
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
|
||||
I != E; ++I) {
|
||||
if (isHighCostExpansionHelper(*I, L, Processed))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If we haven't recognized an expensive SCEV pattern, assume it's an
|
||||
// expression produced by program code.
|
||||
return false;
|
||||
|
@ -68,6 +68,22 @@ static cl::opt<bool> VerifyIndvars(
|
||||
static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
|
||||
cl::desc("Reduce live induction variables."));
|
||||
|
||||
enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
|
||||
|
||||
static cl::opt<ReplaceExitVal> ReplaceExitValue(
|
||||
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
|
||||
cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
|
||||
cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
|
||||
clEnumValN(OnlyCheapRepl, "cheap",
|
||||
"only replace exit value when the cost is cheap"),
|
||||
clEnumValN(AlwaysRepl, "always",
|
||||
"always replace exit value whenever possible"),
|
||||
clEnumValEnd));
|
||||
|
||||
namespace {
|
||||
struct RewritePhi;
|
||||
}
|
||||
|
||||
namespace {
|
||||
class IndVarSimplify : public LoopPass {
|
||||
LoopInfo *LI;
|
||||
@ -112,6 +128,7 @@ namespace {
|
||||
|
||||
void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
|
||||
|
||||
bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
|
||||
void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
|
||||
|
||||
Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
|
||||
@ -464,6 +481,21 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
|
||||
SE->forgetLoop(L);
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Collect information about PHI nodes which can be transformed in
|
||||
// RewriteLoopExitValues.
|
||||
struct RewritePhi {
|
||||
PHINode *PN;
|
||||
unsigned Ith; // Ith incoming value.
|
||||
Value *Val; // Exit value after expansion.
|
||||
bool HighCost; // High Cost when expansion.
|
||||
bool SafePhi; // LCSSASafePhiForRAUW.
|
||||
|
||||
RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
|
||||
: PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
|
||||
};
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// RewriteLoopExitValues - Optimize IV users outside the loop.
|
||||
// As a side effect, reduces the amount of IV processing within the loop.
|
||||
@ -486,6 +518,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
|
||||
SmallVector<BasicBlock*, 8> ExitBlocks;
|
||||
L->getUniqueExitBlocks(ExitBlocks);
|
||||
|
||||
SmallVector<RewritePhi, 8> RewritePhiSet;
|
||||
// Find all values that are computed inside the loop, but used outside of it.
|
||||
// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
|
||||
// the exit blocks of the loop to find them.
|
||||
@ -604,31 +637,111 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
|
||||
DeadInsts.push_back(ExitVal);
|
||||
continue;
|
||||
}
|
||||
Changed = true;
|
||||
++NumReplaced;
|
||||
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L);
|
||||
|
||||
PN->setIncomingValue(i, ExitVal);
|
||||
|
||||
// If this instruction is dead now, delete it. Don't do it now to avoid
|
||||
// invalidating iterators.
|
||||
if (isInstructionTriviallyDead(Inst, TLI))
|
||||
DeadInsts.push_back(Inst);
|
||||
|
||||
// If we determined that this PHI is safe to replace even if an LCSSA
|
||||
// PHI, do so.
|
||||
if (LCSSASafePhiForRAUW) {
|
||||
PN->replaceAllUsesWith(ExitVal);
|
||||
PN->eraseFromParent();
|
||||
}
|
||||
// Collect all the candidate PHINodes to be rewritten.
|
||||
RewritePhiSet.push_back(
|
||||
RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
|
||||
|
||||
// Transformation.
|
||||
for (const RewritePhi &Phi : RewritePhiSet) {
|
||||
PHINode *PN = Phi.PN;
|
||||
Value *ExitVal = Phi.Val;
|
||||
|
||||
// Only do the rewrite when the ExitValue can be expanded cheaply.
|
||||
// If LoopCanBeDel is true, rewrite exit value aggressively.
|
||||
if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
|
||||
DeadInsts.push_back(ExitVal);
|
||||
continue;
|
||||
}
|
||||
|
||||
Changed = true;
|
||||
++NumReplaced;
|
||||
Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
|
||||
PN->setIncomingValue(Phi.Ith, ExitVal);
|
||||
|
||||
// If this instruction is dead now, delete it. Don't do it now to avoid
|
||||
// invalidating iterators.
|
||||
if (isInstructionTriviallyDead(Inst, TLI))
|
||||
DeadInsts.push_back(Inst);
|
||||
|
||||
// If we determined that this PHI is safe to replace even if an LCSSA
|
||||
// PHI, do so.
|
||||
if (Phi.SafePhi) {
|
||||
PN->replaceAllUsesWith(ExitVal);
|
||||
PN->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
// The insertion point instruction may have been deleted; clear it out
|
||||
// so that the rewriter doesn't trip over it later.
|
||||
Rewriter.clearInsertPoint();
|
||||
}
|
||||
|
||||
/// CanLoopBeDeleted - Check whether it is possible to delete the loop after
|
||||
/// rewriting exit value. If it is possible, ignore ReplaceExitValue and
|
||||
/// do rewriting aggressively.
|
||||
bool IndVarSimplify::CanLoopBeDeleted(
|
||||
Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
|
||||
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
// If there is no preheader, the loop will not be deleted.
|
||||
if (!Preheader)
|
||||
return false;
|
||||
|
||||
// In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
|
||||
// We obviate multiple ExitingBlocks case for simplicity.
|
||||
// TODO: If we see testcase with multiple ExitingBlocks can be deleted
|
||||
// after exit value rewriting, we can enhance the logic here.
|
||||
SmallVector<BasicBlock *, 4> ExitingBlocks;
|
||||
L->getExitingBlocks(ExitingBlocks);
|
||||
SmallVector<BasicBlock *, 8> ExitBlocks;
|
||||
L->getUniqueExitBlocks(ExitBlocks);
|
||||
if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
|
||||
return false;
|
||||
|
||||
BasicBlock *ExitBlock = ExitBlocks[0];
|
||||
BasicBlock::iterator BI = ExitBlock->begin();
|
||||
while (PHINode *P = dyn_cast<PHINode>(BI)) {
|
||||
Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
|
||||
|
||||
// If the Incoming value of P is found in RewritePhiSet, we know it
|
||||
// could be rewritten to use a loop invariant value in transformation
|
||||
// phase later. Skip it in the loop invariant check below.
|
||||
bool found = false;
|
||||
for (const RewritePhi &Phi : RewritePhiSet) {
|
||||
unsigned i = Phi.Ith;
|
||||
if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Instruction *I;
|
||||
if (!found && (I = dyn_cast<Instruction>(Incoming)))
|
||||
if (!L->hasLoopInvariantOperands(I))
|
||||
return false;
|
||||
|
||||
++BI;
|
||||
}
|
||||
|
||||
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
|
||||
LI != LE; ++LI) {
|
||||
for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
|
||||
++BI) {
|
||||
if (BI->mayHaveSideEffects())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// IV Widening - Extend the width of an IV to cover its widest uses.
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1867,7 +1980,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
// loop into any instructions outside of the loop that use the final values of
|
||||
// the current expressions.
|
||||
//
|
||||
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
|
||||
if (ReplaceExitValue != NeverRepl &&
|
||||
!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
|
||||
RewriteLoopExitValues(L, Rewriter);
|
||||
|
||||
// Eliminate redundant IV cycles.
|
||||
|
52
test/Transforms/IndVarSimplify/exit_value_test2.ll
Normal file
52
test/Transforms/IndVarSimplify/exit_value_test2.ll
Normal file
@ -0,0 +1,52 @@
|
||||
; PR23538
|
||||
; RUN: opt < %s -indvars -loop-deletion -S | FileCheck %s
|
||||
|
||||
; Check IndVarSimplify should not replace exit value because or else
|
||||
; udiv will be introduced by expand and the cost will be high.
|
||||
;
|
||||
; CHECK-LABEL: @_Z3fooPKcjj(
|
||||
; CHECK-NOT: udiv
|
||||
|
||||
declare void @_Z3mixRjj(i32* dereferenceable(4), i32)
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture)
|
||||
declare void @llvm.lifetime.end(i64, i8* nocapture)
|
||||
|
||||
define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%tmp = bitcast i32* %a to i8*
|
||||
call void @llvm.lifetime.start(i64 4, i8* %tmp)
|
||||
store i32 -1640531527, i32* %a, align 4
|
||||
%cmp8 = icmp ugt i32 %len, 11
|
||||
br i1 %cmp8, label %while.body.lr.ph, label %while.end
|
||||
|
||||
while.body.lr.ph: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %while.body.lr.ph
|
||||
%keylen.010 = phi i32 [ %len, %while.body.lr.ph ], [ %sub, %while.body ]
|
||||
%s.addr.09 = phi i8* [ %s, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
||||
%tmp1 = bitcast i8* %s.addr.09 to i32*
|
||||
%tmp2 = load i32, i32* %tmp1, align 4
|
||||
%shl.i = shl i32 %tmp2, 1
|
||||
%and.i = and i32 %shl.i, 16843008
|
||||
%tmp3 = load i32, i32* %a, align 4
|
||||
%sub.i = add i32 %tmp3, %tmp2
|
||||
%add = sub i32 %sub.i, %and.i
|
||||
store i32 %add, i32* %a, align 4
|
||||
%add.ptr = getelementptr inbounds i8, i8* %s.addr.09, i64 12
|
||||
%sub = add i32 %keylen.010, -12
|
||||
%cmp = icmp ugt i32 %sub, 11
|
||||
br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
|
||||
|
||||
while.cond.while.end_crit_edge: ; preds = %while.body
|
||||
%sub.lcssa = phi i32 [ %sub, %while.body ]
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.cond.while.end_crit_edge, %entry
|
||||
%keylen.0.lcssa = phi i32 [ %sub.lcssa, %while.cond.while.end_crit_edge ], [ %len, %entry ]
|
||||
call void @_Z3mixRjj(i32* dereferenceable(4) %a, i32 %keylen.0.lcssa)
|
||||
%tmp4 = load i32, i32* %a, align 4
|
||||
call void @llvm.lifetime.end(i64 4, i8* %tmp)
|
||||
ret i32 %tmp4
|
||||
}
|
24
test/Transforms/IndVarSimplify/exit_value_test3.ll
Normal file
24
test/Transforms/IndVarSimplify/exit_value_test3.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: opt < %s -indvars -loop-deletion -S |FileCheck %s
|
||||
|
||||
; Check IndVarSimplify should replace exit value even if the expansion cost
|
||||
; is high because the loop can be deleted after the exit value rewrite.
|
||||
;
|
||||
; CHECK-LABEL: @_Z3fooPKcjj(
|
||||
; CHECK: udiv
|
||||
; CHECK: [[LABEL:^[a-zA-Z0-9_.]+]]:
|
||||
; CHECK-NOT: br {{.*}} [[LABEL]]
|
||||
|
||||
define i32 @_Z3fooPKcjj(i8* nocapture readnone %s, i32 %len, i32 %c) #0 {
|
||||
entry:
|
||||
br label %while.cond
|
||||
|
||||
while.cond: ; preds = %while.cond, %entry
|
||||
%klen.0 = phi i32 [ %len, %entry ], [ %sub, %while.cond ]
|
||||
%cmp = icmp ugt i32 %klen.0, 11
|
||||
%sub = add i32 %klen.0, -12
|
||||
br i1 %cmp, label %while.cond, label %while.end
|
||||
|
||||
while.end: ; preds = %while.cond
|
||||
%klen.0.lcssa = phi i32 [ %klen.0, %while.cond ]
|
||||
ret i32 %klen.0.lcssa
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
; RUN: opt < %s -indvars -S | FileCheck %s
|
||||
;
|
||||
; RUN: opt < %s -indvars -replexitval=always -S | FileCheck %s
|
||||
; Make sure IndVars preserves LCSSA form, especially across loop nests.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
|
Loading…
Reference in New Issue
Block a user