mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-09 13:41:35 +00:00
Enable loopreroll for sext of loop control only IV
This patch extend loopreroll to allow the instruction chain of loop control only IV has sext. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269084 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f02416f9ad
commit
fdf3439d6a
@ -512,11 +512,11 @@ static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE,
|
|||||||
|
|
||||||
// Check if an IV is only used to control the loop. There are two cases:
|
// Check if an IV is only used to control the loop. There are two cases:
|
||||||
// 1. It only has one use which is loop increment, and the increment is only
|
// 1. It only has one use which is loop increment, and the increment is only
|
||||||
// used by comparison and the PHI, and the comparison is only used by branch.
|
// used by comparison and the PHI (could has sext with nsw in between), and the
|
||||||
|
// comparison is only used by branch.
|
||||||
// 2. It is used by loop increment and the comparison, the loop increment is
|
// 2. It is used by loop increment and the comparison, the loop increment is
|
||||||
// only used by the PHI, and the comparison is used only by the branch.
|
// only used by the PHI, and the comparison is used only by the branch.
|
||||||
bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
|
bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
|
||||||
|
|
||||||
unsigned IVUses = IV->getNumUses();
|
unsigned IVUses = IV->getNumUses();
|
||||||
if (IVUses != 2 && IVUses != 1)
|
if (IVUses != 2 && IVUses != 1)
|
||||||
return false;
|
return false;
|
||||||
@ -551,9 +551,17 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
|
|||||||
if (PN != IV)
|
if (PN != IV)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Must be a CMP
|
// Must be a CMP or an ext (of a value with nsw) then CMP
|
||||||
else if (!isCompareUsedByBranch(dyn_cast<Instruction>(UU)))
|
else {
|
||||||
return false;
|
Instruction *UUser = dyn_cast<Instruction>(UU);
|
||||||
|
// Skip SExt if we are extending an nsw value
|
||||||
|
// TODO: Allow ZExt too
|
||||||
|
if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 &&
|
||||||
|
isa<SExtInst>(UUser))
|
||||||
|
UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
|
||||||
|
if (!isCompareUsedByBranch(UUser))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
return false;
|
return false;
|
||||||
@ -1161,6 +1169,11 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
|
|||||||
Instruction *UUser = dyn_cast<Instruction>(UU);
|
Instruction *UUser = dyn_cast<Instruction>(UU);
|
||||||
// UUser could be compare, PHI or branch
|
// UUser could be compare, PHI or branch
|
||||||
Uses[UUser].set(IL_All);
|
Uses[UUser].set(IL_All);
|
||||||
|
// Skip SExt
|
||||||
|
if (isa<SExtInst>(UUser)) {
|
||||||
|
UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
|
||||||
|
Uses[UUser].set(IL_All);
|
||||||
|
}
|
||||||
// Is UUser a compare instruction?
|
// Is UUser a compare instruction?
|
||||||
if (UU->hasOneUse()) {
|
if (UU->hasOneUse()) {
|
||||||
Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
|
Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
|
||||||
@ -1502,8 +1515,6 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
|
|||||||
if (NeedNewIV)
|
if (NeedNewIV)
|
||||||
ICSCEV = SE->getMulExpr(IterCount,
|
ICSCEV = SE->getMulExpr(IterCount,
|
||||||
SE->getConstant(IterCount->getType(), Scale));
|
SE->getConstant(IterCount->getType(), Scale));
|
||||||
else
|
|
||||||
ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
|
|
||||||
|
|
||||||
// Iteration count SCEV minus or plus 1
|
// Iteration count SCEV minus or plus 1
|
||||||
const SCEV *MinusPlus1SCEV =
|
const SCEV *MinusPlus1SCEV =
|
||||||
@ -1515,18 +1526,26 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
|
|||||||
|
|
||||||
const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
|
const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV);
|
||||||
// Iteration count minus 1
|
// Iteration count minus 1
|
||||||
Value *ICMinusPlus1 = nullptr;
|
Instruction *InsertPtr = nullptr;
|
||||||
if (isa<SCEVConstant>(ICMinusPlus1SCEV)) {
|
if (isa<SCEVConstant>(ICMinusPlus1SCEV)) {
|
||||||
ICMinusPlus1 =
|
InsertPtr = BI;
|
||||||
Expander.expandCodeFor(ICMinusPlus1SCEV, NewIV->getType(), BI);
|
|
||||||
} else {
|
} else {
|
||||||
BasicBlock *Preheader = L->getLoopPreheader();
|
BasicBlock *Preheader = L->getLoopPreheader();
|
||||||
if (!Preheader)
|
if (!Preheader)
|
||||||
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
|
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
|
||||||
ICMinusPlus1 = Expander.expandCodeFor(
|
InsertPtr = Preheader->getTerminator();
|
||||||
ICMinusPlus1SCEV, NewIV->getType(), Preheader->getTerminator());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isa<PointerType>(NewIV->getType()) && NeedNewIV &&
|
||||||
|
(SE->getTypeSizeInBits(NewIV->getType()) <
|
||||||
|
SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) {
|
||||||
|
IRBuilder<> Builder(BI);
|
||||||
|
Builder.SetCurrentDebugLocation(BI->getDebugLoc());
|
||||||
|
NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType());
|
||||||
|
}
|
||||||
|
Value *ICMinusPlus1 = Expander.expandCodeFor(
|
||||||
|
ICMinusPlus1SCEV, NewIV->getType(), InsertPtr);
|
||||||
|
|
||||||
Value *Cond =
|
Value *Cond =
|
||||||
new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
|
new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond");
|
||||||
BI->setCondition(Cond);
|
BI->setCondition(Cond);
|
||||||
@ -1705,6 +1724,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||||||
|
|
||||||
const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
|
const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
|
||||||
const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
|
const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
|
||||||
|
DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
|
||||||
DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
|
DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
|
||||||
|
|
||||||
// First, we need to find the induction variable with respect to which we can
|
// First, we need to find the induction variable with respect to which we can
|
||||||
@ -1731,6 +1751,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
|
|||||||
Changed = true;
|
Changed = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
|
||||||
|
|
||||||
// Trip count of L has changed so SE must be re-evaluated.
|
// Trip count of L has changed so SE must be re-evaluated.
|
||||||
if (Changed)
|
if (Changed)
|
||||||
|
186
test/Transforms/LoopReroll/indvar_with_ext.ll
Normal file
186
test/Transforms/LoopReroll/indvar_with_ext.ll
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
; RUN: opt -S -loop-reroll %s | FileCheck %s
|
||||||
|
target triple = "aarch64--linux-gnu"
|
||||||
|
|
||||||
|
define void @test(i32 %n, float* %arrayidx200, float* %arrayidx164, float* %arrayidx172) {
|
||||||
|
entry:
|
||||||
|
%rem.i = srem i32 %n, 4
|
||||||
|
%t22 = load float, float* %arrayidx172, align 4
|
||||||
|
%cmp.9 = icmp eq i32 %n, 0
|
||||||
|
%t7 = sext i32 %n to i64
|
||||||
|
br i1 %cmp.9, label %while.end, label %while.body.preheader
|
||||||
|
|
||||||
|
while.body.preheader:
|
||||||
|
br label %while.body
|
||||||
|
|
||||||
|
while.body:
|
||||||
|
;CHECK-LABEL: while.body:
|
||||||
|
;CHECK-NEXT: %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ]
|
||||||
|
;CHECK-NEXT: [[T1:%[0-9]+]] = trunc i64 %indvars.iv.i423 to i32
|
||||||
|
;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423
|
||||||
|
;CHECK-NEXT: %t1 = load float, float* %arrayidx62.i, align 4
|
||||||
|
;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423
|
||||||
|
;CHECK-NEXT: %t2 = load float, float* %arrayidx64.i, align 4
|
||||||
|
;CHECK-NEXT: %mul65.i = fmul fast float %t2, %t22
|
||||||
|
;CHECK-NEXT: %add66.i = fadd fast float %mul65.i, %t1
|
||||||
|
;CHECK-NEXT: store float %add66.i, float* %arrayidx62.i, align 4
|
||||||
|
;CHECK-NEXT: %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 1
|
||||||
|
;CHECK-NEXT: [[T2:%[0-9]+]] = sext i32 [[T1]] to i64
|
||||||
|
;CHECK-NEXT: %exitcond = icmp eq i64 [[T2]], %{{[0-9]+}}
|
||||||
|
;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
|
||||||
|
|
||||||
|
%indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ]
|
||||||
|
%i.22.i = phi i32 [ %add103.i, %while.body ], [ %rem.i, %while.body.preheader ]
|
||||||
|
%arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423
|
||||||
|
%t1 = load float, float* %arrayidx62.i, align 4
|
||||||
|
%arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423
|
||||||
|
%t2 = load float, float* %arrayidx64.i, align 4
|
||||||
|
%mul65.i = fmul fast float %t2, %t22
|
||||||
|
%add66.i = fadd fast float %mul65.i, %t1
|
||||||
|
store float %add66.i, float* %arrayidx62.i, align 4
|
||||||
|
%t3 = add nsw i64 %indvars.iv.i423, 1
|
||||||
|
%arrayidx71.i = getelementptr inbounds float, float* %arrayidx200, i64 %t3
|
||||||
|
%t4 = load float, float* %arrayidx71.i, align 4
|
||||||
|
%arrayidx74.i = getelementptr inbounds float, float* %arrayidx164, i64 %t3
|
||||||
|
%t5 = load float, float* %arrayidx74.i, align 4
|
||||||
|
%mul75.i = fmul fast float %t5, %t22
|
||||||
|
%add76.i = fadd fast float %mul75.i, %t4
|
||||||
|
store float %add76.i, float* %arrayidx71.i, align 4
|
||||||
|
%add103.i = add nsw i32 %i.22.i, 2
|
||||||
|
%t6 = sext i32 %add103.i to i64
|
||||||
|
%cmp58.i = icmp slt i64 %t6, %t7
|
||||||
|
%indvars.iv.next.i424 = add i64 %indvars.iv.i423, 2
|
||||||
|
br i1 %cmp58.i, label %while.body, label %while.end.loopexit
|
||||||
|
|
||||||
|
while.end.loopexit:
|
||||||
|
br label %while.end
|
||||||
|
|
||||||
|
while.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: noinline norecurse nounwind
|
||||||
|
define i32 @test2(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
|
||||||
|
entry:
|
||||||
|
%cmp18 = icmp sgt i64 %n, 0
|
||||||
|
br i1 %cmp18, label %for.body.preheader, label %for.end
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
|
||||||
|
;CHECK: for.body:
|
||||||
|
;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
|
||||||
|
;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
|
||||||
|
;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
|
||||||
|
;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
|
||||||
|
;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
|
||||||
|
;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
|
||||||
|
;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
|
||||||
|
;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||||
|
|
||||||
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||||
|
%arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
|
||||||
|
%0 = load i32, i32* %arrayidx, align 4
|
||||||
|
%arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
|
||||||
|
store i32 %0, i32* %arrayidx3, align 4
|
||||||
|
%1 = or i64 %indvars.iv, 1
|
||||||
|
%arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
|
||||||
|
%2 = load i32, i32* %arrayidx5, align 4
|
||||||
|
%arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
|
||||||
|
store i32 %2, i32* %arrayidx8, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
||||||
|
%cmp = icmp slt i64 %indvars.iv.next, %n
|
||||||
|
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: noinline norecurse nounwind
|
||||||
|
define i32 @test3(i32 %n, i32* nocapture %x, i32* nocapture readonly %y) {
|
||||||
|
entry:
|
||||||
|
%cmp21 = icmp sgt i32 %n, 0
|
||||||
|
br i1 %cmp21, label %for.body.preheader, label %for.end
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
|
||||||
|
;CHECK: for.body:
|
||||||
|
;CHECK: %add12 = add i8 %i.022, 2
|
||||||
|
;CHECK-NEXT: %conv = sext i8 %add12 to i32
|
||||||
|
;CHECK-NEXT: %cmp = icmp slt i32 %conv, %n
|
||||||
|
;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
%conv23 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ]
|
||||||
|
%i.022 = phi i8 [ %add12, %for.body ], [ 0, %for.body.preheader ]
|
||||||
|
%idxprom = sext i8 %i.022 to i64
|
||||||
|
%arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
|
||||||
|
%0 = load i32, i32* %arrayidx, align 4
|
||||||
|
%arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %idxprom
|
||||||
|
store i32 %0, i32* %arrayidx3, align 4
|
||||||
|
%add = or i32 %conv23, 1
|
||||||
|
%idxprom5 = sext i32 %add to i64
|
||||||
|
%arrayidx6 = getelementptr inbounds i32, i32* %y, i64 %idxprom5
|
||||||
|
%1 = load i32, i32* %arrayidx6, align 4
|
||||||
|
%arrayidx10 = getelementptr inbounds i32, i32* %x, i64 %idxprom5
|
||||||
|
store i32 %1, i32* %arrayidx10, align 4
|
||||||
|
%add12 = add i8 %i.022, 2
|
||||||
|
%conv = sext i8 %add12 to i32
|
||||||
|
%cmp = icmp slt i32 %conv, %n
|
||||||
|
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: noinline norecurse nounwind
|
||||||
|
define i32 @test4(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
|
||||||
|
entry:
|
||||||
|
%cmp18 = icmp eq i64 %n, 0
|
||||||
|
br i1 %cmp18, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
|
||||||
|
;CHECK: for.body:
|
||||||
|
;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
|
||||||
|
;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
|
||||||
|
;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
|
||||||
|
;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
|
||||||
|
;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
|
||||||
|
;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
|
||||||
|
;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
|
||||||
|
;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
|
||||||
|
|
||||||
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
|
||||||
|
%arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
|
||||||
|
%0 = load i32, i32* %arrayidx, align 4
|
||||||
|
%arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
|
||||||
|
store i32 %0, i32* %arrayidx3, align 4
|
||||||
|
%1 = or i64 %indvars.iv, 1
|
||||||
|
%arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
|
||||||
|
%2 = load i32, i32* %arrayidx5, align 4
|
||||||
|
%arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
|
||||||
|
store i32 %2, i32* %arrayidx8, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
||||||
|
%cmp = icmp ult i64 %indvars.iv.next, %n
|
||||||
|
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user