mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 12:19:53 +00:00
[LV] Avoid emitting trivially dead instructions
Some instructions from the original loop, when vectorized, can become trivially dead. This happens because of the way we structure the new loop. For example, we create new induction variables and induction variable "steps" in the new loop. Thus, when we go to vectorize the original induction variable update, it may no longer be needed due to the instructions we've already created. This patch prevents us from creating these redundant instructions. This reduces code size before simplification and allows greater flexibility in code generation since we have fewer unnecessary instruction uses. Differential Revision: https://reviews.llvm.org/D25631 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284631 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
db638de2de
commit
f461f21edc
@ -441,6 +441,10 @@ protected:
|
||||
/// respective conditions.
|
||||
void predicateInstructions();
|
||||
|
||||
/// Collect the instructions from the original loop that would be trivially
|
||||
/// dead in the vectorized loop if generated.
|
||||
void collectTriviallyDeadInstructions();
|
||||
|
||||
/// Shrinks vector element sizes to the smallest bitwidth they can be legally
|
||||
/// represented as.
|
||||
void truncateToMinimalBitwidths();
|
||||
@ -763,6 +767,14 @@ protected:
|
||||
|
||||
// Record whether runtime checks are added.
|
||||
bool AddedSafetyChecks;
|
||||
|
||||
// Holds instructions from the original loop whose counterparts in the
|
||||
// vectorized loop would be trivially dead if generated. For example,
|
||||
// original induction update instructions can become dead because we
|
||||
// separately emit induction "steps" when generating code for the new loop.
|
||||
// Similarly, we create a new latch condition when setting up the structure
|
||||
// of the new loop, so the old one can become dead.
|
||||
SmallPtrSet<Instruction *, 4> DeadInstructions;
|
||||
};
|
||||
|
||||
class InnerLoopUnroller : public InnerLoopVectorizer {
|
||||
@ -3802,6 +3814,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
|
||||
// are vectorized, so we can use them to construct the PHI.
|
||||
PhiVector PHIsToFix;
|
||||
|
||||
// Collect instructions from the original loop that will become trivially
|
||||
// dead in the vectorized loop. We don't need to vectorize these
|
||||
// instructions.
|
||||
collectTriviallyDeadInstructions();
|
||||
|
||||
// Scan the loop in a topological order to ensure that defs are vectorized
|
||||
// before users.
|
||||
LoopBlocksDFS DFS(OrigLoop);
|
||||
@ -4209,6 +4226,29 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
|
||||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::collectTriviallyDeadInstructions() {
|
||||
BasicBlock *Latch = OrigLoop->getLoopLatch();
|
||||
|
||||
// We create new control-flow for the vectorized loop, so the original
|
||||
// condition will be dead after vectorization if it's only used by the
|
||||
// branch.
|
||||
auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
|
||||
if (Cmp && Cmp->hasOneUse())
|
||||
DeadInstructions.insert(Cmp);
|
||||
|
||||
// We create new "steps" for induction variable updates to which the original
|
||||
// induction variables map. An original update instruction will be dead if
|
||||
// all its users except the induction variable are dead.
|
||||
for (auto &Induction : *Legal->getInductionVars()) {
|
||||
PHINode *Ind = Induction.first;
|
||||
auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
|
||||
if (all_of(IndUpdate->users(), [&](User *U) -> bool {
|
||||
return U == Ind || DeadInstructions.count(cast<Instruction>(U));
|
||||
}))
|
||||
DeadInstructions.insert(IndUpdate);
|
||||
}
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::predicateInstructions() {
|
||||
|
||||
// For each instruction I marked for predication on value C, split I into its
|
||||
@ -4536,6 +4576,11 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
|
||||
// For each instruction in the old loop.
|
||||
for (Instruction &I : *BB) {
|
||||
|
||||
// If the instruction will become trivially dead when vectorized, we don't
|
||||
// need to generate it.
|
||||
if (DeadInstructions.count(&I))
|
||||
continue;
|
||||
|
||||
// Scalarize instructions that should remain scalar after vectorization.
|
||||
if (!(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
|
||||
isa<DbgInfoIntrinsic>(&I)) &&
|
||||
|
42
test/Transforms/LoopVectorize/dead_instructions.ll
Normal file
42
test/Transforms/LoopVectorize/dead_instructions.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: opt < %s -force-vector-width=2 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
; CHECK-LABEL: @dead_instructions_01
|
||||
;
|
||||
; This test ensures that we don't generate trivially dead instructions prior to
|
||||
; instruction simplification. We don't need to generate instructions
|
||||
; corresponding to the original induction variable update or branch condition,
|
||||
; since we rewrite the loop structure.
|
||||
;
|
||||
; CHECK: vector.body:
|
||||
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
; CHECK: %[[I0:.+]] = add i64 %index, 0
|
||||
; CHECK: %[[I2:.+]] = add i64 %index, 2
|
||||
; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[I0]]
|
||||
; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[I2]]
|
||||
; CHECK-NOT: add nuw nsw i64 %[[I0]], 1
|
||||
; CHECK-NOT: add nuw nsw i64 %[[I2]], 1
|
||||
; CHECK-NOT: icmp slt i64 {{.*}}, %n
|
||||
; CHECK: %index.next = add i64 %index, 4
|
||||
; CHECK: %[[CMP:.+]] = icmp eq i64 %index.next, %n.vec
|
||||
; CHECK: br i1 %[[CMP]], label %middle.block, label %vector.body
|
||||
;
|
||||
define i64 @dead_instructions_01(i64 *%a, i64 %n) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
||||
%r = phi i64 [ %tmp2, %for.body ], [ 0, %entry ]
|
||||
%tmp0 = getelementptr inbounds i64, i64* %a, i64 %i
|
||||
%tmp1 = load i64, i64* %tmp0, align 8
|
||||
%tmp2 = add i64 %tmp1, %r
|
||||
%i.next = add nuw nsw i64 %i, 1
|
||||
%cond = icmp slt i64 %i.next, %n
|
||||
br i1 %cond, label %for.body, label %for.end
|
||||
|
||||
for.end:
|
||||
%tmp3 = phi i64 [ %tmp2, %for.body ]
|
||||
ret i64 %tmp3
|
||||
}
|
Loading…
Reference in New Issue
Block a user