mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-03 17:31:50 +00:00
[Hexagon] Generate hardware loop for a vectorized loop
The induction variable in the vectorized loop wasn't recognized properly, so a hardware loop wasn't generated. Differential Revision: http://reviews.llvm.org/D9722 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237388 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
49d3196418
commit
abf95a22c4
@ -1719,7 +1719,52 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
|
||||
// compared against an immediate, we can fix it.
|
||||
const RegisterBump &RB = I->second;
|
||||
if (CmpRegs.count(RB.first)) {
|
||||
if (!CmpImmOp)
|
||||
if (!CmpImmOp) {
|
||||
// If both operands to the compare instruction are registers, see if
|
||||
// it can be changed to use induction register as one of the operands.
|
||||
MachineInstr *IndI = nullptr;
|
||||
MachineInstr *nonIndI = nullptr;
|
||||
MachineOperand *IndMO = nullptr;
|
||||
MachineOperand *nonIndMO = nullptr;
|
||||
|
||||
for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
|
||||
MachineOperand &MO = PredDef->getOperand(i);
|
||||
if (MO.isReg() && MO.getReg() == RB.first) {
|
||||
DEBUG(dbgs() << "\n DefMI(" << i << ") = "
|
||||
<< *(MRI->getVRegDef(I->first)));
|
||||
if (IndI)
|
||||
return false;
|
||||
|
||||
IndI = MRI->getVRegDef(I->first);
|
||||
IndMO = &MO;
|
||||
} else if (MO.isReg()) {
|
||||
DEBUG(dbgs() << "\n DefMI(" << i << ") = "
|
||||
<< *(MRI->getVRegDef(MO.getReg())));
|
||||
if (nonIndI)
|
||||
return false;
|
||||
|
||||
nonIndI = MRI->getVRegDef(MO.getReg());
|
||||
nonIndMO = &MO;
|
||||
}
|
||||
}
|
||||
if (IndI && nonIndI &&
|
||||
nonIndI->getOpcode() == Hexagon::A2_addi &&
|
||||
nonIndI->getOperand(2).isImm() &&
|
||||
nonIndI->getOperand(2).getImm() == - RB.second) {
|
||||
bool Order = orderBumpCompare(IndI, PredDef);
|
||||
if (Order) {
|
||||
IndMO->setReg(I->first);
|
||||
nonIndMO->setReg(nonIndI->getOperand(1).getReg());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// It is not valid to do this transformation on an unsigned comparison
|
||||
// because it may underflow.
|
||||
Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
|
||||
if (!Cmp || Comparison::isUnsigned(Cmp))
|
||||
return false;
|
||||
|
||||
// If the register is being compared against an immediate, try changing
|
||||
@ -1739,12 +1784,6 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
|
||||
if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
|
||||
return false;
|
||||
|
||||
// It is not valid to do this transformation on an unsigned comparison
|
||||
// because it may underflow.
|
||||
Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
|
||||
if (!Cmp || Comparison::isUnsigned(Cmp))
|
||||
return false;
|
||||
|
||||
// Make sure that the compare happens after the bump. Otherwise,
|
||||
// after the fixup, the compare would use a yet-undefined register.
|
||||
MachineInstr *BumpI = MRI->getVRegDef(I->first);
|
||||
|
93
test/CodeGen/Hexagon/hwloop5.ll
Normal file
93
test/CodeGen/Hexagon/hwloop5.ll
Normal file
@ -0,0 +1,93 @@
|
||||
; RUN: llc -O3 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
|
||||
;
|
||||
; Generate hardware loop when unknown trip count loop is vectorized.
|
||||
|
||||
; CHECK: loop0(.LBB{{[0-9]*}}_{{[0-9]*}}, r{{[0-9]+}})
|
||||
; CHECK: endloop0
|
||||
; CHECK: loop0(.LBB{{[0-9]*}}_{{[0-9]*}}, r{{[0-9]+}})
|
||||
; CHECK: endloop0
|
||||
|
||||
@A = common global [1000 x i32] zeroinitializer, align 8
|
||||
@B = common global [1000 x i32] zeroinitializer, align 8
|
||||
|
||||
define i32 @dotprod2(i32 %count) #0 {
|
||||
entry.split:
|
||||
%cmp6 = icmp sgt i32 %count, 0
|
||||
br i1 %cmp6, label %polly.cond, label %for.end
|
||||
|
||||
for.end.loopexit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
%sum.0.lcssa.reg2mem.0.load37 = phi i32 [ 0, %entry.split ], [ %p_add34, %polly.loop_if13 ], [ %p_add, %for.end.loopexit ]
|
||||
ret i32 %sum.0.lcssa.reg2mem.0.load37
|
||||
|
||||
polly.cond:
|
||||
%0 = icmp sgt i32 %count, 1
|
||||
br i1 %0, label %polly.loop_if, label %polly.loop_if13
|
||||
|
||||
polly.loop_exit.loopexit:
|
||||
br label %polly.loop_exit
|
||||
|
||||
polly.loop_exit:
|
||||
%1 = phi <2 x i32> [ zeroinitializer, %polly.loop_if ], [ %addp_vec, %polly.loop_exit.loopexit ]
|
||||
%2 = extractelement <2 x i32> %1, i32 0
|
||||
%3 = extractelement <2 x i32> %1, i32 1
|
||||
%add_sum = add i32 %2, %3
|
||||
br label %polly.loop_if13
|
||||
|
||||
polly.loop_if:
|
||||
%4 = add i32 %count, -1
|
||||
%leftover_lb = and i32 %4, -2
|
||||
%polly.loop_guard = icmp eq i32 %leftover_lb, 0
|
||||
br i1 %polly.loop_guard, label %polly.loop_exit, label %polly.loop_preheader
|
||||
|
||||
polly.stmt.for.body:
|
||||
%addp_vec28 = phi <2 x i32> [ zeroinitializer, %polly.loop_preheader ], [ %addp_vec, %polly.stmt.for.body ]
|
||||
%scevgep.phi = phi i32* [ getelementptr inbounds ([1000 x i32], [1000 x i32]* @A, i32 0, i32 0), %polly.loop_preheader ], [ %scevgep.inc, %polly.stmt.for.body ]
|
||||
%scevgep9.phi = phi i32* [ getelementptr inbounds ([1000 x i32], [1000 x i32]* @B, i32 0, i32 0), %polly.loop_preheader ], [ %scevgep9.inc, %polly.stmt.for.body ]
|
||||
%polly.indvar = phi i32 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.for.body ]
|
||||
%vector_ptr = bitcast i32* %scevgep.phi to <2 x i32>*
|
||||
%_p_vec_full = load <2 x i32>, <2 x i32>* %vector_ptr, align 8
|
||||
%vector_ptr10 = bitcast i32* %scevgep9.phi to <2 x i32>*
|
||||
%_p_vec_full11 = load <2 x i32>, <2 x i32>* %vector_ptr10, align 8
|
||||
%mulp_vec = mul <2 x i32> %_p_vec_full11, %_p_vec_full
|
||||
%addp_vec = add <2 x i32> %mulp_vec, %addp_vec28
|
||||
%polly.indvar_next = add nsw i32 %polly.indvar, 2
|
||||
%polly.loop_cond = icmp eq i32 %polly.indvar, %polly.adjust_ub
|
||||
%scevgep.inc = getelementptr i32, i32* %scevgep.phi, i32 2
|
||||
%scevgep9.inc = getelementptr i32, i32* %scevgep9.phi, i32 2
|
||||
br i1 %polly.loop_cond, label %polly.loop_exit.loopexit, label %polly.stmt.for.body
|
||||
|
||||
polly.loop_preheader:
|
||||
%polly.adjust_ub = add i32 %leftover_lb, -2
|
||||
br label %polly.stmt.for.body
|
||||
|
||||
polly.loop_if13:
|
||||
%p_add34 = phi i32 [ 0, %polly.cond ], [ %add_sum, %polly.loop_exit ]
|
||||
%merge.lb = phi i32 [ 0, %polly.cond ], [ %leftover_lb, %polly.loop_exit ]
|
||||
%polly.loop_guard17 = icmp slt i32 %merge.lb, %count
|
||||
br i1 %polly.loop_guard17, label %polly.loop_preheader15, label %for.end
|
||||
|
||||
polly.stmt.for.body22:
|
||||
%p_add30 = phi i32 [ %p_add34, %polly.loop_preheader15 ], [ %p_add, %polly.stmt.for.body22 ]
|
||||
%polly.indvar18 = phi i32 [ %merge.lb, %polly.loop_preheader15 ], [ %polly.indvar_next19, %polly.stmt.for.body22 ]
|
||||
%5 = tail call i32 @llvm.annotation.i32(i32 %polly.indvar18, i8* null, i8* null, i32 0), !polly.loop.smallTripCount !0
|
||||
%scevgep23 = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i32 %polly.indvar18
|
||||
%_p_scalar_ = load i32, i32* %scevgep23, align 4
|
||||
%scevgep24 = getelementptr [1000 x i32], [1000 x i32]* @B, i32 0, i32 %polly.indvar18
|
||||
%_p_scalar_25 = load i32, i32* %scevgep24, align 4
|
||||
%p_mul = mul nsw i32 %_p_scalar_25, %_p_scalar_
|
||||
%p_add = add nsw i32 %p_mul, %p_add30
|
||||
%polly.indvar_next19 = add nsw i32 %polly.indvar18, 1
|
||||
%polly.loop_cond21 = icmp slt i32 %polly.indvar18, %polly.adjust_ub20
|
||||
br i1 %polly.loop_cond21, label %polly.stmt.for.body22, label %for.end.loopexit
|
||||
|
||||
polly.loop_preheader15:
|
||||
%polly.adjust_ub20 = add i32 %count, -1
|
||||
br label %polly.stmt.for.body22
|
||||
}
|
||||
|
||||
declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1
|
||||
|
||||
!0 = !{}
|
Loading…
Reference in New Issue
Block a user