mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-27 21:30:51 +00:00
[ARM] Find VPT implicitly predicated by VCTP
On failing to find a VCTP in the list of instructions that explicitly predicate the entry of a VPT block, inspect whether the block is controlled via VPT which is implicitly predicated due to it's predicated operand(s). Differential Revision: https://reviews.llvm.org/D87819
This commit is contained in:
parent
919066e494
commit
a28cbccef7
@ -433,7 +433,6 @@ MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
|
||||
if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI))
|
||||
return LocalDef;
|
||||
|
||||
SmallPtrSet<MachineBasicBlock*, 4> VisitedBBs;
|
||||
SmallPtrSet<MachineInstr*, 2> Incoming;
|
||||
MachineBasicBlock *Parent = MI->getParent();
|
||||
for (auto *Pred : Parent->predecessors())
|
||||
|
@ -256,12 +256,54 @@ namespace {
|
||||
return isPredicatedOnVCTP(Insts.front(), Exclusive);
|
||||
}
|
||||
|
||||
static bool isValid() {
|
||||
// If this block begins with a VPT, we can check whether it's using
|
||||
// at least one predicated input(s), as well as possible loop invariant
|
||||
// which would result in it being implicitly predicated.
|
||||
static bool hasImplicitlyValidVPT(VPTState &Block,
|
||||
ReachingDefAnalysis &RDA) {
|
||||
SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
|
||||
MachineInstr *VPT = Insts.front();
|
||||
assert(isVPTOpcode(VPT->getOpcode()) &&
|
||||
"Expected VPT block to begin with VPT/VPST");
|
||||
|
||||
if (VPT->getOpcode() == ARM::MVE_VPST)
|
||||
return false;
|
||||
|
||||
auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
|
||||
MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
|
||||
return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
|
||||
};
|
||||
|
||||
auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
|
||||
MachineOperand &MO = MI->getOperand(Idx);
|
||||
if (!MO.isReg() || !MO.getReg())
|
||||
return true;
|
||||
|
||||
SmallPtrSet<MachineInstr *, 2> Defs;
|
||||
RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
|
||||
if (Defs.empty())
|
||||
return true;
|
||||
|
||||
for (auto *Def : Defs)
|
||||
if (Def->getParent() == VPT->getParent())
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
// Check that at least one of the operands is directly predicated on a
|
||||
// vctp and allow an invariant value too.
|
||||
return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
|
||||
(IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
|
||||
(IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
|
||||
}
|
||||
|
||||
static bool isValid(ReachingDefAnalysis &RDA) {
|
||||
// All predication within the loop should be based on vctp. If the block
|
||||
// isn't predicated on entry, check whether the vctp is within the block
|
||||
// and that all other instructions are then predicated on it.
|
||||
for (auto &Block : Blocks) {
|
||||
if (isEntryPredicatedOnVCTP(Block))
|
||||
if (isEntryPredicatedOnVCTP(Block, false) ||
|
||||
hasImplicitlyValidVPT(Block, RDA))
|
||||
continue;
|
||||
|
||||
SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
|
||||
@ -517,7 +559,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!VPTState::isValid())
|
||||
if (!VPTState::isValid(RDA))
|
||||
return false;
|
||||
|
||||
if (!ValidateLiveOuts()) {
|
||||
|
@ -7,23 +7,13 @@ define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg
|
||||
; CHECK-NEXT: push {r4, r5, r7, lr}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: ldrd r5, r12, [sp, #80]
|
||||
; CHECK-NEXT: cmp.w r12, #4
|
||||
; CHECK-NEXT: mov r4, r12
|
||||
; CHECK-NEXT: vmvn.i32 q0, #0x80000000
|
||||
; CHECK-NEXT: it ge
|
||||
; CHECK-NEXT: movge r4, #4
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x3f
|
||||
; CHECK-NEXT: sub.w r4, r12, r4
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x1
|
||||
; CHECK-NEXT: add.w lr, r4, #3
|
||||
; CHECK-NEXT: movs r4, #1
|
||||
; CHECK-NEXT: add.w lr, r4, lr, lsr #2
|
||||
; CHECK-NEXT: dls lr, lr
|
||||
; CHECK-NEXT: dlstp.32 lr, r12
|
||||
; CHECK-NEXT: .LBB0_1: @ %bb6
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vctp.32 r12
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vldrwt.u32 q4, [r1], #16
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r1], #16
|
||||
; CHECK-NEXT: vabs.s32 q5, q4
|
||||
; CHECK-NEXT: vcls.s32 q3, q5
|
||||
; CHECK-NEXT: vshl.u32 q5, q5, q3
|
||||
@ -41,15 +31,11 @@ define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg
|
||||
; CHECK-NEXT: vqshl.s32 q5, q5, #1
|
||||
; CHECK-NEXT: vpt.s32 lt, q4, zr
|
||||
; CHECK-NEXT: vnegt.s32 q5, q5
|
||||
; CHECK-NEXT: vctp.32 r12
|
||||
; CHECK-NEXT: sub.w r12, r12, #4
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vldrwt.u32 q4, [r0], #16
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0], #16
|
||||
; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5
|
||||
; CHECK-NEXT: vpstt
|
||||
; CHECK-NEXT: vstrwt.32 q4, [r2], #16
|
||||
; CHECK-NEXT: vstrwt.32 q3, [r3], #16
|
||||
; CHECK-NEXT: le lr, .LBB0_1
|
||||
; CHECK-NEXT: vstrw.32 q4, [r2], #16
|
||||
; CHECK-NEXT: vstrw.32 q3, [r3], #16
|
||||
; CHECK-NEXT: letp lr, .LBB0_1
|
||||
; CHECK-NEXT: @ %bb.2: @ %bb44
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
|
||||
; CHECK-NEXT: pop {r4, r5, r7, pc}
|
||||
|
@ -593,26 +593,17 @@ body: |
|
||||
; CHECK: bb.1.vector.ph:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: liveins: $r0, $r1, $r2
|
||||
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
|
||||
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg
|
||||
; CHECK: $lr = t2DLS killed renamable $lr
|
||||
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1
|
||||
; CHECK: bb.2.vector.body:
|
||||
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3
|
||||
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg
|
||||
; CHECK: MVE_VPST 8, implicit $vpr
|
||||
; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr
|
||||
; CHECK: MVE_VPTv4s32r 14, renamable $q1, renamable $r2, 10, implicit-def $vpr
|
||||
; CHECK: liveins: $lr, $q0, $r0, $r2, $r3
|
||||
; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg
|
||||
; CHECK: MVE_VPTv4s32r 12, renamable $q1, renamable $r2, 10, implicit-def $vpr
|
||||
; CHECK: renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 13, 1, killed renamable $vpr
|
||||
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r1, 2, killed renamable $vpr
|
||||
; CHECK: renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 2, killed renamable $vpr
|
||||
; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
|
||||
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
|
||||
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
|
||||
; CHECK: bb.3.for.cond.cleanup:
|
||||
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user