mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-01 01:31:26 +00:00
[CodeGen][AArch64][SVE] Fold [rdffr, ptest] => rdffrs; bugfix for optimizePTestInstr
When a ptest is used to set flags from the output of rdffr, the ptest can be eliminated, using a flags-setting rdffrs instead. Additionally, check that nothing consumes flags between rdffr and ptest; this case appears to have been missed previously. * There is no unpredicated RDFFRS instruction. * If substituting RDFFR_PP, require that the mask argument of the PTEST matches that of the RDFFR_PP. * Move some precondition code up inside optimizePTestInstr, so that it covers the new code paths for RDFFR which return earlier. * Only consider RDFFR, PTEST in same basic block. * Check for other flag setting instructions between the two, abort if found. * Drop an old TODO comment about removing dead PTEST instructions. RDFFR_P to follow in later patch. Differential Revision: https://reviews.llvm.org/D101357
This commit is contained in:
parent
892c56eabe
commit
3fa6510f6e
@ -1366,6 +1366,18 @@ bool AArch64InstrInfo::optimizePTestInstr(
|
||||
OpChanged = true;
|
||||
break;
|
||||
}
|
||||
case AArch64::RDFFR_PPz: {
|
||||
// rdffr p1.b, PredMask=p0/z <--- Definition of Pred
|
||||
// ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
|
||||
// `rdffrs p1.b, p0/z` above.
|
||||
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
|
||||
if (Mask != PredMask)
|
||||
return false;
|
||||
|
||||
NewOp = AArch64::RDFFRS_PPz;
|
||||
OpChanged = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Bail out if we don't recognize the input
|
||||
return false;
|
||||
@ -1374,23 +1386,11 @@ bool AArch64InstrInfo::optimizePTestInstr(
|
||||
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
|
||||
// If the predicate is in a different block (possibly because its been
|
||||
// hoisted out), then assume the flags are set in between statements.
|
||||
if (Pred->getParent() != PTest->getParent())
|
||||
// If another instruction between Pred and PTest accesses flags, don't remove
|
||||
// the ptest or update the earlier instruction to modify them.
|
||||
if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
|
||||
return false;
|
||||
|
||||
// If another instruction between the propagation and test sets the
|
||||
// flags, don't remove the ptest.
|
||||
MachineBasicBlock::iterator I = Pred, E = PTest;
|
||||
++I; // Skip past the predicate op itself.
|
||||
for (; I != E; ++I) {
|
||||
const MachineInstr &Inst = *I;
|
||||
|
||||
// TODO: If the ptest flags are unused, we could still remove it.
|
||||
if (Inst.modifiesRegister(AArch64::NZCV, TRI))
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we pass all the checks, it's safe to remove the PTEST and use the flags
|
||||
// as they are prior to PTEST. Sometimes this requires the tested PTEST
|
||||
// operand to be replaced with an equivalent instruction that also sets the
|
||||
|
90
llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir
Normal file
90
llvm/test/CodeGen/AArch64/sve-ptest-removal-rdffr.mir
Normal file
@ -0,0 +1,90 @@
|
||||
# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# Test that RDFFR followed by PTEST is replaced with RDFFRS.
|
||||
---
|
||||
# CHECK-LABEL: name:{{\s*}} substitute_rdffr_pp_with_rdffrs_pp
|
||||
name: substitute_rdffr_pp_with_rdffrs_pp
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $ffr, $p0
|
||||
%0:ppr_3b = COPY $p0
|
||||
|
||||
; CHECK: RDFFRS_PPz
|
||||
; CHECK-NOT: PTEST
|
||||
%1:ppr_3b = RDFFR_PPz %0:ppr_3b
|
||||
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
|
||||
|
||||
; Consume nzcv
|
||||
%2:gpr32 = COPY $wzr
|
||||
%3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv
|
||||
$w0 = COPY %3
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask
|
||||
name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $ffr, $p0, $p1
|
||||
%0:ppr_3b = COPY $p0
|
||||
%1:ppr_3b = COPY $p1
|
||||
|
||||
; CHECK: RDFFR_PPz
|
||||
; CHECK: PTEST
|
||||
%2:ppr_3b = RDFFR_PPz %0:ppr_3b
|
||||
PTEST_PP killed %1:ppr_3b, killed %2:ppr_3b, implicit-def $nzcv
|
||||
|
||||
; Consume nzcv
|
||||
%3:gpr32 = COPY $wzr
|
||||
%4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv
|
||||
$w0 = COPY %4
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered
|
||||
name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $ffr, $p0, $x0
|
||||
%0:ppr_3b = COPY $p0
|
||||
|
||||
; CHECK: RDFFR_PPz
|
||||
; CHECK-NEXT: ADDSXrr
|
||||
; CHECK-NEXT: PTEST_PP
|
||||
%1:ppr_3b = RDFFR_PPz %0:ppr_3b
|
||||
; Clobber nzcv
|
||||
$x0 = ADDSXrr $x0, $x0, implicit-def $nzcv
|
||||
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
|
||||
|
||||
; Consume nzcv
|
||||
%2:gpr32 = COPY $wzr
|
||||
%3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv
|
||||
$w0 = COPY %3
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_flags_used_between
|
||||
name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_flags_used_between
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $ffr, $p0, $x0
|
||||
%0:ppr_3b = COPY $p0
|
||||
|
||||
$wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
|
||||
|
||||
; CHECK: RDFFR_PPz
|
||||
; CHECK-NEXT: CSINCWr
|
||||
; CHECK-NEXT: PTEST_PP
|
||||
%1:ppr_3b = RDFFR_PPz %0:ppr_3b
|
||||
; Consume nzcv
|
||||
%2:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
|
||||
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
|
||||
|
||||
; Consume nzcv
|
||||
%3:gpr32 = COPY $wzr
|
||||
%4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv
|
||||
$w0 = ORRWrs %4, %2, 1
|
||||
RET_ReallyLR implicit $w0
|
Loading…
Reference in New Issue
Block a user