[CodeGen][AArch64][SVE] Fold [rdffr, ptest] => rdffrs; bugfix for optimizePTestInstr

When a ptest is used to set flags from the output of rdffr, the ptest
can be eliminated, using a flags-setting rdffrs instead.

Additionally, check that nothing consumes flags between rdffr and ptest;
this case appears to have been missed previously.

* There is no unpredicated RDFFRS instruction.
* If substituting RDFFR_PP, require that the mask argument of the
  PTEST matches that of the RDFFR_PP.
* Move some precondition code up inside optimizePTestInstr, so that it
  covers the new code paths for RDFFR which return earlier.
  * Only consider RDFFR, PTEST in same basic block.
  * Check for other flag setting instructions between the two, abort if
    found.
  * Drop an old TODO comment about removing dead PTEST instructions.

RDFFR_P to follow in later patch.

Differential Revision: https://reviews.llvm.org/D101357
This commit is contained in:
Peter Waller 2021-04-27 12:29:42 +00:00
parent 892c56eabe
commit 3fa6510f6e
2 changed files with 105 additions and 15 deletions

View File

@ -1366,6 +1366,18 @@ bool AArch64InstrInfo::optimizePTestInstr(
OpChanged = true;
break;
}
case AArch64::RDFFR_PPz: {
// rdffr p1.b, PredMask=p0/z <--- Definition of Pred
// ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
// `rdffrs p1.b, p0/z` above.
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask != PredMask)
return false;
NewOp = AArch64::RDFFRS_PPz;
OpChanged = true;
break;
}
default:
// Bail out if we don't recognize the input
return false;
@ -1374,23 +1386,11 @@ bool AArch64InstrInfo::optimizePTestInstr(
const TargetRegisterInfo *TRI = &getRegisterInfo();
// If the predicate is in a different block (possibly because its been
// hoisted out), then assume the flags are set in between statements.
if (Pred->getParent() != PTest->getParent())
// If another instruction between Pred and PTest accesses flags, don't remove
// the ptest or update the earlier instruction to modify them.
if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
return false;
// If another instruction between the propagation and test sets the
// flags, don't remove the ptest.
MachineBasicBlock::iterator I = Pred, E = PTest;
++I; // Skip past the predicate op itself.
for (; I != E; ++I) {
const MachineInstr &Inst = *I;
// TODO: If the ptest flags are unused, we could still remove it.
if (Inst.modifiesRegister(AArch64::NZCV, TRI))
return false;
}
// If we pass all the checks, it's safe to remove the PTEST and use the flags
// as they are prior to PTEST. Sometimes this requires the tested PTEST
// operand to be replaced with an equivalent instruction that also sets the

View File

@ -0,0 +1,90 @@
# RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s
# Test that RDFFR followed by PTEST is replaced with RDFFRS.
---
# CHECK-LABEL: name:{{\s*}} substitute_rdffr_pp_with_rdffrs_pp
name: substitute_rdffr_pp_with_rdffrs_pp
tracksRegLiveness: true
body: |
bb.0:
liveins: $ffr, $p0
%0:ppr_3b = COPY $p0
; CHECK: RDFFRS_PPz
; CHECK-NOT: PTEST
%1:ppr_3b = RDFFR_PPz %0:ppr_3b
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
; Consume nzcv
%2:gpr32 = COPY $wzr
%3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv
$w0 = COPY %3
RET_ReallyLR implicit $w0
...
---
# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask
name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_differing_mask
tracksRegLiveness: true
body: |
bb.0:
liveins: $ffr, $p0, $p1
%0:ppr_3b = COPY $p0
%1:ppr_3b = COPY $p1
; CHECK: RDFFR_PPz
; CHECK: PTEST
%2:ppr_3b = RDFFR_PPz %0:ppr_3b
PTEST_PP killed %1:ppr_3b, killed %2:ppr_3b, implicit-def $nzcv
; Consume nzcv
%3:gpr32 = COPY $wzr
%4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv
$w0 = COPY %4
RET_ReallyLR implicit $w0
...
---
# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered
name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_clobbered
tracksRegLiveness: true
body: |
bb.0:
liveins: $ffr, $p0, $x0
%0:ppr_3b = COPY $p0
; CHECK: RDFFR_PPz
; CHECK-NEXT: ADDSXrr
; CHECK-NEXT: PTEST_PP
%1:ppr_3b = RDFFR_PPz %0:ppr_3b
; Clobber nzcv
$x0 = ADDSXrr $x0, $x0, implicit-def $nzcv
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
; Consume nzcv
%2:gpr32 = COPY $wzr
%3:gpr32 = CSINCWr killed %2, $wzr, 0, implicit $nzcv
$w0 = COPY %3
RET_ReallyLR implicit $w0
...
---
# CHECK-LABEL: name:{{\s*}} fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_flags_used_between
name: fail_to_substitute_rdffr_pp_with_rdffrs_pp_nzcv_flags_used_between
tracksRegLiveness: true
body: |
bb.0:
liveins: $ffr, $p0, $x0
%0:ppr_3b = COPY $p0
$wzr = SUBSWri $w0, 0, 0, implicit-def $nzcv
; CHECK: RDFFR_PPz
; CHECK-NEXT: CSINCWr
; CHECK-NEXT: PTEST_PP
%1:ppr_3b = RDFFR_PPz %0:ppr_3b
; Consume nzcv
%2:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
PTEST_PP killed %0:ppr_3b, killed %1:ppr_3b, implicit-def $nzcv
; Consume nzcv
%3:gpr32 = COPY $wzr
%4:gpr32 = CSINCWr killed %3, $wzr, 0, implicit $nzcv
$w0 = ORRWrs %4, %2, 1
RET_ReallyLR implicit $w0