ARM: fix handling of SUB immediates in peephole opt.

We were negating an immediate that was going to be used in a SUBri form
unnecessarily. Since ADD/SUB are very similar we *can* do that, but we have to
change the SUB to an ADD at the same time. This also applies to ADD, and allows
us to handle a slightly larger range of immediates for those two operations.

rdar://25992245

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268276 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2016-05-02 18:30:08 +00:00
parent 2ab91e26c5
commit 464549ab4d
3 changed files with 153 additions and 16 deletions

View File

@ -2685,14 +2685,24 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
Commute = UseMI->getOperand(2).getReg() != Reg;
switch (UseOpc) {
default: break;
case ARM::SUBrr: {
if (Commute)
return false;
ImmVal = -ImmVal;
NewUseOpc = ARM::SUBri;
// Fallthrough
}
case ARM::ADDrr:
case ARM::SUBrr: {
if (UseOpc == ARM::SUBrr && Commute)
return false;
// ADD/SUB are special because they're essentially the same operation, so
// we can handle a larger range of immediates.
if (ARM_AM::isSOImmTwoPartVal(ImmVal))
NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;
else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {
ImmVal = -ImmVal;
NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;
} else
return false;
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
break;
}
case ARM::ORRrr:
case ARM::EORrr: {
if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
@ -2701,20 +2711,29 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
case ARM::EORrr: NewUseOpc = ARM::EORri; break;
}
break;
}
case ARM::t2SUBrr: {
if (Commute)
return false;
ImmVal = -ImmVal;
NewUseOpc = ARM::t2SUBri;
// Fallthrough
}
case ARM::t2ADDrr:
case ARM::t2SUBrr: {
if (UseOpc == ARM::t2SUBrr && Commute)
return false;
// ADD/SUB are special because they're essentially the same operation, so
// we can handle a larger range of immediates.
if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))
NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri;
else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {
ImmVal = -ImmVal;
NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri;
} else
return false;
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
break;
}
case ARM::t2ORRrr:
case ARM::t2EORrr: {
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
@ -2723,7 +2742,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
switch (UseOpc) {
default: break;
case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
}

View File

@ -0,0 +1,60 @@
# RUN: llc -run-pass=peephole-opts %s -o /dev/null 2>&1 | FileCheck %s
# CHECK: [[IN:%.*]] = COPY %r0
# CHECK: [[SUM1TMP:%.*]] = ADDri [[IN]], 133
# CHECK: [[SUM1:%.*]] = ADDri killed [[SUM1TMP]], 25600
# CHECK: [[SUM2TMP:%.*]] = SUBri [[IN]], 133
# CHECK: [[SUM2:%.*]] = SUBri killed [[SUM2TMP]], 25600
# CHECK: [[SUM3TMP:%.*]] = SUBri [[IN]], 133
# CHECK: [[SUM3:%.*]] = SUBri killed [[SUM3TMP]], 25600
# CHECK: [[SUM4TMP:%.*]] = ADDri killed [[IN]], 133
# CHECK: [[SUM4:%.*]] = ADDri killed [[SUM4TMP]], 25600
--- |
target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
target triple = "armv7-apple-ios"
define i32 @foo(i32 %in) {
ret i32 undef
}
...
---
name: foo
registers:
- { id: 0, class: gprnopc }
- { id: 1, class: rgpr }
- { id: 2, class: rgpr }
- { id: 3, class: rgpr }
- { id: 4, class: rgpr }
- { id: 5, class: rgpr }
- { id: 6, class: rgpr }
- { id: 7, class: rgpr }
- { id: 8, class: rgpr }
liveins:
- { reg: '%r0', virtual-reg: '%0' }
body: |
bb.0 (%ir-block.0):
liveins: %r0
%0 = COPY %r0
%1 = MOVi32imm -25733
%2 = SUBrr %0, killed %1, 14, _, _
%3 = MOVi32imm 25733
%4 = SUBrr %0, killed %3, 14, _, _
%5 = MOVi32imm -25733
%6 = ADDrr %0, killed %5, 14, _, _
%7 = MOVi32imm 25733
%8 = ADDrr killed %0, killed %7, 14, _, _
%r0 = COPY killed %8
BX_RET 14, _, implicit %r0
...

View File

@ -0,0 +1,59 @@
# RUN: llc -run-pass=peephole-opts %s -o /dev/null 2>&1 | FileCheck %s
# CHECK: [[IN:%.*]] = COPY %r0
# CHECK: [[SUM1TMP:%.*]] = t2ADDri [[IN]], 25600
# CHECK: [[SUM1:%.*]] = t2ADDri killed [[SUM1TMP]], 133
# CHECK: [[SUM2TMP:%.*]] = t2SUBri [[IN]], 25600
# CHECK: [[SUM2:%.*]] = t2SUBri killed [[SUM2TMP]], 133
# CHECK: [[SUM3TMP:%.*]] = t2SUBri [[IN]], 25600
# CHECK: [[SUM3:%.*]] = t2SUBri killed [[SUM3TMP]], 133
# CHECK: [[SUM4TMP:%.*]] = t2ADDri killed [[IN]], 25600
# CHECK: [[SUM4:%.*]] = t2ADDri killed [[SUM4TMP]], 133
--- |
target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
target triple = "thumbv7-apple-ios"
define i32 @foo(i32 %in) {
ret i32 undef
}
...
---
name: foo
registers:
- { id: 0, class: gprnopc }
- { id: 1, class: rgpr }
- { id: 2, class: rgpr }
- { id: 3, class: rgpr }
- { id: 4, class: rgpr }
- { id: 5, class: rgpr }
- { id: 6, class: rgpr }
- { id: 7, class: rgpr }
- { id: 8, class: rgpr }
liveins:
- { reg: '%r0', virtual-reg: '%0' }
body: |
bb.0 (%ir-block.0):
liveins: %r0
%0 = COPY %r0
%1 = t2MOVi32imm -25733
%2 = t2SUBrr %0, killed %1, 14, _, _
%3 = t2MOVi32imm 25733
%4 = t2SUBrr %0, killed %3, 14, _, _
%5 = t2MOVi32imm -25733
%6= t2ADDrr %0, killed %5, 14, _, _
%7 = t2MOVi32imm 25733
%8 = t2ADDrr killed %0, killed %7, 14, _, _
%r0 = COPY killed %8
tBX_RET 14, _, implicit %r0
...