mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-14 17:28:53 +00:00
[Thumb] Select (CMPZ X, -C) -> (CMPZ (ADDS X, C), 0)
The CMPZ #0 disappears during peepholing, leaving just a tADDi3, tADDi8 or t2ADDri. This avoids having to materialize the expensive negative constant in Thumb-1, and allows a shrinking from a 32-bit CMN to a 16-bit ADDS in Thumb-2. llvm-svn: 281040
This commit is contained in:
parent
80e498d6dd
commit
36bdf2dfda
@ -3126,6 +3126,48 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
CurDAG->RemoveDeadNode(N);
|
||||
return;
|
||||
}
|
||||
|
||||
case ARMISD::CMPZ: {
|
||||
// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
|
||||
// This allows us to avoid materializing the expensive negative constant.
|
||||
// The CMPZ #0 is useless and will be peepholed away but we need to keep it
|
||||
// for its glue output.
|
||||
SDValue X = N->getOperand(0);
|
||||
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
|
||||
if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
|
||||
int64_t Addend = -C->getSExtValue();
|
||||
|
||||
SDNode *Add = nullptr;
|
||||
// In T2 mode, ADDS can be better than CMN if the immediate fits in a
|
||||
// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
|
||||
// Outside that range we can just use a CMN which is 32-bit but has a
|
||||
// 12-bit immediate range.
|
||||
if (Subtarget->isThumb2() && Addend < 1<<8) {
|
||||
SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
|
||||
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
|
||||
CurDAG->getRegister(0, MVT::i32) };
|
||||
Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
|
||||
} else if (!Subtarget->isThumb2() && Addend < 1<<8) {
|
||||
// FIXME: Add T1 tADDi8 code.
|
||||
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
|
||||
CurDAG->getTargetConstant(Addend, dl, MVT::i32),
|
||||
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
|
||||
Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
|
||||
} else if (!Subtarget->isThumb2() && Addend < 1<<3) {
|
||||
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
|
||||
CurDAG->getTargetConstant(Addend, dl, MVT::i32),
|
||||
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
|
||||
Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
|
||||
}
|
||||
if (Add) {
|
||||
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
|
||||
CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
|
||||
}
|
||||
}
|
||||
// Other cases are autogenerated.
|
||||
break;
|
||||
}
|
||||
|
||||
case ARMISD::VZIP: {
|
||||
unsigned Opc = 0;
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T
|
||||
; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-A
|
||||
|
||||
; LSR should compare against the post-incremented induction variable.
|
||||
; In this case, the immediate value is -2 which requires a cmn instruction.
|
||||
@ -7,7 +7,8 @@
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: %for.body
|
||||
; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
|
||||
; CHECK: cmn{{.*}}[[IV]], #2
|
||||
; CHECK-T: adds{{.*}}[[IV]], #2
|
||||
; CHECK-A: cmn{{.*}}[[IV]], #2
|
||||
; CHECK: bne
|
||||
define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp {
|
||||
entry:
|
||||
|
@ -280,7 +280,7 @@ entry:
|
||||
; ARM: and r0, {{r[0-9]+}}, {{r[0-9]+}}
|
||||
|
||||
; T2-LABEL: t18:
|
||||
; T2: and.w r0, {{r[0-9]+}}
|
||||
; T2: and{{s|.w}} r0, {{r[0-9]+}}
|
||||
%cmp = icmp ne i32 %x, 0
|
||||
%cond = select i1 %cmp, i32 5, i32 2
|
||||
%cmp1 = icmp ne i32 %x, -1
|
||||
|
32
test/CodeGen/Thumb/cmp-add-fold.ll
Normal file
32
test/CodeGen/Thumb/cmp-add-fold.ll
Normal file
@ -0,0 +1,32 @@
|
||||
; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=T1 %s
|
||||
; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=T2 %s
|
||||
|
||||
; CHECK-LABEL: addri1:
|
||||
; CHECK: adds r0, #3
|
||||
; T1-NEXT: b{{eq|ne}}
|
||||
; T2-NOT: cmp
|
||||
define i32 @addri1(i32 %a, i32 %b) {
|
||||
%c = add i32 %a, 3
|
||||
%d = icmp eq i32 %c, 0
|
||||
br i1 %d, label %true, label %false
|
||||
|
||||
true:
|
||||
ret i32 4
|
||||
false:
|
||||
ret i32 5
|
||||
}
|
||||
|
||||
; CHECK-LABEL: addri2:
|
||||
; CHECK: adds r0, #254
|
||||
; T1-NEXT: b{{eq|ne}}
|
||||
; T2-NOT: cmp
|
||||
define i32 @addri2(i32 %a, i32 %b) {
|
||||
%c = add i32 %a, 254
|
||||
%d = icmp eq i32 %c, 0
|
||||
br i1 %d, label %true, label %false
|
||||
|
||||
true:
|
||||
ret i32 4
|
||||
false:
|
||||
ret i32 5
|
||||
}
|
@ -16,7 +16,7 @@ entry:
|
||||
bb: ; preds = %bb, %entry
|
||||
; CHECK: LBB0_1:
|
||||
; CHECK: subs [[R2:r[0-9]+]], #1
|
||||
; CHECK: cmp.w [[R2]], #-1
|
||||
; CHECK: adds {{.*}}, [[R2]], #1
|
||||
; CHECK: bne LBB0_1
|
||||
|
||||
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
|
||||
|
@ -3,7 +3,7 @@
|
||||
; -0x000000bb = 4294967109
|
||||
define i1 @f1(i32 %a) {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: cmn.w {{r.*}}, #187
|
||||
; CHECK: adds {{r.*}}, #187
|
||||
%tmp = icmp ne i32 %a, 4294967109
|
||||
ret i1 %tmp
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user