mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-21 01:06:46 +00:00
Shifter ops are not always free. Do not fold them (especially to form
complex load / store addressing mode) when they have higher cost and when they have more than one use. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117509 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
de5fa932b9
commit
f40deed62f
@ -78,8 +78,12 @@ public:
|
||||
|
||||
SDNode *Select(SDNode *N);
|
||||
|
||||
bool isShifterOpProfitable(const SDValue &Shift,
|
||||
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
|
||||
bool SelectShifterOperandReg(SDValue N, SDValue &A,
|
||||
SDValue &B, SDValue &C);
|
||||
bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
|
||||
SDValue &B, SDValue &C);
|
||||
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
|
||||
|
||||
@ -246,6 +250,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
|
||||
}
|
||||
|
||||
|
||||
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
|
||||
ARM_AM::ShiftOpc ShOpcVal,
|
||||
unsigned ShAmt) {
|
||||
if (!Subtarget->isCortexA9())
|
||||
return true;
|
||||
if (Shift.hasOneUse())
|
||||
return true;
|
||||
// R << 2 is free.
|
||||
return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
|
||||
}
|
||||
|
||||
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
|
||||
SDValue &BaseReg,
|
||||
SDValue &ShReg,
|
||||
@ -261,6 +276,32 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
|
||||
|
||||
BaseReg = N.getOperand(0);
|
||||
unsigned ShImmVal = 0;
|
||||
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
||||
ShReg = CurDAG->getRegister(0, MVT::i32);
|
||||
ShImmVal = RHS->getZExtValue() & 31;
|
||||
} else {
|
||||
ShReg = N.getOperand(1);
|
||||
if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
|
||||
return false;
|
||||
}
|
||||
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
|
||||
MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
|
||||
SDValue &BaseReg,
|
||||
SDValue &ShReg,
|
||||
SDValue &Opc) {
|
||||
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
|
||||
|
||||
// Don't match base register only case. That is matched to a separate
|
||||
// lower complexity pattern with explicit register operand.
|
||||
if (ShOpcVal == ARM_AM::no_shift) return false;
|
||||
|
||||
BaseReg = N.getOperand(0);
|
||||
unsigned ShImmVal = 0;
|
||||
// Do not check isShifterOpProfitable. This must return true.
|
||||
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
||||
ShReg = CurDAG->getRegister(0, MVT::i32);
|
||||
ShImmVal = RHS->getZExtValue() & 31;
|
||||
@ -321,7 +362,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
|
||||
|
||||
bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
|
||||
SDValue &Opc) {
|
||||
if (N.getOpcode() == ISD::MUL) {
|
||||
if (N.getOpcode() == ISD::MUL &&
|
||||
(!Subtarget->isCortexA9() || N.hasOneUse())) {
|
||||
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
||||
// X * [3,5,9] -> X + X * [2,4,8] etc.
|
||||
int RHSC = (int)RHS->getZExtValue();
|
||||
@ -357,6 +399,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
|
||||
}
|
||||
}
|
||||
|
||||
if (Subtarget->isCortexA9() && !N.hasOneUse())
|
||||
// Compute R +/- (R << N) and reuse it.
|
||||
return false;
|
||||
|
||||
// Otherwise this is R +/- [possibly shifted] R.
|
||||
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
|
||||
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
|
||||
@ -371,14 +417,20 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
|
||||
if (ConstantSDNode *Sh =
|
||||
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
|
||||
ShAmt = Sh->getZExtValue();
|
||||
Offset = N.getOperand(1).getOperand(0);
|
||||
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
|
||||
Offset = N.getOperand(1).getOperand(0);
|
||||
else {
|
||||
ShAmt = 0;
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
} else {
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
}
|
||||
|
||||
// Try matching (R shl C) + (R).
|
||||
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
|
||||
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
|
||||
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
|
||||
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
|
||||
if (ShOpcVal != ARM_AM::no_shift) {
|
||||
// Check to see if the RHS of the shift is a constant, if not, we can't
|
||||
@ -386,8 +438,15 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
|
||||
if (ConstantSDNode *Sh =
|
||||
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
|
||||
ShAmt = Sh->getZExtValue();
|
||||
Offset = N.getOperand(0).getOperand(0);
|
||||
Base = N.getOperand(1);
|
||||
if (!Subtarget->isCortexA9() ||
|
||||
(N.hasOneUse() &&
|
||||
isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
|
||||
Offset = N.getOperand(0).getOperand(0);
|
||||
Base = N.getOperand(1);
|
||||
} else {
|
||||
ShAmt = 0;
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
} else {
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
@ -408,7 +467,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
|
||||
SDValue &Base,
|
||||
SDValue &Offset,
|
||||
SDValue &Opc) {
|
||||
if (N.getOpcode() == ISD::MUL) {
|
||||
if (N.getOpcode() == ISD::MUL &&
|
||||
(!Subtarget->isCortexA9() || N.hasOneUse())) {
|
||||
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
||||
// X * [3,5,9] -> X + X * [2,4,8] etc.
|
||||
int RHSC = (int)RHS->getZExtValue();
|
||||
@ -474,6 +534,16 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
|
||||
}
|
||||
}
|
||||
|
||||
if (Subtarget->isCortexA9() && !N.hasOneUse()) {
|
||||
// Compute R +/- (R << N) and reuse it.
|
||||
Base = N;
|
||||
Offset = CurDAG->getRegister(0, MVT::i32);
|
||||
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
|
||||
ARM_AM::no_shift),
|
||||
MVT::i32);
|
||||
return AM2_BASE;
|
||||
}
|
||||
|
||||
// Otherwise this is R +/- [possibly shifted] R.
|
||||
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
|
||||
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
|
||||
@ -488,14 +558,20 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
|
||||
if (ConstantSDNode *Sh =
|
||||
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
|
||||
ShAmt = Sh->getZExtValue();
|
||||
Offset = N.getOperand(1).getOperand(0);
|
||||
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
|
||||
Offset = N.getOperand(1).getOperand(0);
|
||||
else {
|
||||
ShAmt = 0;
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
} else {
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
}
|
||||
|
||||
// Try matching (R shl C) + (R).
|
||||
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
|
||||
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
|
||||
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
|
||||
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
|
||||
if (ShOpcVal != ARM_AM::no_shift) {
|
||||
// Check to see if the RHS of the shift is a constant, if not, we can't
|
||||
@ -503,8 +579,15 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
|
||||
if (ConstantSDNode *Sh =
|
||||
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
|
||||
ShAmt = Sh->getZExtValue();
|
||||
Offset = N.getOperand(0).getOperand(0);
|
||||
Base = N.getOperand(1);
|
||||
if (!Subtarget->isCortexA9() ||
|
||||
(N.hasOneUse() &&
|
||||
isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
|
||||
Offset = N.getOperand(0).getOperand(0);
|
||||
Base = N.getOperand(1);
|
||||
} else {
|
||||
ShAmt = 0;
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
} else {
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
@ -543,7 +626,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
|
||||
// it.
|
||||
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
||||
ShAmt = Sh->getZExtValue();
|
||||
Offset = N.getOperand(0);
|
||||
if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
|
||||
Offset = N.getOperand(0);
|
||||
else {
|
||||
ShAmt = 0;
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
} else {
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
@ -959,6 +1047,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Subtarget->isCortexA9() && !N.hasOneUse()) {
|
||||
// Compute R + (R << [1,2,3]) and reuse it.
|
||||
Base = N;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Look for (R + R) or (R + (R << [1,2,3])).
|
||||
unsigned ShAmt = 0;
|
||||
Base = N.getOperand(0);
|
||||
@ -977,11 +1071,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
|
||||
// it.
|
||||
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
|
||||
ShAmt = Sh->getZExtValue();
|
||||
if (ShAmt >= 4) {
|
||||
if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
|
||||
OffReg = OffReg.getOperand(0);
|
||||
else {
|
||||
ShAmt = 0;
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
} else
|
||||
OffReg = OffReg.getOperand(0);
|
||||
}
|
||||
} else {
|
||||
ShOpcVal = ARM_AM::no_shift;
|
||||
}
|
||||
|
@ -325,6 +325,13 @@ def so_reg : Operand<i32>, // reg reg imm
|
||||
let PrintMethod = "printSORegOperand";
|
||||
let MIOperandInfo = (ops GPR, GPR, i32imm);
|
||||
}
|
||||
def shift_so_reg : Operand<i32>, // reg reg imm
|
||||
ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
|
||||
[shl,srl,sra,rotr]> {
|
||||
string EncoderMethod = "getSORegOpValue";
|
||||
let PrintMethod = "printSORegOperand";
|
||||
let MIOperandInfo = (ops GPR, GPR, i32imm);
|
||||
}
|
||||
|
||||
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
|
||||
// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
|
||||
@ -1715,9 +1722,10 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
|
||||
let Inst{15-12} = Rd;
|
||||
}
|
||||
|
||||
def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src),
|
||||
def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
|
||||
DPSoRegFrm, IIC_iMOVsr,
|
||||
"mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP {
|
||||
"mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
|
||||
UnaryDP {
|
||||
bits<4> Rd;
|
||||
bits<12> src;
|
||||
let Inst{15-12} = Rd;
|
||||
|
@ -1,18 +1,72 @@
|
||||
; RUN: llc < %s -march=arm | grep add | grep lsl
|
||||
; RUN: llc < %s -march=arm | grep bic | grep asr
|
||||
; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
|
||||
; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
|
||||
; rdar://8576755
|
||||
|
||||
|
||||
define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
|
||||
%shift.upgrd.1 = zext i8 %sh to i32 ; <i32> [#uses=1]
|
||||
%A = shl i32 %Y, %shift.upgrd.1 ; <i32> [#uses=1]
|
||||
%B = add i32 %X, %A ; <i32> [#uses=1]
|
||||
; A8: test1:
|
||||
; A8: add r0, r0, r1, lsl r2
|
||||
|
||||
; A9: test1:
|
||||
; A9: add r0, r0, r1, lsl r2
|
||||
%shift.upgrd.1 = zext i8 %sh to i32
|
||||
%A = shl i32 %Y, %shift.upgrd.1
|
||||
%B = add i32 %X, %A
|
||||
ret i32 %B
|
||||
}
|
||||
|
||||
define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
|
||||
%shift.upgrd.2 = zext i8 %sh to i32 ; <i32> [#uses=1]
|
||||
%A = ashr i32 %Y, %shift.upgrd.2 ; <i32> [#uses=1]
|
||||
%B = xor i32 %A, -1 ; <i32> [#uses=1]
|
||||
%C = and i32 %X, %B ; <i32> [#uses=1]
|
||||
; A8: test2:
|
||||
; A8: bic r0, r0, r1, asr r2
|
||||
|
||||
; A9: test2:
|
||||
; A9: bic r0, r0, r1, asr r2
|
||||
%shift.upgrd.2 = zext i8 %sh to i32
|
||||
%A = ashr i32 %Y, %shift.upgrd.2
|
||||
%B = xor i32 %A, -1
|
||||
%C = and i32 %X, %B
|
||||
ret i32 %C
|
||||
}
|
||||
|
||||
define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
|
||||
entry:
|
||||
; A8: test3:
|
||||
; A8: ldr r0, [r0, r2, lsl #2]
|
||||
; A8: ldr r1, [r1, r2, lsl #2]
|
||||
|
||||
; lsl #2 is free
|
||||
; A9: test3:
|
||||
; A9: ldr r1, [r1, r2, lsl #2]
|
||||
; A9: ldr r0, [r0, r2, lsl #2]
|
||||
%tmp1 = shl i32 %offset, 2
|
||||
%tmp2 = add i32 %base, %tmp1
|
||||
%tmp3 = inttoptr i32 %tmp2 to i32*
|
||||
%tmp4 = add i32 %base2, %tmp1
|
||||
%tmp5 = inttoptr i32 %tmp4 to i32*
|
||||
%tmp6 = load i32* %tmp3
|
||||
%tmp7 = load i32* %tmp5
|
||||
%tmp8 = add i32 %tmp7, %tmp6
|
||||
ret i32 %tmp8
|
||||
}
|
||||
|
||||
declare i8* @malloc(...)
|
||||
|
||||
define fastcc void @test4() nounwind {
|
||||
entry:
|
||||
; A8: test4:
|
||||
; A8: ldr r1, [r0, r0, lsl #2]
|
||||
; A8: str r1, [r0, r0, lsl #2]
|
||||
|
||||
; A9: test4:
|
||||
; A9: add r0, r0, r0, lsl #2
|
||||
; A9: ldr r1, [r0]
|
||||
; A9: str r1, [r0]
|
||||
%0 = tail call i8* (...)* @malloc(i32 undef) nounwind
|
||||
%1 = bitcast i8* %0 to i32*
|
||||
%2 = sext i16 undef to i32
|
||||
%3 = getelementptr inbounds i32* %1, i32 %2
|
||||
%4 = load i32* %3, align 4
|
||||
%5 = add nsw i32 %4, 1
|
||||
store i32 %5, i32* %3, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -586,6 +586,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
|
||||
|
||||
MISC("brtarget", "kOperandTypeARMBranchTarget"); // ?
|
||||
MISC("so_reg", "kOperandTypeARMSoReg"); // R, R, I
|
||||
MISC("shift_so_reg", "kOperandTypeARMSoReg"); // R, R, I
|
||||
MISC("t2_so_reg", "kOperandTypeThumb2SoReg"); // R, I
|
||||
MISC("so_imm", "kOperandTypeARMSoImm"); // I
|
||||
MISC("rot_imm", "kOperandTypeARMRotImm"); // I
|
||||
|
Loading…
x
Reference in New Issue
Block a user