Optimize a couple of common patterns involving conditional moves where the false

value is zero. Instead of a cmov + op, issue an conditional op instead. e.g.
    cmp   r9, r4
    mov   r4, #0
    moveq r4, #1 
    orr   lr, lr, r4

should be:
    cmp   r9, r4
    orreq lr, lr, #1

That is, optimize (or x, (cmov 0, y, cond)) to (or.cond x, y). Similarly extend
this to xor as well as (and x, (cmov -1, y, cond)) => (and.cond x, y).

It's possible to extend this to ADD and SUB but I don't think they are common.

rdar://8659097

llvm-svn: 151224
This commit is contained in:
Evan Cheng 2012-02-23 01:19:06 +00:00
parent 7888265c63
commit d18a688213
6 changed files with 358 additions and 8 deletions

View File

@ -244,6 +244,7 @@ private:
/// SelectCMOVOp - Select CMOV instructions for ARM.
SDNode *SelectCMOVOp(SDNode *N);
SDNode *SelectConditionalOp(SDNode *N);
SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@ -2302,9 +2303,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
// Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
// Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
// Pattern complexity = 18 cost = 1 size = 0
SDValue CPTmp0;
SDValue CPTmp1;
SDValue CPTmp2;
if (Subtarget->isThumb()) {
SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
CCVal, CCR, InFlag);
@ -2377,6 +2375,116 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue FalseVal = N->getOperand(0);
SDValue TrueVal = N->getOperand(1);
ARMCC::CondCodes CCVal =
(ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
SDValue CCR = N->getOperand(3);
assert(CCR.getOpcode() == ISD::Register);
SDValue InFlag = N->getOperand(4);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue CPTmp0;
SDValue CPTmp1;
if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
case ARMISD::COR: Opc = ARM::t2ORRCCrs; break;
case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
}
SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
}
ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
if (T) {
unsigned TrueImm = T->getZExtValue();
if (is_t2_so_imm(TrueImm)) {
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
case ARMISD::COR: Opc = ARM::t2ORRCCri; break;
case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
}
SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
}
}
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
case ARMISD::COR: Opc = ARM::t2ORRCCrr; break;
case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
}
SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
}
SDValue CPTmp0;
SDValue CPTmp1;
SDValue CPTmp2;
if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
case ARMISD::COR: Opc = ARM::ORRCCrsi; break;
case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
}
SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
}
if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
case ARMISD::COR: Opc = ARM::ORRCCrsr; break;
case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
}
SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
}
ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
if (T) {
unsigned TrueImm = T->getZExtValue();
if (is_so_imm(TrueImm)) {
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ARMISD::CAND: Opc = ARM::ANDCCri; break;
case ARMISD::COR: Opc = ARM::ORRCCri; break;
case ARMISD::CXOR: Opc = ARM::EORCCri; break;
}
SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
}
}
unsigned Opc;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
case ARMISD::COR: Opc = ARM::ORRCCrr; break;
case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
}
SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
}
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@ -2714,6 +2822,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV:
return SelectCMOVOp(N);
case ARMISD::CAND:
case ARMISD::COR:
case ARMISD::CXOR:
return SelectConditionalOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);

View File

@ -794,10 +794,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
setTargetDAGCombine(ISD::OR);
if (Subtarget->hasNEON())
if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
}
setStackPointerRegisterToSaveRestore(ARM::SP);
@ -890,7 +891,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::CAND: return "ARMISD::CAND";
case ARMISD::COR: return "ARMISD::COR";
case ARMISD::CXOR: return "ARMISD::CXOR";
case ARMISD::RBIT: return "ARMISD::RBIT";
@ -6843,8 +6848,52 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
return false;
SDValue FalseVal = N.getOperand(0);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
if (!C)
return false;
if (AllOnes)
return C->isAllOnesValue();
return C->isNullValue();
}
/// formConditionalOp - Combine an operation with a conditional move operand
/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
bool Commutable) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
bool isAND = N->getOpcode() == ISD::AND;
bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
if (!isCand && Commutable) {
isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
if (isCand)
std::swap(N0, N1);
}
if (!isCand)
return SDValue();
unsigned Opc = 0;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected node");
case ISD::AND: Opc = ARMISD::CAND; break;
case ISD::OR: Opc = ARMISD::COR; break;
case ISD::XOR: Opc = ARMISD::CXOR; break;
}
return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
N1.getOperand(4));
}
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
@ -6875,6 +6924,13 @@ static SDValue PerformANDCombine(SDNode *N,
}
}
if (!Subtarget->isThumb1Only()) {
// (and x, (cmov -1, y, cond)) => (and.cond x, y)
SDValue CAND = formConditionalOp(N, DAG, true);
if (CAND.getNode())
return CAND;
}
return SDValue();
}
@ -6911,6 +6967,13 @@ static SDValue PerformORCombine(SDNode *N,
}
}
if (!Subtarget->isThumb1Only()) {
// (or x, (cmov 0, y, cond)) => (or.cond x, y)
SDValue COR = formConditionalOp(N, DAG, true);
if (COR.getNode())
return COR;
}
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
@ -7059,6 +7122,25 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
}
static SDValue PerformXORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
if (!Subtarget->isThumb1Only()) {
// (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
SDValue CXOR = formConditionalOp(N, DAG, true);
if (CXOR.getNode())
return CXOR;
}
return SDValue();
}
/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
/// the bits being cleared by the AND are not demanded by the BFI.
static SDValue PerformBFICombine(SDNode *N,
@ -8110,7 +8192,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);

View File

@ -56,7 +56,11 @@ namespace llvm {
CMPFP, // ARM VFP compare instruction, sets FPSCR.
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
CAND, // ARM conditional and instructions.
COR, // ARM conditional or instructions.
CXOR, // ARM conditional xor instructions.
BCC_i64,

View File

@ -4084,6 +4084,73 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
4, IIC_iCMOVi,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
let isCodeGenOnly = 1 in {
// Conditional instructions
multiclass AsI1_bincc_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm", []>,
RegConstraint<"$Rn = $Rd"> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
let Inst{25} = 1;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{11-0} = imm;
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm", []>,
RegConstraint<"$Rn = $Rd"> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
let Inst{25} = 0;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{11-4} = 0b00000000;
let Inst{3-0} = Rm;
}
def rsi : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
iis, opc, "\t$Rd, $Rn, $shift", []>,
RegConstraint<"$Rn = $Rd"> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{11-5} = shift{11-5};
let Inst{4} = 0;
let Inst{3-0} = shift{3-0};
}
def rsr : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift", []>,
RegConstraint<"$Rn = $Rd"> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
let Inst{19-16} = Rn;
let Inst{15-12} = Rd;
let Inst{11-8} = shift{11-8};
let Inst{7} = 0;
let Inst{6-5} = shift{6-5};
let Inst{4} = 1;
let Inst{3-0} = shift{3-0};
}
} // AsI1_bincc_irs
defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
} // isCodeGenOnly
} // neverHasSideEffects
//===----------------------------------------------------------------------===//

View File

@ -2943,6 +2943,44 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
RegConstraint<"$false = $Rd">;
multiclass T2I_bincc_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
// shifted imm
def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
iii, opc, ".w\t$Rd, $Rn, $imm", []>,
RegConstraint<"$Rn = $Rd"> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
let Inst{15} = 0;
}
// register
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
iir, opc, ".w\t$Rd, $Rn, $Rm", []>,
RegConstraint<"$Rn = $Rd"> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
}
// shifted register
def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd),
(ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>,
RegConstraint<"$Rn = $Rd"> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
}
} // T2I_bincc_irs
defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
} // isCodeGenOnly = 1
} // neverHasSideEffects

View File

@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
%s = or i32 %z, %y
ret i32 %s
}
define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
entry:
; ARM: t5:
; ARM-NOT: moveq
; ARM: orreq r2, r2, #1
; T2: t5:
; T2-NOT: moveq
; T2: orreq.w r2, r2, #1
%tmp1 = icmp eq i32 %a, %b
%tmp2 = zext i1 %tmp1 to i32
%tmp3 = or i32 %tmp2, %c
ret i32 %tmp3
}
define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; ARM: t6:
; ARM-NOT: movge
; ARM: eorlt r3, r3, r2
; T2: t6:
; T2-NOT: movge
; T2: eorlt.w r3, r3, r2
%cond = icmp slt i32 %a, %b
%tmp1 = select i1 %cond, i32 %c, i32 0
%tmp2 = xor i32 %tmp1, %d
ret i32 %tmp2
}
define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
entry:
; ARM: t7:
; ARM-NOT: lsleq
; ARM: andeq r2, r2, r2, lsl #1
; T2: t7:
; T2-NOT: lsleq.w
; T2: andeq.w r2, r2, r2, lsl #1
%tmp1 = shl i32 %c, 1
%cond = icmp eq i32 %a, %b
%tmp2 = select i1 %cond, i32 %tmp1, i32 -1
%tmp3 = and i32 %c, %tmp2
ret i32 %tmp3
}