Reapply r162160 with a fix: Optimize Arith->Trunc->SETCC sequence to allow better compare/branch code.

llvm-svn: 162172
This commit is contained in:
Nadav Rotem 2012-08-18 17:53:03 +00:00
parent c0e610e681
commit d01a7b5942
3 changed files with 107 additions and 17 deletions

View File

@ -8283,7 +8283,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
unsigned Opcode = 0;
unsigned NumOperands = 0;
switch (Op.getNode()->getOpcode()) {
// Truncate operations may prevent the merge of the SETCC instruction
// and the arithmetic intruction before it. Attempt to truncate the operands
// of the arithmetic instruction and use a reduced bit-width instruction.
bool NeedTruncation = false;
SDValue ArithOp = Op;
if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
SDValue Arith = Op->getOperand(0);
// Both the trunc and the arithmetic op need to have one user each.
if (Arith->hasOneUse())
switch (Arith.getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
NeedTruncation = true;
ArithOp = Arith;
}
}
}
// NOTICE: In the code below we use ArithOp to hold the arithmetic operation
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
case ISD::ADD:
// Due to an isel shortcoming, be conservative if this add is likely to be
// selected as part of a load-modify-store instruction. When the root node
@ -8303,7 +8329,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
goto default_case;
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
if (C->getAPIntValue() == 1) {
Opcode = X86ISD::INC;
@ -8339,7 +8365,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
if (User->getOpcode() != ISD::BRCOND &&
User->getOpcode() != ISD::SETCC &&
(User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
!(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
NonFlagUse = true;
break;
}
@ -8360,11 +8386,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
switch (Op.getNode()->getOpcode()) {
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
case ISD::SUB:
Opcode = X86ISD::SUB;
break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::OR: Opcode = X86ISD::OR; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
@ -8385,19 +8409,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
break;
}
// If we found that truncation is beneficial, perform the truncation and
// update 'Op'.
if (NeedTruncation) {
EVT VT = Op.getValueType();
SDValue WideVal = Op->getOperand(0);
EVT WideVT = WideVal.getValueType();
unsigned ConvertedOp = 0;
// Use a target machine opcode to prevent further DAGCombine
// optimizations that may separate the arithmetic operations
// from the setcc node.
switch (WideVal.getOpcode()) {
default: break;
case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
case ISD::AND: ConvertedOp = X86ISD::AND; break;
case ISD::OR: ConvertedOp = X86ISD::OR; break;
case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
}
if (ConvertedOp) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
}
}
}
if (Opcode == 0)
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
if (Opcode == X86ISD::CMP) {
SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0),
Op.getOperand(1));
// We can't replace usage of SUB with CMP.
// The SUB node will be removed later because there is no use of it.
return SDValue(New.getNode(), 0);
}
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)

View File

@ -0,0 +1,45 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
; rdar://12081007
; CHECK: and_1:
; CHECK: andb
; CHECK-NEXT: cmovnel
; CHECK: ret
define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
%1 = and i8 %b, %a
%2 = icmp ne i8 %1, 0
%3 = select i1 %2, i32 %x, i32 0
ret i32 %3
}
; CHECK: and_2:
; CHECK: andb
; CHECK-NEXT: setne
; CHECK: ret
define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) {
%1 = and i8 %b, %a
%2 = icmp ne i8 %1, 0
ret i1 %2
}
; CHECK: xor_1:
; CHECK: xorb
; CHECK-NEXT: cmovnel
; CHECK: ret
define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
%1 = xor i8 %b, %a
%2 = icmp ne i8 %1, 0
%3 = select i1 %2, i32 %x, i32 0
ret i32 %3
}
; CHECK: xor_2:
; CHECK: xorb
; CHECK-NEXT: setne
; CHECK: ret
define zeroext i1 @xor_2(i8 zeroext %a, i8 zeroext %b) {
%1 = xor i8 %b, %a
%2 = icmp ne i8 %1, 0
ret i1 %2
}

View File

@ -57,13 +57,13 @@ entry:
%0 = load i32* %P, align 4
%1 = load i32* %Q, align 4
%2 = xor i32 %0, %1
%3 = and i32 %2, 65535
%3 = and i32 %2, 89947
%4 = icmp eq i32 %3, 0
br i1 %4, label %exit, label %land.end
exit:
%shr.i.i19 = xor i32 %1, %0
%5 = and i32 %shr.i.i19, 2147418112
%5 = and i32 %shr.i.i19, 3456789123
%6 = icmp eq i32 %5, 0
br label %land.end