mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-04 02:40:42 +00:00
Add a target-specific branchless method for double-width relational
comparisons on x86. Essentially, the way this works is that SUB+SBB sets the relevant flags the same way a double-width CMP would. This is a substantial improvement over the generic lowering in LLVM. The output is also shorter than the gcc-generated output; I haven't done any detailed benchmarking, though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127852 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a4f9839701
commit
b4b8b0cc90
@ -446,12 +446,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
|
||||
if (Subtarget->is64Bit()) {
|
||||
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
|
||||
setOperationAction(ISD::SETCC , MVT::i128 , Custom);
|
||||
}
|
||||
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
|
||||
|
||||
@ -2839,7 +2840,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
|
||||
} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
|
||||
// X < 0 -> X == 0, jump on sign.
|
||||
return X86::COND_S;
|
||||
} else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
|
||||
} else if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
|
||||
// X < 1 -> X <= 0
|
||||
RHS = DAG.getConstant(0, RHS.getValueType());
|
||||
return X86::COND_LE;
|
||||
@ -7422,7 +7423,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Lower (X & (1 << N)) == 0 to BT(X, N).
|
||||
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
|
||||
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
|
||||
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
|
||||
if (isTypeLegal(Op0.getValueType()) &&
|
||||
Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
|
||||
Op1.getOpcode() == ISD::Constant &&
|
||||
cast<ConstantSDNode>(Op1)->isNullValue() &&
|
||||
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||
@ -7434,7 +7436,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
|
||||
// these.
|
||||
if (Op1.getOpcode() == ISD::Constant &&
|
||||
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
|
||||
(cast<ConstantSDNode>(Op1)->isOne() ||
|
||||
cast<ConstantSDNode>(Op1)->isNullValue()) &&
|
||||
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||
|
||||
@ -7457,6 +7459,73 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (X86CC == X86::COND_INVALID)
|
||||
return SDValue();
|
||||
|
||||
if ((!Subtarget->is64Bit() && Op0.getValueType() == MVT::i64) ||
|
||||
(Subtarget->is64Bit() && Op0.getValueType() == MVT::i128)) {
|
||||
switch (X86CC) {
|
||||
case X86::COND_E:
|
||||
case X86::COND_NE:
|
||||
case X86::COND_S:
|
||||
case X86::COND_NS:
|
||||
// Just use the generic lowering, which works well on x86.
|
||||
return SDValue();
|
||||
case X86::COND_B:
|
||||
case X86::COND_AE:
|
||||
case X86::COND_L:
|
||||
case X86::COND_GE:
|
||||
// Use SBB-based lowering.
|
||||
break;
|
||||
case X86::COND_A:
|
||||
// Use SBB-based lowering; commute so ZF isn't used.
|
||||
X86CC = X86::COND_B;
|
||||
std::swap(Op0, Op1);
|
||||
break;
|
||||
case X86::COND_BE:
|
||||
// Use SBB-based lowering; commute so ZF isn't used.
|
||||
X86CC = X86::COND_AE;
|
||||
std::swap(Op0, Op1);
|
||||
break;
|
||||
case X86::COND_G:
|
||||
// Use SBB-based lowering; commute so ZF isn't used.
|
||||
X86CC = X86::COND_L;
|
||||
std::swap(Op0, Op1);
|
||||
break;
|
||||
case X86::COND_LE:
|
||||
// Use SBB-based lowering; commute so ZF isn't used.
|
||||
X86CC = X86::COND_GE;
|
||||
std::swap(Op0, Op1);
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unexpected X86CC.");
|
||||
return SDValue();
|
||||
}
|
||||
MVT HalfType = getPointerTy();
|
||||
// FIXME: Refactor this code out to implement ISD::SADDO and friends.
|
||||
SDValue Op0Low = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType,
|
||||
Op0, DAG.getIntPtrConstant(0));
|
||||
SDValue Op1Low = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType,
|
||||
Op1, DAG.getIntPtrConstant(0));
|
||||
SDValue Op0High = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType,
|
||||
Op0, DAG.getIntPtrConstant(1));
|
||||
SDValue Op1High = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfType,
|
||||
Op1, DAG.getIntPtrConstant(1));
|
||||
// Redirect some cases which will simplify to the generic expansion;
|
||||
// X86ISD::SUB and X86ISD::SBB are not optimized well at the moment.
|
||||
// FIXME: We really need to add DAGCombines for SUB/SBB/etc.
|
||||
if (Op1Low.getOpcode() == ISD::Constant &&
|
||||
cast<ConstantSDNode>(Op1Low)->isNullValue())
|
||||
return SDValue();
|
||||
if (Op0Low.getOpcode() == ISD::Constant &&
|
||||
cast<ConstantSDNode>(Op0Low)->isAllOnesValue())
|
||||
return SDValue();
|
||||
SDValue res1, res2;
|
||||
SDVTList VTList = DAG.getVTList(HalfType, MVT::i32);
|
||||
res1 = DAG.getNode(X86ISD::SUB, dl, VTList, Op0Low, Op1Low).getValue(1);
|
||||
res2 = DAG.getNode(X86ISD::SBB, dl, VTList, Op0High, Op1High,
|
||||
res1).getValue(1);
|
||||
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
|
||||
DAG.getConstant(X86CC, MVT::i8), res2);
|
||||
}
|
||||
|
||||
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
|
||||
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
|
||||
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
|
||||
|
@ -1,18 +1,35 @@
|
||||
; RUN: llc < %s -march=x86 | grep cmp | count 1
|
||||
; RUN: llc < %s -march=x86 | grep shr | count 1
|
||||
; RUN: llc < %s -march=x86 | grep xor | count 1
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
define i1 @t1(i64 %x) nounwind {
|
||||
%B = icmp slt i64 %x, 0
|
||||
ret i1 %B
|
||||
; General case
|
||||
define i1 @t1(i64 %x, i64 %y) nounwind {
|
||||
; CHECK: @t1
|
||||
; CHECK: subl
|
||||
; CHECK: sbbl
|
||||
; CHECK: setl %al
|
||||
%B = icmp slt i64 %x, %y
|
||||
ret i1 %B
|
||||
}
|
||||
|
||||
; Some special cases
|
||||
define i1 @t2(i64 %x) nounwind {
|
||||
%tmp = icmp ult i64 %x, 4294967296
|
||||
ret i1 %tmp
|
||||
; CHECK: @t2
|
||||
; CHECK: shrl $31, %eax
|
||||
%B = icmp slt i64 %x, 0
|
||||
ret i1 %B
|
||||
}
|
||||
|
||||
define i1 @t3(i32 %x) nounwind {
|
||||
%tmp = icmp ugt i32 %x, -1
|
||||
ret i1 %tmp
|
||||
define i1 @t3(i64 %x) nounwind {
|
||||
; CHECK: @t3
|
||||
; CHECX: cmpl $0
|
||||
; CHECX: sete %al
|
||||
%tmp = icmp ult i64 %x, 4294967296
|
||||
ret i1 %tmp
|
||||
}
|
||||
|
||||
define i1 @t4(i64 %x) nounwind {
|
||||
; CHECK: @t4
|
||||
; CHECX: cmpl $0
|
||||
; CHECX: setne %al
|
||||
%tmp = icmp ugt i64 %x, 4294967295
|
||||
ret i1 %tmp
|
||||
}
|
||||
|
@ -39,7 +39,8 @@ entry:
|
||||
; 32: t3:
|
||||
; 32: cmpl $1
|
||||
; 32: sbbl
|
||||
; 32: cmpl
|
||||
; 32: subl
|
||||
; 32: sbbl
|
||||
; 32: xorl
|
||||
|
||||
; 64: t3:
|
||||
|
Loading…
Reference in New Issue
Block a user