mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-15 06:18:50 +00:00
Recommit 132404 with fixes. rdar://problem/5993888
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132424 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9aaa02a1d2
commit
ec880283b3
@ -1759,13 +1759,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
||||
if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
|
||||
Op.getOperand(0).getValueType().isFloatingPoint() &&
|
||||
!Op.getOperand(0).getValueType().isVector()) {
|
||||
if (isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32)) {
|
||||
EVT Ty = (isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType())) ?
|
||||
Op.getValueType() : MVT::i32;
|
||||
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
|
||||
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
|
||||
if (OpVTLegal || i32Legal) {
|
||||
EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
|
||||
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
|
||||
// place. We expect the SHL to be eliminated by other optimizations.
|
||||
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
|
||||
if (Ty != Op.getValueType())
|
||||
if (!OpVTLegal)
|
||||
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
|
||||
unsigned ShVal = Op.getValueType().getSizeInBits()-1;
|
||||
SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
|
||||
|
@ -9391,6 +9391,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::UCOMI: return "X86ISD::UCOMI";
|
||||
case X86ISD::SETCC: return "X86ISD::SETCC";
|
||||
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
|
||||
case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
|
||||
case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
|
||||
case X86ISD::CMOV: return "X86ISD::CMOV";
|
||||
case X86ISD::BRCOND: return "X86ISD::BRCOND";
|
||||
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
|
||||
@ -11668,12 +11670,88 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
|
||||
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
|
||||
// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
|
||||
// and friends. Likewise for OR -> CMPNEQSS.
|
||||
static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
unsigned opcode;
|
||||
|
||||
// SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
|
||||
// we're requiring SSE2 for both.
|
||||
if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue CMP = N0->getOperand(1);
|
||||
SDValue CMP0 = CMP->getOperand(0);
|
||||
SDValue CMP1 = CMP->getOperand(1);
|
||||
EVT VT = CMP0.getValueType();
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
if (VT == MVT::f32 || VT == MVT::f64) {
|
||||
bool ExpectingFlags = false;
|
||||
// Check for any users that want flags:
|
||||
for (SDNode::use_iterator UI = N->use_begin(),
|
||||
UE = N->use_end();
|
||||
!ExpectingFlags && UI != UE; ++UI)
|
||||
switch (UI->getOpcode()) {
|
||||
default:
|
||||
case ISD::BR_CC:
|
||||
case ISD::BRCOND:
|
||||
case ISD::SELECT:
|
||||
ExpectingFlags = true;
|
||||
break;
|
||||
case ISD::CopyToReg:
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ExpectingFlags) {
|
||||
enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
|
||||
enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
|
||||
|
||||
if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
|
||||
X86::CondCode tmp = cc0;
|
||||
cc0 = cc1;
|
||||
cc1 = tmp;
|
||||
}
|
||||
|
||||
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
|
||||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
|
||||
bool is64BitFP = (CMP0.getValueType() == MVT::f64);
|
||||
X86ISD::NodeType NTOperator = is64BitFP ?
|
||||
X86ISD::FSETCCsd : X86ISD::FSETCCss;
|
||||
// FIXME: need symbolic constants for these magic numbers.
|
||||
// See X86ATTInstPrinter.cpp:printSSECC().
|
||||
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
|
||||
SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP0, CMP1,
|
||||
DAG.getConstant(x86cc, MVT::i8));
|
||||
SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
|
||||
OnesOrZeroesF);
|
||||
SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
|
||||
DAG.getConstant(1, MVT::i32));
|
||||
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
|
||||
return OneBitOfTruth;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
|
||||
if (R.getNode())
|
||||
return R;
|
||||
|
||||
// Want to form PANDN nodes, in the hopes of then easily combining them with
|
||||
// OR and AND nodes to form PBLEND/PSIGN.
|
||||
EVT VT = N->getValueType(0);
|
||||
@ -11703,6 +11781,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
|
||||
if (R.getNode())
|
||||
return R;
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
|
||||
return SDValue();
|
||||
|
@ -94,6 +94,11 @@ namespace llvm {
|
||||
// one's or all zero's.
|
||||
SETCC_CARRY, // R = carry_bit ? ~0 : 0
|
||||
|
||||
/// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
|
||||
/// Operands are two FP values to compare; result is a mask of
|
||||
/// 0s or 1s. Generally DTRT for C/C++ with NaNs.
|
||||
FSETCCss, FSETCCsd,
|
||||
|
||||
/// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
|
||||
/// result in an integer GPR. Needs masking for scalar result.
|
||||
FGETSIGNx86,
|
||||
|
@ -41,6 +41,8 @@ def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
|
||||
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
|
||||
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
|
||||
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
|
||||
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
|
||||
def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
|
||||
def X86pshufb : SDNode<"X86ISD::PSHUFB",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
|
@ -23,6 +23,9 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3,
|
||||
|
||||
def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
|
||||
|
||||
def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
|
||||
|
||||
def SDTX86Cmov : SDTypeProfile<1, 4,
|
||||
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
|
||||
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
|
||||
|
@ -1056,13 +1056,37 @@ let neverHasSideEffects = 1 in {
|
||||
XD, VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def CMPSSrr : SIi8<0xC2, MRMSrcReg,
|
||||
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
|
||||
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
|
||||
def CMPSSrm : SIi8<0xC2, MRMSrcMem,
|
||||
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
|
||||
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
|
||||
def CMPSDrr : SIi8<0xC2, MRMSrcReg,
|
||||
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
|
||||
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
|
||||
def CMPSDrm : SIi8<0xC2, MRMSrcMem,
|
||||
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
|
||||
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
|
||||
[(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
|
||||
}
|
||||
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
|
||||
defm CMPSS : sse12_cmp_scalar<FR32, f32mem,
|
||||
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
|
||||
"cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
|
||||
defm CMPSD : sse12_cmp_scalar<FR64, f64mem,
|
||||
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
|
||||
"cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
|
||||
def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
|
||||
(outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
|
||||
"cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
|
||||
def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
|
||||
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
|
||||
"cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
|
||||
def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
|
||||
(outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
|
||||
"cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
|
||||
def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
|
||||
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
|
||||
"cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
|
||||
}
|
||||
|
||||
multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
|
||||
|
@ -1,17 +1,15 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
|
||||
; RUN: not grep cmp %t
|
||||
; RUN: not grep xor %t
|
||||
; RUN: grep jne %t | count 1
|
||||
; RUN: grep jp %t | count 1
|
||||
; RUN: grep setnp %t | count 1
|
||||
; RUN: grep sete %t | count 1
|
||||
; RUN: grep and %t | count 1
|
||||
; RUN: grep cvt %t | count 4
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
|
||||
|
||||
define i32 @isint_return(double %d) nounwind {
|
||||
; CHECK-NOT: xor
|
||||
; CHECK: cvt
|
||||
%i = fptosi double %d to i32
|
||||
; CHECK-NEXT: cvt
|
||||
%e = sitofp i32 %i to double
|
||||
; CHECK: cmpeqsd
|
||||
%c = fcmp oeq double %d, %e
|
||||
; CHECK-NEXT: movd
|
||||
; CHECK-NEXT: andl
|
||||
%z = zext i1 %c to i32
|
||||
ret i32 %z
|
||||
}
|
||||
@ -19,9 +17,14 @@ define i32 @isint_return(double %d) nounwind {
|
||||
declare void @foo()
|
||||
|
||||
define void @isint_branch(double %d) nounwind {
|
||||
; CHECK: cvt
|
||||
%i = fptosi double %d to i32
|
||||
; CHECK-NEXT: cvt
|
||||
%e = sitofp i32 %i to double
|
||||
; CHECK: ucomisd
|
||||
%c = fcmp oeq double %d, %e
|
||||
; CHECK-NEXT: jne
|
||||
; CHECK-NEXT: jp
|
||||
br i1 %c, label %true, label %false
|
||||
true:
|
||||
call void @foo()
|
||||
|
@ -10,4 +10,4 @@ entry:
|
||||
}
|
||||
|
||||
; test that the load is folded.
|
||||
; CHECK: ucomisd (%{{rdi|rdx}}), %xmm0
|
||||
; CHECK: cmpeqsd (%{{rdi|rdx}}), %xmm0
|
||||
|
@ -1,5 +1,4 @@
|
||||
; RUN: llc < %s -march=x86 | grep set | count 2
|
||||
; RUN: llc < %s -march=x86 | grep and
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
define zeroext i8 @t(double %x) nounwind readnone {
|
||||
entry:
|
||||
@ -7,5 +6,16 @@ entry:
|
||||
%1 = sitofp i32 %0 to double ; <double> [#uses=1]
|
||||
%2 = fcmp oeq double %1, %x ; <i1> [#uses=1]
|
||||
%retval12 = zext i1 %2 to i8 ; <i8> [#uses=1]
|
||||
; CHECK: cmpeqsd
|
||||
ret i8 %retval12
|
||||
}
|
||||
|
||||
define zeroext i8 @u(double %x) nounwind readnone {
|
||||
entry:
|
||||
%0 = fptosi double %x to i32 ; <i32> [#uses=1]
|
||||
%1 = sitofp i32 %0 to double ; <double> [#uses=1]
|
||||
%2 = fcmp une double %1, %x ; <i1> [#uses=1]
|
||||
%retval12 = zext i1 %2 to i8 ; <i8> [#uses=1]
|
||||
; CHECK: cmpneqsd
|
||||
ret i8 %retval12
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user