mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-23 14:30:50 +00:00
More fcopysign correctness and performance fix.
The previous codegen for the slow path (when values are in VFP / NEON registers) was incorrect if the source is NaN. The new codegen uses NEON vbsl instruction to copy the sign bit. e.g. vmov.i32 d1, #0x80000000 vbsl d1, d2, d0 If NEON is not available, it uses integer instructions to copy the sign bit. rdar://9034702 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126295 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d2a5073423
commit
e573fb3255
@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
EVT VT = Op.getValueType();
|
EVT VT = Op.getValueType();
|
||||||
EVT SrcVT = Tmp1.getValueType();
|
EVT SrcVT = Tmp1.getValueType();
|
||||||
bool F2IisFast = Subtarget->isCortexA9() ||
|
bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
|
||||||
Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
|
Tmp0.getOpcode() == ARMISD::VMOVDRR;
|
||||||
|
bool UseNEON = !InGPR && Subtarget->hasNEON();
|
||||||
|
|
||||||
|
if (UseNEON) {
|
||||||
|
// Use VBSL to copy the sign bit.
|
||||||
|
unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
|
||||||
|
SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
|
||||||
|
DAG.getTargetConstant(EncodedVal, MVT::i32));
|
||||||
|
EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
|
||||||
|
if (VT == MVT::f64)
|
||||||
|
Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
|
||||||
|
DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
|
||||||
|
DAG.getConstant(32, MVT::i32));
|
||||||
|
else /*if (VT == MVT::f32)*/
|
||||||
|
Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
|
||||||
|
if (SrcVT == MVT::f32) {
|
||||||
|
Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
|
||||||
|
if (VT == MVT::f64)
|
||||||
|
Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
|
||||||
|
DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
|
||||||
|
DAG.getConstant(32, MVT::i32));
|
||||||
|
}
|
||||||
|
Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
|
||||||
|
Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
|
||||||
|
|
||||||
|
SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
|
||||||
|
MVT::i32);
|
||||||
|
AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
|
||||||
|
SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
|
||||||
|
DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
|
||||||
|
|
||||||
|
SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
|
||||||
|
DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
|
||||||
|
DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
|
||||||
|
if (SrcVT == MVT::f32) {
|
||||||
|
Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
|
||||||
|
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
|
||||||
|
DAG.getConstant(0, MVT::i32));
|
||||||
|
} else {
|
||||||
|
Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Res;
|
||||||
|
}
|
||||||
|
|
||||||
// Bitcast operand 1 to i32.
|
// Bitcast operand 1 to i32.
|
||||||
if (SrcVT == MVT::f64)
|
if (SrcVT == MVT::f64)
|
||||||
@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
&Tmp1, 1).getValue(1);
|
&Tmp1, 1).getValue(1);
|
||||||
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
|
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
|
||||||
|
|
||||||
// If float to int conversion isn't going to be super expensive, then simply
|
// Or in the signbit with integer operations.
|
||||||
// or in the signbit.
|
SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
|
||||||
if (F2IisFast) {
|
SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
|
||||||
SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
|
Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
|
||||||
SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
|
if (VT == MVT::f32) {
|
||||||
Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
|
Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
|
||||||
if (VT == MVT::f32) {
|
DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
|
||||||
Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
|
return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
|
||||||
DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
|
DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
|
||||||
return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
|
|
||||||
DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// f64: Or the high part with signbit and then combine two parts.
|
|
||||||
Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
|
|
||||||
&Tmp0, 1);
|
|
||||||
SDValue Lo = Tmp0.getValue(0);
|
|
||||||
SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
|
|
||||||
Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
|
|
||||||
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove the signbit of operand 0.
|
// f64: Or the high part with signbit and then combine two parts.
|
||||||
Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
|
Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
|
||||||
|
&Tmp0, 1);
|
||||||
// If operand 1 signbit is one, then negate operand 0.
|
SDValue Lo = Tmp0.getValue(0);
|
||||||
SDValue ARMcc;
|
SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
|
||||||
SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
|
Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
|
||||||
ISD::SETLT, ARMcc, DAG, dl);
|
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
|
||||||
return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
|
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
|
||||||
|
@ -9,9 +9,8 @@ entry:
|
|||||||
; SOFT: bfi r0, r1, #31, #1
|
; SOFT: bfi r0, r1, #31, #1
|
||||||
|
|
||||||
; HARD: test1:
|
; HARD: test1:
|
||||||
; HARD: vabs.f32 d0, d0
|
; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
|
||||||
; HARD: cmp r0, #0
|
; HARD: vbsl [[REG1]], d2, d0
|
||||||
; HARD: vneglt.f32 s0, s0
|
|
||||||
%0 = tail call float @copysignf(float %x, float %y) nounwind
|
%0 = tail call float @copysignf(float %x, float %y) nounwind
|
||||||
ret float %0
|
ret float %0
|
||||||
}
|
}
|
||||||
@ -23,9 +22,9 @@ entry:
|
|||||||
; SOFT: bfi r1, r2, #31, #1
|
; SOFT: bfi r1, r2, #31, #1
|
||||||
|
|
||||||
; HARD: test2:
|
; HARD: test2:
|
||||||
; HARD: vabs.f64 d0, d0
|
; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
|
||||||
; HARD: cmp r1, #0
|
; HARD: vshl.i64 [[REG2]], [[REG2]], #32
|
||||||
; HARD: vneglt.f64 d0, d0
|
; HARD: vbsl [[REG2]], d1, d0
|
||||||
%0 = tail call double @copysign(double %x, double %y) nounwind
|
%0 = tail call double @copysign(double %x, double %y) nounwind
|
||||||
ret double %0
|
ret double %0
|
||||||
}
|
}
|
||||||
@ -33,9 +32,9 @@ entry:
|
|||||||
define double @test3(double %x, double %y, double %z) nounwind {
|
define double @test3(double %x, double %y, double %z) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; SOFT: test3:
|
; SOFT: test3:
|
||||||
; SOFT: vabs.f64
|
; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000
|
||||||
; SOFT: cmp {{.*}}, #0
|
; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
|
||||||
; SOFT: vneglt.f64
|
; SOFT: vbsl [[REG3]],
|
||||||
%0 = fmul double %x, %y
|
%0 = fmul double %x, %y
|
||||||
%1 = tail call double @copysign(double %0, double %z) nounwind
|
%1 = tail call double @copysign(double %0, double %z) nounwind
|
||||||
ret double %1
|
ret double %1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user