Fix buggy fcopysign lowering.

This
define float @foo(float %x, float %y) nounwind readnone {
entry:
  %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
  ret float %0
}

Was compiled to:
    vmov     s0, r1
    bic      r0, r0, #-2147483648
    vmov     s1, r0
    vcmpe.f32    s0, #0
    vmrs         apsr_nzcv, fpscr
    it           lt
    vneglt.f32   s1, s1
    vmov         r0, s1
    bx           lr

This fails to copy the sign of -0.0f because it's lost during the float to int
conversion. Also, it's sub-optimal when the inputs are in GPR registers.

Now it uses integer and + or operations when it's profitable. And it's correct!
    lsrs    r1, r1, #31
    bfi     r0, r1, #31, #1
    bx      lr
rdar://8984306


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125357 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2011-02-11 02:28:55 +00:00
parent 98311ecb4a
commit c143dd4f63
2 changed files with 79 additions and 18 deletions

View File

@ -2833,12 +2833,46 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
bool F2IisFast = Subtarget->isCortexA9() ||
Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
&Tmp1, 1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
// If float to int conversion isn't going to be super expensive, then simply
// or in the signbit.
if (F2IisFast) {
SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
if (VT == MVT::f32) {
Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
}
// f64: Or the high part with signbit and then combine two parts.
Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
&Tmp0, 1);
SDValue Lo = Tmp0.getValue(0);
SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
// Remove the signbit of operand 0.
Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
// If operand 1 signbit is one, then negate operand 0.
SDValue ARMcc;
SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
ISD::SETLT, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{

View File

@ -1,18 +1,45 @@
; RUN: llc < %s -march=arm | grep bic | count 2
; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
; RUN: grep vneg | count 2
; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
define float @test1(float %x, double %y) {
%tmp = fpext float %x to double
%tmp2 = tail call double @copysign( double %tmp, double %y )
%tmp3 = fptrunc double %tmp2 to float
ret float %tmp3
; rdar://8984306
define float @test1(float %x, float %y) nounwind {
entry:
; SOFT: test1:
; SOFT: lsr r1, r1, #31
; SOFT: bfi r0, r1, #31, #1
; HARD: test1:
; HARD: vabs.f32 d0, d0
; HARD: cmp r0, #0
; HARD: vneglt.f32 s0, s0
%0 = tail call float @copysignf(float %x, float %y) nounwind
ret float %0
}
define double @test2(double %x, float %y) {
%tmp = fpext float %y to double
%tmp2 = tail call double @copysign( double %x, double %tmp )
ret double %tmp2
define double @test2(double %x, double %y) nounwind {
entry:
; SOFT: test2:
; SOFT: lsr r2, r3, #31
; SOFT: bfi r1, r2, #31, #1
; HARD: test2:
; HARD: vabs.f64 d0, d0
; HARD: cmp r1, #0
; HARD: vneglt.f64 d0, d0
%0 = tail call double @copysign(double %x, double %y) nounwind
ret double %0
}
declare double @copysign(double, double)
define double @test3(double %x, double %y, double %z) nounwind {
entry:
; SOFT: test3:
; SOFT: vabs.f64
; SOFT: cmp {{.*}}, #0
; SOFT: vneglt.f64
%0 = fmul double %x, %y
%1 = tail call double @copysign(double %0, double %z) nounwind
ret double %1
}
declare double @copysign(double, double) nounwind
declare float @copysignf(float, float) nounwind