[PowerPC] Improve int_to_fp(fp_to_int(x)) combining

The old target DAG combine that allowed for performing int_to_fp(fp_to_int(x))
without a load/store pair is updated here with support for unsigned integers,
and to support single-precision values without a third rounding step, on newer
cores with the appropriate instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225248 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-01-06 06:01:57 +00:00
parent 17395fa733
commit 10ae865847
3 changed files with 144 additions and 30 deletions

View File

@ -631,6 +631,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
@ -8349,6 +8351,75 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
N->getOperand(0), ShiftCst), ShiftCst);
}
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
DAGCombinerInfo &DCI) const {
assert((N->getOpcode() == ISD::SINT_TO_FP ||
N->getOpcode() == ISD::UINT_TO_FP) &&
"Need an int -> FP conversion node here");
if (!Subtarget.has64BitSupport())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
SDValue Op(N, 0);
// Don't handle ppc_fp128 here or i1 conversions.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
if (Op.getOperand(0).getValueType() == MVT::i1)
return SDValue();
// For i32 intermediate values, unfortunately, the conversion functions
// leave the upper 32 bits of the value are undefined. Within the set of
// scalar instructions, we have no method for zero- or sign-extending the
// value. Thus, we cannot handle i32 intermediate values here.
if (Op.getOperand(0).getValueType() == MVT::i32)
return SDValue();
assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDUS : PPCISD::FCFIDS) :
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDU : PPCISD::FCFID);
MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
MVT::f32 : MVT::f64;
// If we're converting from a float, to an int, and back to a float again,
// then we don't need the store/load pair at all.
if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
Subtarget.hasFPCVT()) ||
(Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
SDValue Src = Op.getOperand(0).getOperand(0);
if (Src.getValueType() == MVT::f32) {
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
DCI.AddToWorklist(Src.getNode());
}
unsigned FCTOp =
Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
PPCISD::FCTIDUZ;
SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
DCI.AddToWorklist(FP.getNode());
}
return FP;
}
return SDValue();
}
// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
// builtins) into loads with swaps.
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
@ -8483,36 +8554,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
// We allow the src/dst to be either f32/f64, but the intermediate
// type must be i64.
if (N->getOperand(0).getValueType() == MVT::i64 &&
N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
SDValue Val = N->getOperand(0).getOperand(0);
if (Val.getValueType() == MVT::f32) {
Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
}
Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
DCI.AddToWorklist(Val.getNode());
if (N->getValueType(0) == MVT::f32) {
Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
DAG.getIntPtrConstant(0));
DCI.AddToWorklist(Val.getNode());
}
return Val;
} else if (N->getOperand(0).getValueType() == MVT::i32) {
// If the intermediate type is i32, we can avoid the load/store here
// too.
}
}
}
break;
case ISD::UINT_TO_FP:
return combineFPToIntToFP(N, DCI);
case ISD::STORE: {
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&

View File

@ -748,6 +748,7 @@ namespace llvm {
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps,

View File

@ -0,0 +1,70 @@
; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=FPCVT
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=PPC64
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind readnone
define float @fool(float %X) #0 {
entry:
%conv = fptosi float %X to i64
%conv1 = sitofp i64 %conv to float
ret float %conv1
; FPCVT-LABEL: @fool
; FPCVT: fctidz [[REG1:[0-9]+]], 1
; FPCVT: fcfids 1, [[REG1]]
; FPCVT: blr
; PPC64-LABEL: @fool
; PPC64: fctidz [[REG1:[0-9]+]], 1
; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]]
; PPC64: frsp 1, [[REG2]]
; PPC64: blr
}
; Function Attrs: nounwind readnone
define double @foodl(double %X) #0 {
entry:
%conv = fptosi double %X to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
; FPCVT-LABEL: @foodl
; FPCVT: fctidz [[REG1:[0-9]+]], 1
; FPCVT: fcfid 1, [[REG1]]
; FPCVT: blr
; PPC64-LABEL: @foodl
; PPC64: fctidz [[REG1:[0-9]+]], 1
; PPC64: fcfid 1, [[REG1]]
; PPC64: blr
}
; Function Attrs: nounwind readnone
define float @fooul(float %X) #0 {
entry:
%conv = fptoui float %X to i64
%conv1 = uitofp i64 %conv to float
ret float %conv1
; FPCVT-LABEL: @fooul
; FPCVT: fctiduz [[REG1:[0-9]+]], 1
; FPCVT: fcfidus 1, [[REG1]]
; FPCVT: blr
}
; Function Attrs: nounwind readnone
define double @fooudl(double %X) #0 {
entry:
%conv = fptoui double %X to i64
%conv1 = uitofp i64 %conv to double
ret double %conv1
; FPCVT-LABEL: @fooudl
; FPCVT: fctiduz [[REG1:[0-9]+]], 1
; FPCVT: fcfidu 1, [[REG1]]
; FPCVT: blr
}
attributes #0 = { nounwind readnone }