mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-03 09:14:30 +00:00
[PowerPC] Improve int_to_fp(fp_to_int(x)) combining
The old target DAG combine that allowed for performing int_to_fp(fp_to_int(x)) without a load/store pair is updated here with support for unsigned integers, and to support single-precision values without a third rounding step, on newer cores with the appropriate instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225248 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
17395fa733
commit
10ae865847
@ -631,6 +631,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
|
||||
|
||||
// We have target-specific dag combine patterns for the following nodes:
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
if (Subtarget.hasFPCVT())
|
||||
setTargetDAGCombine(ISD::UINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::LOAD);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::BR_CC);
|
||||
@ -8349,6 +8351,75 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
|
||||
N->getOperand(0), ShiftCst), ShiftCst);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
assert((N->getOpcode() == ISD::SINT_TO_FP ||
|
||||
N->getOpcode() == ISD::UINT_TO_FP) &&
|
||||
"Need an int -> FP conversion node here");
|
||||
|
||||
if (!Subtarget.has64BitSupport())
|
||||
return SDValue();
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDLoc dl(N);
|
||||
SDValue Op(N, 0);
|
||||
|
||||
// Don't handle ppc_fp128 here or i1 conversions.
|
||||
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
|
||||
return SDValue();
|
||||
if (Op.getOperand(0).getValueType() == MVT::i1)
|
||||
return SDValue();
|
||||
|
||||
// For i32 intermediate values, unfortunately, the conversion functions
|
||||
// leave the upper 32 bits of the value are undefined. Within the set of
|
||||
// scalar instructions, we have no method for zero- or sign-extending the
|
||||
// value. Thus, we cannot handle i32 intermediate values here.
|
||||
if (Op.getOperand(0).getValueType() == MVT::i32)
|
||||
return SDValue();
|
||||
|
||||
assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
|
||||
"UINT_TO_FP is supported only with FPCVT");
|
||||
|
||||
// If we have FCFIDS, then use it when converting to single-precision.
|
||||
// Otherwise, convert to double-precision and then round.
|
||||
unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
|
||||
(Op.getOpcode() == ISD::UINT_TO_FP ?
|
||||
PPCISD::FCFIDUS : PPCISD::FCFIDS) :
|
||||
(Op.getOpcode() == ISD::UINT_TO_FP ?
|
||||
PPCISD::FCFIDU : PPCISD::FCFID);
|
||||
MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
|
||||
MVT::f32 : MVT::f64;
|
||||
|
||||
// If we're converting from a float, to an int, and back to a float again,
|
||||
// then we don't need the store/load pair at all.
|
||||
if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
|
||||
Subtarget.hasFPCVT()) ||
|
||||
(Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
|
||||
SDValue Src = Op.getOperand(0).getOperand(0);
|
||||
if (Src.getValueType() == MVT::f32) {
|
||||
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
|
||||
DCI.AddToWorklist(Src.getNode());
|
||||
}
|
||||
|
||||
unsigned FCTOp =
|
||||
Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
|
||||
PPCISD::FCTIDUZ;
|
||||
|
||||
SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
|
||||
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
|
||||
|
||||
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
|
||||
FP = DAG.getNode(ISD::FP_ROUND, dl,
|
||||
MVT::f32, FP, DAG.getIntPtrConstant(0));
|
||||
DCI.AddToWorklist(FP.getNode());
|
||||
}
|
||||
|
||||
return FP;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
|
||||
// builtins) into loads with swaps.
|
||||
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
|
||||
@ -8483,36 +8554,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::SELECT_CC:
|
||||
return DAGCombineTruncBoolExt(N, DCI);
|
||||
case ISD::SINT_TO_FP:
|
||||
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
|
||||
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
|
||||
// Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
|
||||
// We allow the src/dst to be either f32/f64, but the intermediate
|
||||
// type must be i64.
|
||||
if (N->getOperand(0).getValueType() == MVT::i64 &&
|
||||
N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
|
||||
SDValue Val = N->getOperand(0).getOperand(0);
|
||||
if (Val.getValueType() == MVT::f32) {
|
||||
Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.getNode());
|
||||
}
|
||||
|
||||
Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.getNode());
|
||||
Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
|
||||
DCI.AddToWorklist(Val.getNode());
|
||||
if (N->getValueType(0) == MVT::f32) {
|
||||
Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
|
||||
DAG.getIntPtrConstant(0));
|
||||
DCI.AddToWorklist(Val.getNode());
|
||||
}
|
||||
return Val;
|
||||
} else if (N->getOperand(0).getValueType() == MVT::i32) {
|
||||
// If the intermediate type is i32, we can avoid the load/store here
|
||||
// too.
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ISD::UINT_TO_FP:
|
||||
return combineFPToIntToFP(N, DCI);
|
||||
case ISD::STORE: {
|
||||
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
|
||||
if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
|
||||
|
@ -748,6 +748,7 @@ namespace llvm {
|
||||
|
||||
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
|
||||
unsigned &RefinementSteps,
|
||||
|
70
test/CodeGen/PowerPC/fp-to-int-to-fp.ll
Normal file
70
test/CodeGen/PowerPC/fp-to-int-to-fp.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=FPCVT
|
||||
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=PPC64
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define float @fool(float %X) #0 {
|
||||
entry:
|
||||
%conv = fptosi float %X to i64
|
||||
%conv1 = sitofp i64 %conv to float
|
||||
ret float %conv1
|
||||
|
||||
; FPCVT-LABEL: @fool
|
||||
; FPCVT: fctidz [[REG1:[0-9]+]], 1
|
||||
; FPCVT: fcfids 1, [[REG1]]
|
||||
; FPCVT: blr
|
||||
|
||||
; PPC64-LABEL: @fool
|
||||
; PPC64: fctidz [[REG1:[0-9]+]], 1
|
||||
; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]]
|
||||
; PPC64: frsp 1, [[REG2]]
|
||||
; PPC64: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define double @foodl(double %X) #0 {
|
||||
entry:
|
||||
%conv = fptosi double %X to i64
|
||||
%conv1 = sitofp i64 %conv to double
|
||||
ret double %conv1
|
||||
|
||||
; FPCVT-LABEL: @foodl
|
||||
; FPCVT: fctidz [[REG1:[0-9]+]], 1
|
||||
; FPCVT: fcfid 1, [[REG1]]
|
||||
; FPCVT: blr
|
||||
|
||||
; PPC64-LABEL: @foodl
|
||||
; PPC64: fctidz [[REG1:[0-9]+]], 1
|
||||
; PPC64: fcfid 1, [[REG1]]
|
||||
; PPC64: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define float @fooul(float %X) #0 {
|
||||
entry:
|
||||
%conv = fptoui float %X to i64
|
||||
%conv1 = uitofp i64 %conv to float
|
||||
ret float %conv1
|
||||
|
||||
; FPCVT-LABEL: @fooul
|
||||
; FPCVT: fctiduz [[REG1:[0-9]+]], 1
|
||||
; FPCVT: fcfidus 1, [[REG1]]
|
||||
; FPCVT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define double @fooudl(double %X) #0 {
|
||||
entry:
|
||||
%conv = fptoui double %X to i64
|
||||
%conv1 = uitofp i64 %conv to double
|
||||
ret double %conv1
|
||||
|
||||
; FPCVT-LABEL: @fooudl
|
||||
; FPCVT: fctiduz [[REG1:[0-9]+]], 1
|
||||
; FPCVT: fcfidu 1, [[REG1]]
|
||||
; FPCVT: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
|
Loading…
x
Reference in New Issue
Block a user