mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-06 20:10:38 +00:00
[PowerPC] Reuse a load operand in int->fp conversions
int->fp conversions on PPC must be done through memory loads and stores. On a modern core, this process begins by storing the int value to memory, then loading it using a (sometimes special) FP load instruction. Unfortunately, we would do this even when the value to be converted was itself a load, and we can just use that same memory location instead of copying it to another first. There is a slight complication when handling int_to_fp(fp_to_int(x)) pairs, because the fp_to_int operand has not been lowered when the int_to_fp is being lowered. We handle this specially by invoking fp_to_int's lowering logic (partially) and getting the necessary memory location (some trivial refactoring was done to make this possible). This is all somewhat ugly, and it would be nice if some later CodeGen stage could just clean this stuff up, but because doing so would involve modifying target-specific nodes (or instructions), it is not immediately clear how that would work. Also, remove a related entry from the README.txt for which we now generate reasonable code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225301 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
193f586fd2
commit
8e9ba0e588
@ -5408,9 +5408,9 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
return Op;
|
||||
}
|
||||
|
||||
// FIXME: Split this code up when LegalizeDAGTypes lands.
|
||||
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
|
||||
SDLoc dl) const {
|
||||
void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
|
||||
SelectionDAG &DAG,
|
||||
SDLoc dl) const {
|
||||
assert(Op.getOperand(0).getValueType().isFloatingPoint());
|
||||
SDValue Src = Op.getOperand(0);
|
||||
if (Src.getValueType() == MVT::f32)
|
||||
@ -5459,15 +5459,92 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
|
||||
if (Op.getValueType() == MVT::i32 && !i32Stack) {
|
||||
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
|
||||
DAG.getConstant(4, FIPtr.getValueType()));
|
||||
MPI = MachinePointerInfo();
|
||||
MPI = MPI.getWithOffset(4);
|
||||
}
|
||||
|
||||
return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
|
||||
false, false, false, 0);
|
||||
RLI.Chain = Chain;
|
||||
RLI.Ptr = FIPtr;
|
||||
RLI.MPI = MPI;
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
|
||||
SDLoc dl) const {
|
||||
ReuseLoadInfo RLI;
|
||||
LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
|
||||
|
||||
return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
|
||||
false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
|
||||
RLI.Ranges);
|
||||
}
|
||||
|
||||
// We're trying to insert a regular store, S, and then a load, L. If the
|
||||
// incoming value, O, is a load, we might just be able to have our load use the
|
||||
// address used by O. However, we don't know if anything else will store to
|
||||
// that address before we can load from it. To prevent this situation, we need
|
||||
// to insert our load, L, into the chain as a peer of O. To do this, we give L
|
||||
// the same chain operand as O, we create a token factor from the chain results
|
||||
// of O and L, and we replace all uses of O's chain result with that token
|
||||
// factor (see spliceIntoChain below for this last part).
|
||||
bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
|
||||
ReuseLoadInfo &RLI,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
if ((Op.getOpcode() == ISD::FP_TO_UINT ||
|
||||
Op.getOpcode() == ISD::FP_TO_SINT) &&
|
||||
isOperationLegalOrCustom(Op.getOpcode(),
|
||||
Op.getOperand(0).getValueType())) {
|
||||
|
||||
LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
|
||||
return true;
|
||||
}
|
||||
|
||||
LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
|
||||
if (!LD || !ISD::isNON_EXTLoad(LD) || LD->isVolatile() || LD->isNonTemporal())
|
||||
return false;
|
||||
if (LD->getMemoryVT() != MemVT)
|
||||
return false;
|
||||
|
||||
RLI.Ptr = LD->getBasePtr();
|
||||
if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
|
||||
assert(LD->getAddressingMode() == ISD::PRE_INC &&
|
||||
"Non-pre-inc AM on PPC?");
|
||||
RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
|
||||
LD->getOffset());
|
||||
}
|
||||
|
||||
RLI.Chain = LD->getChain();
|
||||
RLI.MPI = LD->getPointerInfo();
|
||||
RLI.IsInvariant = LD->isInvariant();
|
||||
RLI.Alignment = LD->getAlignment();
|
||||
RLI.AAInfo = LD->getAAInfo();
|
||||
RLI.Ranges = LD->getRanges();
|
||||
|
||||
RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Given the head of the old chain, ResChain, insert a token factor containing
|
||||
// it and NewResChain, and make users of ResChain now be users of that token
|
||||
// factor.
|
||||
void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
|
||||
SDValue NewResChain,
|
||||
SelectionDAG &DAG) const {
|
||||
if (!ResChain)
|
||||
return;
|
||||
|
||||
SDLoc dl(NewResChain);
|
||||
|
||||
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
NewResChain, DAG.getUNDEF(MVT::Other));
|
||||
assert(TF.getNode() != NewResChain.getNode() &&
|
||||
"A new TF really is required here");
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
|
||||
DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
|
||||
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
|
||||
@ -5539,7 +5616,17 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
|
||||
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
|
||||
}
|
||||
|
||||
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
|
||||
ReuseLoadInfo RLI;
|
||||
SDValue Bits;
|
||||
|
||||
if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
|
||||
Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
|
||||
false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
|
||||
RLI.Ranges);
|
||||
spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
|
||||
} else
|
||||
Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
|
||||
|
||||
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
|
||||
|
||||
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
|
||||
@ -5560,23 +5647,36 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
|
||||
|
||||
SDValue Ld;
|
||||
if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
|
||||
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
|
||||
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
|
||||
ReuseLoadInfo RLI;
|
||||
bool ReusingLoad;
|
||||
if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
|
||||
DAG))) {
|
||||
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
|
||||
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
|
||||
|
||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
|
||||
MachinePointerInfo::getFixedStack(FrameIdx),
|
||||
false, false, 0);
|
||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
|
||||
MachinePointerInfo::getFixedStack(FrameIdx),
|
||||
false, false, 0);
|
||||
|
||||
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
|
||||
"Expected an i32 store");
|
||||
|
||||
RLI.Ptr = FIdx;
|
||||
RLI.Chain = Store;
|
||||
RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
|
||||
RLI.Alignment = 4;
|
||||
}
|
||||
|
||||
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
|
||||
"Expected an i32 store");
|
||||
MachineMemOperand *MMO =
|
||||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
|
||||
MachineMemOperand::MOLoad, 4, 4);
|
||||
SDValue Ops[] = { Store, FIdx };
|
||||
MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
|
||||
RLI.Alignment, RLI.AAInfo, RLI.Ranges);
|
||||
SDValue Ops[] = { RLI.Chain, RLI.Ptr };
|
||||
Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
|
||||
PPCISD::LFIWZX : PPCISD::LFIWAX,
|
||||
dl, DAG.getVTList(MVT::f64, MVT::Other),
|
||||
Ops, MVT::i32, MMO);
|
||||
if (ReusingLoad)
|
||||
spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
|
||||
} else {
|
||||
assert(Subtarget.isPPC64() &&
|
||||
"i32->FP without LFIWAX supported only on PPC64");
|
||||
@ -6489,7 +6589,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
|
||||
SDLoc(Op));
|
||||
SDLoc(Op));
|
||||
case ISD::UINT_TO_FP:
|
||||
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
|
||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||
|
@ -591,6 +591,28 @@ namespace llvm {
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
struct ReuseLoadInfo {
|
||||
SDValue Ptr;
|
||||
SDValue Chain;
|
||||
SDValue ResChain;
|
||||
MachinePointerInfo MPI;
|
||||
bool IsInvariant;
|
||||
unsigned Alignment;
|
||||
AAMDNodes AAInfo;
|
||||
const MDNode *Ranges;
|
||||
|
||||
ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
|
||||
};
|
||||
|
||||
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
|
||||
SelectionDAG &DAG) const;
|
||||
void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
|
||||
SelectionDAG &DAG, SDLoc dl) const;
|
||||
|
||||
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
|
||||
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
|
||||
|
||||
|
@ -302,27 +302,6 @@ http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
float foo(float X) { return (int)(X); }
|
||||
|
||||
Currently produces:
|
||||
|
||||
_foo:
|
||||
fctiwz f0, f1
|
||||
stfd f0, -8(r1)
|
||||
lwz r2, -4(r1)
|
||||
extsw r2, r2
|
||||
std r2, -16(r1)
|
||||
lfd f0, -16(r1)
|
||||
fcfid f0, f0
|
||||
frsp f1, f0
|
||||
blr
|
||||
|
||||
We could use a target dag combine to turn the lwz/extsw into an lwa when the
|
||||
lwz has a single use. Since LWA is cracked anyway, this would be a codesize
|
||||
win only.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
We generate ugly code for this:
|
||||
|
||||
void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
|
||||
|
96
test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
Normal file
96
test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
Normal file
@ -0,0 +1,96 @@
|
||||
; RUN: llc -mcpu=a2 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
define double @test1(i64* nocapture readonly %x) #0 {
|
||||
entry:
|
||||
%0 = load i64* %x, align 8
|
||||
%conv = sitofp i64 %0 to double
|
||||
ret double %conv
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: lfd [[REG1:[0-9]+]], 0(3)
|
||||
; CHECK: fcfid 1, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
define double @test2(i32* nocapture readonly %x) #0 {
|
||||
entry:
|
||||
%0 = load i32* %x, align 4
|
||||
%conv = sitofp i32 %0 to double
|
||||
ret double %conv
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3
|
||||
; CHECK: fcfid 1, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define float @foo(float %X) #0 {
|
||||
entry:
|
||||
%conv = fptosi float %X to i32
|
||||
%conv1 = sitofp i32 %conv to float
|
||||
ret float %conv1
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
|
||||
; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
|
||||
; CHECK: stfiwx [[REG2]], 0, [[REG1]]
|
||||
; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
|
||||
; CHECK: fcfids 1, [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define double @food(double %X) #0 {
|
||||
entry:
|
||||
%conv = fptosi double %X to i32
|
||||
%conv1 = sitofp i32 %conv to double
|
||||
ret double %conv1
|
||||
|
||||
; CHECK-LABEL: @food
|
||||
; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
|
||||
; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
|
||||
; CHECK: stfiwx [[REG2]], 0, [[REG1]]
|
||||
; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
|
||||
; CHECK: fcfid 1, [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define float @foou(float %X) #0 {
|
||||
entry:
|
||||
%conv = fptoui float %X to i32
|
||||
%conv1 = uitofp i32 %conv to float
|
||||
ret float %conv1
|
||||
|
||||
; CHECK-LABEL: @foou
|
||||
; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
|
||||
; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
|
||||
; CHECK: stfiwx [[REG2]], 0, [[REG1]]
|
||||
; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
|
||||
; CHECK: fcfidus 1, [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define double @fooud(double %X) #0 {
|
||||
entry:
|
||||
%conv = fptoui double %X to i32
|
||||
%conv1 = uitofp i32 %conv to double
|
||||
ret double %conv1
|
||||
|
||||
; CHECK-LABEL: @fooud
|
||||
; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
|
||||
; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
|
||||
; CHECK: stfiwx [[REG2]], 0, [[REG1]]
|
||||
; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
|
||||
; CHECK: fcfidu 1, [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readonly }
|
||||
|
Loading…
Reference in New Issue
Block a user