mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 15:39:06 +00:00
AMDGPU: Split R600 and SI store lowering
These were only sharing some somewhat incorrect logic for when to scalarize or split vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260490 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9be04e4c36
commit
b49a0edca2
@ -671,11 +671,6 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
|
||||
// nothing here and let the illegal result integer be handled normally.
|
||||
return;
|
||||
case ISD::STORE: {
|
||||
if (SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG))
|
||||
Results.push_back(Lowered);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
return;
|
||||
}
|
||||
@ -1146,6 +1141,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
|
||||
return DAG.getMergeValues(Ops, SL);
|
||||
}
|
||||
|
||||
// FIXME: This isn't doing anything for SI. This should be used in a target
|
||||
// combine during type legalization.
|
||||
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
|
||||
SelectionDAG &DAG) const {
|
||||
StoreSDNode *Store = cast<StoreSDNode>(Op);
|
||||
@ -1290,64 +1287,6 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
|
||||
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
|
||||
return Result;
|
||||
|
||||
StoreSDNode *Store = cast<StoreSDNode>(Op);
|
||||
SDValue Chain = Store->getChain();
|
||||
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
Store->getValue().getValueType().isVector()) {
|
||||
return SplitVectorStore(Op, DAG);
|
||||
}
|
||||
|
||||
EVT MemVT = Store->getMemoryVT();
|
||||
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
|
||||
MemVT.bitsLT(MVT::i32)) {
|
||||
unsigned Mask = 0;
|
||||
if (Store->getMemoryVT() == MVT::i8) {
|
||||
Mask = 0xff;
|
||||
} else if (Store->getMemoryVT() == MVT::i16) {
|
||||
Mask = 0xffff;
|
||||
}
|
||||
SDValue BasePtr = Store->getBasePtr();
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
|
||||
DAG.getConstant(2, DL, MVT::i32));
|
||||
SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
|
||||
Chain, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
|
||||
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
|
||||
DAG.getConstant(0x3, DL, MVT::i32));
|
||||
|
||||
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
||||
DAG.getConstant(3, DL, MVT::i32));
|
||||
|
||||
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
|
||||
Store->getValue());
|
||||
|
||||
SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
|
||||
|
||||
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
|
||||
MaskedValue, ShiftAmt);
|
||||
|
||||
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
|
||||
DAG.getConstant(Mask, DL, MVT::i32),
|
||||
ShiftAmt);
|
||||
DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
|
||||
DAG.getConstant(0xffffffff, DL, MVT::i32));
|
||||
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
|
||||
|
||||
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
|
||||
return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
|
||||
Chain, Value, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// This is a shortcut for integer division because we have fast i32<->f32
|
||||
// conversions, and fast f32 reciprocal instructions. The fractional part of a
|
||||
// float is enough to accurately represent up to a 24-bit integer.
|
||||
|
@ -28,7 +28,6 @@ class AMDGPUTargetLowering : public TargetLowering {
|
||||
protected:
|
||||
const AMDGPUSubtarget *Subtarget;
|
||||
|
||||
private:
|
||||
SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
|
||||
const SDValue &InitPtr,
|
||||
SDValue Chain,
|
||||
|
@ -1264,17 +1264,73 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth,
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Value = Op.getOperand(1);
|
||||
SDValue Ptr = Op.getOperand(2);
|
||||
SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Store);
|
||||
|
||||
if (SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
|
||||
unsigned Mask = 0;
|
||||
if (Store->getMemoryVT() == MVT::i8) {
|
||||
Mask = 0xff;
|
||||
} else if (Store->getMemoryVT() == MVT::i16) {
|
||||
Mask = 0xffff;
|
||||
}
|
||||
|
||||
SDValue Chain = Store->getChain();
|
||||
SDValue BasePtr = Store->getBasePtr();
|
||||
EVT MemVT = Store->getMemoryVT();
|
||||
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
|
||||
DAG.getConstant(2, DL, MVT::i32));
|
||||
SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
|
||||
Chain, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
|
||||
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
|
||||
DAG.getConstant(0x3, DL, MVT::i32));
|
||||
|
||||
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
||||
DAG.getConstant(3, DL, MVT::i32));
|
||||
|
||||
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
|
||||
Store->getValue());
|
||||
|
||||
SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
|
||||
|
||||
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
|
||||
MaskedValue, ShiftAmt);
|
||||
|
||||
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
|
||||
DAG.getConstant(Mask, DL, MVT::i32),
|
||||
ShiftAmt);
|
||||
DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
|
||||
DAG.getConstant(0xffffffff, DL, MVT::i32));
|
||||
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
|
||||
|
||||
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
|
||||
return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
|
||||
Chain, Value, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
|
||||
return Result;
|
||||
|
||||
if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
|
||||
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
|
||||
unsigned AS = StoreNode->getAddressSpace();
|
||||
SDValue Value = StoreNode->getValue();
|
||||
EVT ValueVT = Value.getValueType();
|
||||
|
||||
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
ValueVT.isVector()) {
|
||||
return SplitVectorStore(Op, DAG);
|
||||
}
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue Chain = StoreNode->getChain();
|
||||
SDValue Ptr = StoreNode->getBasePtr();
|
||||
|
||||
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
|
||||
if (StoreNode->isTruncatingStore()) {
|
||||
EVT VT = Value.getValueType();
|
||||
assert(VT.bitsLE(MVT::i32));
|
||||
@ -1309,7 +1365,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
Op->getVTList(), Args, MemVT,
|
||||
StoreNode->getMemOperand());
|
||||
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
|
||||
Value.getValueType().bitsGE(MVT::i32)) {
|
||||
ValueVT.bitsGE(MVT::i32)) {
|
||||
// Convert pointer from byte address to dword address.
|
||||
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
|
||||
DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
|
||||
@ -1324,13 +1380,12 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
}
|
||||
|
||||
EVT ValueVT = Value.getValueType();
|
||||
|
||||
if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
if (AS != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return SDValue();
|
||||
|
||||
if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
|
||||
return Ret;
|
||||
EVT MemVT = StoreNode->getMemoryVT();
|
||||
if (MemVT.bitsLT(MVT::i32))
|
||||
return lowerPrivateTruncStore(StoreNode, DAG);
|
||||
|
||||
// Lowering for indirect addressing
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
@ -59,6 +59,8 @@ private:
|
||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
|
@ -1846,23 +1846,27 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
StoreSDNode *Store = cast<StoreSDNode>(Op);
|
||||
EVT VT = Store->getMemoryVT();
|
||||
|
||||
// These stores are legal.
|
||||
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
if (VT.isVector() && VT.getVectorNumElements() > 4)
|
||||
return ScalarizeVectorStore(Op, DAG);
|
||||
return SDValue();
|
||||
if (VT == MVT::i1) {
|
||||
return DAG.getTruncStore(Store->getChain(), DL,
|
||||
DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
|
||||
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
|
||||
}
|
||||
|
||||
if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
|
||||
return Ret;
|
||||
assert(Store->getValue().getValueType().getScalarType() == MVT::i32);
|
||||
|
||||
if (VT.isVector() && VT.getVectorNumElements() >= 8)
|
||||
return SplitVectorStore(Op, DAG);
|
||||
unsigned NElts = VT.getVectorNumElements();
|
||||
unsigned AS = Store->getAddressSpace();
|
||||
if (AS == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
// If properly aligned, if we split we might be able to use ds_write_b64.
|
||||
return SplitVectorStore(Op, DAG);
|
||||
}
|
||||
|
||||
if (VT == MVT::i1)
|
||||
return DAG.getTruncStore(Store->getChain(), DL,
|
||||
DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
|
||||
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
|
||||
if (AS == AMDGPUAS::PRIVATE_ADDRESS && NElts > 4)
|
||||
return ScalarizeVectorStore(Op, DAG);
|
||||
|
||||
// These stores are legal. private, global and flat.
|
||||
if (NElts >= 8)
|
||||
return SplitVectorStore(Op, DAG);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user