AMDGPU: Split R600 and SI store lowering

These were only sharing some somewhat incorrect
logic for when to scalarize or split vectors.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260490 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2016-02-11 05:32:46 +00:00
parent 9be04e4c36
commit b49a0edca2
5 changed files with 90 additions and 91 deletions

View File

@ -671,11 +671,6 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
// nothing here and let the illegal result integer be handled normally.
return;
case ISD::STORE: {
if (SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG))
Results.push_back(Lowered);
return;
}
default:
return;
}
@ -1146,6 +1141,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
return DAG.getMergeValues(Ops, SL);
}
// FIXME: This isn't doing anything for SI. This should be used in a target
// combine during type legalization.
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
@ -1290,64 +1287,6 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
}
SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
return Result;
StoreSDNode *Store = cast<StoreSDNode>(Op);
SDValue Chain = Store->getChain();
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
return SplitVectorStore(Op, DAG);
}
EVT MemVT = Store->getMemoryVT();
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
MemVT.bitsLT(MVT::i32)) {
unsigned Mask = 0;
if (Store->getMemoryVT() == MVT::i8) {
Mask = 0xff;
} else if (Store->getMemoryVT() == MVT::i16) {
Mask = 0xffff;
}
SDValue BasePtr = Store->getBasePtr();
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
DAG.getConstant(2, DL, MVT::i32));
SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
Chain, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32));
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
DAG.getConstant(0x3, DL, MVT::i32));
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
DAG.getConstant(3, DL, MVT::i32));
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
Store->getValue());
SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
MaskedValue, ShiftAmt);
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
DAG.getConstant(Mask, DL, MVT::i32),
ShiftAmt);
DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
DAG.getConstant(0xffffffff, DL, MVT::i32));
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
Chain, Value, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32));
}
return SDValue();
}
// This is a shortcut for integer division because we have fast i32<->f32
// conversions, and fast f32 reciprocal instructions. The fractional part of a
// float is enough to accurately represent up to a 24-bit integer.

View File

@ -28,7 +28,6 @@ class AMDGPUTargetLowering : public TargetLowering {
protected:
const AMDGPUSubtarget *Subtarget;
private:
SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
const SDValue &InitPtr,
SDValue Chain,

View File

@ -1264,17 +1264,73 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth,
}
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Value = Op.getOperand(1);
SDValue Ptr = Op.getOperand(2);
SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
SelectionDAG &DAG) const {
SDLoc DL(Store);
if (SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
unsigned Mask = 0;
if (Store->getMemoryVT() == MVT::i8) {
Mask = 0xff;
} else if (Store->getMemoryVT() == MVT::i16) {
Mask = 0xffff;
}
SDValue Chain = Store->getChain();
SDValue BasePtr = Store->getBasePtr();
EVT MemVT = Store->getMemoryVT();
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
DAG.getConstant(2, DL, MVT::i32));
SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
Chain, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32));
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
DAG.getConstant(0x3, DL, MVT::i32));
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
DAG.getConstant(3, DL, MVT::i32));
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
Store->getValue());
SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
MaskedValue, ShiftAmt);
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
DAG.getConstant(Mask, DL, MVT::i32),
ShiftAmt);
DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
DAG.getConstant(0xffffffff, DL, MVT::i32));
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
Chain, Value, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32));
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
return Result;
if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
unsigned AS = StoreNode->getAddressSpace();
SDValue Value = StoreNode->getValue();
EVT ValueVT = Value.getValueType();
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
ValueVT.isVector()) {
return SplitVectorStore(Op, DAG);
}
SDLoc DL(Op);
SDValue Chain = StoreNode->getChain();
SDValue Ptr = StoreNode->getBasePtr();
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (StoreNode->isTruncatingStore()) {
EVT VT = Value.getValueType();
assert(VT.bitsLE(MVT::i32));
@ -1309,7 +1365,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Op->getVTList(), Args, MemVT,
StoreNode->getMemOperand());
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Value.getValueType().bitsGE(MVT::i32)) {
ValueVT.bitsGE(MVT::i32)) {
// Convert pointer from byte address to dword address.
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
@ -1324,13 +1380,12 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
}
EVT ValueVT = Value.getValueType();
if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
if (AS != AMDGPUAS::PRIVATE_ADDRESS)
return SDValue();
if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
return Ret;
EVT MemVT = StoreNode->getMemoryVT();
if (MemVT.bitsLT(MVT::i32))
return lowerPrivateTruncStore(StoreNode, DAG);
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();

View File

@ -59,6 +59,8 @@ private:
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;

View File

@ -1846,23 +1846,27 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT VT = Store->getMemoryVT();
// These stores are legal.
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (VT.isVector() && VT.getVectorNumElements() > 4)
return ScalarizeVectorStore(Op, DAG);
return SDValue();
if (VT == MVT::i1) {
return DAG.getTruncStore(Store->getChain(), DL,
DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
}
if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
return Ret;
assert(Store->getValue().getValueType().getScalarType() == MVT::i32);
if (VT.isVector() && VT.getVectorNumElements() >= 8)
return SplitVectorStore(Op, DAG);
unsigned NElts = VT.getVectorNumElements();
unsigned AS = Store->getAddressSpace();
if (AS == AMDGPUAS::LOCAL_ADDRESS) {
// If properly aligned, if we split we might be able to use ds_write_b64.
return SplitVectorStore(Op, DAG);
}
if (VT == MVT::i1)
return DAG.getTruncStore(Store->getChain(), DL,
DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
if (AS == AMDGPUAS::PRIVATE_ADDRESS && NElts > 4)
return ScalarizeVectorStore(Op, DAG);
// These stores are legal. private, global and flat.
if (NElts >= 8)
return SplitVectorStore(Op, DAG);
return SDValue();
}