mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-10 06:24:58 +00:00
R600/SI: Add preliminary support for flat address space
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217777 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
035f02cb23
commit
d189a0407d
@ -81,6 +81,11 @@ def FeatureCFALUBug : SubtargetFeature<"cfalubug",
|
|||||||
"true",
|
"true",
|
||||||
"GPU has CF_ALU bug">;
|
"GPU has CF_ALU bug">;
|
||||||
|
|
||||||
|
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
|
||||||
|
"FlatAddressSpace",
|
||||||
|
"true",
|
||||||
|
"Support flat address space">;
|
||||||
|
|
||||||
class SubtargetFeatureFetchLimit <string Value> :
|
class SubtargetFeatureFetchLimit <string Value> :
|
||||||
SubtargetFeature <"fetch"#Value,
|
SubtargetFeature <"fetch"#Value,
|
||||||
"TexVTXClauseSize",
|
"TexVTXClauseSize",
|
||||||
@ -135,7 +140,7 @@ def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
|||||||
|
|
||||||
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
|
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||||
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
|
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
|
||||||
FeatureWavefrontSize64]>;
|
FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def AMDGPUInstrInfo : InstrInfo {
|
def AMDGPUInstrInfo : InstrInfo {
|
||||||
|
@ -240,6 +240,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||||||
unsigned MaxSGPR = 0;
|
unsigned MaxSGPR = 0;
|
||||||
unsigned MaxVGPR = 0;
|
unsigned MaxVGPR = 0;
|
||||||
bool VCCUsed = false;
|
bool VCCUsed = false;
|
||||||
|
bool FlatUsed = false;
|
||||||
const SIRegisterInfo *RI = static_cast<const SIRegisterInfo *>(
|
const SIRegisterInfo *RI = static_cast<const SIRegisterInfo *>(
|
||||||
TM.getSubtargetImpl()->getRegisterInfo());
|
TM.getSubtargetImpl()->getRegisterInfo());
|
||||||
|
|
||||||
@ -262,6 +263,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||||||
reg == AMDGPU::VCC_HI) {
|
reg == AMDGPU::VCC_HI) {
|
||||||
VCCUsed = true;
|
VCCUsed = true;
|
||||||
continue;
|
continue;
|
||||||
|
} else if (reg == AMDGPU::FLAT_SCR ||
|
||||||
|
reg == AMDGPU::FLAT_SCR_LO ||
|
||||||
|
reg == AMDGPU::FLAT_SCR_HI) {
|
||||||
|
FlatUsed = true;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (reg) {
|
switch (reg) {
|
||||||
@ -322,6 +328,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||||||
if (VCCUsed)
|
if (VCCUsed)
|
||||||
MaxSGPR += 2;
|
MaxSGPR += 2;
|
||||||
|
|
||||||
|
if (FlatUsed)
|
||||||
|
MaxSGPR += 2;
|
||||||
|
|
||||||
// We found the maximum register index. They start at 0, so add one to get the
|
// We found the maximum register index. They start at 0, so add one to get the
|
||||||
// number of registers.
|
// number of registers.
|
||||||
ProgInfo.NumVGPR = MaxVGPR + 1;
|
ProgInfo.NumVGPR = MaxVGPR + 1;
|
||||||
@ -340,6 +349,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
|||||||
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
||||||
ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
|
ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
|
||||||
|
|
||||||
|
ProgInfo.FlatUsed = FlatUsed;
|
||||||
|
ProgInfo.VCCUsed = VCCUsed;
|
||||||
ProgInfo.CodeLen = CodeSize;
|
ProgInfo.CodeLen = CodeSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -402,6 +413,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
|
|||||||
|
|
||||||
OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
|
OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
|
||||||
OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
|
OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
|
||||||
|
|
||||||
|
// TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
|
||||||
|
// 0" comment but I don't see a corresponding field in the register spec.
|
||||||
} else {
|
} else {
|
||||||
OutStreamer.EmitIntValue(RsrcReg, 4);
|
OutStreamer.EmitIntValue(RsrcReg, 4);
|
||||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
|
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
|
||||||
|
@ -33,6 +33,8 @@ private:
|
|||||||
DebugMode(0),
|
DebugMode(0),
|
||||||
IEEEMode(0),
|
IEEEMode(0),
|
||||||
ScratchSize(0),
|
ScratchSize(0),
|
||||||
|
FlatUsed(false),
|
||||||
|
VCCUsed(false),
|
||||||
CodeLen(0) {}
|
CodeLen(0) {}
|
||||||
|
|
||||||
// Fields set in PGM_RSRC1 pm4 packet.
|
// Fields set in PGM_RSRC1 pm4 packet.
|
||||||
@ -46,7 +48,10 @@ private:
|
|||||||
uint32_t IEEEMode;
|
uint32_t IEEEMode;
|
||||||
uint32_t ScratchSize;
|
uint32_t ScratchSize;
|
||||||
|
|
||||||
|
bool FlatUsed;
|
||||||
|
|
||||||
// Bonus information for debugging.
|
// Bonus information for debugging.
|
||||||
|
bool VCCUsed;
|
||||||
uint64_t CodeLen;
|
uint64_t CodeLen;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -65,6 +65,7 @@ private:
|
|||||||
static bool checkPrivateAddress(const MachineMemOperand *Op);
|
static bool checkPrivateAddress(const MachineMemOperand *Op);
|
||||||
|
|
||||||
static bool isGlobalStore(const StoreSDNode *N);
|
static bool isGlobalStore(const StoreSDNode *N);
|
||||||
|
static bool isFlatStore(const StoreSDNode *N);
|
||||||
static bool isPrivateStore(const StoreSDNode *N);
|
static bool isPrivateStore(const StoreSDNode *N);
|
||||||
static bool isLocalStore(const StoreSDNode *N);
|
static bool isLocalStore(const StoreSDNode *N);
|
||||||
static bool isRegionStore(const StoreSDNode *N);
|
static bool isRegionStore(const StoreSDNode *N);
|
||||||
@ -72,6 +73,7 @@ private:
|
|||||||
bool isCPLoad(const LoadSDNode *N) const;
|
bool isCPLoad(const LoadSDNode *N) const;
|
||||||
bool isConstantLoad(const LoadSDNode *N, int cbID) const;
|
bool isConstantLoad(const LoadSDNode *N, int cbID) const;
|
||||||
bool isGlobalLoad(const LoadSDNode *N) const;
|
bool isGlobalLoad(const LoadSDNode *N) const;
|
||||||
|
bool isFlatLoad(const LoadSDNode *N) const;
|
||||||
bool isParamLoad(const LoadSDNode *N) const;
|
bool isParamLoad(const LoadSDNode *N) const;
|
||||||
bool isPrivateLoad(const LoadSDNode *N) const;
|
bool isPrivateLoad(const LoadSDNode *N) const;
|
||||||
bool isLocalLoad(const LoadSDNode *N) const;
|
bool isLocalLoad(const LoadSDNode *N) const;
|
||||||
@ -104,6 +106,7 @@ private:
|
|||||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
||||||
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
||||||
SDValue &TFE) const;
|
SDValue &TFE) const;
|
||||||
|
SDNode *SelectAddrSpaceCast(SDNode *N);
|
||||||
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
||||||
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
||||||
SDValue &Clamp, SDValue &Omod) const;
|
SDValue &Clamp, SDValue &Omod) const;
|
||||||
@ -484,6 +487,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||||||
case AMDGPUISD::DIV_SCALE: {
|
case AMDGPUISD::DIV_SCALE: {
|
||||||
return SelectDIV_SCALE(N);
|
return SelectDIV_SCALE(N);
|
||||||
}
|
}
|
||||||
|
case ISD::ADDRSPACECAST:
|
||||||
|
return SelectAddrSpaceCast(N);
|
||||||
}
|
}
|
||||||
return SelectCode(N);
|
return SelectCode(N);
|
||||||
}
|
}
|
||||||
@ -522,6 +527,10 @@ bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
|
|||||||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
|
||||||
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
||||||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
|
||||||
}
|
}
|
||||||
@ -553,6 +562,10 @@ bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
|
|||||||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
|
||||||
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
|
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
|
||||||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
|
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
|
||||||
}
|
}
|
||||||
@ -582,10 +595,11 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
|
|||||||
const Value *MemVal = N->getMemOperand()->getValue();
|
const Value *MemVal = N->getMemOperand()->getValue();
|
||||||
if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
|
if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
|
||||||
!checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
|
!checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
|
||||||
|
!checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
|
||||||
!checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
|
!checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
|
||||||
!checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
|
!checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
|
||||||
!checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
|
!checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
|
||||||
!checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
|
!checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -1005,6 +1019,66 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: This is incorrect and only enough to be able to compile.
|
||||||
|
SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
|
||||||
|
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
|
||||||
|
SDLoc DL(N);
|
||||||
|
|
||||||
|
assert(Subtarget.hasFlatAddressSpace() &&
|
||||||
|
"addrspacecast only supported with flat address space!");
|
||||||
|
|
||||||
|
assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
|
||||||
|
ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
|
||||||
|
"Cannot cast address space to / from constant address!");
|
||||||
|
|
||||||
|
assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
|
||||||
|
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
|
||||||
|
"Can only cast to / from flat address space!");
|
||||||
|
|
||||||
|
// The flat instructions read the address as the index of the VGPR holding the
|
||||||
|
// address, so casting should just be reinterpreting the base VGPR, so just
|
||||||
|
// insert trunc / bitcast / zext.
|
||||||
|
|
||||||
|
SDValue Src = ASC->getOperand(0);
|
||||||
|
EVT DestVT = ASC->getValueType(0);
|
||||||
|
EVT SrcVT = Src.getValueType();
|
||||||
|
|
||||||
|
unsigned SrcSize = SrcVT.getSizeInBits();
|
||||||
|
unsigned DestSize = DestVT.getSizeInBits();
|
||||||
|
|
||||||
|
if (SrcSize > DestSize) {
|
||||||
|
assert(SrcSize == 64 && DestSize == 32);
|
||||||
|
return CurDAG->getMachineNode(
|
||||||
|
TargetOpcode::EXTRACT_SUBREG,
|
||||||
|
DL,
|
||||||
|
DestVT,
|
||||||
|
Src,
|
||||||
|
CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (DestSize > SrcSize) {
|
||||||
|
assert(SrcSize == 32 && DestSize == 64);
|
||||||
|
|
||||||
|
SDValue RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
|
||||||
|
|
||||||
|
const SDValue Ops[] = {
|
||||||
|
RC,
|
||||||
|
Src,
|
||||||
|
CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
|
||||||
|
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
|
||||||
|
CurDAG->getConstant(0, MVT::i32)), 0),
|
||||||
|
CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
|
||||||
|
};
|
||||||
|
|
||||||
|
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
|
||||||
|
SDLoc(N), N->getValueType(0), Ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(SrcSize == 64 && DestSize == 64);
|
||||||
|
return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
|
||||||
SDValue &SrcMods) const {
|
SDValue &SrcMods) const {
|
||||||
|
|
||||||
|
@ -95,6 +95,7 @@ protected:
|
|||||||
MachineInstr *MI,
|
MachineInstr *MI,
|
||||||
const SmallVectorImpl<unsigned> &Ops,
|
const SmallVectorImpl<unsigned> &Ops,
|
||||||
MachineInstr *LoadMI) const override;
|
MachineInstr *LoadMI) const override;
|
||||||
|
public:
|
||||||
/// \returns the smallest register index that will be accessed by an indirect
|
/// \returns the smallest register index that will be accessed by an indirect
|
||||||
/// read or write or -1 if indirect addressing is not used by this program.
|
/// read or write or -1 if indirect addressing is not used by this program.
|
||||||
int getIndirectIndexBegin(const MachineFunction &MF) const;
|
int getIndirectIndexBegin(const MachineFunction &MF) const;
|
||||||
@ -103,7 +104,6 @@ protected:
|
|||||||
/// read or write or -1 if indirect addressing is not used by this program.
|
/// read or write or -1 if indirect addressing is not used by this program.
|
||||||
int getIndirectIndexEnd(const MachineFunction &MF) const;
|
int getIndirectIndexEnd(const MachineFunction &MF) const;
|
||||||
|
|
||||||
public:
|
|
||||||
bool canFoldMemoryOperand(const MachineInstr *MI,
|
bool canFoldMemoryOperand(const MachineInstr *MI,
|
||||||
const SmallVectorImpl<unsigned> &Ops) const override;
|
const SmallVectorImpl<unsigned> &Ops) const override;
|
||||||
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
||||||
|
@ -195,6 +195,14 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
|||||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
||||||
|
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
||||||
|
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
||||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
}]>;
|
}]>;
|
||||||
@ -223,6 +231,14 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
|
|||||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
|
||||||
|
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
|
||||||
|
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
|
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
|
||||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
}]>;
|
}]>;
|
||||||
@ -248,6 +264,11 @@ def az_extloadi32_global : PatFrag<(ops node:$ptr),
|
|||||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def az_extloadi32_flat : PatFrag<(ops node:$ptr),
|
||||||
|
(az_extloadi32 node:$ptr), [{
|
||||||
|
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
def az_extloadi32_constant : PatFrag<(ops node:$ptr),
|
def az_extloadi32_constant : PatFrag<(ops node:$ptr),
|
||||||
(az_extloadi32 node:$ptr), [{
|
(az_extloadi32 node:$ptr), [{
|
||||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
@ -263,6 +284,16 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
|
|||||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei8 node:$val, node:$ptr), [{
|
||||||
|
return isFlatStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(truncstorei16 node:$val, node:$ptr), [{
|
||||||
|
return isFlatStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
def local_store : PatFrag<(ops node:$val, node:$ptr),
|
def local_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
(store node:$val, node:$ptr), [{
|
(store node:$val, node:$ptr), [{
|
||||||
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
return isLocalStore(dyn_cast<StoreSDNode>(N));
|
||||||
@ -318,6 +349,7 @@ def mskor_global : PatFrag<(ops node:$val, node:$ptr),
|
|||||||
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
|
||||||
def atomic_cmp_swap_32_local :
|
def atomic_cmp_swap_32_local :
|
||||||
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
|
PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
|
||||||
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
|
(atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
|
||||||
@ -334,6 +366,20 @@ def atomic_cmp_swap_64_local :
|
|||||||
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||||
|
return isFlatLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def flat_store : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(store node:$val, node:$ptr), [{
|
||||||
|
return isFlatStore(dyn_cast<StoreSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
|
||||||
|
(AMDGPUstore_mskor node:$val, node:$ptr), [{
|
||||||
|
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
|
||||||
|
}]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Misc Pattern Fragments
|
// Misc Pattern Fragments
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -12,7 +12,9 @@ void AMDGPUMachineFunction::anchor() {}
|
|||||||
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
|
||||||
MachineFunctionInfo(),
|
MachineFunctionInfo(),
|
||||||
ShaderType(ShaderType::COMPUTE),
|
ShaderType(ShaderType::COMPUTE),
|
||||||
LDSSize(0) {
|
LDSSize(0),
|
||||||
|
ScratchSize(0),
|
||||||
|
IsKernel(true) {
|
||||||
AttributeSet Set = MF.getFunction()->getAttributes();
|
AttributeSet Set = MF.getFunction()->getAttributes();
|
||||||
Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
|
Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
|
||||||
ShaderTypeAttribute);
|
ShaderTypeAttribute);
|
||||||
|
@ -33,6 +33,9 @@ public:
|
|||||||
unsigned getShaderType() const {
|
unsigned getShaderType() const {
|
||||||
return ShaderType;
|
return ShaderType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned ScratchSize;
|
||||||
|
bool IsKernel;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -77,14 +77,14 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
|
|||||||
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
|
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
|
||||||
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
|
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
|
||||||
FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
|
FP64Denormals(false), FP32Denormals(false), CaymanISA(false),
|
||||||
EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
|
FlatAddressSpace(false), EnableIRStructurizer(true),
|
||||||
|
EnablePromoteAlloca(false), EnableIfCvt(true),
|
||||||
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||||
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
|
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
|
||||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||||
64 * 16, // Maximum stack alignment (long16)
|
64 * 16, // Maximum stack alignment (long16)
|
||||||
0),
|
0),
|
||||||
InstrItins(getInstrItineraryForCPU(GPU)) {
|
InstrItins(getInstrItineraryForCPU(GPU)) {
|
||||||
|
|
||||||
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
||||||
InstrInfo.reset(new R600InstrInfo(*this));
|
InstrInfo.reset(new R600InstrInfo(*this));
|
||||||
TLInfo.reset(new R600TargetLowering(TM));
|
TLInfo.reset(new R600TargetLowering(TM));
|
||||||
|
@ -56,6 +56,7 @@ private:
|
|||||||
bool FP64Denormals;
|
bool FP64Denormals;
|
||||||
bool FP32Denormals;
|
bool FP32Denormals;
|
||||||
bool CaymanISA;
|
bool CaymanISA;
|
||||||
|
bool FlatAddressSpace;
|
||||||
bool EnableIRStructurizer;
|
bool EnableIRStructurizer;
|
||||||
bool EnablePromoteAlloca;
|
bool EnablePromoteAlloca;
|
||||||
bool EnableIfCvt;
|
bool EnableIfCvt;
|
||||||
@ -124,6 +125,10 @@ public:
|
|||||||
return FP64Denormals;
|
return FP64Denormals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool hasFlatAddressSpace() const {
|
||||||
|
return FlatAddressSpace;
|
||||||
|
}
|
||||||
|
|
||||||
bool hasBFE() const {
|
bool hasBFE() const {
|
||||||
return (getGeneration() >= EVERGREEN);
|
return (getGeneration() >= EVERGREEN);
|
||||||
}
|
}
|
||||||
|
@ -98,6 +98,27 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
|
|||||||
case AMDGPU::M0:
|
case AMDGPU::M0:
|
||||||
O << "m0";
|
O << "m0";
|
||||||
return;
|
return;
|
||||||
|
case AMDGPU::FLAT_SCR:
|
||||||
|
O << "flat_scratch";
|
||||||
|
return;
|
||||||
|
case AMDGPU::VCC_LO:
|
||||||
|
O << "vcc_lo";
|
||||||
|
return;
|
||||||
|
case AMDGPU::VCC_HI:
|
||||||
|
O << "vcc_hi";
|
||||||
|
return;
|
||||||
|
case AMDGPU::EXEC_LO:
|
||||||
|
O << "exec_lo";
|
||||||
|
return;
|
||||||
|
case AMDGPU::EXEC_HI:
|
||||||
|
O << "exec_hi";
|
||||||
|
return;
|
||||||
|
case AMDGPU::FLAT_SCR_LO:
|
||||||
|
O << "flat_scratch_lo";
|
||||||
|
return;
|
||||||
|
case AMDGPU::FLAT_SCR_HI:
|
||||||
|
O << "flat_scratch_hi";
|
||||||
|
return;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,8 @@ enum {
|
|||||||
VOPC = 1 << 8,
|
VOPC = 1 << 8,
|
||||||
SALU = 1 << 9,
|
SALU = 1 << 9,
|
||||||
MUBUF = 1 << 10,
|
MUBUF = 1 << 10,
|
||||||
MTBUF = 1 << 11
|
MTBUF = 1 << 11,
|
||||||
|
FLAT = 1 << 12
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||||||
field bits<1> SALU = 0;
|
field bits<1> SALU = 0;
|
||||||
field bits<1> MUBUF = 0;
|
field bits<1> MUBUF = 0;
|
||||||
field bits<1> MTBUF = 0;
|
field bits<1> MTBUF = 0;
|
||||||
|
field bits<1> FLAT = 0;
|
||||||
|
|
||||||
// These need to be kept in sync with the enum in SIInstrFlags.
|
// These need to be kept in sync with the enum in SIInstrFlags.
|
||||||
let TSFlags{0} = VM_CNT;
|
let TSFlags{0} = VM_CNT;
|
||||||
@ -40,6 +41,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
|||||||
let TSFlags{9} = SALU;
|
let TSFlags{9} = SALU;
|
||||||
let TSFlags{10} = MUBUF;
|
let TSFlags{10} = MUBUF;
|
||||||
let TSFlags{11} = MTBUF;
|
let TSFlags{11} = MTBUF;
|
||||||
|
let TSFlags{12} = FLAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
class Enc32 {
|
class Enc32 {
|
||||||
@ -425,8 +427,27 @@ class MIMGe <bits<7> op> : Enc64 {
|
|||||||
let Inst{57-53} = SSAMP{6-2};
|
let Inst{57-53} = SSAMP{6-2};
|
||||||
}
|
}
|
||||||
|
|
||||||
class EXPe : Enc64 {
|
class FLATe<bits<7> op> : Enc64 {
|
||||||
|
bits<8> addr;
|
||||||
|
bits<8> data;
|
||||||
|
bits<8> vdst;
|
||||||
|
bits<1> slc;
|
||||||
|
bits<1> glc;
|
||||||
|
bits<1> tfe;
|
||||||
|
|
||||||
|
// 15-0 is reserved.
|
||||||
|
let Inst{16} = glc;
|
||||||
|
let Inst{17} = slc;
|
||||||
|
let Inst{24-18} = op;
|
||||||
|
let Inst{31-26} = 0x37; // Encoding.
|
||||||
|
let Inst{39-32} = addr;
|
||||||
|
let Inst{47-40} = data;
|
||||||
|
// 54-48 is reserved.
|
||||||
|
let Inst{55} = tfe;
|
||||||
|
let Inst{63-56} = vdst;
|
||||||
|
}
|
||||||
|
|
||||||
|
class EXPe : Enc64 {
|
||||||
bits<4> EN;
|
bits<4> EN;
|
||||||
bits<6> TGT;
|
bits<6> TGT;
|
||||||
bits<1> COMPR;
|
bits<1> COMPR;
|
||||||
@ -533,6 +554,21 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|||||||
let UseNamedOperandTable = 1;
|
let UseNamedOperandTable = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||||
|
InstSI<outs, ins, asm, pattern>, FLATe <op> {
|
||||||
|
let FLAT = 1;
|
||||||
|
// Internally, FLAT instruction are executed as both an LDS and a
|
||||||
|
// Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
|
||||||
|
// and are not considered done until both have been decremented.
|
||||||
|
let VM_CNT = 1;
|
||||||
|
let LGKM_CNT = 1;
|
||||||
|
|
||||||
|
let Uses = [EXEC, FLAT_SCR]; // M0
|
||||||
|
|
||||||
|
let UseNamedOperandTable = 1;
|
||||||
|
let hasSideEffects = 0;
|
||||||
|
}
|
||||||
|
|
||||||
class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||||
InstSI <outs, ins, asm, pattern>, MIMGe <op> {
|
InstSI <outs, ins, asm, pattern>, MIMGe <op> {
|
||||||
|
|
||||||
|
@ -638,6 +638,10 @@ bool SIInstrInfo::isMTBUF(uint16_t Opcode) const {
|
|||||||
return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
|
return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SIInstrInfo::isFLAT(uint16_t Opcode) const {
|
||||||
|
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
|
||||||
|
}
|
||||||
|
|
||||||
bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
|
bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
|
||||||
return get(Opcode).TSFlags & SIInstrFlags::VOP1;
|
return get(Opcode).TSFlags & SIInstrFlags::VOP1;
|
||||||
}
|
}
|
||||||
@ -843,6 +847,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
|||||||
if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
|
if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
|
||||||
++ConstantBusCount;
|
++ConstantBusCount;
|
||||||
|
|
||||||
|
// FLAT_SCR is just an SGPR pair.
|
||||||
|
if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
|
||||||
|
++ConstantBusCount;
|
||||||
|
|
||||||
// SGPRs use the constant bus
|
// SGPRs use the constant bus
|
||||||
if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
|
if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
|
||||||
(!MO.isImplicit() &&
|
(!MO.isImplicit() &&
|
||||||
|
@ -108,6 +108,7 @@ public:
|
|||||||
bool isSMRD(uint16_t Opcode) const;
|
bool isSMRD(uint16_t Opcode) const;
|
||||||
bool isMUBUF(uint16_t Opcode) const;
|
bool isMUBUF(uint16_t Opcode) const;
|
||||||
bool isMTBUF(uint16_t Opcode) const;
|
bool isMTBUF(uint16_t Opcode) const;
|
||||||
|
bool isFLAT(uint16_t Opcode) const;
|
||||||
bool isVOP1(uint16_t Opcode) const;
|
bool isVOP1(uint16_t Opcode) const;
|
||||||
bool isVOP2(uint16_t Opcode) const;
|
bool isVOP2(uint16_t Opcode) const;
|
||||||
bool isVOP3(uint16_t Opcode) const;
|
bool isVOP3(uint16_t Opcode) const;
|
||||||
|
@ -209,6 +209,7 @@ def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
|
|||||||
def SIOperand {
|
def SIOperand {
|
||||||
int ZERO = 0x80;
|
int ZERO = 0x80;
|
||||||
int VCC = 0x6A;
|
int VCC = 0x6A;
|
||||||
|
int FLAT_SCR = 0x68;
|
||||||
}
|
}
|
||||||
|
|
||||||
def SRCMODS {
|
def SRCMODS {
|
||||||
@ -1063,6 +1064,30 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
|
||||||
|
FLAT <op, (outs regClass:$data),
|
||||||
|
(ins VReg_64:$addr),
|
||||||
|
asm#" $data, $addr, [M0, FLAT_SCRATCH]", []> {
|
||||||
|
let glc = 0;
|
||||||
|
let slc = 0;
|
||||||
|
let tfe = 0;
|
||||||
|
let mayLoad = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
|
||||||
|
FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr),
|
||||||
|
name#" $data, $addr, [M0, FLAT_SCRATCH]",
|
||||||
|
[]> {
|
||||||
|
|
||||||
|
let mayLoad = 0;
|
||||||
|
let mayStore = 1;
|
||||||
|
|
||||||
|
// Encoding
|
||||||
|
let glc = 0;
|
||||||
|
let slc = 0;
|
||||||
|
let tfe = 0;
|
||||||
|
}
|
||||||
|
|
||||||
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
||||||
op,
|
op,
|
||||||
(outs regClass:$dst),
|
(outs regClass:$dst),
|
||||||
|
@ -31,6 +31,7 @@ def isSI : Predicate<"Subtarget.getGeneration() "
|
|||||||
|
|
||||||
def isCI : Predicate<"Subtarget.getGeneration() "
|
def isCI : Predicate<"Subtarget.getGeneration() "
|
||||||
">= AMDGPUSubtarget::SEA_ISLANDS">;
|
">= AMDGPUSubtarget::SEA_ISLANDS">;
|
||||||
|
def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
|
||||||
|
|
||||||
def isCFDepth0 : Predicate<"isCFDepth0()">;
|
def isCFDepth0 : Predicate<"isCFDepth0()">;
|
||||||
|
|
||||||
@ -1043,6 +1044,80 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "IMAGE_SAMPLE_C_CD_CL_O"
|
|||||||
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
|
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
|
||||||
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
|
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Flat Instructions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
let Predicates = [HasFlatAddressSpace] in {
|
||||||
|
def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x00000008, "FLAT_LOAD_UBYTE", VReg_32>;
|
||||||
|
def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x00000009, "FLAT_LOAD_SBYTE", VReg_32>;
|
||||||
|
def FLAT_LOAD_USHORT : FLAT_Load_Helper <0x0000000a, "FLAT_LOAD_USHORT", VReg_32>;
|
||||||
|
def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0x0000000b, "FLAT_LOAD_SSHORT", VReg_32>;
|
||||||
|
def FLAT_LOAD_DWORD : FLAT_Load_Helper <0x0000000c, "FLAT_LOAD_DWORD", VReg_32>;
|
||||||
|
def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0x0000000d, "FLAT_LOAD_DWORDX2", VReg_64>;
|
||||||
|
def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0x0000000e, "FLAT_LOAD_DWORDX4", VReg_128>;
|
||||||
|
def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0x00000010, "FLAT_LOAD_DWORDX3", VReg_96>;
|
||||||
|
|
||||||
|
def FLAT_STORE_BYTE : FLAT_Store_Helper <
|
||||||
|
0x00000018, "FLAT_STORE_BYTE", VReg_32
|
||||||
|
>;
|
||||||
|
|
||||||
|
def FLAT_STORE_SHORT : FLAT_Store_Helper <
|
||||||
|
0x0000001a, "FLAT_STORE_SHORT", VReg_32
|
||||||
|
>;
|
||||||
|
|
||||||
|
def FLAT_STORE_DWORD : FLAT_Store_Helper <
|
||||||
|
0x0000001c, "FLAT_STORE_DWORD", VReg_32
|
||||||
|
>;
|
||||||
|
|
||||||
|
def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
|
||||||
|
0x0000001d, "FLAT_STORE_DWORDX2", VReg_64
|
||||||
|
>;
|
||||||
|
|
||||||
|
def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
|
||||||
|
0x0000001e, "FLAT_STORE_DWORDX4", VReg_128
|
||||||
|
>;
|
||||||
|
|
||||||
|
def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
|
||||||
|
0x0000001e, "FLAT_STORE_DWORDX3", VReg_96
|
||||||
|
>;
|
||||||
|
|
||||||
|
//def FLAT_ATOMIC_SWAP : FLAT_ <0x00000030, "FLAT_ATOMIC_SWAP", []>;
|
||||||
|
//def FLAT_ATOMIC_CMPSWAP : FLAT_ <0x00000031, "FLAT_ATOMIC_CMPSWAP", []>;
|
||||||
|
//def FLAT_ATOMIC_ADD : FLAT_ <0x00000032, "FLAT_ATOMIC_ADD", []>;
|
||||||
|
//def FLAT_ATOMIC_SUB : FLAT_ <0x00000033, "FLAT_ATOMIC_SUB", []>;
|
||||||
|
//def FLAT_ATOMIC_RSUB : FLAT_ <0x00000034, "FLAT_ATOMIC_RSUB", []>;
|
||||||
|
//def FLAT_ATOMIC_SMIN : FLAT_ <0x00000035, "FLAT_ATOMIC_SMIN", []>;
|
||||||
|
//def FLAT_ATOMIC_UMIN : FLAT_ <0x00000036, "FLAT_ATOMIC_UMIN", []>;
|
||||||
|
//def FLAT_ATOMIC_SMAX : FLAT_ <0x00000037, "FLAT_ATOMIC_SMAX", []>;
|
||||||
|
//def FLAT_ATOMIC_UMAX : FLAT_ <0x00000038, "FLAT_ATOMIC_UMAX", []>;
|
||||||
|
//def FLAT_ATOMIC_AND : FLAT_ <0x00000039, "FLAT_ATOMIC_AND", []>;
|
||||||
|
//def FLAT_ATOMIC_OR : FLAT_ <0x0000003a, "FLAT_ATOMIC_OR", []>;
|
||||||
|
//def FLAT_ATOMIC_XOR : FLAT_ <0x0000003b, "FLAT_ATOMIC_XOR", []>;
|
||||||
|
//def FLAT_ATOMIC_INC : FLAT_ <0x0000003c, "FLAT_ATOMIC_INC", []>;
|
||||||
|
//def FLAT_ATOMIC_DEC : FLAT_ <0x0000003d, "FLAT_ATOMIC_DEC", []>;
|
||||||
|
//def FLAT_ATOMIC_FCMPSWAP : FLAT_ <0x0000003e, "FLAT_ATOMIC_FCMPSWAP", []>;
|
||||||
|
//def FLAT_ATOMIC_FMIN : FLAT_ <0x0000003f, "FLAT_ATOMIC_FMIN", []>;
|
||||||
|
//def FLAT_ATOMIC_FMAX : FLAT_ <0x00000040, "FLAT_ATOMIC_FMAX", []>;
|
||||||
|
//def FLAT_ATOMIC_SWAP_X2 : FLAT_X2 <0x00000050, "FLAT_ATOMIC_SWAP_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_CMPSWAP_X2 : FLAT_X2 <0x00000051, "FLAT_ATOMIC_CMPSWAP_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_ADD_X2 : FLAT_X2 <0x00000052, "FLAT_ATOMIC_ADD_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_SUB_X2 : FLAT_X2 <0x00000053, "FLAT_ATOMIC_SUB_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_RSUB_X2 : FLAT_X2 <0x00000054, "FLAT_ATOMIC_RSUB_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_SMIN_X2 : FLAT_X2 <0x00000055, "FLAT_ATOMIC_SMIN_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_UMIN_X2 : FLAT_X2 <0x00000056, "FLAT_ATOMIC_UMIN_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_SMAX_X2 : FLAT_X2 <0x00000057, "FLAT_ATOMIC_SMAX_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_UMAX_X2 : FLAT_X2 <0x00000058, "FLAT_ATOMIC_UMAX_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_AND_X2 : FLAT_X2 <0x00000059, "FLAT_ATOMIC_AND_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_OR_X2 : FLAT_X2 <0x0000005a, "FLAT_ATOMIC_OR_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_XOR_X2 : FLAT_X2 <0x0000005b, "FLAT_ATOMIC_XOR_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_INC_X2 : FLAT_X2 <0x0000005c, "FLAT_ATOMIC_INC_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_DEC_X2 : FLAT_X2 <0x0000005d, "FLAT_ATOMIC_DEC_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_X2 <0x0000005e, "FLAT_ATOMIC_FCMPSWAP_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_FMIN_X2 : FLAT_X2 <0x0000005f, "FLAT_ATOMIC_FMIN_X2", []>;
|
||||||
|
//def FLAT_ATOMIC_FMAX_X2 : FLAT_X2 <0x00000060, "FLAT_ATOMIC_FMAX_X2", []>;
|
||||||
|
|
||||||
|
} // End HasFlatAddressSpace predicate
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// VOP1 Instructions
|
// VOP1 Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -2822,6 +2897,37 @@ defm V_MAD_I64_I32 : VOP3Inst <0x00000177, "V_MAD_I64_I32",
|
|||||||
|
|
||||||
} // End iSCI
|
} // End iSCI
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Flat Patterns
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
|
||||||
|
PatFrag flat_ld> :
|
||||||
|
Pat <(vt (flat_ld i64:$ptr)),
|
||||||
|
(Instr_ADDR64 $ptr)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
|
||||||
|
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
|
||||||
|
|
||||||
|
class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
|
||||||
|
Pat <(st vt:$value, i64:$ptr),
|
||||||
|
(Instr $value, $ptr)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
|
||||||
|
def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
|
||||||
|
def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
|
||||||
|
def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
|
||||||
|
def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
|
||||||
|
def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
|
||||||
|
|
||||||
/********** ====================== **********/
|
/********** ====================== **********/
|
||||||
/********** Indirect adressing **********/
|
/********** Indirect adressing **********/
|
||||||
|
@ -52,6 +52,7 @@
|
|||||||
#include "AMDGPUSubtarget.h"
|
#include "AMDGPUSubtarget.h"
|
||||||
#include "SIInstrInfo.h"
|
#include "SIInstrInfo.h"
|
||||||
#include "SIMachineFunctionInfo.h"
|
#include "SIMachineFunctionInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
@ -451,6 +452,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
bool HaveKill = false;
|
bool HaveKill = false;
|
||||||
bool NeedM0 = false;
|
bool NeedM0 = false;
|
||||||
bool NeedWQM = false;
|
bool NeedWQM = false;
|
||||||
|
bool NeedFlat = false;
|
||||||
unsigned Depth = 0;
|
unsigned Depth = 0;
|
||||||
|
|
||||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||||
@ -467,6 +469,12 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
NeedWQM = true;
|
NeedWQM = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Flat uses m0 in case it needs to access LDS.
|
||||||
|
if (TII->isFLAT(MI.getOpcode())) {
|
||||||
|
NeedM0 = true;
|
||||||
|
NeedFlat = true;
|
||||||
|
}
|
||||||
|
|
||||||
switch (MI.getOpcode()) {
|
switch (MI.getOpcode()) {
|
||||||
default: break;
|
default: break;
|
||||||
case AMDGPU::SI_IF:
|
case AMDGPU::SI_IF:
|
||||||
@ -532,7 +540,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
case AMDGPU::V_INTERP_MOV_F32:
|
case AMDGPU::V_INTERP_MOV_F32:
|
||||||
NeedWQM = true;
|
NeedWQM = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -550,5 +557,42 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
|
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: This seems inappropriate to do here.
|
||||||
|
if (NeedFlat && MFI->IsKernel) {
|
||||||
|
// Insert the prologue initializing the SGPRs pointing to the scratch space
|
||||||
|
// for flat accesses.
|
||||||
|
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
|
||||||
|
|
||||||
|
// TODO: What to use with function calls?
|
||||||
|
|
||||||
|
// FIXME: This is reporting stack size that is used in a scratch buffer
|
||||||
|
// rather than registers as well.
|
||||||
|
uint64_t StackSizeBytes = FrameInfo->getStackSize();
|
||||||
|
|
||||||
|
int IndirectBegin
|
||||||
|
= static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF);
|
||||||
|
// Convert register index to 256-byte unit.
|
||||||
|
uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256);
|
||||||
|
|
||||||
|
assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff &&
|
||||||
|
"Stack limits should be smaller than 16-bits");
|
||||||
|
|
||||||
|
// Initialize the flat scratch register pair.
|
||||||
|
// TODO: Can we use one s_mov_b64 here?
|
||||||
|
|
||||||
|
// Offset is in units of 256-bytes.
|
||||||
|
MachineBasicBlock &MBB = MF.front();
|
||||||
|
DebugLoc NoDL;
|
||||||
|
MachineBasicBlock::iterator Start = MBB.getFirstNonPHI();
|
||||||
|
const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32);
|
||||||
|
|
||||||
|
BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO)
|
||||||
|
.addImm(StackOffset);
|
||||||
|
|
||||||
|
// Documentation says size is "per-thread scratch size in bytes"
|
||||||
|
BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI)
|
||||||
|
.addImm(StackSizeBytes);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||||||
BitVector Reserved(getNumRegs());
|
BitVector Reserved(getNumRegs());
|
||||||
Reserved.set(AMDGPU::EXEC);
|
Reserved.set(AMDGPU::EXEC);
|
||||||
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
|
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
|
||||||
|
Reserved.set(AMDGPU::FLAT_SCR);
|
||||||
return Reserved;
|
return Reserved;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,6 +247,28 @@ unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
|
|||||||
default: llvm_unreachable("Invalid SubIdx for VCC");
|
default: llvm_unreachable("Invalid SubIdx for VCC");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::FLAT_SCR:
|
||||||
|
switch (Channel) {
|
||||||
|
case 0:
|
||||||
|
return AMDGPU::FLAT_SCR_LO;
|
||||||
|
case 1:
|
||||||
|
return AMDGPU::FLAT_SCR_HI;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Invalid SubIdx for FLAT_SCR");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AMDGPU::EXEC:
|
||||||
|
switch (Channel) {
|
||||||
|
case 0:
|
||||||
|
return AMDGPU::EXEC_LO;
|
||||||
|
case 1:
|
||||||
|
return AMDGPU::EXEC_HI;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Invalid SubIdx for EXEC");
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Index = getHWRegIndex(Reg);
|
unsigned Index = getHWRegIndex(Reg);
|
||||||
|
@ -39,6 +39,16 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
|
|||||||
def SCC : SIReg<"SCC", 253>;
|
def SCC : SIReg<"SCC", 253>;
|
||||||
def M0 : SIReg <"M0", 124>;
|
def M0 : SIReg <"M0", 124>;
|
||||||
|
|
||||||
|
def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
|
||||||
|
def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
|
||||||
|
|
||||||
|
// Pair to indicate location of scratch space for flat accesses.
|
||||||
|
def FLAT_SCR : RegisterWithSubRegs <"FLAT_SCR", [FLAT_SCR_LO, FLAT_SCR_HI]> {
|
||||||
|
let Namespace = "AMDGPU";
|
||||||
|
let SubRegIndices = [sub0, sub1];
|
||||||
|
let HWEncoding = 104;
|
||||||
|
}
|
||||||
|
|
||||||
// SGPR registers
|
// SGPR registers
|
||||||
foreach Index = 0-101 in {
|
foreach Index = 0-101 in {
|
||||||
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
|
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
|
||||||
@ -167,13 +177,13 @@ def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
|
|||||||
|
|
||||||
// Register class for all scalar registers (SGPRs + Special Registers)
|
// Register class for all scalar registers (SGPRs + Special Registers)
|
||||||
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||||
(add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI)
|
(add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
|
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
|
||||||
|
|
||||||
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
|
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
|
||||||
(add SGPR_64, VCCReg, EXECReg)
|
(add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
|
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
|
||||||
|
182
test/CodeGen/R600/flat-address-space.ll
Normal file
182
test/CodeGen/R600/flat-address-space.ll
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
; RUN: llc -O0 -march=r600 -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
|
||||||
|
; RUN: llc -O0 -march=r600 -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
|
||||||
|
|
||||||
|
; Disable optimizations in case there are optimizations added that
|
||||||
|
; specialize away generic pointer accesses.
|
||||||
|
|
||||||
|
|
||||||
|
; CHECK-LABEL: @branch_use_flat_i32:
|
||||||
|
; CHECK: FLAT_STORE_DWORD {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
|
||||||
|
; CHECK: S_ENDPGM
|
||||||
|
define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
|
||||||
|
entry:
|
||||||
|
%cmp = icmp ne i32 %c, 0
|
||||||
|
br i1 %cmp, label %local, label %global
|
||||||
|
|
||||||
|
local:
|
||||||
|
%flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
global:
|
||||||
|
%flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
%fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
|
||||||
|
store i32 %x, i32 addrspace(4)* %fptr, align 4
|
||||||
|
; %val = load i32 addrspace(4)* %fptr, align 4
|
||||||
|
; store i32 %val, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; These testcases might become useless when there are optimizations to
|
||||||
|
; remove generic pointers.
|
||||||
|
|
||||||
|
; CHECK-LABEL: @store_flat_i32:
|
||||||
|
; CHECK: V_MOV_B32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
|
||||||
|
; CHECK: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
|
||||||
|
; CHECK: V_MOV_B32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
|
||||||
|
; CHECK: FLAT_STORE_DWORD v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
|
||||||
|
define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
|
||||||
|
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
|
||||||
|
store i32 %x, i32 addrspace(4)* %fptr, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @store_flat_i64:
|
||||||
|
; CHECK: FLAT_STORE_DWORDX2
|
||||||
|
define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
|
||||||
|
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
|
||||||
|
store i64 %x, i64 addrspace(4)* %fptr, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @store_flat_v4i32:
|
||||||
|
; CHECK: FLAT_STORE_DWORDX4
|
||||||
|
define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
|
||||||
|
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
|
||||||
|
store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @store_flat_trunc_i16:
|
||||||
|
; CHECK: FLAT_STORE_SHORT
|
||||||
|
define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
|
||||||
|
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
|
||||||
|
%y = trunc i32 %x to i16
|
||||||
|
store i16 %y, i16 addrspace(4)* %fptr, align 2
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @store_flat_trunc_i8:
|
||||||
|
; CHECK: FLAT_STORE_BYTE
|
||||||
|
define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
|
||||||
|
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
|
||||||
|
%y = trunc i32 %x to i8
|
||||||
|
store i8 %y, i8 addrspace(4)* %fptr, align 2
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; CHECK-LABEL @load_flat_i32:
|
||||||
|
; CHECK: FLAT_LOAD_DWORD
|
||||||
|
define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
|
||||||
|
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
|
||||||
|
%fload = load i32 addrspace(4)* %fptr, align 4
|
||||||
|
store i32 %fload, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL @load_flat_i64:
|
||||||
|
; CHECK: FLAT_LOAD_DWORDX2
|
||||||
|
define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
|
||||||
|
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
|
||||||
|
%fload = load i64 addrspace(4)* %fptr, align 4
|
||||||
|
store i64 %fload, i64 addrspace(1)* %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL @load_flat_v4i32:
|
||||||
|
; CHECK: FLAT_LOAD_DWORDX4
|
||||||
|
define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
|
||||||
|
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
|
||||||
|
%fload = load <4 x i32> addrspace(4)* %fptr, align 4
|
||||||
|
store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL @sextload_flat_i8:
|
||||||
|
; CHECK: FLAT_LOAD_SBYTE
|
||||||
|
define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
|
||||||
|
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
|
||||||
|
%fload = load i8 addrspace(4)* %fptr, align 4
|
||||||
|
%ext = sext i8 %fload to i32
|
||||||
|
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL @zextload_flat_i8:
|
||||||
|
; CHECK: FLAT_LOAD_UBYTE
|
||||||
|
define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
|
||||||
|
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
|
||||||
|
%fload = load i8 addrspace(4)* %fptr, align 4
|
||||||
|
%ext = zext i8 %fload to i32
|
||||||
|
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL @sextload_flat_i16:
|
||||||
|
; CHECK: FLAT_LOAD_SSHORT
|
||||||
|
define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
|
||||||
|
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
|
||||||
|
%fload = load i16 addrspace(4)* %fptr, align 4
|
||||||
|
%ext = sext i16 %fload to i32
|
||||||
|
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL @zextload_flat_i16:
|
||||||
|
; CHECK: FLAT_LOAD_USHORT
|
||||||
|
define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
|
||||||
|
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
|
||||||
|
%fload = load i16 addrspace(4)* %fptr, align 4
|
||||||
|
%ext = zext i16 %fload to i32
|
||||||
|
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
; TODO: This should not be zero when registers are used for small
|
||||||
|
; scratch allocations again.
|
||||||
|
|
||||||
|
; Check for prologue initializing special SGPRs pointing to scratch.
|
||||||
|
; CHECK-LABEL: @store_flat_scratch:
|
||||||
|
; CHECK: S_MOVK_I32 flat_scratch_lo, 0
|
||||||
|
; CHECK-NO-PROMOTE: S_MOVK_I32 flat_scratch_hi, 40
|
||||||
|
; CHECK-PROMOTE: S_MOVK_I32 flat_scratch_hi, 0
|
||||||
|
; CHECK: FLAT_STORE_DWORD
|
||||||
|
; CHECK: S_BARRIER
|
||||||
|
; CHECK: FLAT_LOAD_DWORD
|
||||||
|
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
|
||||||
|
%alloca = alloca i32, i32 9, align 4
|
||||||
|
%x = call i32 @llvm.r600.read.tidig.x() #3
|
||||||
|
%pptr = getelementptr i32* %alloca, i32 %x
|
||||||
|
%fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
|
||||||
|
store i32 %x, i32 addrspace(4)* %fptr
|
||||||
|
; Dummy call
|
||||||
|
call void @llvm.AMDGPU.barrier.local() #1
|
||||||
|
%reload = load i32 addrspace(4)* %fptr, align 4
|
||||||
|
store i32 %reload, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.AMDGPU.barrier.local() #1
|
||||||
|
declare i32 @llvm.r600.read.tidig.x() #3
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
attributes #1 = { nounwind noduplicate }
|
||||||
|
attributes #3 = { nounwind readnone }
|
Loading…
x
Reference in New Issue
Block a user