mirror of
https://github.com/RPCS3/llvm.git
synced 2025-03-05 00:59:19 +00:00
R600: Add support for i8 and i16 local memory loads
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189225 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c52565157d
commit
a01cdea9c6
@ -96,6 +96,10 @@ def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
|||||||
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
||||||
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
||||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
}]>;
|
}]>;
|
||||||
@ -108,8 +112,12 @@ def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
|||||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
|
||||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
||||||
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
||||||
@ -132,6 +140,14 @@ def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
|
|||||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
|
||||||
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
|
||||||
|
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
||||||
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||||
}]>;
|
}]>;
|
||||||
|
@ -104,7 +104,21 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
|||||||
static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
|
static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
|
||||||
|
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
default:
|
||||||
|
if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) {
|
||||||
|
MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
|
||||||
|
TII->get(MI->getOpcode()),
|
||||||
|
AMDGPU::OQAP);
|
||||||
|
for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
|
||||||
|
NewMI.addOperand(MI->getOperand(i));
|
||||||
|
}
|
||||||
|
TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
|
||||||
|
MI->getOperand(0).getReg(),
|
||||||
|
AMDGPU::OQAP);
|
||||||
|
} else {
|
||||||
|
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case AMDGPU::CLAMP_R600: {
|
case AMDGPU::CLAMP_R600: {
|
||||||
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
|
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
|
||||||
AMDGPU::MOV,
|
AMDGPU::MOV,
|
||||||
@ -140,19 +154,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case AMDGPU::LDS_READ_RET: {
|
|
||||||
MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
|
|
||||||
TII->get(MI->getOpcode()),
|
|
||||||
AMDGPU::OQAP);
|
|
||||||
for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
|
|
||||||
NewMI.addOperand(MI->getOperand(i));
|
|
||||||
}
|
|
||||||
TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
|
|
||||||
MI->getOperand(0).getReg(),
|
|
||||||
AMDGPU::OQAP);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case AMDGPU::MOV_IMM_F32:
|
case AMDGPU::MOV_IMM_F32:
|
||||||
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
|
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
|
||||||
MI->getOperand(1).getFPImm()->getValueAPF()
|
MI->getOperand(1).getFPImm()->getValueAPF()
|
||||||
|
@ -1682,6 +1682,18 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE",
|
|||||||
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
|
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
|
||||||
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
|
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
|
||||||
>;
|
>;
|
||||||
|
def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET",
|
||||||
|
[(set i32:$dst, (sextloadi8_local i32:$src0))]
|
||||||
|
>;
|
||||||
|
def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET",
|
||||||
|
[(set i32:$dst, (az_extloadi8_local i32:$src0))]
|
||||||
|
>;
|
||||||
|
def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET",
|
||||||
|
[(set i32:$dst, (sextloadi16_local i32:$src0))]
|
||||||
|
>;
|
||||||
|
def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
|
||||||
|
[(set i32:$dst, (az_extloadi16_local i32:$src0))]
|
||||||
|
>;
|
||||||
|
|
||||||
// TRUNC is used for the FLT_TO_INT instructions to work around a
|
// TRUNC is used for the FLT_TO_INT instructions to work around a
|
||||||
// perceived problem where the rounding modes are applied differently
|
// perceived problem where the rounding modes are applied differently
|
||||||
|
@ -395,6 +395,10 @@ def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
|
|||||||
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
|
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
|
||||||
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
|
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
|
||||||
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
|
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
|
||||||
|
def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
|
||||||
|
def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
|
||||||
|
def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
|
||||||
|
def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
|
||||||
|
|
||||||
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
|
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
|
||||||
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
|
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
|
||||||
@ -1747,6 +1751,16 @@ def : Pat <
|
|||||||
/********** Load/Store Patterns **********/
|
/********** Load/Store Patterns **********/
|
||||||
/********** ======================= **********/
|
/********** ======================= **********/
|
||||||
|
|
||||||
|
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
|
||||||
|
(frag i32:$src0),
|
||||||
|
(vt (inst 0, $src0, $src0, $src0, 0, 0))
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
|
||||||
|
def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
|
||||||
|
def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
|
||||||
|
def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
|
||||||
|
def : DSReadPat <DS_READ_B32, i32, local_load>;
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(local_load i32:$src0),
|
(local_load i32:$src0),
|
||||||
(i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
|
(i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
|
||||||
|
@ -438,3 +438,81 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;===------------------------------------------------------------------------===;
|
||||||
|
; LOCAL ADDRESS SPACE
|
||||||
|
;===------------------------------------------------------------------------===;
|
||||||
|
|
||||||
|
; Load an i8 value from the local address space.
|
||||||
|
; R600-CHECK: @load_i8_local
|
||||||
|
; R600-CHECK: LDS_UBYTE_READ_RET
|
||||||
|
; SI-CHECK: @load_i8_local
|
||||||
|
; SI-CHECK: DS_READ_U8
|
||||||
|
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
|
||||||
|
%1 = load i8 addrspace(3)* %in
|
||||||
|
%2 = zext i8 %1 to i32
|
||||||
|
store i32 %2, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; R600-CHECK: @load_i8_sext_local
|
||||||
|
; R600-CHECK: LDS_UBYTE_READ_RET
|
||||||
|
; R600-CHECK: ASHR
|
||||||
|
; SI-CHECK: @load_i8_sext_local
|
||||||
|
; SI-CHECK: DS_READ_I8
|
||||||
|
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load i8 addrspace(3)* %in
|
||||||
|
%1 = sext i8 %0 to i32
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Load an i16 value from the local address space.
|
||||||
|
; R600-CHECK: @load_i16_local
|
||||||
|
; R600-CHECK: LDS_USHORT_READ_RET
|
||||||
|
; SI-CHECK: @load_i16_local
|
||||||
|
; SI-CHECK: DS_READ_U16
|
||||||
|
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load i16 addrspace(3)* %in
|
||||||
|
%1 = zext i16 %0 to i32
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; R600-CHECK: @load_i16_sext_local
|
||||||
|
; R600-CHECK: LDS_USHORT_READ_RET
|
||||||
|
; R600-CHECK: ASHR
|
||||||
|
; SI-CHECK: @load_i16_sext_local
|
||||||
|
; SI-CHECK: DS_READ_I16
|
||||||
|
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load i16 addrspace(3)* %in
|
||||||
|
%1 = sext i16 %0 to i32
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; load an i32 value from the glocal address space.
|
||||||
|
; R600-CHECK: @load_i32_local
|
||||||
|
; R600-CHECK: LDS_READ_RET
|
||||||
|
; SI-CHECK: @load_i32_local
|
||||||
|
; SI-CHECK: DS_READ_B32
|
||||||
|
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load i32 addrspace(3)* %in
|
||||||
|
store i32 %0, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; load a f32 value from the global address space.
|
||||||
|
; R600-CHECK: @load_f32_local
|
||||||
|
; R600-CHECK: LDS_READ_RET
|
||||||
|
; SI-CHECK: @load_f32_local
|
||||||
|
; SI-CHECK: DS_READ_B32
|
||||||
|
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load float addrspace(3)* %in
|
||||||
|
store float %0, float addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user