mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-03 13:51:39 +00:00
R600/SI: Add compute support for CI v2
v2: - Fix LDS size calculation Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193621 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f54a8409f9
commit
54328c772c
@ -87,6 +87,8 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
|
||||
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
[Feature64BitPtr, FeatureFP64]>;
|
||||
|
||||
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
[Feature64BitPtr, FeatureFP64]>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
|
@ -167,6 +167,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
unsigned MaxSGPR = 0;
|
||||
unsigned MaxVGPR = 0;
|
||||
bool VCCUsed = false;
|
||||
@ -267,13 +268,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
|
||||
OutStreamer.EmitIntValue(RsrcReg, 4);
|
||||
OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
|
||||
|
||||
unsigned LDSAlignShift;
|
||||
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
// LDS is allocated in 64 dword blocks
|
||||
LDSAlignShift = 8;
|
||||
} else {
|
||||
// LDS is allocated in 128 dword blocks
|
||||
LDSAlignShift = 9;
|
||||
}
|
||||
unsigned LDSBlocks =
|
||||
RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
|
||||
|
||||
if (MFI->ShaderType == ShaderType::COMPUTE) {
|
||||
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
|
||||
OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
|
||||
OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
|
||||
}
|
||||
if (MFI->ShaderType == ShaderType::PIXEL) {
|
||||
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
|
||||
OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
|
||||
OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
|
||||
OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
|
||||
OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ def CC_AMDGPU_Kernel : CallingConv<[
|
||||
]>;
|
||||
|
||||
def CC_AMDGPU : CallingConv<[
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() == "
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() >= "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
|
||||
"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
@ -57,5 +57,5 @@ def CC_AMDGPU : CallingConv<[
|
||||
"State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
|
||||
".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
|
||||
".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
|
||||
]>;
|
||||
|
@ -33,7 +33,8 @@ public:
|
||||
R700,
|
||||
EVERGREEN,
|
||||
NORTHERN_ISLANDS,
|
||||
SOUTHERN_ISLANDS
|
||||
SOUTHERN_ISLANDS,
|
||||
SEA_ISLANDS
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -48,6 +48,6 @@ def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"bonaire", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"kabini", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"kaveri", SI_Itin, [FeatureSouthernIslands]>;
|
||||
def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>;
|
||||
def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>;
|
||||
def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>;
|
||||
|
@ -23,7 +23,7 @@ def InterpSlot : Operand<i32> {
|
||||
}
|
||||
|
||||
def isSI : Predicate<"Subtarget.getGeneration() "
|
||||
"== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
|
||||
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
|
||||
|
||||
def WAIT_FLAG : InstFlag<"printWaitFlag">;
|
||||
|
||||
|
@ -1,16 +1,20 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s
|
||||
|
||||
@local_memory.local_mem = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 4
|
||||
@local_memory.local_mem = internal addrspace(3) unnamed_addr global [128 x i32] zeroinitializer, align 4
|
||||
|
||||
; EG-CHECK: @local_memory
|
||||
; SI-CHECK: @local_memory
|
||||
; EG-CHECK-LABEL: @local_memory
|
||||
; SI-CHECK-LABEL: @local_memory
|
||||
; CI-CHECK-LABEL: @local_memory
|
||||
|
||||
; Check that the LDS size emitted correctly
|
||||
; EG-CHECK: .long 166120
|
||||
; EG-CHECK-NEXT: .long 16
|
||||
; EG-CHECK-NEXT: .long 128
|
||||
; SI-CHECK: .long 47180
|
||||
; SI-CHECK-NEXT: .long 32768
|
||||
; SI-CHECK-NEXT: .long 65536
|
||||
; CI-CHECK: .long 47180
|
||||
; CI-CHECK-NEXT: .long 32768
|
||||
|
||||
; EG-CHECK: LDS_WRITE
|
||||
; SI-CHECK_NOT: S_WQM_B64
|
||||
@ -27,13 +31,13 @@
|
||||
define void @local_memory(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%y.i = call i32 @llvm.r600.read.tidig.x() #0
|
||||
%arrayidx = getelementptr inbounds [16 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
|
||||
%arrayidx = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
|
||||
store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
|
||||
%add = add nsw i32 %y.i, 1
|
||||
%cmp = icmp eq i32 %add, 16
|
||||
%.add = select i1 %cmp, i32 0, i32 %add
|
||||
call void @llvm.AMDGPU.barrier.local()
|
||||
%arrayidx1 = getelementptr inbounds [16 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
|
||||
%arrayidx1 = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
|
||||
%0 = load i32 addrspace(3)* %arrayidx1, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i32 %y.i
|
||||
store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
|
||||
|
Loading…
x
Reference in New Issue
Block a user