mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-02 18:31:54 +00:00
[AMDGPU] Add a new pass to insert waitcnts. Leave under an option for testing.
Based on comments in https://reviews.llvm.org/D31161. llvm-svn: 300023
This commit is contained in:
parent
046cd27262
commit
fd367b26fb
@ -48,6 +48,7 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
|
||||
FunctionPass *createSIFixSGPRCopiesPass();
|
||||
FunctionPass *createSIDebuggerInsertNopsPass();
|
||||
FunctionPass *createSIInsertWaitsPass();
|
||||
FunctionPass *createSIInsertWaitcntsPass();
|
||||
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
|
||||
|
||||
ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
|
||||
@ -125,6 +126,9 @@ extern char &SIDebuggerInsertNopsID;
|
||||
void initializeSIInsertWaitsPass(PassRegistry&);
|
||||
extern char &SIInsertWaitsID;
|
||||
|
||||
void initializeSIInsertWaitcntsPass(PassRegistry&);
|
||||
extern char &SIInsertWaitcntsID;
|
||||
|
||||
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
|
||||
extern char &AMDGPUUnifyDivergentExitNodesID;
|
||||
|
||||
|
@ -112,6 +112,12 @@ static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
|
||||
cl::desc("Enable AMDGPU Alias Analysis"),
|
||||
cl::init(true));
|
||||
|
||||
// Option to enable new waitcnt insertion pass.
|
||||
static cl::opt<bool> EnableSIInsertWaitcntsPass(
|
||||
"enable-si-insert-waitcnts",
|
||||
cl::desc("Use new waitcnt insertion pass"),
|
||||
cl::init(false));
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||
// Register the target
|
||||
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
|
||||
@ -134,6 +140,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||
initializeAMDGPUUnifyMetadataPass(*PR);
|
||||
initializeSIAnnotateControlFlowPass(*PR);
|
||||
initializeSIInsertWaitsPass(*PR);
|
||||
initializeSIInsertWaitcntsPass(*PR);
|
||||
initializeSIWholeQuadModePass(*PR);
|
||||
initializeSILowerControlFlowPass(*PR);
|
||||
initializeSIInsertSkipsPass(*PR);
|
||||
@ -810,7 +817,10 @@ void GCNPassConfig::addPreEmitPass() {
|
||||
// cases.
|
||||
addPass(&PostRAHazardRecognizerID);
|
||||
|
||||
addPass(createSIInsertWaitsPass());
|
||||
if (EnableSIInsertWaitcntsPass)
|
||||
addPass(createSIInsertWaitcntsPass());
|
||||
else
|
||||
addPass(createSIInsertWaitsPass());
|
||||
addPass(createSIShrinkInstructionsPass());
|
||||
addPass(&SIInsertSkipsPassID);
|
||||
addPass(createSIDebuggerInsertNopsPass());
|
||||
|
@ -82,6 +82,7 @@ add_llvm_target(AMDGPUCodeGen
|
||||
SIFrameLowering.cpp
|
||||
SIInsertSkips.cpp
|
||||
SIInsertWaits.cpp
|
||||
SIInsertWaitcnts.cpp
|
||||
SIInstrInfo.cpp
|
||||
SIISelLowering.cpp
|
||||
SILoadStoreOptimizer.cpp
|
||||
|
@ -253,6 +253,8 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
|
||||
[(set i32:$vdst,
|
||||
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
|
||||
|
||||
let LGKM_CNT = 0;
|
||||
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let isConvergent = 1;
|
||||
|
1863
lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Normal file
1863
lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,6 @@ declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0
|
||||
|
||||
; FUNC-LABEL: {{^}}ds_bpermute:
|
||||
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; CHECK: s_waitcnt lgkmcnt
|
||||
define amdgpu_kernel void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind {
|
||||
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0
|
||||
store i32 %bpermute, i32 addrspace(1)* %out, align 4
|
||||
@ -13,7 +12,6 @@ define amdgpu_kernel void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %
|
||||
|
||||
; CHECK-LABEL: {{^}}ds_bpermute_imm_offset:
|
||||
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
|
||||
; CHECK: s_waitcnt lgkmcnt
|
||||
define amdgpu_kernel void @ds_bpermute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
|
||||
%index = add i32 %base_index, 4
|
||||
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0
|
||||
@ -23,7 +21,6 @@ define amdgpu_kernel void @ds_bpermute_imm_offset(i32 addrspace(1)* %out, i32 %b
|
||||
|
||||
; CHECK-LABEL: {{^}}ds_bpermute_imm_index:
|
||||
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:64
|
||||
; CHECK: s_waitcnt lgkmcnt
|
||||
define amdgpu_kernel void @ds_bpermute_imm_index(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
|
||||
%bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 64, i32 %src) #0
|
||||
store i32 %bpermute, i32 addrspace(1)* %out, align 4
|
||||
|
@ -4,7 +4,6 @@ declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0
|
||||
|
||||
; CHECK-LABEL: {{^}}ds_permute:
|
||||
; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; CHECK: s_waitcnt lgkmcnt
|
||||
define amdgpu_kernel void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind {
|
||||
%bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
|
||||
store i32 %bpermute, i32 addrspace(1)* %out, align 4
|
||||
@ -13,7 +12,6 @@ define amdgpu_kernel void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %s
|
||||
|
||||
; CHECK-LABEL: {{^}}ds_permute_imm_offset:
|
||||
; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
|
||||
; CHECK: s_waitcnt lgkmcnt
|
||||
define amdgpu_kernel void @ds_permute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
|
||||
%index = add i32 %base_index, 4
|
||||
%bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
|
||||
|
Loading…
x
Reference in New Issue
Block a user