[AMDGPU] Add dynamic stack bit info to kernel-resource-usage Rpass output

In code object 5 (https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata) the AMDGPU backend added the .uses_dynamic_stack bit to the kernel meta data to identity kernels which have compile time indeterminable stack usage (indirect function calls and recursion mainly). This patch adds this information to the output of the kernel-resource-usage remarks.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D156040

Author:    Corbin Robeck <corbin.robeck@amd.com>
This commit is contained in:
Corbin Robeck 2023-07-25 12:17:18 -07:00 committed by Corbin Robeck
parent 45f9f3f710
commit 7a4968b5a3
3 changed files with 67 additions and 7 deletions

View File

@ -1,11 +1,12 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx908 -Rpass-analysis=kernel-resource-usage -S -O0 -verify %s -o /dev/null
// expected-remark@+9 {{Function Name: foo}}
// expected-remark@+8 {{ SGPRs: 13}}
// expected-remark@+7 {{ VGPRs: 10}}
// expected-remark@+6 {{ AGPRs: 12}}
// expected-remark@+5 {{ ScratchSize [bytes/lane]: 0}}
// expected-remark@+10 {{Function Name: foo}}
// expected-remark@+9 {{ SGPRs: 13}}
// expected-remark@+8 {{ VGPRs: 10}}
// expected-remark@+7 {{ AGPRs: 12}}
// expected-remark@+6 {{ ScratchSize [bytes/lane]: 0}}
// expected-remark@+5 {{ Dynamic Stack: False}}
// expected-remark@+4 {{ Occupancy [waves/SIMD]: 10}}
// expected-remark@+3 {{ SGPRs Spill: 0}}
// expected-remark@+2 {{ VGPRs Spill: 0}}

View File

@ -1293,6 +1293,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks(
EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
CurrentProgramInfo.ScratchSize);
StringRef DynamicStackStr =
CurrentProgramInfo.DynamicCallStack ? "True" : "False";
EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
CurrentProgramInfo.Occupancy);
EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=STDERR %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
; RUN: FileCheck -check-prefix=REMARK %s < %t
; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
@ -6,6 +6,7 @@
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:27:0: Dynamic Stack: False
; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5
; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0
@ -55,7 +56,16 @@
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: '
; REMARK-NEXT: - ScratchSize: '0'
; REMARK-NEXT: ...
; REMARK-NEXT: ..
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: DynamicStack
; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
; REMARK-NEXT: Function: test_kernel
; REMARK-NEXT: Args:
; REMARK-NEXT: - String: ' Dynamic Stack:
; REMARK-NEXT: - DynamicStack: 'False'
; REMARK-NEXT: ..
; REMARK-NEXT: --- !Analysis
; REMARK-NEXT: Pass: kernel-resource-usage
; REMARK-NEXT: Name: Occupancy
@ -108,6 +118,7 @@ define amdgpu_kernel void @test_kernel() !dbg !3 {
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:42:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:42:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:42:0: Dynamic Stack: False
; STDERR-NEXT: remark: foo.cl:42:0: Occupancy [waves/SIMD]: 0
; STDERR-NEXT: remark: foo.cl:42:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:42:0: VGPRs Spill: 0
@ -124,6 +135,7 @@ define void @test_func() !dbg !6 {
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:8:0: Dynamic Stack: False
; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 8
; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0
@ -137,6 +149,7 @@ define amdgpu_kernel void @empty_kernel() !dbg !7 {
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs: 0
; STDERR-NEXT: remark: foo.cl:52:0: AGPRs: 0
; STDERR-NEXT: remark: foo.cl:52:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:52:0: Dynamic Stack: False
; STDERR-NEXT: remark: foo.cl:52:0: Occupancy [waves/SIMD]: 0
; STDERR-NEXT: remark: foo.cl:52:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:52:0: VGPRs Spill: 0
@ -144,8 +157,48 @@ define void @empty_func() !dbg !8 {
ret void
}
; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39
; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32
; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10
; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0
; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True
; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8
; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0
@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
define amdgpu_kernel void @test_indirect_call() !dbg !9 {
%fptr = load ptr, ptr addrspace(4) @gv.fptr0
call void %fptr()
ret void
}
; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39
; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32
; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10
; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 64
; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True
; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8
; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0
; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0
declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
%alloca = alloca <10 x i64>, align 16, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false)
%fptr = load ptr, ptr addrspace(4) @gv.fptr0
call void %fptr()
ret void
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2}
!llvm.module.flags = !{!11}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!1 = !DIFile(filename: "foo.cl", directory: "/tmp")
@ -156,3 +209,6 @@ define void @empty_func() !dbg !8 {
!6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!11 = !{i32 1, !"amdgpu_code_object_version", i32 500}