From 0e1e60b73a980dfa60f7628998c0a98f2b8b87ba Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Apr 2017 19:38:10 +0000 Subject: [PATCH] AMDGPU: Don't emit amd_kernel_code_t for callable functions This is inserted directly in the text section. The relocation for the function ends up resolving to the beginning of the amd_kernel_code_t header rather than the actual function entry point. Also skip some of the comments for initialization that only makes sense for kernels. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300736 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 15 ++++++++++++++- test/CodeGen/AMDGPU/hsa-func.ll | 14 +++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7ee4bcb86fe..a81bcb56dfd 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -144,6 +144,10 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { + const AMDGPUMachineFunction *MFI = MF->getInfo(); + if (!MFI->isEntryFunction()) + return; + const AMDGPUSubtarget &STM = MF->getSubtarget(); SIProgramInfo KernelInfo; amd_kernel_code_t KernelCode; @@ -222,13 +226,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->SwitchSection(CommentSection); if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - OutStreamer->emitRawComment(" Kernel info:", false); + if (MFI->isEntryFunction()) { + OutStreamer->emitRawComment(" Kernel info:", false); + } else { + OutStreamer->emitRawComment(" Function info:", false); + } + OutStreamer->emitRawComment(" codeLenInByte = " + Twine(getFunctionCodeSize(MF)), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), false); + OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), false); OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), @@ -238,6 +248,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) + " bytes/workgroup (compile time only)", false); + if (!MFI->isEntryFunction()) + return false; + OutStreamer->emitRawComment(" SGPRBlocks: " + Twine(KernelInfo.SGPRBlocks), false); OutStreamer->emitRawComment(" VGPRBlocks: " + diff --git a/test/CodeGen/AMDGPU/hsa-func.ll b/test/CodeGen/AMDGPU/hsa-func.ll index 147cf9bbe6c..d96b796d449 100644 --- a/test/CodeGen/AMDGPU/hsa-func.ll +++ b/test/CodeGen/AMDGPU/hsa-func.ll @@ -27,7 +27,7 @@ ; ELF: Symbol { ; ELF: Name: simple -; ELF: Size: 292 +; ELF: Size: 44 ; ELF: Type: Function (0x2) ; ELF: } @@ -40,11 +40,10 @@ ; HSA: .globl simple ; HSA: .p2align 2 ; HSA: {{^}}simple: -; HSA: .amd_kernel_code_t -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: .end_amd_kernel_code_t -; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 +; HSA-NOT: amd_kernel_code_t + +; FIXME: Check this isn't a kernarg load when calling convention implemented. +; XHSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 ; Make sure we are setting the ATC bit: ; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000 @@ -55,7 +54,8 @@ ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple - +; HSA: ; Function info: +; HSA-NOT: COMPUTE_PGM_RSRC2 define void @simple(i32 addrspace(1)* %out) { entry: store i32 0, i32 addrspace(1)* %out