AMDGPU : Add trap handler support.

Differential Revision: http://reviews.llvm.org/D26010

llvm-svn: 294692
This commit is contained in:
Wei Ding 2017-02-10 02:15:29 +00:00
parent 99ec2d0f0b
commit 3609e1230f
13 changed files with 210 additions and 27 deletions

View File

@ -261,6 +261,45 @@ VOP_SDWA examples:
For full list of supported instructions, refer to "Vector ALU instructions".
Trap Handler ABI
--------------------------
The Trap Handler suppored is implemented differently based on the host OS. OS
is obtained from the appropriate element of the target triple HSA OS:
.. code-block:: c++
enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
TrapHandlerAbiHsa = 1
};
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For HSA OS, a trap handler is always enabled and that the following S_TRAP immediate
operand codes are supported:
.. code-block:: c++
enum TrapCode {
TrapCodeBreakPoint = 0,
TrapCodeLLVMTrap = 1,
TrapCodeLLVMDebugTrap = 2,
TrapCodeHSADebugTrap = 3
};
- 0: Used for debugger breakpoint. If debugger is not installed causes dispatch
to be terminated and its associated queue put into the error state.
- 1: Used for llvm.trap..queue_ptr is in SGPR0-1. Causes dispatch to be
terminated and its associated queue put into the error state.
- 2: Used for llvm.debugtrap. queue_ptr is in SGPR0-1. If debugger not installed
handled same as llvm.trap.
- 3: Used for HSA DEBUGTRAP. queue_ptr is in SGPR0-1, the user code is in VGPR0.
Graphics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For Graphics, S_ENDPGM is generated for llvm.trap. S_NOP is generated for
llvm.debugtrap together with a warning that there is no trap handler installed.
HSA Code Object Directives
--------------------------

View File

@ -67,6 +67,12 @@ def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"Support unaligned global loads and stores"
>;
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
"TrapHandler",
"true",
"Trap handler support"
>;
def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"UnalignedScratchAccess",
"true",

View File

@ -191,7 +191,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
{ "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
{ "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
{ "llvm.trap", "amdgpu-queue-ptr" }
{ "llvm.trap", "amdgpu-queue-ptr" },
{ "llvm.debugtrap", "amdgpu-queue-ptr" }
};
// TODO: We should not add the attributes if the known compile time workgroup

View File

@ -243,6 +243,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),
false);
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
false);
@ -634,6 +637,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |
S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |

View File

@ -44,7 +44,7 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,";
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
FullFS += FS;
@ -94,6 +94,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
UnalignedBufferAccess(false),
EnableXNACK(false),
TrapHandler(false),
DebuggerInsertNops(false),
DebuggerReserveRegs(false),
DebuggerEmitPrologue(false),

View File

@ -66,6 +66,22 @@ public:
ISAVersion8_1_0,
};
enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
TrapHandlerAbiHsa = 1
};
enum TrapCode {
TrapCodeBreakPoint = 0,
TrapCodeLLVMTrap = 1,
TrapCodeLLVMDebugTrap = 2,
TrapCodeHSADebugTrap = 3
};
enum TrapRegValues {
TrapCodeLLVMTrapRegValue = 1
};
protected:
// Basic subtarget description.
Triple TargetTriple;
@ -88,6 +104,7 @@ protected:
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool EnableXNACK;
bool TrapHandler;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
bool DebuggerEmitPrologue;
@ -256,6 +273,10 @@ public:
return CaymanISA;
}
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@ -309,6 +330,10 @@ public:
return UnalignedScratchAccess;
}
bool isTrapHandlerEnabled() const {
return TrapHandler;
}
bool isXNACKEnabled() const {
return EnableXNACK;
}

View File

@ -300,6 +300,9 @@ enum DstUnused {
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
#define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6)
#define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1)
#define C_00B84C_TRAP_HANDLER 0xFFFFFFBF
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
@ -387,7 +390,6 @@ enum DstUnused {
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
} // End namespace llvm
#endif

View File

@ -276,6 +276,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@ -1779,24 +1780,46 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
switch (MI.getOpcode()) {
case AMDGPU::S_TRAP_PSEUDO: {
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
.addImm(1);
case AMDGPU::S_TRAP_PSEUDO: {
const DebugLoc &DL = MI.getDebugLoc();
const int TrapType = MI.getOperand(0).getImm();
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);
if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
Subtarget->isTrapHandlerEnabled()) {
if (!BB->isLiveIn(UserSGPR))
BB->addLiveIn(UserSGPR);
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
.addReg(UserSGPR);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)).addImm(0x1)
.addReg(AMDGPU::VGPR0, RegState::Implicit)
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
if (!BB->isLiveIn(UserSGPR))
BB->addLiveIn(UserSGPR);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
.addReg(UserSGPR);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
.addImm(TrapType)
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
} else {
switch (TrapType) {
case SISubtarget::TrapCodeLLVMTrap:
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
break;
case SISubtarget::TrapCodeLLVMDebugTrap: {
DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
"debugtrap handler not supported",
DL,
DS_Warning);
LLVMContext &C = MF->getFunction()->getContext();
C.diagnose(NoTrap);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP))
.addImm(0);
break;
}
default:
llvm_unreachable("unsupported trap handler type!");
}
}
MI.eraseFromParent();
return BB;

View File

@ -631,6 +631,11 @@ def DSTOMOD {
int NONE = 0;
}
def TRAPTYPE {
int LLVM_TRAP = 1;
int LLVM_DEBUG_TRAP = 2;
}
//===----------------------------------------------------------------------===//
//
// SI Instruction multiclass helpers.

View File

@ -111,8 +111,7 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
def S_TRAP_PSEUDO : VPseudoInstSI <(outs), (ins),
[(trap)]> {
def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
let hasSideEffects = 1;
let SALU = 1;
let usesCustomInserter = 1;
@ -390,6 +389,15 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
} // End SubtargetPredicate = isGCN
let Predicates = [isGCN] in {
def : Pat<
(trap),
(S_TRAP_PSEUDO TRAPTYPE.LLVM_TRAP)
>;
def : Pat<
(debugtrap),
(S_TRAP_PSEUDO TRAPTYPE.LLVM_DEBUG_TRAP)
>;
def : Pat<
(int_amdgcn_else i64:$src, bb:$target),

View File

@ -87,7 +87,7 @@ COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR),
// TODO: enable_trap_handler
COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER),
COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN),
COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN),
COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN),

View File

@ -6,7 +6,7 @@
; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}|
; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}}
; VI-NOT: and
; VI-NOT: _and
; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|
define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
%fabs = call half @llvm.fabs.f16(half %x)
@ -22,7 +22,7 @@ define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}}
; CI: v_cvt_f16_f32_e32
; VI-NOT: and
; VI-NOT: _and
; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}|
; VI-NOT: [[MUL]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]

View File

@ -1,11 +1,80 @@
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s
; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s
; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s
; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
; enable trap handler feature
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s
; disable trap handler feature
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s
declare void @llvm.trap() #0
declare void @llvm.debugtrap() #0
; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
; MESA-TRAP-NEXT: .long 208
; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
; NOMESA-TRAP-NEXT: .long 144
; GCN-LABEL: {{^}}hsa_trap:
; HSA-TRAP: enable_trap_handler = 1
; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
; HSA-TRAP: s_trap 1
; for llvm.trap in hsa path without ABI, direct generate s_endpgm instruction without any warning information
; NO-HSA-TRAP: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm
; NO-HSA-TRAP: COMPUTE_PGM_RSRC2:TRAP_HANDLER: 0
; TRAP-BIT: enable_trap_handler = 1
; NO-TRAP-BIT: enable_trap_handler = 0
; NO-MESA-TRAP: s_endpgm
define void @hsa_trap() {
call void @llvm.trap()
ret void
}
; MESA-TRAP: .section .AMDGPU.config
; MESA-TRAP: .long 47180
; MESA-TRAP-NEXT: .long 208
; NOMESA-TRAP: .section .AMDGPU.config
; NOMESA-TRAP: .long 47180
; NOMESA-TRAP-NEXT: .long 144
; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (): debugtrap handler not supported
; GCN-LABEL: {{^}}hsa_debugtrap:
; HSA-TRAP: enable_trap_handler = 1
; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
; HSA-TRAP: s_trap 2
; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction
; NO-HSA-TRAP: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm
; TRAP-BIT: enable_trap_handler = 1
; NO-TRAP-BIT: enable_trap_handler = 0
; NO-MESA-TRAP: s_endpgm
define void @hsa_debugtrap() {
call void @llvm.debugtrap()
ret void
}
; For non-HSA path
; GCN-LABEL: {{^}}trap:
; GCN: v_mov_b32_e32 v0, 1
; GCN: s_mov_b64 s[0:1], s[4:5]
; GCN: s_trap 1
; TRAP-BIT: enable_trap_handler = 1
; NO-TRAP-BIT: enable_trap_handler = 0
; NO-HSA-TRAP: s_endpgm
; NO-MESA-TRAP: s_endpgm
define void @trap() {
call void @llvm.trap()
ret void