AMDGPU: Move trap lowering to DAG

Fixes traps in any block besides the entry block,
and fixes depending on a live-in physical register
by using a virtual register copy.

Also happens to stop emitting a nop in the case
debug trap is not supported.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301206 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2017-04-24 17:49:13 +00:00
parent c180879c54
commit 666020a37d
7 changed files with 89 additions and 62 deletions

View File

@ -82,9 +82,8 @@ handler as follows:
=============== ============= ===============================================
Usage Code Sequence Description
=============== ============= ===============================================
llvm.trap s_endpgm Causes wavefront to be terminated.
llvm.debugtrap s_nop No operation. Compiler warning generated that
there is no trap handler installed.
llvm.trap s_endpgm Causes wavefront to be terminated.
llvm.debugtrap Nothing. Compiler warning generated that there is no trap handler installed.
=============== ============= ===============================================
Assembler

View File

@ -3437,6 +3437,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ELSE)
NODE_NAME_CASE(LOOP)
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(TRAP)
NODE_NAME_CASE(RET_FLAG)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)

View File

@ -244,6 +244,7 @@ enum NodeType : unsigned {
// Function call.
CALL,
TRAP,
// Masked control flow nodes.
IF,

View File

@ -78,6 +78,11 @@ def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>,
[SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue]
>;
def AMDGPUconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
SDTCisVT<0, iPTR>]>

View File

@ -287,8 +287,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::TRAP, MVT::Other, Custom);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@ -1948,50 +1948,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
switch (MI.getOpcode()) {
case AMDGPU::S_TRAP_PSEUDO: {
const DebugLoc &DL = MI.getDebugLoc();
const int TrapType = MI.getOperand(0).getImm();
if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
Subtarget->isTrapHandlerEnabled()) {
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);
if (!BB->isLiveIn(UserSGPR))
BB->addLiveIn(UserSGPR);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
.addReg(UserSGPR);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
.addImm(TrapType)
.addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
} else {
switch (TrapType) {
case SISubtarget::TrapIDLLVMTrap:
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
break;
case SISubtarget::TrapIDLLVMDebugTrap: {
DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
"debugtrap handler not supported",
DL,
DS_Warning);
LLVMContext &C = MF->getFunction()->getContext();
C.diagnose(NoTrap);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP))
.addImm(0);
break;
}
default:
llvm_unreachable("unsupported trap handler type!");
}
}
MI.eraseFromParent();
return BB;
}
case AMDGPU::SI_INIT_M0:
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
@ -2163,6 +2119,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
case ISD::TRAP:
case ISD::DEBUGTRAP:
return lowerTRAP(Op, DAG);
}
return SDValue();
}
@ -2431,6 +2391,57 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
}
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
MachineFunction &MF = DAG.getMachineFunction();
SDValue Chain = Op.getOperand(0);
unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ?
SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap;
if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
Subtarget->isTrapHandlerEnabled()) {
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
assert(UserSGPR != AMDGPU::NoRegister);
SDValue QueuePtr = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
QueuePtr, SDValue());
SDValue Ops[] = {
ToReg,
DAG.getTargetConstant(TrapID, SL, MVT::i16),
SGPR01,
ToReg.getValue(1)
};
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}
switch (TrapID) {
case SISubtarget::TrapIDLLVMTrap:
return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
case SISubtarget::TrapIDLLVMDebugTrap: {
DiagnosticInfoUnsupported NoTrap(*MF.getFunction(),
"debugtrap handler not supported",
Op.getDebugLoc(),
DS_Warning);
LLVMContext &Ctx = MF.getFunction()->getContext();
Ctx.diagnose(NoTrap);
return Chain;
}
default:
llvm_unreachable("unsupported trap handler type!");
}
return Chain;
}
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
// FIXME: Use inline constants (src_{shared, private}_base) instead.

View File

@ -111,12 +111,6 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
let hasSideEffects = 1;
let SALU = 1;
let usesCustomInserter = 1;
}
let usesCustomInserter = 1, SALU = 1 in {
def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
@ -400,13 +394,8 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
let Predicates = [isGCN] in {
def : Pat<
(trap),
(S_TRAP_PSEUDO TRAPID.LLVM_TRAP)
>;
def : Pat<
(debugtrap),
(S_TRAP_PSEUDO TRAPID.LLVM_DEBUG_TRAP)
(AMDGPUtrap timm:$trapid),
(S_TRAP $trapid)
>;
def : Pat<

View File

@ -80,4 +80,25 @@ define amdgpu_kernel void @trap() {
ret void
}
; GCN-LABEL: {{^}}non_entry_trap:
; TRAP-BIT: enable_trap_handler = 1
; NO-TRAP-BIT: enable_trap_handler = 0
; HSA: BB{{[0-9]_[0-9]+]]: ; %trap
; HSA-TRAP: s_mov_b64 s[0:1], s[4:5]
; HSA-TRAP-NEXT: s_trap 2
define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr #1 {
entry:
%tmp29 = load volatile i32, i32 addrspace(1)* %arg0
%cmp = icmp eq i32 %tmp29, -1
br i1 %cmp, label %ret, label %trap
trap:
call void @llvm.trap()
unreachable
ret:
ret void
}
attributes #0 = { nounwind noreturn }