AMDGPU/SI: Implement sendmsghalt intrinsic

v2: expose using amdgcn prefix

Differential Revision: https://reviews.llvm.org/D23511

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290977 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jan Vesely 2017-01-04 18:06:55 +00:00
parent 5f405a9704
commit bf64cb107c
11 changed files with 230 additions and 45 deletions

View File

@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id :
// Instruction Intrinsics
//===----------------------------------------------------------------------===//
// The first parameter is s_sendmsg immediate (i16),
// the second one is copied to m0
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrConvergent]>;

View File

@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(KILL)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(SENDMSGHALT)
NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)

View File

@ -313,6 +313,7 @@ enum NodeType : unsigned {
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
SENDMSG,
SENDMSGHALT,
INTERP_MOV,
INTERP_P1,
INTERP_P2,

View File

@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPInGlue]>;
def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPInGlue]>;
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
[SDNPInGlue]>;

View File

@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
case AMDGPUIntrinsic::SI_sendmsg: {
case AMDGPUIntrinsic::SI_sendmsg:
case Intrinsic::amdgcn_s_sendmsg: {
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
case Intrinsic::amdgcn_s_sendmsghalt: {
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
case AMDGPUIntrinsic::SI_tbuffer_store: {
SDValue Ops[] = {
Chain,

View File

@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
LastInstWritesM0 = false;
return;
@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
ST->needWaitcntBeforeBarrier()) ||
I->getOpcode() == AMDGPU::S_SENDMSG)
I->getOpcode() == AMDGPU::S_SENDMSG ||
I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Required = LastIssued;
else
Required = handleOperands(*I);

View File

@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
[(AMDGPUsendmsg (i32 imm:$simm16))]
>;
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
[(AMDGPUsendmsghalt (i32 imm:$simm16))]
>;
} // End Uses = [EXEC, M0]
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;

View File

@ -0,0 +1,41 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
; GCN-LABEL: {{^}}main:
; GCN: s_mov_b32 m0, s0
; VI-NEXT: s_nop 0
; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
; GCN-NEXT: s_endpgm
define amdgpu_gs void @main(i32 inreg %a) #0 {
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %a)
ret void
}
; GCN-LABEL: {{^}}main_halt:
; GCN: s_mov_b32 m0, s0
; VI-NEXT: s_nop 0
; GCN-NEXT: s_sendmsghalt sendmsg(MSG_INTERRUPT)
; GCN-NEXT: s_endpgm
define void @main_halt(i32 inreg %a) #0 {
call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %a)
ret void
}
; GCN-LABEL: {{^}}legacy:
; GCN: s_mov_b32 m0, s0
; VI-NEXT: s_nop 0
; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
; GCN-NEXT: s_endpgm
define amdgpu_gs void @legacy(i32 inreg %a) #0 {
call void @llvm.SI.sendmsg(i32 3, i32 %a)
ret void
}
declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
declare void @llvm.SI.sendmsg(i32, i32) #0
attributes #0 = { nounwind }

View File

@ -0,0 +1,161 @@
;RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test_interrupt:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT)
define void @test_interrupt() {
body:
call void @llvm.amdgcn.s.sendmsg(i32 1, i32 0);
ret void
}
; CHECK-LABEL: {{^}}test_gs_emit:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0)
define void @test_gs_emit() {
body:
call void @llvm.amdgcn.s.sendmsg(i32 34, i32 0);
ret void
}
; CHECK-LABEL: {{^}}test_gs_cut:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1)
define void @test_gs_cut() {
body:
call void @llvm.amdgcn.s.sendmsg(i32 274, i32 0);
ret void
}
; CHECK-LABEL: {{^}}test_gs_emit_cut:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
define void @test_gs_emit_cut() {
body:
call void @llvm.amdgcn.s.sendmsg(i32 562, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_gs_done:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
define void @test_gs_done() {
body:
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_interrupt_halt:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsghalt sendmsg(MSG_INTERRUPT)
define void @test_interrupt_halt() {
body:
call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_gs_emit_halt:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT, 0)
define void @test_gs_emit_halt() {
body:
call void @llvm.amdgcn.s.sendmsghalt(i32 34, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_gs_cut_halt:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_CUT, 1)
define void @test_gs_cut_halt() {
body:
call void @llvm.amdgcn.s.sendmsghalt(i32 274, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_gs_emit_cut_halt:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
define void @test_gs_emit_cut_halt() {
body:
call void @llvm.amdgcn.s.sendmsghalt(i32 562, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_gs_done_halt:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsghalt sendmsg(MSG_GS_DONE, GS_OP_NOP)
define void @test_gs_done_halt() {
body:
call void @llvm.amdgcn.s.sendmsghalt(i32 3, i32 0)
ret void
}
; Legacy
; CHECK-LABEL: {{^}}test_legacy_interrupt:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT)
define void @test_legacy_interrupt() {
body:
call void @llvm.SI.sendmsg(i32 1, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_legacy_gs_emit:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0)
define void @test_legacy_gs_emit() {
body:
call void @llvm.SI.sendmsg(i32 34, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_legacy_gs_cut:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1)
define void @test_legacy_gs_cut() {
body:
call void @llvm.SI.sendmsg(i32 274, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_legacy_gs_emit_cut:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
define void @test_legacy_gs_emit_cut() {
body:
call void @llvm.SI.sendmsg(i32 562, i32 0)
ret void
}
; CHECK-LABEL: {{^}}test_legacy_gs_done:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
define void @test_legacy_gs_done() {
body:
call void @llvm.SI.sendmsg(i32 3, i32 0)
ret void
}
; Function Attrs: nounwind
declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
declare void @llvm.SI.sendmsg(i32, i32) #0
attributes #0 = { nounwind }

View File

@ -1,17 +0,0 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
; GCN-LABEL: {{^}}main:
; GCN: s_mov_b32 m0, s0
; VI-NEXT: s_nop 0
; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
; GCN-NEXT: s_endpgm
define amdgpu_gs void @main(i32 inreg %a) #0 {
call void @llvm.SI.sendmsg(i32 3, i32 %a)
ret void
}
declare void @llvm.SI.sendmsg(i32, i32) #0
attributes #0 = { nounwind }

View File

@ -1,24 +0,0 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}main:
; CHECK: s_mov_b32 m0, 0
; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0)
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1)
; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
define void @main() {
main_body:
call void @llvm.SI.sendmsg(i32 34, i32 0);
call void @llvm.SI.sendmsg(i32 274, i32 0);
call void @llvm.SI.sendmsg(i32 562, i32 0);
call void @llvm.SI.sendmsg(i32 3, i32 0);
ret void
}
; Function Attrs: nounwind
declare void @llvm.SI.sendmsg(i32, i32) #0
attributes #0 = { nounwind }