diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 84f26cccd7d..cb03b908e61 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6081,6 +6081,28 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset, cast(Offset)->getSExtValue(); } +static unsigned getDSShaderTypeValue(const MachineFunction &MF) { + switch (MF.getFunction().getCallingConv()) { + case CallingConv::AMDGPU_PS: + return 1; + case CallingConv::AMDGPU_VS: + return 2; + case CallingConv::AMDGPU_GS: + return 3; + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_ES: + report_fatal_error("ds_ordered_count unsupported for this calling conv"); + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::C: + case CallingConv::Fast: + default: + // Assume other calling conventions are various compute callable functions + return 0; + } +} + SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast(Op.getOperand(1))->getZExtValue(); @@ -6096,8 +6118,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, unsigned IndexOperand = M->getConstantOperandVal(7); unsigned WaveRelease = M->getConstantOperandVal(8); unsigned WaveDone = M->getConstantOperandVal(9); - unsigned ShaderType; - unsigned Instruction; unsigned OrderedCountIndex = IndexOperand & 0x3f; IndexOperand &= ~0x3f; @@ -6116,36 +6136,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, if (IndexOperand) report_fatal_error("ds_ordered_count: bad index operand"); - switch (IntrID) { - case Intrinsic::amdgcn_ds_ordered_add: - Instruction = 0; - break; - case Intrinsic::amdgcn_ds_ordered_swap: - Instruction = 1; - break; - } - if (WaveDone && !WaveRelease) report_fatal_error("ds_ordered_count: wave_done requires wave_release"); - switch (DAG.getMachineFunction().getFunction().getCallingConv()) { - case CallingConv::AMDGPU_CS: - case CallingConv::AMDGPU_KERNEL: - ShaderType = 0; - break; - case CallingConv::AMDGPU_PS: - ShaderType = 1; - break; - case CallingConv::AMDGPU_VS: - ShaderType = 2; - break; - case CallingConv::AMDGPU_GS: - ShaderType = 3; - break; - default: - report_fatal_error("ds_ordered_count unsupported for this calling conv"); - } - + unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1; + unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction()); unsigned Offset0 = OrderedCountIndex << 2; unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | (Instruction << 4); diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll index ad489debc46..4009c5a6344 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll @@ -57,6 +57,42 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) { ret float %r } +; FUNC-LABEL: {{^}}ds_ordered_add_default_cc: +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN: s_mov_b32 m0, 0{{$}} +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define float @ds_ordered_add_default_cc() { + %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + +; FUNC-LABEL: {{^}}ds_ordered_add_fastcc: +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN: s_mov_b32 m0, 0{{$}} +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define fastcc float @ds_ordered_add_fastcc() { + %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + +; FUNC-LABEL: {{^}}ds_ordered_add_func: +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 +; GCN: s_mov_b32 m0, 0{{$}} +; VIGFX9-NEXT: s_nop 0 +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) +define float @ds_ordered_add_func() { + %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %r = bitcast i32 %val to float + ret float %r +} + ; FUNC-LABEL: {{^}}ds_ordered_add_ps: ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN: s_mov_b32 m0, s0