AMDGPU: Don't error on ds.ordered intrinsic in function

These should be assumed to be called from a compute context. Also don't use a 2 entry switch over constants.
2025-04-03 08:11:52 +00:00 · 2020-01-05 14:16:22 -05:00 · 2020-01-05 14:16:22 -05:00 · 2c13fd38f3
commit 2c13fd38f3
parent 54ede9be55
2 changed files with 60 additions and 29 deletions
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -6081,6 +6081,28 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset,
         cast<ConstantSDNode>(Offset)->getSExtValue();
 }

+static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
+  switch (MF.getFunction().getCallingConv()) {
+  case CallingConv::AMDGPU_PS:
+    return 1;
+  case CallingConv::AMDGPU_VS:
+    return 2;
+  case CallingConv::AMDGPU_GS:
+    return 3;
+  case CallingConv::AMDGPU_HS:
+  case CallingConv::AMDGPU_LS:
+  case CallingConv::AMDGPU_ES:
+    report_fatal_error("ds_ordered_count unsupported for this calling conv");
+  case CallingConv::AMDGPU_CS:
+  case CallingConv::AMDGPU_KERNEL:
+  case CallingConv::C:
+  case CallingConv::Fast:
+  default:
+    // Assume other calling conventions are various compute callable functions
+    return 0;
+  }
+}
+
 SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                 SelectionDAG &DAG) const {
  unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@ -6096,8 +6118,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
    unsigned IndexOperand = M->getConstantOperandVal(7);
    unsigned WaveRelease = M->getConstantOperandVal(8);
    unsigned WaveDone = M->getConstantOperandVal(9);
-    unsigned ShaderType;
-    unsigned Instruction;

    unsigned OrderedCountIndex = IndexOperand & 0x3f;
    IndexOperand &= ~0x3f;
@ -6116,36 +6136,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
    if (IndexOperand)
      report_fatal_error("ds_ordered_count: bad index operand");

-    switch (IntrID) {
-    case Intrinsic::amdgcn_ds_ordered_add:
-      Instruction = 0;
-      break;
-    case Intrinsic::amdgcn_ds_ordered_swap:
-      Instruction = 1;
-      break;
-    }
-
    if (WaveDone && !WaveRelease)
      report_fatal_error("ds_ordered_count: wave_done requires wave_release");

-    switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
-    case CallingConv::AMDGPU_CS:
-    case CallingConv::AMDGPU_KERNEL:
-      ShaderType = 0;
-      break;
-    case CallingConv::AMDGPU_PS:
-      ShaderType = 1;
-      break;
-    case CallingConv::AMDGPU_VS:
-      ShaderType = 2;
-      break;
-    case CallingConv::AMDGPU_GS:
-      ShaderType = 3;
-      break;
-    default:
-      report_fatal_error("ds_ordered_count unsupported for this calling conv");
-    }
-
+    unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
+    unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction());
    unsigned Offset0 = OrderedCountIndex << 2;
    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
                       (Instruction << 4);
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
@ -57,6 +57,42 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
  ret float %r
 }

+; FUNC-LABEL: {{^}}ds_ordered_add_default_cc:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define float @ds_ordered_add_default_cc() {
+  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_fastcc:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define fastcc float @ds_ordered_add_fastcc() {
+  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_func:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define float @ds_ordered_add_func() {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
 ; FUNC-LABEL: {{^}}ds_ordered_add_ps:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, s0