AMDGPU: Don't error on ds.ordered intrinsic in function

These should be assumed to be called from a compute context. Also don't use a 2 entry switch over constants.
2025-04-02 15:51:54 +00:00 · 2020-01-05 14:16:22 -05:00 · 2020-01-05 14:16:22 -05:00 · 2c13fd38f3
commit 2c13fd38f3
parent 54ede9be55
2 changed files with 60 additions and 29 deletions
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -6081,6 +6081,28 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset,
         cast<ConstantSDNode>(Offset)->getSExtValue();
 }
 static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
  switch (MF.getFunction().getCallingConv()) {
  case CallingConv::AMDGPU_PS:
    return 1;
  case CallingConv::AMDGPU_VS:
    return 2;
  case CallingConv::AMDGPU_GS:
    return 3;
  case CallingConv::AMDGPU_HS:
  case CallingConv::AMDGPU_LS:
  case CallingConv::AMDGPU_ES:
    report_fatal_error("ds_ordered_count unsupported for this calling conv");
  case CallingConv::AMDGPU_CS:
  case CallingConv::AMDGPU_KERNEL:
  case CallingConv::C:
  case CallingConv::Fast:
  default:
    // Assume other calling conventions are various compute callable functions
    return 0;
  }
 }
 SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                 SelectionDAG &DAG) const {
  unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@ -6096,8 +6118,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
    unsigned IndexOperand = M->getConstantOperandVal(7);
    unsigned WaveRelease = M->getConstantOperandVal(8);
    unsigned WaveDone = M->getConstantOperandVal(9);
    unsigned ShaderType;
    unsigned Instruction;
    unsigned OrderedCountIndex = IndexOperand & 0x3f;
    IndexOperand &= ~0x3f;
@ -6116,36 +6136,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
    if (IndexOperand)
      report_fatal_error("ds_ordered_count: bad index operand");
    switch (IntrID) {
    case Intrinsic::amdgcn_ds_ordered_add:
      Instruction = 0;
      break;
    case Intrinsic::amdgcn_ds_ordered_swap:
      Instruction = 1;
      break;
    }
    if (WaveDone && !WaveRelease)
      report_fatal_error("ds_ordered_count: wave_done requires wave_release");
-    switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
+    unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
-    case CallingConv::AMDGPU_CS:
+    unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction());
    case CallingConv::AMDGPU_KERNEL:
      ShaderType = 0;
      break;
    case CallingConv::AMDGPU_PS:
      ShaderType = 1;
      break;
    case CallingConv::AMDGPU_VS:
      ShaderType = 2;
      break;
    case CallingConv::AMDGPU_GS:
      ShaderType = 3;
      break;
    default:
      report_fatal_error("ds_ordered_count unsupported for this calling conv");
    }
    unsigned Offset0 = OrderedCountIndex << 2;
    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
                       (Instruction << 4);
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
@ -57,6 +57,42 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
  ret float %r
 }
 ; FUNC-LABEL: {{^}}ds_ordered_add_default_cc:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, 0{{$}}
 ; VIGFX9-NEXT: s_nop 0
 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
 define float @ds_ordered_add_default_cc() {
  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
  %r = bitcast i32 %val to float
  ret float %r
 }
 ; FUNC-LABEL: {{^}}ds_ordered_add_fastcc:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, 0{{$}}
 ; VIGFX9-NEXT: s_nop 0
 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
 define fastcc float @ds_ordered_add_fastcc() {
  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
  %r = bitcast i32 %val to float
  ret float %r
 }
 ; FUNC-LABEL: {{^}}ds_ordered_add_func:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, 0{{$}}
 ; VIGFX9-NEXT: s_nop 0
 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
 ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
 define float @ds_ordered_add_func() {
  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
  %r = bitcast i32 %val to float
  ret float %r
 }
 ; FUNC-LABEL: {{^}}ds_ordered_add_ps:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, s0