diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 6fd8e80013ce..cf50f2a59f60 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4710,4 +4710,14 @@ def int_nvvm_is_explicit_cluster [IntrNoMem, IntrSpeculatable, NoUndef], "llvm.nvvm.is_explicit_cluster">; +// Setmaxnreg inc/dec intrinsics +def int_nvvm_setmaxnreg_inc_sync_aligned_u32 + : DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg>], + "llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">; +def int_nvvm_setmaxnreg_dec_sync_aligned_u32 + : DefaultAttrsIntrinsic<[], [llvm_i32_ty], + [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg>], + "llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">; + } // let TargetPrefix = "nvvm" diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index aeaca21a99cc..b6ad85b2d46e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -96,6 +96,7 @@ #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -6031,6 +6032,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "Value for inactive lanes must be a VGPR function argument", &Call); break; } + case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32: + case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: { + Value *V = Call.getArgOperand(0); + unsigned RegCount = cast(V)->getZExtValue(); + Check(RegCount % 8 == 0, + "reg_count argument to nvvm.setmaxnreg must be in multiples of 8"); + Check((RegCount >= 24 && RegCount <= 256), + "reg_count argument to nvvm.setmaxnreg must be within [24, 256]"); + break; + } case Intrinsic::experimental_convergence_entry: LLVM_FALLTHROUGH; case Intrinsic::experimental_convergence_anchor: diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 13665985f52e..e1cced327544 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -164,6 +164,9 @@ def True : Predicate<"true">; class hasPTX: Predicate<"Subtarget->getPTXVersion() >= " # version>; class hasSM: Predicate<"Subtarget->getSmVersion() >= " # version>; +// Explicit records for arch-accelerated SM versions +def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; + // non-sync shfl instructions are not available on sm_70+ in PTX6.4+ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 85eae44f349a..6b062a7f3912 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6727,3 +6727,16 @@ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), "mov.pred\t$d, %is_explicit_cluster;", [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, Requires<[hasSM<90>, hasPTX<78>]>; + +// setmaxnreg inc/dec intrinsics +let isConvergent = true in { +multiclass SET_MAXNREG { + def : NVPTXInst<(outs), (ins i32imm:$reg_count), + "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;", + [(Intr timm:$reg_count)]>, + Requires<[hasSM90a, hasPTX<80>]>; +} + +defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>; +defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>; +} // isConvergent diff --git a/llvm/test/CodeGen/NVPTX/setmaxnreg.ll b/llvm/test/CodeGen/NVPTX/setmaxnreg.ll new file mode 100644 index 000000000000..9025e11fd42e --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/setmaxnreg.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80| FileCheck --check-prefixes=CHECK %s +; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80| %ptxas-verify -arch=sm_90a %} + +declare void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 %reg_count) +declare void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 %reg_count) + +; CHECK-LABEL: test_set_maxn_reg +define void @test_set_maxn_reg() { + ; CHECK: setmaxnreg.inc.sync.aligned.u32 96; + call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 96) + + ; CHECK: setmaxnreg.dec.sync.aligned.u32 64; + call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 64) + + ret void +} diff --git a/llvm/test/Verifier/NVPTX/lit.local.cfg b/llvm/test/Verifier/NVPTX/lit.local.cfg new file mode 100644 index 000000000000..0d37b86e1c8e --- /dev/null +++ b/llvm/test/Verifier/NVPTX/lit.local.cfg @@ -0,0 +1,2 @@ +if not "NVPTX" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Verifier/NVPTX/setmaxnreg.ll b/llvm/test/Verifier/NVPTX/setmaxnreg.ll new file mode 100644 index 000000000000..8999e4ffa667 --- /dev/null +++ b/llvm/test/Verifier/NVPTX/setmaxnreg.ll @@ -0,0 +1,14 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 %reg_count) +declare void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 %reg_count) + +define void @test_set_maxn_reg() { + ; CHECK: reg_count argument to nvvm.setmaxnreg must be in multiples of 8 + call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 95) + + ; CHECK: reg_count argument to nvvm.setmaxnreg must be within [24, 256] + call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 16) + + ret void +}