mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-15 07:18:33 +00:00
[NVPTX] Auto-upgrade some NVPTX intrinsics to LLVM target-generic code.
Summary: Specifically, we upgrade llvm.nvvm.: * brev{32,64} * clz.{i,ll} * popc.{i,ll} * abs.{i,ll} * {min,max}.{i,ll,u,ull} * h2f These either map directly to an existing LLVM target-generic intrinsic or map to a simple LLVM target-generic idiom. In all cases, we check that the code we generate is lowered to PTX as we expect. These builtins don't need to be backfilled in clang: They're not accessible to user code from nvcc. Reviewers: tra Subscribers: majnemer, cfe-commits, llvm-commits, jholewinski Differential Revision: https://reviews.llvm.org/D28793 llvm-svn: 292694
This commit is contained in:
parent
077f8fb168
commit
46624a822d
@ -64,24 +64,10 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
|
||||
|
||||
// MISC
|
||||
|
||||
BUILTIN(__nvvm_clz_i, "ii", "")
|
||||
BUILTIN(__nvvm_clz_ll, "iLLi", "")
|
||||
BUILTIN(__nvvm_popc_i, "ii", "")
|
||||
BUILTIN(__nvvm_popc_ll, "iLLi", "")
|
||||
BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
|
||||
|
||||
// Min Max
|
||||
|
||||
BUILTIN(__nvvm_min_i, "iii", "")
|
||||
BUILTIN(__nvvm_min_ui, "UiUiUi", "")
|
||||
BUILTIN(__nvvm_min_ll, "LLiLLiLLi", "")
|
||||
BUILTIN(__nvvm_min_ull, "ULLiULLiULLi", "")
|
||||
|
||||
BUILTIN(__nvvm_max_i, "iii", "")
|
||||
BUILTIN(__nvvm_max_ui, "UiUiUi", "")
|
||||
BUILTIN(__nvvm_max_ll, "LLiLLiLLi", "")
|
||||
BUILTIN(__nvvm_max_ull, "ULLiULLiULLi", "")
|
||||
|
||||
BUILTIN(__nvvm_fmax_ftz_f, "fff", "")
|
||||
BUILTIN(__nvvm_fmax_f, "fff", "")
|
||||
BUILTIN(__nvvm_fmin_ftz_f, "fff", "")
|
||||
@ -133,11 +119,6 @@ BUILTIN(__nvvm_div_rz_d, "ddd", "")
|
||||
BUILTIN(__nvvm_div_rm_d, "ddd", "")
|
||||
BUILTIN(__nvvm_div_rp_d, "ddd", "")
|
||||
|
||||
// Brev
|
||||
|
||||
BUILTIN(__nvvm_brev32, "UiUi", "")
|
||||
BUILTIN(__nvvm_brev64, "ULLiULLi", "")
|
||||
|
||||
// Sad
|
||||
|
||||
BUILTIN(__nvvm_sad_i, "iiii", "")
|
||||
@ -155,9 +136,6 @@ BUILTIN(__nvvm_ceil_d, "dd", "")
|
||||
|
||||
// Abs
|
||||
|
||||
BUILTIN(__nvvm_abs_i, "ii", "")
|
||||
BUILTIN(__nvvm_abs_ll, "LLiLLi", "")
|
||||
|
||||
BUILTIN(__nvvm_fabs_ftz_f, "ff", "")
|
||||
BUILTIN(__nvvm_fabs_f, "ff", "")
|
||||
BUILTIN(__nvvm_fabs_d, "dd", "")
|
||||
@ -385,8 +363,6 @@ BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
|
||||
BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "")
|
||||
BUILTIN(__nvvm_f2h_rn, "Usf", "")
|
||||
|
||||
BUILTIN(__nvvm_h2f, "fUs", "")
|
||||
|
||||
// Bitcast
|
||||
|
||||
BUILTIN(__nvvm_bitcast_f2i, "if", "")
|
||||
|
@ -11,6 +11,27 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The following intrinsics were once defined here, but are now auto-upgraded
|
||||
// to target-generic LLVM intrinsics.
|
||||
//
|
||||
// * llvm.nvvm.brev32 --> llvm.bitreverse.i32
|
||||
// * llvm.nvvm.brev64 --> llvm.bitreverse.i64
|
||||
// * llvm.nvvm.clz.i --> llvm.ctlz.i32
|
||||
// * llvm.nvvm.clz.ll --> trunc i64 llvm.ctlz.i64(x) to i32
|
||||
// * llvm.nvvm.popc.i --> llvm.ctpop.i32
|
||||
// * llvm.nvvm.popc.ll --> trunc i64 llvm.ctpop.i64 to i32
|
||||
// * llvm.nvvm.abs.i --> select(x >= -x, x, -x)
|
||||
// * llvm.nvvm.abs.ll --> ibid.
|
||||
// * llvm.nvvm.max.i --> select(x sge y, x, y)
|
||||
// * llvm.nvvm.max.ll --> ibid.
|
||||
// * llvm.nvvm.max.ui --> select(x uge y, x, y)
|
||||
// * llvm.nvvm.max.ull --> ibid.
|
||||
// * llvm.nvvm.max.i --> select(x sle y, x, y)
|
||||
// * llvm.nvvm.max.ll --> ibid.
|
||||
// * llvm.nvvm.max.ui --> select(x ule y, x, y)
|
||||
// * llvm.nvvm.max.ull --> ibid.
|
||||
// * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32
|
||||
|
||||
def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
||||
|
||||
//
|
||||
@ -18,16 +39,6 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
||||
//
|
||||
|
||||
let TargetPrefix = "nvvm" in {
|
||||
def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
@ -36,34 +47,6 @@ let TargetPrefix = "nvvm" in {
|
||||
// Min Max
|
||||
//
|
||||
|
||||
def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
@ -200,15 +183,6 @@ let TargetPrefix = "nvvm" in {
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
|
||||
[IntrNoMem, Commutative]>;
|
||||
|
||||
//
|
||||
// Brev
|
||||
//
|
||||
|
||||
def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Sad
|
||||
//
|
||||
@ -242,16 +216,10 @@ let TargetPrefix = "nvvm" in {
|
||||
// Abs
|
||||
//
|
||||
|
||||
def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
@ -700,9 +668,6 @@ let TargetPrefix = "nvvm" in {
|
||||
def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Bitcast
|
||||
//
|
||||
|
@ -14,6 +14,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/IR/AutoUpgrade.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
@ -204,7 +205,38 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'n': {
|
||||
if (Name.startswith("nvvm.")) {
|
||||
Name = Name.substr(5);
|
||||
|
||||
// The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
|
||||
Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
|
||||
.Cases("brev32", "brev64", Intrinsic::bitreverse)
|
||||
.Case("clz.i", Intrinsic::ctlz)
|
||||
.Case("popc.i", Intrinsic::ctpop)
|
||||
.Default(Intrinsic::not_intrinsic);
|
||||
if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
|
||||
{F->getReturnType()});
|
||||
return true;
|
||||
}
|
||||
|
||||
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
|
||||
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
|
||||
//
|
||||
// TODO: We could add lohi.i2d.
|
||||
bool Expand = StringSwitch<bool>(Name)
|
||||
.Cases("abs.i", "abs.ll", true)
|
||||
.Cases("clz.ll", "popc.ll", "h2f", true)
|
||||
.Cases("max.i", "max.ll", "max.ui", "max.ull", true)
|
||||
.Cases("min.i", "min.ll", "min.ui", "min.ull", true)
|
||||
.Default(false);
|
||||
if (Expand) {
|
||||
NewFn = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
case 'o':
|
||||
// We only need to change the name to match the mangling including the
|
||||
// address space.
|
||||
@ -753,6 +785,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
bool IsX86 = Name.startswith("x86.");
|
||||
if (IsX86)
|
||||
Name = Name.substr(4);
|
||||
bool IsNVVM = Name.startswith("nvvm.");
|
||||
if (IsNVVM)
|
||||
Name = Name.substr(5);
|
||||
|
||||
if (IsX86 && Name.startswith("sse4a.movnt.")) {
|
||||
Module *M = F->getParent();
|
||||
@ -1727,6 +1762,50 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
{ CI->getArgOperand(0), CI->getArgOperand(1) });
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
|
||||
Value *Arg = CI->getArgOperand(0);
|
||||
Value *Neg = Builder.CreateNeg(Arg, "neg");
|
||||
Value *Cmp = Builder.CreateICmpSGE(
|
||||
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
|
||||
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
|
||||
} else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
|
||||
Name == "max.ui" || Name == "max.ull")) {
|
||||
Value *Arg0 = CI->getArgOperand(0);
|
||||
Value *Arg1 = CI->getArgOperand(1);
|
||||
Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
|
||||
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
|
||||
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
|
||||
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
|
||||
} else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
|
||||
Name == "min.ui" || Name == "min.ull")) {
|
||||
Value *Arg0 = CI->getArgOperand(0);
|
||||
Value *Arg1 = CI->getArgOperand(1);
|
||||
Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
|
||||
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
|
||||
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
|
||||
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
|
||||
} else if (IsNVVM && Name == "clz.ll") {
|
||||
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
|
||||
Value *Arg = CI->getArgOperand(0);
|
||||
Value *Ctlz = Builder.CreateCall(
|
||||
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
|
||||
{Arg->getType()}),
|
||||
{Arg, Builder.getFalse()}, "ctlz");
|
||||
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
|
||||
} else if (IsNVVM && Name == "popc.ll") {
|
||||
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
|
||||
// i64.
|
||||
Value *Arg = CI->getArgOperand(0);
|
||||
Value *Popc = Builder.CreateCall(
|
||||
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
|
||||
{Arg->getType()}),
|
||||
Arg, "ctpop");
|
||||
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
|
||||
} else if (IsNVVM && Name == "h2f") {
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(
|
||||
F->getParent(), Intrinsic::convert_from_fp16,
|
||||
{Builder.getFloatTy()}),
|
||||
CI->getArgOperand(0), "h2f");
|
||||
} else {
|
||||
llvm_unreachable("Unknown function for CallInst upgrade.");
|
||||
}
|
||||
@ -1786,11 +1865,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
|
||||
case Intrinsic::ctpop: {
|
||||
case Intrinsic::ctpop:
|
||||
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
|
||||
case Intrinsic::convert_from_fp16:
|
||||
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_xop_vfrcz_ss:
|
||||
case Intrinsic::x86_xop_vfrcz_sd:
|
||||
|
@ -187,16 +187,6 @@ class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
|
||||
// MISC
|
||||
//
|
||||
|
||||
def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
|
||||
int_nvvm_clz_i>;
|
||||
def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
|
||||
int_nvvm_clz_ll>;
|
||||
|
||||
def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
|
||||
int_nvvm_popc_i>;
|
||||
def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
|
||||
int_nvvm_popc_ll>;
|
||||
|
||||
def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
|
||||
Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
|
||||
|
||||
@ -204,26 +194,6 @@ def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
|
||||
// Min Max
|
||||
//
|
||||
|
||||
def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
|
||||
Int32Regs, Int32Regs, int_nvvm_min_i>;
|
||||
def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
|
||||
Int32Regs, Int32Regs, int_nvvm_min_ui>;
|
||||
|
||||
def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
|
||||
Int64Regs, Int64Regs, int_nvvm_min_ll>;
|
||||
def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
|
||||
Int64Regs, Int64Regs, int_nvvm_min_ull>;
|
||||
|
||||
def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
|
||||
Int32Regs, Int32Regs, int_nvvm_max_i>;
|
||||
def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
|
||||
Int32Regs, Int32Regs, int_nvvm_max_ui>;
|
||||
|
||||
def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
|
||||
Int64Regs, Int64Regs, int_nvvm_max_ll>;
|
||||
def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
|
||||
Int64Regs, Int64Regs, int_nvvm_max_ull>;
|
||||
|
||||
def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
|
||||
Float32Regs, Float32Regs, int_nvvm_fmin_f>;
|
||||
def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
|
||||
@ -239,6 +209,7 @@ def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
|
||||
def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
|
||||
Float64Regs, Float64Regs, int_nvvm_fmax_d>;
|
||||
|
||||
|
||||
//
|
||||
// Multiplication
|
||||
//
|
||||
@ -320,15 +291,6 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
|
||||
def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
|
||||
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
|
||||
|
||||
//
|
||||
// Brev
|
||||
//
|
||||
|
||||
def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
|
||||
int_nvvm_brev32>;
|
||||
def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
|
||||
int_nvvm_brev64>;
|
||||
|
||||
//
|
||||
// Sad
|
||||
//
|
||||
@ -360,11 +322,6 @@ def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
|
||||
// Abs
|
||||
//
|
||||
|
||||
def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
|
||||
int_nvvm_abs_i>;
|
||||
def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
|
||||
int_nvvm_abs_ll>;
|
||||
|
||||
def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
|
||||
Float32Regs, int_nvvm_fabs_ftz_f>;
|
||||
def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
|
||||
@ -810,9 +767,6 @@ def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
|
||||
def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
|
||||
(BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
|
||||
|
||||
def : Pat<(int_nvvm_h2f Int16Regs:$a),
|
||||
(CVT_f32_f16 (BITCONVERT_16_I2F Int16Regs:$a), CvtNONE)>;
|
||||
|
||||
//
|
||||
// Bitcast
|
||||
//
|
||||
|
102
llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
Normal file
102
llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
Normal file
@ -0,0 +1,102 @@
|
||||
; Test to make sure NVVM intrinsics are automatically upgraded.
|
||||
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||
; RUN: verify-uselistorder %s
|
||||
|
||||
declare i32 @llvm.nvvm.brev32(i32)
|
||||
declare i64 @llvm.nvvm.brev64(i64)
|
||||
declare i32 @llvm.nvvm.clz.i(i32)
|
||||
declare i32 @llvm.nvvm.clz.ll(i64)
|
||||
declare i32 @llvm.nvvm.popc.i(i32)
|
||||
declare i32 @llvm.nvvm.popc.ll(i64)
|
||||
declare float @llvm.nvvm.h2f(i16)
|
||||
|
||||
declare i32 @llvm.nvvm.abs.i(i32)
|
||||
declare i64 @llvm.nvvm.abs.ll(i64)
|
||||
|
||||
declare i32 @llvm.nvvm.max.i(i32, i32)
|
||||
declare i64 @llvm.nvvm.max.ll(i64, i64)
|
||||
declare i32 @llvm.nvvm.max.ui(i32, i32)
|
||||
declare i64 @llvm.nvvm.max.ull(i64, i64)
|
||||
declare i32 @llvm.nvvm.min.i(i32, i32)
|
||||
declare i64 @llvm.nvvm.min.ll(i64, i64)
|
||||
declare i32 @llvm.nvvm.min.ui(i32, i32)
|
||||
declare i64 @llvm.nvvm.min.ull(i64, i64)
|
||||
|
||||
; CHECK-LABEL: @simple_upgrade
|
||||
define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
|
||||
; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
|
||||
%r1 = call i32 @llvm.nvvm.brev32(i32 %a)
|
||||
|
||||
; CHECK: call i64 @llvm.bitreverse.i64(i64 %b)
|
||||
%r2 = call i64 @llvm.nvvm.brev64(i64 %b)
|
||||
|
||||
; CHECK: call i32 @llvm.ctlz.i32(i32 %a, i1 false)
|
||||
%r3 = call i32 @llvm.nvvm.clz.i(i32 %a)
|
||||
|
||||
; CHECK: [[clz:%[a-zA-Z0-9.]+]] = call i64 @llvm.ctlz.i64(i64 %b, i1 false)
|
||||
; CHECK: trunc i64 [[clz]] to i32
|
||||
%r4 = call i32 @llvm.nvvm.clz.ll(i64 %b)
|
||||
|
||||
; CHECK: call i32 @llvm.ctpop.i32(i32 %a)
|
||||
%r5 = call i32 @llvm.nvvm.popc.i(i32 %a)
|
||||
|
||||
; CHECK: [[popc:%[a-zA-Z0-9.]+]] = call i64 @llvm.ctpop.i64(i64 %b)
|
||||
; CHECK: trunc i64 [[popc]] to i32
|
||||
%r6 = call i32 @llvm.nvvm.popc.ll(i64 %b)
|
||||
|
||||
; CHECK: call float @llvm.convert.from.fp16.f32(i16 %c)
|
||||
%r7 = call float @llvm.nvvm.h2f(i16 %c)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL @abs
|
||||
define void @abs(i32 %a, i64 %b) {
|
||||
; CHECK-DAG: [[negi:%[a-zA-Z0-9.]+]] = sub i32 0, %a
|
||||
; CHECK-DAG: [[cmpi:%[a-zA-Z0-9.]+]] = icmp sge i32 %a, 0
|
||||
; CHECK: select i1 [[cmpi]], i32 %a, i32 [[negi]]
|
||||
%r1 = call i32 @llvm.nvvm.abs.i(i32 %a)
|
||||
|
||||
; CHECK-DAG: [[negll:%[a-zA-Z0-9.]+]] = sub i64 0, %b
|
||||
; CHECK-DAG: [[cmpll:%[a-zA-Z0-9.]+]] = icmp sge i64 %b, 0
|
||||
; CHECK: select i1 [[cmpll]], i64 %b, i64 [[negll]]
|
||||
%r2 = call i64 @llvm.nvvm.abs.ll(i64 %b)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @min_max
|
||||
define void @min_max(i32 %a1, i32 %a2, i64 %b1, i64 %b2) {
|
||||
; CHECK: [[maxi:%[a-zA-Z0-9.]+]] = icmp sge i32 %a1, %a2
|
||||
; CHECK: select i1 [[maxi]], i32 %a1, i32 %a2
|
||||
%r1 = call i32 @llvm.nvvm.max.i(i32 %a1, i32 %a2)
|
||||
|
||||
; CHECK: [[maxll:%[a-zA-Z0-9.]+]] = icmp sge i64 %b1, %b2
|
||||
; CHECK: select i1 [[maxll]], i64 %b1, i64 %b2
|
||||
%r2 = call i64 @llvm.nvvm.max.ll(i64 %b1, i64 %b2)
|
||||
|
||||
; CHECK: [[maxui:%[a-zA-Z0-9.]+]] = icmp uge i32 %a1, %a2
|
||||
; CHECK: select i1 [[maxui]], i32 %a1, i32 %a2
|
||||
%r3 = call i32 @llvm.nvvm.max.ui(i32 %a1, i32 %a2)
|
||||
|
||||
; CHECK: [[maxull:%[a-zA-Z0-9.]+]] = icmp uge i64 %b1, %b2
|
||||
; CHECK: select i1 [[maxull]], i64 %b1, i64 %b2
|
||||
%r4 = call i64 @llvm.nvvm.max.ull(i64 %b1, i64 %b2)
|
||||
|
||||
; CHECK: [[mini:%[a-zA-Z0-9.]+]] = icmp sle i32 %a1, %a2
|
||||
; CHECK: select i1 [[mini]], i32 %a1, i32 %a2
|
||||
%r5 = call i32 @llvm.nvvm.min.i(i32 %a1, i32 %a2)
|
||||
|
||||
; CHECK: [[minll:%[a-zA-Z0-9.]+]] = icmp sle i64 %b1, %b2
|
||||
; CHECK: select i1 [[minll]], i64 %b1, i64 %b2
|
||||
%r6 = call i64 @llvm.nvvm.min.ll(i64 %b1, i64 %b2)
|
||||
|
||||
; CHECK: [[minui:%[a-zA-Z0-9.]+]] = icmp ule i32 %a1, %a2
|
||||
; CHECK: select i1 [[minui]], i32 %a1, i32 %a2
|
||||
%r7 = call i32 @llvm.nvvm.min.ui(i32 %a1, i32 %a2)
|
||||
|
||||
; CHECK: [[minull:%[a-zA-Z0-9.]+]] = icmp ule i64 %b1, %b2
|
||||
; CHECK: select i1 [[minull]], i64 %b1, i64 %b2
|
||||
%r8 = call i64 @llvm.nvvm.min.ull(i64 %b1, i64 %b2)
|
||||
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user